diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index aad07740d..1c14100dc 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -1,6 +1,6 @@ -import { SqlSyncRules } from '@powersync/service-sync-rules'; +import { BucketDataSource, ParameterIndexLookupCreator, SqlSyncRules } from '@powersync/service-sync-rules'; -import { GetIntanceOptions, storage } from '@powersync/service-core'; +import { GetIntanceOptions, maxLsn, CreateWriterOptions, storage } from '@powersync/service-core'; import { BaseObserver, ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { v4 as uuid } from 'uuid'; @@ -13,6 +13,9 @@ import { SyncRuleDocument } from './implementation/models.js'; import { MongoPersistedSyncRulesContent } from './implementation/MongoPersistedSyncRulesContent.js'; import { MongoSyncBucketStorage, MongoSyncBucketStorageOptions } from './implementation/MongoSyncBucketStorage.js'; import { generateSlotName } from '../utils/util.js'; +import { BucketDefinitionMapping } from './implementation/BucketDefinitionMapping.js'; +import { MongoBucketDataWriter } from './storage-index.js'; +import { MergedSyncRules } from './implementation/MergedSyncRules.js'; export class MongoBucketStorage extends BaseObserver @@ -50,7 +53,14 @@ export class MongoBucketStorage if ((typeof id as any) == 'bigint') { id = Number(id); } - const storage = new MongoSyncBucketStorage(this, id, syncRules, slot_name, undefined, this.internalOptions); + const storage = new MongoSyncBucketStorage( + this, + id, + syncRules as MongoPersistedSyncRulesContent, + slot_name, + undefined, + this.internalOptions + ); if (!options?.skipLifecycleHooks) { this.iterateListeners((cb) => cb.syncStorageCreated?.(storage)); } @@ -64,6 +74,48 @@ export class MongoBucketStorage return storage; } + async createCombinedWriter( + storages: storage.SyncRulesBucketStorage[], + options: CreateWriterOptions + ): Promise { + const mongoStorages = storages as MongoSyncBucketStorage[]; + const mappings = mongoStorages.map((s) => s.sync_rules.mapping); + const mergedMappings = BucketDefinitionMapping.merged(mappings); + const mergedProcessor = MergedSyncRules.merge(mongoStorages.map((s) => s.getParsedSyncRules(options))); + + const writer = new MongoBucketDataWriter({ + db: this.db, + mapping: mergedMappings, + markRecordUnavailable: options.markRecordUnavailable, + rowProcessor: mergedProcessor, + skipExistingRows: options.skipExistingRows ?? false, + slotName: '', + storeCurrentData: options.storeCurrentData, + logger: options.logger + }); + + for (let storage of mongoStorages) { + const doc = await this.db.sync_rules.findOne( + { + _id: storage.group_id + }, + { projection: { last_checkpoint_lsn: 1, no_checkpoint_before: 1, keepalive_op: 1, snapshot_lsn: 1 } } + ); + const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null; + const parsedSyncRules = storage.getParsedSyncRules(options); + const batch = writer.forSyncRules({ + syncRules: parsedSyncRules, + + lastCheckpointLsn: checkpoint_lsn, + resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn), + keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null + }); + storage.iterateListeners((cb) => cb.batchStarted?.(batch)); + } + + return writer; + } + async getSystemIdentifier(): Promise { const { setName: id } = await this.db.db.command({ hello: 1 @@ -184,7 +236,17 @@ export class MongoBucketStorage { state: storage.SyncRuleState.PROCESSING }, - { $set: { state: storage.SyncRuleState.STOP } } + { $set: { state: storage.SyncRuleState.STOP } }, + { + session: this.session + } + ); + + const activeSyncRules = await this.db.sync_rules.findOne( + { + state: storage.SyncRuleState.ACTIVE + }, + { session: this.session } ); const id_doc = await this.db.op_id_sequence.findOneAndUpdate( @@ -198,20 +260,73 @@ export class MongoBucketStorage }, { upsert: true, - returnDocument: 'after' + returnDocument: 'after', + session: this.session } ); const id = Number(id_doc!.op_id); const slot_name = generateSlotName(this.slot_name_prefix, id); + const syncRules = SqlSyncRules.fromYaml(options.content, { + // No schema-based validation at this point + schema: undefined, + defaultSchema: 'not_applicable', // Not needed for validation + throwOnError: false + }); + let bucketDefinitionMapping: Record = {}; + let parameterDefinitionMapping: Record = {}; + let bucketDefinitionId = (id << 16) + 1; + let parameterDefinitionId = (id << 17) + 1; + + let existingMapping: BucketDefinitionMapping; + if (activeSyncRules != null) { + existingMapping = BucketDefinitionMapping.fromSyncRules(activeSyncRules); + } else { + existingMapping = new BucketDefinitionMapping({}, {}); + } + + syncRules.config.hydrate({ + hydrationState: { + getBucketSourceScope(source: BucketDataSource) { + const existingId = existingMapping.equivalentBucketSourceId(source); + if (existingId != null) { + bucketDefinitionMapping[source.uniqueName] = existingId; + } else { + bucketDefinitionMapping[source.uniqueName] = bucketDefinitionId; + bucketDefinitionId += 1; + } + return { + // N/A + bucketPrefix: '', + source + }; + }, + getParameterIndexLookupScope(source: ParameterIndexLookupCreator) { + const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; + const existingId = existingMapping.equivalentParameterLookupId(source); + if (existingId != null) { + parameterDefinitionMapping[key] = existingId; + } else { + parameterDefinitionMapping[key] = parameterDefinitionId; + parameterDefinitionId += 1; + } + // N/A + return source.defaultLookupScope; + } + } + }); + const doc: SyncRuleDocument = { _id: id, content: options.content, - last_checkpoint: null, + last_checkpoint: activeSyncRules?.last_checkpoint ?? null, last_checkpoint_lsn: null, no_checkpoint_before: null, - keepalive_op: null, + // HACK: copy the op from the active sync rules, if any. + // This specifically helps for the case of the new sync rules not replicating anything new. + // FIXME: Make sure this is properly sound and tested. + keepalive_op: activeSyncRules?.last_checkpoint ? String(activeSyncRules.last_checkpoint) : null, snapshot_done: false, snapshot_lsn: undefined, state: storage.SyncRuleState.PROCESSING, @@ -219,13 +334,17 @@ export class MongoBucketStorage last_checkpoint_ts: null, last_fatal_error: null, last_fatal_error_ts: null, - last_keepalive_ts: null + last_keepalive_ts: null, + rule_mapping: { + definitions: bucketDefinitionMapping, + parameter_lookups: parameterDefinitionMapping + } }; - await this.db.sync_rules.insertOne(doc); + await this.db.sync_rules.insertOne(doc, { session: this.session }); await this.db.notifyCheckpoint(); rules = new MongoPersistedSyncRulesContent(this.db, doc); if (options.lock) { - const lock = await rules.lock(); + await rules.lock(this.session); } }); @@ -275,6 +394,8 @@ export class MongoBucketStorage .find({ state: { $in: [storage.SyncRuleState.PROCESSING, storage.SyncRuleState.ACTIVE] } }) + // Prioritize "ACTIVE" first + .sort({ state: 1, _id: 1 }) .toArray(); return docs.map((doc) => { diff --git a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts new file mode 100644 index 000000000..fc48cfbbd --- /dev/null +++ b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts @@ -0,0 +1,61 @@ +import { ServiceAssertionError } from '@powersync/lib-services-framework'; +import { BucketDataSource, ParameterIndexLookupCreator, SqlSyncRules } from '@powersync/service-sync-rules'; +import { SyncRuleDocument } from './models.js'; + +export class BucketDefinitionMapping { + static fromSyncRules(doc: Pick): BucketDefinitionMapping { + return new BucketDefinitionMapping(doc.rule_mapping.definitions, doc.rule_mapping.parameter_lookups); + } + + static merged(mappings: BucketDefinitionMapping[]): BucketDefinitionMapping { + return mappings.reduce((acc, curr) => acc.mergeWith(curr), new BucketDefinitionMapping()); + } + + constructor( + private definitions: Record = {}, + private parameterLookupMapping: Record = {} + ) {} + + hasBucketSourceId(id: number) { + return Object.values(this.definitions).includes(id); + } + + hasParameterLookupId(id: number) { + return Object.values(this.parameterLookupMapping).includes(id); + } + + bucketSourceId(source: BucketDataSource): number { + const defId = this.definitions[source.uniqueName]; + if (defId == null) { + throw new ServiceAssertionError(`No mapping found for bucket source ${source.uniqueName}`); + } + return defId; + } + + parameterLookupId(source: ParameterIndexLookupCreator): number { + const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; + const defId = this.parameterLookupMapping[key]; + if (defId == null) { + throw new ServiceAssertionError(`No mapping found for parameter lookup source ${key}`); + } + return defId; + } + + equivalentBucketSourceId(source: BucketDataSource): number | null { + // FIXME: Do an actual comparison, instead of just using the unique name + return this.definitions[source.uniqueName] ?? null; + } + + equivalentParameterLookupId(source: ParameterIndexLookupCreator): number | null { + // FIXME: Do an actual comparison, instead of just using the scope + const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; + return this.parameterLookupMapping[key] ?? null; + } + + mergeWith(other: BucketDefinitionMapping): BucketDefinitionMapping { + return new BucketDefinitionMapping( + { ...this.definitions, ...other.definitions }, + { ...this.parameterLookupMapping, ...other.parameterLookupMapping } + ); + } +} diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts new file mode 100644 index 000000000..06607deb7 --- /dev/null +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -0,0 +1,240 @@ +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { SourceTable } from '@powersync/service-core'; +import { + BucketDataSource, + CompatibilityContext, + EvaluatedParameters, + EvaluatedParametersResult, + EvaluatedRow, + EvaluateRowOptions, + EvaluationError, + EvaluationResult, + hydrateEvaluateParameterRow, + hydrateEvaluateRow, + isEvaluatedParameters, + isEvaluatedRow, + isEvaluationError, + ParameterIndexLookupCreator, + RowProcessor, + SourceTableInterface, + SqlEventDescriptor, + SqliteInputValue, + SqliteRow, + SqliteValue, + SyncConfig, + TableDataSources, + TablePattern +} from '@powersync/service-sync-rules'; +import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; + +type EvaluateRowFn = (options: EvaluateRowOptions) => EvaluationResult[]; +type EvaluateParameterRowFn = (sourceTable: SourceTableInterface, row: SqliteRow) => EvaluatedParametersResult[]; + +interface ResolvedDataSource { + source: BucketDataSource; + evaluate: EvaluateRowFn; + id: number; +} +interface ResolvedParameterLookupSource { + source: ParameterIndexLookupCreator; + id: number; + evaluate: EvaluateParameterRowFn; +} + +/** + * This is like HydratedSyncRules, but merges multiple sources together, and only implements the methods + * required for replication. + * + * This should be moved to a re-usable location, possibly merged with HydratedSyncRules logic. + */ +export class MergedSyncRules implements RowProcessor { + static merge(sources: MongoPersistedSyncRules[]): MergedSyncRules { + return new MergedSyncRules(sources); + } + + private resolvedDataSources: Map; + private resolvedParameterLookupSources: Map; + + // keyed by TablePattern.key + private tableDataSources: Map = new Map(); + + private allSyncRules: SyncConfig[]; + + // all table patterns + private sourcePatterns: TablePattern[]; + // sourcePatterns, non-wildcard, keyed by patternKey() + private indexedPatterns: Map = new Map(); + // all wildcard patterns + private wildcardPatterns: TablePattern[] = []; + + eventDescriptors: SqlEventDescriptor[] = []; + compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; + + constructor(sources: MongoPersistedSyncRules[]) { + let resolvedDataSources = new Map(); + let resolvedParameterLookupSources = new Map(); + let sourcePatternMap = new Map(); + + this.allSyncRules = []; + for (let source of sources) { + const syncRules = source.sync_rules.config; + const mapping = source.mapping; + const hydrationState = source.hydrationState; + const dataSources = syncRules.bucketDataSources; + const bucketParameterLookupSources = syncRules.bucketParameterLookupSources; + this.allSyncRules.push(syncRules); + for (let source of dataSources) { + const id = mapping.bucketSourceId(source); + if (resolvedDataSources.has(id)) { + continue; + } + const evaluate = hydrateEvaluateRow(hydrationState, source); + resolvedDataSources.set(id, { source, evaluate, id }); + + for (let pattern of source.getSourceTables()) { + if (!this.tableDataSources.has(pattern.key)) { + this.tableDataSources.set(pattern.key, { bucketDataSources: [], parameterIndexLookupCreators: [] }); + } + this.tableDataSources.get(pattern.key)!.bucketDataSources.push(source); + } + } + + for (let source of bucketParameterLookupSources) { + const id = mapping.parameterLookupId(source); + if (resolvedParameterLookupSources.has(id)) { + continue; + } + + const withScope = hydrateEvaluateParameterRow(hydrationState, source); + resolvedParameterLookupSources.set(id, { source, id, evaluate: withScope }); + + for (let pattern of source.getSourceTables()) { + if (!this.tableDataSources.has(pattern.key)) { + this.tableDataSources.set(pattern.key, { bucketDataSources: [], parameterIndexLookupCreators: [] }); + } + this.tableDataSources.get(pattern.key)!.parameterIndexLookupCreators.push(source); + } + } + + for (let pattern of syncRules.getSourceTables()) { + const key = pattern.key; + if (!sourcePatternMap.has(key)) { + sourcePatternMap.set(key, pattern); + } + } + } + + for (let value of this.tableDataSources.values()) { + // Make the arrays unique / remove duplicates: + value.bucketDataSources = Array.from(new Set(value.bucketDataSources)); + value.parameterIndexLookupCreators = Array.from(new Set(value.parameterIndexLookupCreators)); + } + + this.resolvedDataSources = resolvedDataSources; + this.resolvedParameterLookupSources = resolvedParameterLookupSources; + this.sourcePatterns = Array.from(sourcePatternMap.values()); + + for (let pattern of this.sourcePatterns) { + if (pattern.isWildcard) { + this.wildcardPatterns.push(pattern); + } else { + const key = patternKey(pattern); + if (!this.indexedPatterns.has(key)) { + this.indexedPatterns.set(key, []); + } + this.indexedPatterns.get(key)!.push(pattern); + } + } + } + + /** + * + * @param pattern The source database table definition, _not_ the individually derived SourceTables. + * @returns + */ + getMatchingSources(pattern: TablePattern): TableDataSources { + return this.tableDataSources.get(pattern.key) ?? { bucketDataSources: [], parameterIndexLookupCreators: [] }; + } + + getSourceTables(): TablePattern[] { + return this.sourcePatterns; + } + + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[] { + // Equivalent to: + // return this.sourcePatterns.filter((pattern) => pattern.matches(table)); + const tables = this.indexedPatterns.get(patternKey(table)) ?? []; + if (this.wildcardPatterns.length === 0) { + // Fast path - no wildcards + return tables; + } else { + const matchedPatterns = this.wildcardPatterns.filter((pattern) => pattern.matches(table)); + return [...tables, ...matchedPatterns]; + } + } + + applyRowContext( + source: SqliteRow + ): SqliteRow { + // FIXME: This may be different per sync rules - need to handle that + return this.allSyncRules[this.allSyncRules.length - 1].applyRowContext(source); + } + + evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { + // Important: We only get matching sources here, not all sources. This can help for two things: + // 1. For performance: Skip any not-matching sources. + // 2. For re-replication: We may take a snapshot when adding a new source, with a new SourceTable. + // In that case, we don't want to re-evaluate all existing sources, only the new one. + + const table = options.sourceTable; + // FIXME: Fix API to not require this type assertion + if (!(table instanceof SourceTable)) { + throw new ReplicationAssertionError(`Expected SourceTable instance`); + } + const bucketDataSources: ResolvedDataSource[] = []; + for (let sourceId of table.bucketDataSourceIds) { + const ds = this.resolvedDataSources.get(sourceId); + if (ds) { + bucketDataSources.push(ds); + } + } + + const rawResults: EvaluationResult[] = bucketDataSources.flatMap((dataSource) => dataSource.evaluate(options)); + const results = rawResults.filter(isEvaluatedRow) as EvaluatedRow[]; + const errors = rawResults.filter(isEvaluationError) as EvaluationError[]; + + return { results, errors }; + } + + evaluateParameterRowWithErrors( + table: SourceTableInterface, + row: SqliteRow + ): { results: EvaluatedParameters[]; errors: EvaluationError[] } { + // FIXME: Fix API to not require this type assertion + if (!(table instanceof SourceTable)) { + throw new ReplicationAssertionError(`Expected SourceTable instance`); + } + let parameterIndexLookupCreators: ResolvedParameterLookupSource[] = []; + for (let sourceId of table.parameterLookupSourceIds) { + const ds = this.resolvedParameterLookupSources.get(sourceId); + if (ds) { + parameterIndexLookupCreators.push(ds); + } + } + const rawResults: EvaluatedParametersResult[] = parameterIndexLookupCreators.flatMap((creator) => + creator.evaluate(table, row) + ); + const results = rawResults.filter(isEvaluatedParameters) as EvaluatedParameters[]; + const errors = rawResults.filter(isEvaluationError) as EvaluationError[]; + return { results, errors }; + } +} + +/** + * Key for a pattern or source table. + * + * Does not support wildcard patterns. + */ +function patternKey(pattern: TablePattern | SourceTableInterface): string { + return JSON.stringify([pattern.connectionTag, pattern.schema, pattern.name]); +} diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts similarity index 67% rename from modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts rename to modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts index 6fab32240..9a8294df1 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts @@ -1,5 +1,5 @@ import { mongo } from '@powersync/lib-service-mongodb'; -import { HydratedSyncRules, SqlEventDescriptor, SqliteRow, SqliteValue } from '@powersync/service-sync-rules'; +import { RowProcessor, SqlEventDescriptor, SqliteRow, SqliteValue } from '@powersync/service-sync-rules'; import * as bson from 'bson'; import { @@ -13,20 +13,33 @@ import { ServiceError } from '@powersync/lib-services-framework'; import { + BatchedCustomWriteCheckpointOptions, BucketStorageMarkRecordUnavailable, deserializeBson, InternalOpId, isCompleteRow, + maxLsn, + ResolveTableToDropsOptions, SaveOperationTag, + SourceTable, storage, SyncRuleState, utils } from '@powersync/service-core'; import * as timers from 'node:timers/promises'; import { idPrefixFilter, mongoTableId } from '../../utils/util.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; -import { CurrentBucket, CurrentDataDocument, SourceKey, SyncRuleDocument } from './models.js'; +import { + CurrentBucket, + CurrentDataDocument, + RecordedLookup, + SourceKey, + SourceTableDocument, + SyncRuleDocument +} from './models.js'; import { MongoIdSequence } from './MongoIdSequence.js'; +import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js'; import { cacheKey, OperationBatch, RecordOperation } from './OperationBatch.js'; import { PersistedBatch } from './PersistedBatch.js'; @@ -45,15 +58,14 @@ const replicationMutex = new utils.Mutex(); export const EMPTY_DATA = new bson.Binary(bson.serialize({})); -export interface MongoBucketBatchOptions { +export interface MongoWriterOptions { db: PowerSyncMongo; - syncRules: HydratedSyncRules; - groupId: number; slotName: string; - lastCheckpointLsn: string | null; - keepaliveOp: InternalOpId | null; - resumeFromLsn: string | null; storeCurrentData: boolean; + + rowProcessor: RowProcessor; + mapping: BucketDefinitionMapping; + /** * Set to true for initial replication. */ @@ -64,86 +76,428 @@ export interface MongoBucketBatchOptions { logger?: Logger; } -export class MongoBucketBatch - extends BaseObserver - implements storage.BucketStorageBatch -{ - private logger: Logger; +interface MongoBucketBatchOptions { + db: PowerSyncMongo; + syncRules: MongoPersistedSyncRules; + lastCheckpointLsn: string | null; + keepaliveOp: InternalOpId | null; + resumeFromLsn: string | null; + logger: Logger; + writer: MongoBucketDataWriter; +} + +export interface ForSyncRulesOptions { + syncRules: MongoPersistedSyncRules; + + lastCheckpointLsn: string | null; + resumeFromLsn: string | null; + keepaliveOp: InternalOpId | null; +} + +export class MongoBucketDataWriter implements storage.BucketDataWriter { + private batch: OperationBatch | null = null; + public readonly rowProcessor: RowProcessor; + write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; private readonly client: mongo.MongoClient; public readonly db: PowerSyncMongo; public readonly session: mongo.ClientSession; - private readonly sync_rules: HydratedSyncRules; - - private readonly group_id: number; - + private readonly logger: Logger; private readonly slot_name: string; private readonly storeCurrentData: boolean; private readonly skipExistingRows: boolean; - private batch: OperationBatch | null = null; - private write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; - private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; - private clearedError = false; - - /** - * Last LSN received associated with a checkpoint. - * - * This could be either: - * 1. A commit LSN. - * 2. A keepalive message LSN. - */ - private last_checkpoint_lsn: string | null = null; - - private persisted_op: InternalOpId | null = null; - - /** - * Last written op, if any. This may not reflect a consistent checkpoint. - */ - public last_flushed_op: InternalOpId | null = null; - - /** - * lastCheckpointLsn is the last consistent commit. - * - * While that is generally a "safe" point to resume from, there are cases where we may want to resume from a different point: - * 1. After an initial snapshot, we don't have a consistent commit yet, but need to resume from the snapshot LSN. - * 2. If "no_checkpoint_before_lsn" is set far in advance, it may take a while to reach that point. We - * may want to resume at incremental points before that. - * - * This is set when creating the batch, but may not be updated afterwards. - */ - public resumeFromLsn: string | null = null; + private readonly mapping: BucketDefinitionMapping; - private needsActivation = true; + private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; + public subWriters: MongoBucketBatch[] = []; - constructor(options: MongoBucketBatchOptions) { - super(); - this.logger = options.logger ?? defaultLogger; - this.client = options.db.client; + constructor(options: MongoWriterOptions) { this.db = options.db; - this.group_id = options.groupId; - this.last_checkpoint_lsn = options.lastCheckpointLsn; - this.resumeFromLsn = options.resumeFromLsn; + this.client = this.db.client; this.session = this.client.startSession(); this.slot_name = options.slotName; - this.sync_rules = options.syncRules; + this.mapping = options.mapping; + this.rowProcessor = options.rowProcessor; this.storeCurrentData = options.storeCurrentData; this.skipExistingRows = options.skipExistingRows; + this.logger = options.logger ?? defaultLogger; this.markRecordUnavailable = options.markRecordUnavailable; - this.batch = new OperationBatch(); + } - this.persisted_op = options.keepaliveOp ?? null; + forSyncRules(options: ForSyncRulesOptions): MongoBucketBatch { + const batch = new MongoBucketBatch({ + db: this.db, + syncRules: options.syncRules, + lastCheckpointLsn: options.lastCheckpointLsn, + keepaliveOp: options.keepaliveOp, + resumeFromLsn: options.resumeFromLsn, + logger: this.logger, + writer: this + }); + this.subWriters.push(batch); + return batch; } - addCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): void { - this.write_checkpoint_batch.push({ - ...checkpoint, - sync_rules_id: this.group_id + async [Symbol.asyncDispose](): Promise { + await this.session.endSession(); + for (let batch of this.subWriters) { + await batch[Symbol.asyncDispose](); + } + } + + get resumeFromLsn(): string | null { + // FIXME: check the logic here when there are multiple batches + let lsn: string | null = null; + for (let sub of this.subWriters) { + // TODO: should this be min instead? + lsn = maxLsn(lsn, sub.resumeFromLsn); + } + return lsn; + } + + async keepalive(lsn: string): Promise { + let didAny = false; + for (let batch of this.subWriters) { + const didBatchKeepalive = await batch.keepalive(lsn); + didAny ||= didBatchKeepalive; + } + return didAny; + } + + async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { + let didCommit = false; + for (let batch of this.subWriters) { + const didWriterCommit = await batch.commit(lsn, options); + didCommit ||= didWriterCommit; + } + return didCommit; + } + + async setResumeLsn(lsn: string): Promise { + for (let batch of this.subWriters) { + await batch.setResumeLsn(lsn); + } + } + + private findMatchingSubWriters(tables: SourceTableDocument[]) { + return this.subWriters.filter((subWriter) => { + return tables.some((table) => subWriter.hasTable(table)); }); } - get lastCheckpointLsn() { - return this.last_checkpoint_lsn; + async markTableSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn?: string) { + const session = this.session; + const ids = tables.map((table) => mongoTableId(table.id)); + + await this.withTransaction(async () => { + await this.db.source_tables.updateMany( + { _id: { $in: ids } }, + { + $set: { + snapshot_done: true + }, + $unset: { + snapshot_status: 1 + } + }, + { session } + ); + + const updatedTables = await this.db.source_tables.find({ _id: { $in: ids } }, { session }).toArray(); + + if (no_checkpoint_before_lsn != null) { + const affectedSubWriters = this.findMatchingSubWriters(updatedTables); + + await this.db.sync_rules.updateOne( + { + _id: { $in: affectedSubWriters.map((w) => w.group_id) } + }, + { + $set: { + last_keepalive_ts: new Date() + }, + $max: { + no_checkpoint_before: no_checkpoint_before_lsn + } + }, + { session: this.session } + ); + } + }); + return tables.map((table) => { + const copy = table.clone(); + copy.snapshotComplete = true; + return copy; + }); + } + + async markTableSnapshotRequired(table: SourceTable): Promise { + const doc = await this.db.source_tables.findOne({ _id: mongoTableId(table.id) }); + if (doc == null) { + return; + } + + const subWriters = this.findMatchingSubWriters([doc]); + + await this.db.sync_rules.updateOne( + { + _id: { $in: subWriters.map((w) => w.group_id) } + }, + { + $set: { + snapshot_done: false + } + }, + { session: this.session } + ); + } + + async markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise { + await this.db.sync_rules.updateOne( + { + _id: { $in: this.subWriters.map((w) => w.group_id) }, + snapshot_done: { $ne: true } + }, + { + $set: { + snapshot_done: true, + last_keepalive_ts: new Date() + }, + $max: { + no_checkpoint_before: no_checkpoint_before_lsn + } + }, + { session: this.session } + ); + } + + async getTable(ref: SourceTable): Promise { + const doc = await this.db.source_tables.findOne({ _id: mongoTableId(ref.id) }); + if (doc == null) { + return null; + } + const sourceTable = new storage.SourceTable({ + id: doc._id, + objectId: doc.relation_id, + schema: doc.schema_name, + connectionTag: ref.connectionTag, + name: doc.table_name, + replicaIdColumns: ref.replicaIdColumns, + snapshotComplete: doc.snapshot_done ?? true, + bucketDataSourceIds: doc.bucket_data_source_ids ?? [], + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [], + pattern: ref.pattern + }); + sourceTable.snapshotStatus = + doc.snapshot_status == null + ? undefined + : { + lastKey: doc.snapshot_status.last_key?.buffer ?? null, + totalEstimatedCount: doc.snapshot_status.total_estimated_count, + replicatedCount: doc.snapshot_status.replicated_count + }; + + sourceTable.syncData = doc.bucket_data_source_ids.length > 0; + sourceTable.syncParameters = doc.parameter_lookup_source_ids.length > 0; + // FIXME: implement sourceTable.syncEvent + return sourceTable; + } + + async resolveTables(options: storage.ResolveTablesOptions): Promise { + const sources = this.rowProcessor.getMatchingSources(options.pattern); + const bucketDataSourceIds = sources.bucketDataSources.map((source) => this.mapping.bucketSourceId(source)); + const parameterLookupSourceIds = sources.parameterIndexLookupCreators.map((source) => + this.mapping.parameterLookupId(source) + ); + + const { connection_id, connection_tag, entity_descriptor } = options; + + const { schema, name, objectId, replicaIdColumns } = entity_descriptor; + + const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ + name: column.name, + type: column.type, + type_oid: column.typeId + })); + let result: SourceTable[] = []; + + let currentTableIds: bson.ObjectId[] = []; + await this.db.client.withSession(async (session) => { + const col = this.db.source_tables; + let filter: mongo.Filter = { + connection_id: connection_id, + schema_name: schema, + table_name: name, + replica_id_columns2: normalizedReplicaIdColumns + }; + if (objectId != null) { + filter.relation_id = objectId; + } + let docs = await col.find(filter, { session }).toArray(); + let matchingDocs: SourceTableDocument[] = []; + + let coveredBucketDataSourceIds = new Set(); + let coveredParameterLookupSourceIds = new Set(); + + // Use _all_ docs that match the basic table definition, not only ones that match data sources. + currentTableIds = docs.map((doc) => doc._id); + for (let doc of docs) { + const matchingBucketDataSourceIds = doc.bucket_data_source_ids.filter((id) => bucketDataSourceIds.includes(id)); + const matchingParameterLookupSourceIds = doc.parameter_lookup_source_ids.filter((id) => + parameterLookupSourceIds.includes(id) + ); + if (matchingBucketDataSourceIds.length == 0 && matchingParameterLookupSourceIds.length == 0) { + // Not relevant + continue; + } + matchingDocs.push(doc); + for (let id of matchingBucketDataSourceIds) { + coveredBucketDataSourceIds.add(id); + } + for (let id of matchingParameterLookupSourceIds) { + coveredParameterLookupSourceIds.add(id); + } + } + + const pendingBucketDataSourceIds = bucketDataSourceIds.filter((id) => !coveredBucketDataSourceIds.has(id)); + const pendingParameterLookupSourceIds = parameterLookupSourceIds.filter( + (id) => !coveredParameterLookupSourceIds.has(id) + ); + if (pendingBucketDataSourceIds.length > 0 || pendingParameterLookupSourceIds.length > 0) { + const id = options.idGenerator ? (options.idGenerator() as bson.ObjectId) : new bson.ObjectId(); + const doc: SourceTableDocument = { + _id: id, + connection_id: connection_id, + relation_id: objectId, + schema_name: schema, + table_name: name, + replica_id_columns: null, + replica_id_columns2: normalizedReplicaIdColumns, + snapshot_done: false, + snapshot_status: undefined, + bucket_data_source_ids: pendingBucketDataSourceIds, + parameter_lookup_source_ids: pendingParameterLookupSourceIds + }; + currentTableIds.push(doc._id); + + await col.insertOne(doc, { session }); + matchingDocs.push(doc); + } + + const sourceTables = matchingDocs.map((doc) => { + const sourceTable = new storage.SourceTable({ + id: doc._id, + connectionTag: connection_tag, + objectId: objectId, + schema: schema, + name: name, + replicaIdColumns: replicaIdColumns, + snapshotComplete: doc.snapshot_done ?? true, + bucketDataSourceIds: doc.bucket_data_source_ids ?? [], + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [], + pattern: options.pattern + }); + sourceTable.snapshotStatus = + doc.snapshot_status == null + ? undefined + : { + lastKey: doc.snapshot_status.last_key?.buffer ?? null, + totalEstimatedCount: doc.snapshot_status.total_estimated_count, + replicatedCount: doc.snapshot_status.replicated_count + }; + + sourceTable.syncData = doc.bucket_data_source_ids.length > 0; + sourceTable.syncParameters = doc.parameter_lookup_source_ids.length > 0; + // FIXME: implement sourceTable.syncEvent + return sourceTable; + }); + + // Detect tables that are either renamed, or have different replica_id_columns + + result = sourceTables; + }); + return result; + } + + async resolveTablesToDrop(options: ResolveTableToDropsOptions): Promise { + const { connection_id, connection_tag, entity_descriptor } = options; + const { schema, name, objectId, replicaIdColumns } = entity_descriptor; + const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ + name: column.name, + type: column.type, + type_oid: column.typeId + })); + const col = this.db.source_tables; + let filter: mongo.Filter = { + connection_id: connection_id, + schema_name: schema, + table_name: name, + replica_id_columns2: normalizedReplicaIdColumns + }; + if (objectId != null) { + filter.relation_id = objectId; + } + + let filters: mongo.Filter[] = []; + // Case 1: name matches, but replica_id_columns2 differs + filters.push({ + connection_id: connection_id, + schema_name: schema, + table_name: name, + replica_id_columns2: { $ne: normalizedReplicaIdColumns } + }); + if (objectId != null) { + // Case 2: relation_id differs + filters.push({ + connection_id: connection_id, + schema_name: schema, + table_name: name, + relation_id: { $ne: objectId } + }); + // Case 3: relation_id matches, but name differs + filters.push({ + $nor: [ + { + connection_id: connection_id, + schema_name: schema, + table_name: name + } + ], + relation_id: objectId + }); + } + + const truncate = await col + .find({ + $or: filters, + connection_id: connection_id + }) + .toArray(); + const dropTables = truncate.map( + (doc) => + new storage.SourceTable({ + id: doc._id, + connectionTag: connection_tag, + objectId: doc.relation_id, + schema: doc.schema_name, + name: doc.table_name, + replicaIdColumns: + doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], + snapshotComplete: doc.snapshot_done ?? true, + bucketDataSourceIds: doc.bucket_data_source_ids ?? [], + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [] + }) + ); + return dropTables; + } + /** + * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. + */ + addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void { + for (let writer of this.subWriters) { + writer.addCustomWriteCheckpoint(checkpoint); + } } async flush(options?: storage.BatchBucketFlushOptions): Promise { @@ -185,8 +539,10 @@ export class MongoBucketBatch throw new ReplicationAssertionError('Unexpected last_op == null'); } - this.persisted_op = last_op; - this.last_flushed_op = last_op; + for (let batch of this.subWriters) { + batch.persisted_op = last_op; + batch.last_flushed_op = last_op; + } return { flushed_op: last_op }; } @@ -197,6 +553,7 @@ export class MongoBucketBatch options?: storage.BucketBatchCommitOptions ): Promise { let sizes: Map | undefined = undefined; + if (this.storeCurrentData && !this.skipExistingRows) { // We skip this step if we don't store current_data, since the sizes will // always be small in that case. @@ -212,7 +569,7 @@ export class MongoBucketBatch // the order of processing, which then becomes really tricky to manage. // This now takes 2+ queries, but doesn't have any issues with order of operations. const sizeLookups: SourceKey[] = batch.batch.map((r) => { - return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; + return { g: 0, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; }); sizes = new Map(); @@ -255,7 +612,7 @@ export class MongoBucketBatch continue; } const lookups: SourceKey[] = b.map((r) => { - return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; + return { g: 0, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; }); let current_data_lookup = new Map(); // With skipExistingRows, we only need to know whether or not the row exists. @@ -270,8 +627,9 @@ export class MongoBucketBatch current_data_lookup.set(cacheKey(doc._id.t, doc._id.k), doc); } - let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize, { - logger: this.logger + let persistedBatch: PersistedBatch | null = new PersistedBatch(transactionSize, { + logger: this.logger, + mapping: this.mapping }); for (let op of b) { @@ -312,7 +670,9 @@ export class MongoBucketBatch } if (didFlush) { - await this.clearError(); + for (let batch of this.subWriters) { + await batch.clearError(); + } } return resumeBatch?.hasData() ? resumeBatch : null; @@ -332,10 +692,10 @@ export class MongoBucketBatch let existing_buckets: CurrentBucket[] = []; let new_buckets: CurrentBucket[] = []; - let existing_lookups: bson.Binary[] = []; - let new_lookups: bson.Binary[] = []; + let existing_lookups: RecordedLookup[] = []; + let new_lookups: RecordedLookup[] = []; - const before_key: SourceKey = { g: this.group_id, t: mongoTableId(record.sourceTable.id), k: beforeId }; + const before_key: SourceKey = { g: 0, t: mongoTableId(record.sourceTable.id), k: beforeId }; if (this.skipExistingRows) { if (record.tag == SaveOperationTag.INSERT) { @@ -467,7 +827,7 @@ export class MongoBucketBatch if (afterId && after && utils.isCompleteRow(this.storeCurrentData, after)) { // Insert or update if (sourceTable.syncData) { - const { results: evaluated, errors: syncErrors } = this.sync_rules.evaluateRowWithErrors({ + const { results: evaluated, errors: syncErrors } = this.rowProcessor.evaluateRowWithErrors({ record: after, sourceTable }); @@ -497,7 +857,9 @@ export class MongoBucketBatch before_buckets: existing_buckets }); new_buckets = evaluated.map((e) => { + const sourceDefinitionId = this.mapping.bucketSourceId(e.source); return { + def: sourceDefinitionId, bucket: e.bucket, table: e.table, id: e.id @@ -507,7 +869,7 @@ export class MongoBucketBatch if (sourceTable.syncParameters) { // Parameters - const { results: paramEvaluated, errors: paramErrors } = this.sync_rules.evaluateParameterRowWithErrors( + const { results: paramEvaluated, errors: paramErrors } = this.rowProcessor.evaluateParameterRowWithErrors( sourceTable, after ); @@ -536,7 +898,9 @@ export class MongoBucketBatch existing_lookups }); new_lookups = paramEvaluated.map((p) => { - return storage.serializeLookup(p.lookup); + const l = storage.serializeLookup(p.lookup); + const d = this.mapping.parameterLookupId(p.lookup.source); + return { l, d }; }); } } @@ -546,7 +910,7 @@ export class MongoBucketBatch // 5. TOAST: Update current data and bucket list. if (afterId) { // Insert or update - const after_key: SourceKey = { g: this.group_id, t: mongoTableId(sourceTable.id), k: afterId }; + const after_key: SourceKey = { g: 0, t: mongoTableId(sourceTable.id), k: afterId }; batch.upsertCurrentData(after_key, { data: afterData, buckets: new_buckets, @@ -570,7 +934,7 @@ export class MongoBucketBatch return result; } - private async withTransaction(cb: () => Promise) { + async withTransaction(cb: () => Promise) { await replicationMutex.exclusiveLock(async () => { await this.session.withTransaction( async () => { @@ -591,7 +955,7 @@ export class MongoBucketBatch }); } - private async withReplicationTransaction( + async withReplicationTransaction( description: string, callback: (session: mongo.ClientSession, opSeq: MongoIdSequence) => Promise ): Promise { @@ -646,40 +1010,317 @@ export class MongoBucketBatch } ); - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, - { - $set: { - last_keepalive_ts: new Date() - } - }, - { session } - ); + // FIXME: Do we need this? + // await this.db.sync_rules.updateOne( + // { + // _id: this.group_id + // }, + // { + // $set: { + // last_keepalive_ts: new Date() + // } + // }, + // { session } + // ); // We don't notify checkpoint here - we don't make any checkpoint updates directly }); } - async [Symbol.asyncDispose]() { - await this.session.endSession(); - super.clearListeners(); - } - - private lastWaitingLogThottled = 0; + async save(record: storage.SaveOptions): Promise { + const { after, before, sourceTable, tag } = record; + for (const event of this.getTableEvents(sourceTable)) { + for (let batch of this.subWriters) { + batch.iterateListeners((cb) => + cb.replicationEvent?.({ + batch: batch, + table: sourceTable, + data: { + op: tag, + after: after && utils.isCompleteRow(this.storeCurrentData, after) ? after : undefined, + before: before && utils.isCompleteRow(this.storeCurrentData, before) ? before : undefined + }, + event + }) + ); + } + } - async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { - const { createEmptyCheckpoints } = { ...storage.DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS, ...options }; + /** + * Return if the table is just an event table + */ + if (!sourceTable.syncData && !sourceTable.syncParameters) { + return null; + } - await this.flush(options); + this.logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); - const now = new Date(); + this.batch ??= new OperationBatch(); + this.batch.push(new RecordOperation(record)); - // Mark relevant write checkpoints as "processed". - // This makes it easier to identify write checkpoints that are "valid" in order. - await this.db.write_checkpoints.updateMany( - { - processed_at_lsn: null, + if (this.batch.shouldFlush()) { + const r = await this.flush(); + // HACK: Give other streams a chance to also flush + await timers.setTimeout(5); + return r; + } + return null; + } + + /** + * Drop is equivalent to TRUNCATE, plus removing our record of the table. + */ + async drop(sourceTables: storage.SourceTable[]): Promise { + await this.truncate(sourceTables); + const result = await this.flush(); + + await this.withTransaction(async () => { + for (let table of sourceTables) { + await this.db.source_tables.deleteOne({ _id: mongoTableId(table.id) }); + } + }); + return result; + } + + async truncate(sourceTables: storage.SourceTable[]): Promise { + await this.flush(); + + let last_op: InternalOpId | null = null; + for (let table of sourceTables) { + last_op = await this.truncateSingle(table); + } + + if (last_op) { + for (let batch of this.subWriters) { + batch.persisted_op = last_op; + } + return { + flushed_op: last_op + }; + } else { + return null; + } + } + + async truncateSingle(sourceTable: storage.SourceTable): Promise { + let last_op: InternalOpId | null = null; + + // To avoid too large transactions, we limit the amount of data we delete per transaction. + // Since we don't use the record data here, we don't have explicit size limits per batch. + const BATCH_LIMIT = 2000; + + let lastBatchCount = BATCH_LIMIT; + while (lastBatchCount == BATCH_LIMIT) { + await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => { + const current_data_filter: mongo.Filter = { + _id: idPrefixFilter({ g: 0, t: mongoTableId(sourceTable.id) }, ['k']), + // Skip soft-deleted data + pending_delete: { $exists: false } + }; + + const cursor = this.db.current_data.find(current_data_filter, { + projection: { + _id: 1, + buckets: 1, + lookups: 1 + }, + limit: BATCH_LIMIT, + session: session + }); + const batch = await cursor.toArray(); + const persistedBatch = new PersistedBatch(0, { logger: this.logger, mapping: this.mapping }); + + for (let value of batch) { + persistedBatch.saveBucketData({ + op_seq: opSeq, + before_buckets: value.buckets, + evaluated: [], + table: sourceTable, + sourceKey: value._id.k + }); + persistedBatch.saveParameterData({ + op_seq: opSeq, + existing_lookups: value.lookups, + evaluated: [], + sourceTable: sourceTable, + sourceKey: value._id.k + }); + + // Since this is not from streaming replication, we can do a hard delete + persistedBatch.hardDeleteCurrentData(value._id); + } + await persistedBatch.flush(this.db, session); + lastBatchCount = batch.length; + + last_op = opSeq.last(); + }); + } + + return last_op!; + } + + async updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + const copy = table.clone(); + const snapshotStatus = { + totalEstimatedCount: progress.totalEstimatedCount ?? copy.snapshotStatus?.totalEstimatedCount ?? 0, + replicatedCount: progress.replicatedCount ?? copy.snapshotStatus?.replicatedCount ?? 0, + lastKey: progress.lastKey ?? copy.snapshotStatus?.lastKey ?? null + }; + copy.snapshotStatus = snapshotStatus; + + await this.withTransaction(async () => { + await this.db.source_tables.updateOne( + { _id: mongoTableId(table.id) }, + { + $set: { + snapshot_status: { + last_key: snapshotStatus.lastKey == null ? null : new bson.Binary(snapshotStatus.lastKey), + total_estimated_count: snapshotStatus.totalEstimatedCount, + replicated_count: snapshotStatus.replicatedCount + } + } + }, + { session: this.session } + ); + }); + + return copy; + } + + /** + * Gets relevant {@link SqlEventDescriptor}s for the given {@link SourceTable} + */ + protected getTableEvents(table: storage.SourceTable): SqlEventDescriptor[] { + return this.rowProcessor.eventDescriptors.filter((evt) => + [...evt.getSourceTables()].some((sourceTable) => sourceTable.matches(table)) + ); + } +} + +export class MongoBucketBatch + extends BaseObserver + implements storage.BucketStorageBatch +{ + private logger: Logger; + + public readonly db: PowerSyncMongo; + public readonly session: mongo.ClientSession; + + public readonly group_id: number; + + private clearedError = false; + + /** + * Last LSN received associated with a checkpoint. + * + * This could be either: + * 1. A commit LSN. + * 2. A keepalive message LSN. + */ + private last_checkpoint_lsn: string | null = null; + + persisted_op: InternalOpId | null = null; + + /** + * Last written op, if any. This may not reflect a consistent checkpoint. + */ + public last_flushed_op: InternalOpId | null = null; + + /** + * lastCheckpointLsn is the last consistent commit. + * + * While that is generally a "safe" point to resume from, there are cases where we may want to resume from a different point: + * 1. After an initial snapshot, we don't have a consistent commit yet, but need to resume from the snapshot LSN. + * 2. If "no_checkpoint_before_lsn" is set far in advance, it may take a while to reach that point. We + * may want to resume at incremental points before that. + * + * This is set when creating the batch, but may not be updated afterwards. + */ + public readonly resumeFromLsn: string | null = null; + + private needsActivation = true; + + private readonly writer: MongoBucketDataWriter; + + public readonly mapping: BucketDefinitionMapping; + + constructor(options: MongoBucketBatchOptions) { + super(); + this.logger = options.logger ?? defaultLogger; + this.db = options.db; + this.group_id = options.syncRules.id; + this.last_checkpoint_lsn = options.lastCheckpointLsn; + this.resumeFromLsn = options.resumeFromLsn; + this.writer = options.writer; + this.session = this.writer.session; + this.mapping = options.syncRules.mapping; + + this.persisted_op = options.keepaliveOp ?? null; + } + + async updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + return await this.writer.updateTableProgress(table, progress); + } + + hasTable(sourceTable: SourceTableDocument): boolean { + return ( + sourceTable.bucket_data_source_ids.some((id) => this.mapping.hasBucketSourceId(id)) || + sourceTable.parameter_lookup_source_ids.some((id) => this.mapping.hasParameterLookupId(id)) + ); + } + + save(record: storage.SaveOptions): Promise { + return this.writer.save(record); + } + truncate(sourceTables: storage.SourceTable[]): Promise { + return this.writer.truncate(sourceTables); + } + drop(sourceTables: storage.SourceTable[]): Promise { + return this.writer.truncate(sourceTables); + } + flush(options?: storage.BatchBucketFlushOptions): Promise { + return this.writer.flush(options); + } + + addCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): void { + this.writer.write_checkpoint_batch.push({ + ...checkpoint, + sync_rules_id: this.group_id + }); + } + + get lastCheckpointLsn() { + return this.last_checkpoint_lsn; + } + + async [Symbol.asyncDispose]() { + await this.dispose(); + } + + async dispose(): Promise { + await this.session.endSession(); + super.clearListeners(); + } + + private lastWaitingLogThottled = 0; + + async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { + const { createEmptyCheckpoints } = { ...storage.DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS, ...options }; + + await this.writer.flush(options); + + const now = new Date(); + + // Mark relevant write checkpoints as "processed". + // This makes it easier to identify write checkpoints that are "valid" in order. + await this.db.write_checkpoints.updateMany( + { + processed_at_lsn: null, 'lsns.1': { $lte: lsn } }, { @@ -925,165 +1566,6 @@ export class MongoBucketBatch ); } - async save(record: storage.SaveOptions): Promise { - const { after, before, sourceTable, tag } = record; - for (const event of this.getTableEvents(sourceTable)) { - this.iterateListeners((cb) => - cb.replicationEvent?.({ - batch: this, - table: sourceTable, - data: { - op: tag, - after: after && utils.isCompleteRow(this.storeCurrentData, after) ? after : undefined, - before: before && utils.isCompleteRow(this.storeCurrentData, before) ? before : undefined - }, - event - }) - ); - } - - /** - * Return if the table is just an event table - */ - if (!sourceTable.syncData && !sourceTable.syncParameters) { - return null; - } - - this.logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); - - this.batch ??= new OperationBatch(); - this.batch.push(new RecordOperation(record)); - - if (this.batch.shouldFlush()) { - const r = await this.flush(); - // HACK: Give other streams a chance to also flush - await timers.setTimeout(5); - return r; - } - return null; - } - - /** - * Drop is equivalent to TRUNCATE, plus removing our record of the table. - */ - async drop(sourceTables: storage.SourceTable[]): Promise { - await this.truncate(sourceTables); - const result = await this.flush(); - - await this.withTransaction(async () => { - for (let table of sourceTables) { - await this.db.source_tables.deleteOne({ _id: mongoTableId(table.id) }); - } - }); - return result; - } - - async truncate(sourceTables: storage.SourceTable[]): Promise { - await this.flush(); - - let last_op: InternalOpId | null = null; - for (let table of sourceTables) { - last_op = await this.truncateSingle(table); - } - - if (last_op) { - this.persisted_op = last_op; - return { - flushed_op: last_op - }; - } else { - return null; - } - } - - async truncateSingle(sourceTable: storage.SourceTable): Promise { - let last_op: InternalOpId | null = null; - - // To avoid too large transactions, we limit the amount of data we delete per transaction. - // Since we don't use the record data here, we don't have explicit size limits per batch. - const BATCH_LIMIT = 2000; - - let lastBatchCount = BATCH_LIMIT; - while (lastBatchCount == BATCH_LIMIT) { - await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => { - const current_data_filter: mongo.Filter = { - _id: idPrefixFilter({ g: this.group_id, t: mongoTableId(sourceTable.id) }, ['k']), - // Skip soft-deleted data - pending_delete: { $exists: false } - }; - - const cursor = this.db.current_data.find(current_data_filter, { - projection: { - _id: 1, - buckets: 1, - lookups: 1 - }, - limit: BATCH_LIMIT, - session: session - }); - const batch = await cursor.toArray(); - const persistedBatch = new PersistedBatch(this.group_id, 0, { logger: this.logger }); - - for (let value of batch) { - persistedBatch.saveBucketData({ - op_seq: opSeq, - before_buckets: value.buckets, - evaluated: [], - table: sourceTable, - sourceKey: value._id.k - }); - persistedBatch.saveParameterData({ - op_seq: opSeq, - existing_lookups: value.lookups, - evaluated: [], - sourceTable: sourceTable, - sourceKey: value._id.k - }); - - // Since this is not from streaming replication, we can do a hard delete - persistedBatch.hardDeleteCurrentData(value._id); - } - await persistedBatch.flush(this.db, session); - lastBatchCount = batch.length; - - last_op = opSeq.last(); - }); - } - - return last_op!; - } - - async updateTableProgress( - table: storage.SourceTable, - progress: Partial - ): Promise { - const copy = table.clone(); - const snapshotStatus = { - totalEstimatedCount: progress.totalEstimatedCount ?? copy.snapshotStatus?.totalEstimatedCount ?? 0, - replicatedCount: progress.replicatedCount ?? copy.snapshotStatus?.replicatedCount ?? 0, - lastKey: progress.lastKey ?? copy.snapshotStatus?.lastKey ?? null - }; - copy.snapshotStatus = snapshotStatus; - - await this.withTransaction(async () => { - await this.db.source_tables.updateOne( - { _id: mongoTableId(table.id) }, - { - $set: { - snapshot_status: { - last_key: snapshotStatus.lastKey == null ? null : new bson.Binary(snapshotStatus.lastKey), - total_estimated_count: snapshotStatus.totalEstimatedCount, - replicated_count: snapshotStatus.replicatedCount - } - } - }, - { session: this.session } - ); - }); - - return copy; - } - async markAllSnapshotDone(no_checkpoint_before_lsn: string) { await this.db.sync_rules.updateOne( { @@ -1103,62 +1585,14 @@ export class MongoBucketBatch } async markTableSnapshotRequired(table: storage.SourceTable): Promise { - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, - { - $set: { - snapshot_done: false - } - }, - { session: this.session } - ); + await this.writer.markTableSnapshotRequired(table); } async markTableSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn?: string) { - const session = this.session; - const ids = tables.map((table) => mongoTableId(table.id)); - - await this.withTransaction(async () => { - await this.db.source_tables.updateMany( - { _id: { $in: ids } }, - { - $set: { - snapshot_done: true - }, - $unset: { - snapshot_status: 1 - } - }, - { session } - ); - - if (no_checkpoint_before_lsn != null) { - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, - { - $set: { - last_keepalive_ts: new Date() - }, - $max: { - no_checkpoint_before: no_checkpoint_before_lsn - } - }, - { session: this.session } - ); - } - }); - return tables.map((table) => { - const copy = table.clone(); - copy.snapshotComplete = true; - return copy; - }); + return this.writer.markTableSnapshotDone(tables, no_checkpoint_before_lsn); } - protected async clearError(): Promise { + async clearError(): Promise { // No need to clear an error more than once per batch, since an error would always result in restarting the batch. if (this.clearedError) { return; @@ -1177,15 +1611,6 @@ export class MongoBucketBatch ); this.clearedError = true; } - - /** - * Gets relevant {@link SqlEventDescriptor}s for the given {@link SourceTable} - */ - protected getTableEvents(table: storage.SourceTable): SqlEventDescriptor[] { - return this.sync_rules.eventDescriptors.filter((evt) => - [...evt.getSourceTables()].some((sourceTable) => sourceTable.matches(table)) - ); - } } export function currentBucketKey(b: CurrentBucket) { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts index abcb15845..5ebcb4020 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts @@ -3,6 +3,7 @@ import { addPartialChecksums, bson, BucketChecksum, + BucketChecksumRequest, ChecksumCache, ChecksumMap, FetchPartialBucketChecksum, @@ -13,6 +14,7 @@ import { PartialOrFullChecksum } from '@powersync/service-core'; import { PowerSyncMongo } from './db.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; /** * Checksum calculation options, primarily for tests. @@ -47,6 +49,7 @@ export class MongoChecksums { constructor( private db: PowerSyncMongo, private group_id: number, + private mapping: BucketDefinitionMapping, private options?: MongoChecksumOptions ) {} @@ -68,7 +71,7 @@ export class MongoChecksums { * Calculate checksums, utilizing the cache for partial checkums, and querying the remainder from * the database (bucket_state + bucket_data). */ - async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: InternalOpId, buckets: BucketChecksumRequest[]): Promise { return this.cache.getChecksumMap(checkpoint, buckets); } @@ -92,10 +95,12 @@ export class MongoChecksums { const preFilters: any[] = []; for (let request of batch) { + const sourceId = this.mapping.bucketSourceId(request.source); + if (request.start == null) { preFilters.push({ _id: { - g: this.group_id, + g: sourceId, b: request.bucket }, 'compacted_state.op_id': { $exists: true, $lte: request.end } @@ -206,15 +211,16 @@ export class MongoChecksums { while (requests.size > 0) { const filters: any[] = []; for (let request of requests.values()) { + const sourceId = this.mapping.bucketSourceId(request.source); filters.push({ _id: { $gt: { - g: this.group_id, + g: sourceId, b: request.bucket, o: request.start ?? new bson.MinKey() }, $lte: { - g: this.group_id, + g: sourceId, b: request.bucket, o: request.end } @@ -291,7 +297,8 @@ export class MongoChecksums { requests.set(bucket, { bucket, start: doc.last_op, - end: req!.end + end: req!.end, + source: req!.source }); } else { // All done for this bucket diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts index efdcdc61c..0f61708bf 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts @@ -2,6 +2,7 @@ import { mongo, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongod import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework'; import { addChecksums, + BucketChecksumRequest, InternalOpId, isPartialChecksum, PopulateChecksumCacheResults, @@ -13,8 +14,11 @@ import { PowerSyncMongo } from './db.js'; import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js'; import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js'; import { cacheKey } from './OperationBatch.js'; +import { BucketDataSource, SqlSyncRules } from '@powersync/service-sync-rules'; interface CurrentBucketState { + def: number; + /** Bucket name */ bucket: string; @@ -62,7 +66,6 @@ const DEFAULT_CLEAR_BATCH_LIMIT = 5000; const DEFAULT_MOVE_BATCH_LIMIT = 2000; const DEFAULT_MOVE_BATCH_QUERY_LIMIT = 10_000; const DEFAULT_MIN_BUCKET_CHANGES = 10; -const DEFAULT_MIN_CHANGE_RATIO = 0.1; /** This default is primarily for tests. */ const DEFAULT_MEMORY_LIMIT_MB = 64; @@ -76,24 +79,20 @@ export class MongoCompactor { private moveBatchQueryLimit: number; private clearBatchLimit: number; private minBucketChanges: number; - private minChangeRatio: number; private maxOpId: bigint; private buckets: string[] | undefined; private signal?: AbortSignal; - private group_id: number; constructor( private storage: MongoSyncBucketStorage, private db: PowerSyncMongo, options?: MongoCompactOptions ) { - this.group_id = storage.group_id; this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024; this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT; this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT; this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT; this.minBucketChanges = options?.minBucketChanges ?? DEFAULT_MIN_BUCKET_CHANGES; - this.minChangeRatio = options?.minChangeRatio ?? DEFAULT_MIN_CHANGE_RATIO; this.maxOpId = options?.maxOpId ?? 0n; this.buckets = options?.compactBuckets; this.signal = options?.signal; @@ -106,39 +105,54 @@ export class MongoCompactor { */ async compact() { if (this.buckets) { - for (let bucket of this.buckets) { - // We can make this more efficient later on by iterating - // through the buckets in a single query. - // That makes batching more tricky, so we leave for later. - await this.compactSingleBucket(bucket); - } + throw new Error('Compacting specific buckets is not implemented currently'); + // for (let bucket of this.buckets) { + // // We can make this more efficient later on by iterating + // // through the buckets in a single query. + // // That makes batching more tricky, so we leave for later. + // await this.compactSingleBucket(bucket); + // } } else { await this.compactDirtyBuckets(); } } private async compactDirtyBuckets() { - for await (let buckets of this.dirtyBucketBatches({ - minBucketChanges: this.minBucketChanges, - minChangeRatio: this.minChangeRatio - })) { - if (this.signal?.aborted) { + const persistedSyncRules = this.storage.sync_rules.parsed({ defaultSchema: 'n/a' }); + const hydrated = persistedSyncRules.hydratedSyncRules(); + const sqlSyncRules = hydrated.definition; + + while (!this.signal?.aborted) { + // Process all buckets with 10 or more changes since last time. + // We exclude the last 100 compacted buckets, to avoid repeatedly re-compacting the same buckets over and over + // if they are modified while compacting. + const TRACK_RECENTLY_COMPACTED_NUMBER = 100; + + let recentlyCompacted: string[] = []; + const buckets = await this.dirtyBucketBatch({ + sqlSyncRules, + minBucketChanges: this.minBucketChanges, + exclude: recentlyCompacted + }); + if (buckets.length == 0) { + // All done break; } - if (buckets.length == 0) { - continue; + for (let { bucket, def } of buckets) { + await this.compactSingleBucket(def, bucket); + recentlyCompacted.push(bucket); } - - for (let { bucket } of buckets) { - await this.compactSingleBucket(bucket); + if (recentlyCompacted.length > TRACK_RECENTLY_COMPACTED_NUMBER) { + recentlyCompacted = recentlyCompacted.slice(-TRACK_RECENTLY_COMPACTED_NUMBER); } } } - private async compactSingleBucket(bucket: string) { + private async compactSingleBucket(def: number, bucket: string) { const idLimitBytes = this.idLimitBytes; let currentState: CurrentBucketState = { + def, bucket, seen: new Map(), trackingSize: 0, @@ -152,14 +166,14 @@ export class MongoCompactor { // Constant lower bound const lowerBound: BucketDataKey = { - g: this.group_id, + g: def, b: bucket, o: new mongo.MinKey() as any }; // Upper bound is adjusted for each batch let upperBound: BucketDataKey = { - g: this.group_id, + g: def, b: bucket, o: new mongo.MaxKey() as any }; @@ -287,7 +301,7 @@ export class MongoCompactor { currentState.seen.clear(); if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) { logger.info( - `Inserting CLEAR at ${this.group_id}:${bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations` + `Inserting CLEAR at ${currentState.def}:${bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations` ); // Need flush() before clear() await this.flush(); @@ -314,7 +328,7 @@ export class MongoCompactor { updateOne: { filter: { _id: { - g: this.group_id, + g: state.def, b: state.bucket } }, @@ -377,12 +391,12 @@ export class MongoCompactor { const opFilter = { _id: { $gte: { - g: this.group_id, + g: currentState.def, b: bucket, o: new mongo.MinKey() as any }, $lte: { - g: this.group_id, + g: currentState.def, b: bucket, o: clearOp } @@ -444,7 +458,7 @@ export class MongoCompactor { { _id: { $gte: { - g: this.group_id, + g: currentState.def, b: bucket, o: new mongo.MinKey() as any }, @@ -484,20 +498,16 @@ export class MongoCompactor { * Subset of compact, only populating checksums where relevant. */ async populateChecksums(options: { minBucketChanges: number }): Promise { + const sqlSyncRules = this.storage.sync_rules.parsed({ defaultSchema: 'n/a' }).hydratedSyncRules().definition; let count = 0; - for await (let buckets of this.dirtyBucketBatches({ - minBucketChanges: options.minBucketChanges, - minChangeRatio: 0 - })) { - if (this.signal?.aborted) { - break; - } + while (!this.signal?.aborted) { + const buckets = await this.dirtyBucketBatch({ ...options, sqlSyncRules }); if (buckets.length == 0) { - continue; + // All done + break; } - const start = Date.now(); - logger.info(`Calculating checksums for batch of ${buckets.length} buckets, starting at ${buckets[0].bucket}`); + logger.info(`Calculating checksums for batch of ${buckets.length} buckets`); // Filter batch by estimated bucket size, to reduce possibility of timeouts let checkBuckets: typeof buckets = []; @@ -509,7 +519,7 @@ export class MongoCompactor { break; } } - await this.updateChecksumsBatch(checkBuckets.map((b) => b.bucket)); + await this.updateChecksumsBatch(checkBuckets); logger.info(`Updated checksums for batch of ${checkBuckets.length} buckets in ${Date.now() - start}ms`); count += buckets.length; } @@ -517,84 +527,77 @@ export class MongoCompactor { } /** - * Return batches of dirty buckets. + * Returns a batch of dirty buckets - buckets with most changes first. * - * Can be used to iterate through all buckets. - * - * minBucketChanges: minimum number of changes for a bucket to be included in the results. - * minChangeRatio: minimum ratio of changes to total ops for a bucket to be included in the results, number between 0 and 1. + * This cannot be used to iterate on its own - the client is expected to process these buckets and + * set estimate_since_compact.count: 0 when done, before fetching the next batch. */ - private async *dirtyBucketBatches(options: { + private async dirtyBucketBatch(options: { + sqlSyncRules: SqlSyncRules; minBucketChanges: number; - minChangeRatio: number; - }): AsyncGenerator<{ bucket: string; estimatedCount: number }[]> { - // Previously, we used an index on {_id.g: 1, estimate_since_compact.count: 1} to only buckets with changes. - // This works well if there are only a small number of buckets with changes. - // However, if buckets are continuosly modified while we are compacting, we get the same buckets over and over again. - // This has caused the compact process to re-read the same collection around 5x times in total, which is very inefficient. - // To solve this, we now just iterate through all buckets, and filter out the ones with low changes. - + exclude?: string[]; + }): Promise<{ def: number; bucket: string; estimatedCount: number; source: BucketDataSource }[]> { if (options.minBucketChanges <= 0) { throw new ReplicationAssertionError('minBucketChanges must be >= 1'); } - let lastId = { g: this.group_id, b: new mongo.MinKey() as any }; - const maxId = { g: this.group_id, b: new mongo.MaxKey() as any }; - while (true) { - const batch = await this.db.bucket_state - .find( - { - _id: { $gt: lastId, $lt: maxId }, - 'estimate_since_compact.count': { $gte: options.minBucketChanges } - }, - { - projection: { - _id: 1, - estimate_since_compact: 1, - compacted_state: 1 - }, - sort: { - _id: 1 - }, - limit: 2000, - maxTimeMS: MONGO_OPERATION_TIMEOUT_MS - } - ) - .toArray(); - if (batch.length == 0) { - break; - } - lastId = batch[batch.length - 1]._id; - const mapped = batch.map((b) => { - const updatedCount = b.estimate_since_compact?.count ?? 0; - const totalCount = (b.compacted_state?.count ?? 0) + updatedCount; - const updatedBytes = b.estimate_since_compact?.bytes ?? 0; - const totalBytes = (b.compacted_state?.bytes ?? 0) + updatedBytes; - const dirtyChangeNumber = totalCount > 0 ? updatedCount / totalCount : 0; - const dirtyChangeBytes = totalBytes > 0 ? updatedBytes / totalBytes : 0; - return { - bucket: b._id.b, - estimatedCount: totalCount, - dirtyRatio: Math.max(dirtyChangeNumber, dirtyChangeBytes) - }; - }); - const filtered = mapped.filter( - (b) => b.estimatedCount >= options.minBucketChanges && b.dirtyRatio >= options.minChangeRatio - ); - yield filtered; + + const mapping = this.storage.sync_rules.mapping; + let definitions = new Map(); + for (let source of options.sqlSyncRules.bucketDataSources) { + const id = mapping.bucketSourceId(source); + definitions.set(id, source); } + + // We make use of an index on {_id.g: 1, 'estimate_since_compact.count': -1} + const dirtyBuckets = await this.db.bucket_state + .find( + { + '_id.g': { $in: [...definitions.keys()] }, + 'estimate_since_compact.count': { $gte: options.minBucketChanges }, + '_id.b': { $nin: options.exclude ?? [] } + }, + { + projection: { + _id: 1, + estimate_since_compact: 1, + compacted_state: 1 + }, + sort: { + 'estimate_since_compact.count': -1 + }, + limit: 200, + maxTimeMS: MONGO_OPERATION_TIMEOUT_MS + } + ) + .toArray(); + + return dirtyBuckets.map((bucket) => ({ + def: bucket._id.g, + bucket: bucket._id.b, + estimatedCount: bucket.estimate_since_compact!.count + (bucket.compacted_state?.count ?? 0), + source: definitions.get(bucket._id.g)! + })); } - private async updateChecksumsBatch(buckets: string[]) { + private async updateChecksumsBatch(buckets: BucketChecksumRequest[]) { + const sourceMap = new Map(buckets.map((b) => [b.bucket, b.source])); + const checksums = await this.storage.checksums.computePartialChecksumsDirect( buckets.map((bucket) => { return { - bucket, + bucket: bucket.bucket, + source: bucket.source, end: this.maxOpId }; }) ); for (let bucketChecksum of checksums.values()) { + const source = sourceMap.get(bucketChecksum.bucket); + if (!source) { + throw new ServiceAssertionError(`Unknown source for bucket ${bucketChecksum.bucket}`); + } + const sourceId = this.storage.sync_rules.mapping.bucketSourceId(source); if (isPartialChecksum(bucketChecksum)) { // Should never happen since we don't specify `start` throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`); @@ -604,7 +607,7 @@ export class MongoCompactor { updateOne: { filter: { _id: { - g: this.group_id, + g: sourceId, b: bucketChecksum.bucket } }, diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts index 3b7f6add6..c567c1d49 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts @@ -1,9 +1,10 @@ +import { mongo } from '@powersync/lib-service-mongodb'; import { logger } from '@powersync/lib-services-framework'; import { bson, CompactOptions, InternalOpId } from '@powersync/service-core'; import { LRUCache } from 'lru-cache'; import { PowerSyncMongo } from './db.js'; -import { mongo } from '@powersync/lib-service-mongodb'; import { BucketParameterDocument } from './models.js'; +import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js'; /** * Compacts parameter lookup data (the bucket_parameters collection). @@ -15,13 +16,13 @@ import { BucketParameterDocument } from './models.js'; export class MongoParameterCompactor { constructor( private db: PowerSyncMongo, - private group_id: number, + private storage: MongoSyncBucketStorage, private checkpoint: InternalOpId, private options: CompactOptions ) {} async compact() { - logger.info(`Compacting parameters for group ${this.group_id} up to checkpoint ${this.checkpoint}`); + logger.info(`Compacting parameters for group ${this.storage.group_id} up to checkpoint ${this.checkpoint}`); // This is the currently-active checkpoint. // We do not remove any data that may be used by this checkpoint. // snapshot queries ensure that if any clients are still using older checkpoints, they would @@ -32,9 +33,12 @@ export class MongoParameterCompactor { // In theory, we could let MongoDB do more of the work here, by grouping by (key, lookup) // in MongoDB already. However, that risks running into cases where MongoDB needs to process // very large amounts of data before returning results, which could lead to timeouts. + + // Note: This does _not_ currently filter by sync rules version. + // We may need to change the storage structure to group by parameter index lookup creator id in the future. const cursor = this.db.bucket_parameters.find( { - 'key.g': this.group_id + 'key.g': 0 }, { sort: { lookup: 1, _id: 1 }, diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts index dc0fc5237..957047036 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts @@ -1,20 +1,60 @@ -import { SyncConfigWithErrors, HydratedSyncRules, versionedHydrationState } from '@powersync/service-sync-rules'; +import { + BucketDataScope, + BucketDataSource, + CompatibilityOption, + DEFAULT_HYDRATION_STATE, + HydratedSyncRules, + HydrationState, + ParameterIndexLookupCreator, + SyncConfigWithErrors, + versionedHydrationState +} from '@powersync/service-sync-rules'; import { storage } from '@powersync/service-core'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly slot_name: string; + public readonly hydrationState: HydrationState; constructor( public readonly id: number, public readonly sync_rules: SyncConfigWithErrors, public readonly checkpoint_lsn: string | null, - slot_name: string | null + slot_name: string | null, + public readonly mapping: BucketDefinitionMapping ) { this.slot_name = slot_name ?? `powersync_${id}`; + if (!this.sync_rules.config.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { + this.hydrationState = DEFAULT_HYDRATION_STATE; + } else if (this.mapping == null) { + this.hydrationState = versionedHydrationState(this.id); + } else { + this.hydrationState = new MongoHydrationState(this.mapping); + } } hydratedSyncRules(): HydratedSyncRules { - return this.sync_rules.config.hydrate({ hydrationState: versionedHydrationState(this.id) }); + return this.sync_rules.config.hydrate({ hydrationState: this.hydrationState }); + } +} + +class MongoHydrationState implements HydrationState { + constructor(private mapping: BucketDefinitionMapping) {} + + getBucketSourceScope(source: BucketDataSource): BucketDataScope { + const defId = this.mapping.bucketSourceId(source); + return { + bucketPrefix: defId.toString(16), + source: source + }; + } + getParameterIndexLookupScope(source: ParameterIndexLookupCreator) { + const defId = this.mapping.parameterLookupId(source); + return { + lookupName: defId.toString(16), + queryId: '', + source + }; } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts index a843d9a00..8a2bbc092 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts @@ -1,6 +1,7 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { storage } from '@powersync/service-core'; import { SqlSyncRules } from '@powersync/service-sync-rules'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; import { MongoSyncRulesLock } from './MongoSyncRulesLock.js'; import { PowerSyncMongo } from './db.js'; @@ -17,6 +18,7 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule public readonly last_keepalive_ts: Date | null; public readonly last_checkpoint_ts: Date | null; public readonly active: boolean; + public readonly mapping: BucketDefinitionMapping; public current_lock: MongoSyncRulesLock | null = null; @@ -33,6 +35,7 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule this.last_fatal_error_ts = doc.last_fatal_error_ts; this.last_checkpoint_ts = doc.last_checkpoint_ts; this.last_keepalive_ts = doc.last_keepalive_ts; + this.mapping = BucketDefinitionMapping.fromSyncRules(doc); this.active = doc.state == 'ACTIVE'; } @@ -41,12 +44,13 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule this.id, SqlSyncRules.fromYaml(this.sync_rules_content, options), this.last_checkpoint_lsn, - this.slot_name + this.slot_name, + this.mapping ); } - async lock() { - const lock = await MongoSyncRulesLock.createLock(this.db, this); + async lock(session: mongo.ClientSession | undefined = undefined): Promise { + const lock = await MongoSyncRulesLock.createLock(this.db, this, session); this.current_lock = lock; return lock; } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index d25ff1087..191f9812f 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -8,6 +8,8 @@ import { } from '@powersync/lib-services-framework'; import { BroadcastIterable, + BucketChecksumRequest, + BucketDataRequest, CHECKPOINT_INVALIDATE_ALL, CheckpointChanges, deserializeParameterLookup, @@ -21,6 +23,7 @@ import { ProtocolOpId, ReplicationCheckpoint, storage, + SyncRuleState, utils, WatchWriteCheckpointOptions } from '@powersync/service-core'; @@ -31,12 +34,14 @@ import { LRUCache } from 'lru-cache'; import * as timers from 'timers/promises'; import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from '../../utils/util.js'; import { MongoBucketStorage } from '../MongoBucketStorage.js'; +import { MongoPersistedSyncRules } from '../storage-index.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; -import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js'; -import { MongoBucketBatch } from './MongoBucketBatch.js'; +import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey } from './models.js'; import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js'; import { MongoCompactor } from './MongoCompactor.js'; import { MongoParameterCompactor } from './MongoParameterCompactor.js'; +import { MongoPersistedSyncRulesContent } from './MongoPersistedSyncRulesContent.js'; import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js'; export interface MongoSyncBucketStorageOptions { @@ -61,20 +66,24 @@ export class MongoSyncBucketStorage private readonly db: PowerSyncMongo; readonly checksums: MongoChecksums; - private parsedSyncRulesCache: { parsed: HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined; + private parsedSyncRulesCache: + | { parsed: MongoPersistedSyncRules; hydrated: HydratedSyncRules; options: storage.ParseSyncRulesOptions } + | undefined; private writeCheckpointAPI: MongoWriteCheckpointAPI; + private readonly mapping: BucketDefinitionMapping; constructor( public readonly factory: MongoBucketStorage, public readonly group_id: number, - private readonly sync_rules: storage.PersistedSyncRulesContent, + public readonly sync_rules: MongoPersistedSyncRulesContent, public readonly slot_name: string, writeCheckpointMode?: storage.WriteCheckpointMode, options?: MongoSyncBucketStorageOptions ) { super(); this.db = factory.db; - this.checksums = new MongoChecksums(this.db, this.group_id, options?.checksumOptions); + this.mapping = this.sync_rules.mapping; + this.checksums = new MongoChecksums(this.db, this.group_id, this.mapping, options?.checksumOptions); this.writeCheckpointAPI = new MongoWriteCheckpointAPI({ db: this.db, mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED, @@ -101,17 +110,23 @@ export class MongoSyncBucketStorage }); } - getParsedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncRules { + getParsedSyncRules(options: storage.ParseSyncRulesOptions): MongoPersistedSyncRules { + this.getHydratedSyncRules(options); + return this.parsedSyncRulesCache!.parsed; + } + + getHydratedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncRules { const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {}; /** * Check if the cached sync rules, if present, had the same options. * Parse sync rules if the options are different or if there is no cached value. */ if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) { - this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncRules(), options }; + const parsed = this.sync_rules.parsed(options); + this.parsedSyncRulesCache = { parsed, hydrated: parsed.hydratedSyncRules(), options }; } - return this.parsedSyncRulesCache!.parsed; + return this.parsedSyncRulesCache!.hydrated; } async getCheckpoint(): Promise { @@ -155,141 +170,8 @@ export class MongoSyncBucketStorage }); } - async startBatch( - options: storage.StartBatchOptions, - callback: (batch: storage.BucketStorageBatch) => Promise - ): Promise { - const doc = await this.db.sync_rules.findOne( - { - _id: this.group_id - }, - { projection: { last_checkpoint_lsn: 1, no_checkpoint_before: 1, keepalive_op: 1, snapshot_lsn: 1 } } - ); - const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null; - - await using batch = new MongoBucketBatch({ - logger: options.logger, - db: this.db, - syncRules: this.sync_rules.parsed(options).hydratedSyncRules(), - groupId: this.group_id, - slotName: this.slot_name, - lastCheckpointLsn: checkpoint_lsn, - resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn), - keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null, - storeCurrentData: options.storeCurrentData, - skipExistingRows: options.skipExistingRows ?? false, - markRecordUnavailable: options.markRecordUnavailable - }); - this.iterateListeners((cb) => cb.batchStarted?.(batch)); - - await callback(batch); - await batch.flush(); - if (batch.last_flushed_op != null) { - return { flushed_op: batch.last_flushed_op }; - } else { - return null; - } - } - - async resolveTable(options: storage.ResolveTableOptions): Promise { - const { group_id, connection_id, connection_tag, entity_descriptor } = options; - - const { schema, name, objectId, replicaIdColumns } = entity_descriptor; - - const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ - name: column.name, - type: column.type, - type_oid: column.typeId - })); - let result: storage.ResolveTableResult | null = null; - await this.db.client.withSession(async (session) => { - const col = this.db.source_tables; - let filter: Partial = { - group_id: group_id, - connection_id: connection_id, - schema_name: schema, - table_name: name, - replica_id_columns2: normalizedReplicaIdColumns - }; - if (objectId != null) { - filter.relation_id = objectId; - } - let doc = await col.findOne(filter, { session }); - if (doc == null) { - doc = { - _id: new bson.ObjectId(), - group_id: group_id, - connection_id: connection_id, - relation_id: objectId, - schema_name: schema, - table_name: name, - replica_id_columns: null, - replica_id_columns2: normalizedReplicaIdColumns, - snapshot_done: false, - snapshot_status: undefined - }; - - await col.insertOne(doc, { session }); - } - const sourceTable = new storage.SourceTable({ - id: doc._id, - connectionTag: connection_tag, - objectId: objectId, - schema: schema, - name: name, - replicaIdColumns: replicaIdColumns, - snapshotComplete: doc.snapshot_done ?? true - }); - sourceTable.syncEvent = options.sync_rules.tableTriggersEvent(sourceTable); - sourceTable.syncData = options.sync_rules.tableSyncsData(sourceTable); - sourceTable.syncParameters = options.sync_rules.tableSyncsParameters(sourceTable); - sourceTable.snapshotStatus = - doc.snapshot_status == null - ? undefined - : { - lastKey: doc.snapshot_status.last_key?.buffer ?? null, - totalEstimatedCount: doc.snapshot_status.total_estimated_count, - replicatedCount: doc.snapshot_status.replicated_count - }; - - let dropTables: storage.SourceTable[] = []; - // Detect tables that are either renamed, or have different replica_id_columns - let truncateFilter = [{ schema_name: schema, table_name: name }] as any[]; - if (objectId != null) { - // Only detect renames if the source uses relation ids. - truncateFilter.push({ relation_id: objectId }); - } - const truncate = await col - .find( - { - group_id: group_id, - connection_id: connection_id, - _id: { $ne: doc._id }, - $or: truncateFilter - }, - { session } - ) - .toArray(); - dropTables = truncate.map( - (doc) => - new storage.SourceTable({ - id: doc._id, - connectionTag: connection_tag, - objectId: doc.relation_id, - schema: doc.schema_name, - name: doc.table_name, - replicaIdColumns: - doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], - snapshotComplete: doc.snapshot_done ?? true - }) - ); - - result = { - table: sourceTable, - dropTables: dropTables - }; - }); - return result!; + async createWriter(options: storage.CreateWriterOptions): Promise { + return await this.factory.createCombinedWriter([this], options); } async getParameterSets( @@ -321,7 +203,7 @@ export class MongoSyncBucketStorage [ { $match: { - 'key.g': this.group_id, + 'key.g': 0, lookup: { $in: lookupFilter }, _id: { $lte: checkpoint.checkpoint } } @@ -360,28 +242,30 @@ export class MongoSyncBucketStorage async *getBucketDataBatch( checkpoint: utils.InternalOpId, - dataBuckets: Map, + dataBuckets: BucketDataRequest[], options?: storage.BucketDataBatchOptions ): AsyncIterable { - if (dataBuckets.size == 0) { + if (dataBuckets.length == 0) { return; } let filters: mongo.Filter[] = []; + const bucketMap = new Map(dataBuckets.map((d) => [d.bucket, d.start])); if (checkpoint == null) { throw new ServiceAssertionError('checkpoint is null'); } const end = checkpoint; - for (let [name, start] of dataBuckets.entries()) { + for (let { bucket: name, start, source } of dataBuckets) { + const sourceDefinitionId = this.mapping.bucketSourceId(source); filters.push({ _id: { $gt: { - g: this.group_id, + g: sourceDefinitionId, b: name, o: start }, $lte: { - g: this.group_id, + g: sourceDefinitionId, b: name, o: end as any } @@ -465,7 +349,7 @@ export class MongoSyncBucketStorage } if (start == null) { - const startOpId = dataBuckets.get(bucket); + const startOpId = bucketMap.get(bucket); if (startOpId == null) { throw new ServiceAssertionError(`data for unexpected bucket: ${bucket}`); } @@ -507,7 +391,7 @@ export class MongoSyncBucketStorage } } - async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: utils.InternalOpId, buckets: BucketChecksumRequest[]): Promise { return this.checksums.getChecksums(checkpoint, buckets); } @@ -562,33 +446,9 @@ export class MongoSyncBucketStorage } async clear(options?: storage.ClearStorageOptions): Promise { - while (true) { - if (options?.signal?.aborted) { - throw new ReplicationAbortedError('Aborted clearing data', options.signal.reason); - } - try { - await this.clearIteration(); + const signal = options?.signal ?? new AbortController().signal; - logger.info(`${this.slot_name} Done clearing data`); - return; - } catch (e: unknown) { - if (lib_mongo.isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { - logger.info( - `${this.slot_name} Cleared batch of data in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...` - ); - await timers.setTimeout(lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5); - } else { - throw e; - } - } - } - } - - private async clearIteration(): Promise { - // Individual operations here may time out with the maxTimeMS option. - // It is expected to still make progress, and continue on the next try. - - await this.db.sync_rules.updateOne( + const doc = await this.db.sync_rules.findOneAndUpdate( { _id: this.group_id }, @@ -604,41 +464,171 @@ export class MongoSyncBucketStorage snapshot_lsn: 1 } }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } - ); - await this.db.bucket_data.deleteMany( - { - _id: idPrefixFilter({ g: this.group_id }, ['b', 'o']) - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS, returnDocument: 'after' } ); - await this.db.bucket_parameters.deleteMany( - { - 'key.g': this.group_id - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + + if (doc?.rule_mapping != null) { + // TODO: Handle consistency + + const otherSyncRules = await this.db.sync_rules + .find({ + _id: { $ne: this.group_id }, + state: { $in: [SyncRuleState.ACTIVE, SyncRuleState.PROCESSING, SyncRuleState.ERRORED] }, + 'rule_mapping.definitions': { $exists: true } + }) + .toArray(); + const keepSyncDefinitionIds = new Set(); + const keepParameterLookupIds = new Set(); + for (let other of otherSyncRules) { + for (let id of Object.values(other.rule_mapping.definitions)) { + keepSyncDefinitionIds.add(id); + } + for (let id of Object.values(other.rule_mapping.parameter_lookups)) { + keepParameterLookupIds.add(id); + } + } + + for (let [name, id] of Object.entries(doc.rule_mapping.definitions)) { + if (keepSyncDefinitionIds.has(id)) { + continue; + } + await this.retriedDelete(`deleting bucket data for ${name}`, signal, () => + this.db.bucket_data.deleteMany( + { + _id: idPrefixFilter({ g: id }, ['b', 'o']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + + await this.retriedDelete(`deleting bucket_state data for ${name}`, signal, () => + this.db.bucket_state.deleteMany( + { + _id: idPrefixFilter({ g: id }, ['b']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + } + + for (let [name, id] of Object.entries(doc.rule_mapping.parameter_lookups)) { + if (keepParameterLookupIds.has(id)) { + continue; + } + // FIXME: Index this + await this.retriedDelete(`deleting parameter lookup data for ${name}`, signal, () => + this.db.bucket_parameters.deleteMany( + { + def: id + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + } + } + + // Legacy + await this.retriedDelete('deleting bucket data', signal, () => + this.db.bucket_data.deleteMany( + { + _id: idPrefixFilter({ g: this.group_id }, ['b', 'o']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); - await this.db.current_data.deleteMany( - { - _id: idPrefixFilter({ g: this.group_id }, ['t', 'k']) - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + // Legacy + await this.retriedDelete('deleting bucket parameter lookup values', signal, () => + this.db.bucket_parameters.deleteMany( + { + 'key.g': this.group_id + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); - await this.db.bucket_state.deleteMany( - { - _id: idPrefixFilter({ g: this.group_id }, ['b']) - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + // Legacy + await this.retriedDelete('deleting current data records', signal, () => + this.db.current_data.deleteMany( + { + _id: idPrefixFilter({ g: this.group_id as any }, ['t', 'k']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); - await this.db.source_tables.deleteMany( - { - group_id: this.group_id - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + // Legacy + await this.retriedDelete('deleting bucket state records', signal, () => + this.db.bucket_state.deleteMany( + { + _id: idPrefixFilter({ g: this.group_id }, ['b']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); + + // First remove the reference + this.db.source_tables.updateMany({ sync_rules_ids: this.group_id }, { $pull: { sync_rules_ids: this.group_id } }); + + // Then delete the data associated with unreferenced source tables + const tables = await this.db.source_tables + .find( + { + sync_rules_ids: [] + }, + { projection: { _id: 1 } } + ) + .toArray(); + + for (let table of tables) { + await this.retriedDelete(`deleting current data records for table ${table.table_name}`, signal, () => + this.db.current_data.deleteMany( + { + _id: idPrefixFilter({ g: 0, t: table._id }, ['k']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + await this.retriedDelete(`deleting parameter data records for table ${table.table_name}`, signal, () => + this.db.bucket_parameters.deleteMany( + { + key: idPrefixFilter({ g: 0, t: table._id }, ['k']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + + await this.db.source_tables.deleteOne({ _id: table._id }); // Delete the source table record itself + } + } + + private async retriedDelete( + message: string, + signal: AbortSignal, + deleteFunc: () => Promise + ): Promise { + // Individual operations here may time out with the maxTimeMS option. + // It is expected to still make progress, and continue on the next try. + + let i = 0; + while (!signal.aborted) { + try { + const result = await deleteFunc(); + if (result.deletedCount > 0) { + logger.info(`${this.slot_name} ${message} - done`); + } + return; + } catch (e: unknown) { + if (lib_mongo.isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { + i += 1; + logger.info(`${this.slot_name} ${message} iteration ${i}, continuing...`); + await timers.setTimeout(lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5); + } else { + throw e; + } + } + } + throw new ReplicationAbortedError('Aborted clearing data', signal.reason); } async reportError(e: any): Promise { @@ -665,7 +655,7 @@ export class MongoSyncBucketStorage await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact(); if (maxOpId != null && options?.compactParameterData) { - await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact(); + await new MongoParameterCompactor(this.db, this, maxOpId, options).compact(); } } @@ -912,7 +902,10 @@ export class MongoSyncBucketStorage .find( { // We have an index on (_id.g, last_op). - '_id.g': this.group_id, + // We cannot do a plain filter this on _id.g anymore, since that depends on the bucket definition. + // For now we leave out the filter. But we may need to either: + // 1. Add a new index purely on last_op, or + // 2. Use an $in on all relevant _id.g values (from the sync rules mapping). last_op: { $gt: options.lastCheckpoint.checkpoint } }, { @@ -945,7 +938,7 @@ export class MongoSyncBucketStorage .find( { _id: { $gt: options.lastCheckpoint.checkpoint, $lte: options.nextCheckpoint.checkpoint }, - 'key.g': this.group_id + 'key.g': 0 }, { projection: { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts index 00536a8aa..098d1e1d4 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts @@ -3,6 +3,7 @@ import crypto from 'crypto'; import { ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { storage } from '@powersync/service-core'; import { PowerSyncMongo } from './db.js'; +import { mongo } from '@powersync/lib-service-mongodb'; /** * Manages a lock on a sync rules document, so that only one process @@ -13,7 +14,8 @@ export class MongoSyncRulesLock implements storage.ReplicationLock { static async createLock( db: PowerSyncMongo, - sync_rules: storage.PersistedSyncRulesContent + sync_rules: storage.PersistedSyncRulesContent, + session?: mongo.ClientSession ): Promise { const lockId = crypto.randomBytes(8).toString('hex'); const doc = await db.sync_rules.findOneAndUpdate( @@ -28,12 +30,14 @@ export class MongoSyncRulesLock implements storage.ReplicationLock { }, { projection: { lock: 1 }, - returnDocument: 'before' + returnDocument: 'before', + session } ); if (doc == null) { // Query the existing lock to get the expiration time (best effort - it may have been released in the meantime). + // We don't use the session here - we want to see the latest state. const heldLock = await db.sync_rules.findOne({ _id: sync_rules.id }, { projection: { lock: 1 } }); if (heldLock?.lock?.expires_at) { throw new ServiceError( diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index 1b41fa1f6..2e373f736 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -1,11 +1,12 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { JSONBig } from '@powersync/service-jsonbig'; -import { EvaluatedParameters, EvaluatedRow } from '@powersync/service-sync-rules'; +import { BucketDataSource, EvaluatedParameters, EvaluatedRow } from '@powersync/service-sync-rules'; import * as bson from 'bson'; -import { Logger, logger as defaultLogger } from '@powersync/lib-services-framework'; +import { Logger, ReplicationAssertionError } from '@powersync/lib-services-framework'; import { InternalOpId, storage, utils } from '@powersync/service-core'; -import { currentBucketKey, EMPTY_DATA, MAX_ROW_SIZE } from './MongoBucketBatch.js'; +import { mongoTableId, replicaIdToSubkey } from '../../utils/util.js'; +import { currentBucketKey, EMPTY_DATA, MAX_ROW_SIZE } from './MongoBucketDataWriter.js'; import { MongoIdSequence } from './MongoIdSequence.js'; import { PowerSyncMongo } from './db.js'; import { @@ -14,9 +15,10 @@ import { BucketStateDocument, CurrentBucket, CurrentDataDocument, + RecordedLookup, SourceKey } from './models.js'; -import { mongoTableId, replicaIdToSubkey } from '../../utils/util.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; /** * Maximum size of operations we write in a single transaction. @@ -51,6 +53,7 @@ export class PersistedBatch { bucketParameters: mongo.AnyBulkWriteOperation[] = []; currentData: mongo.AnyBulkWriteOperation[] = []; bucketStates: Map = new Map(); + mapping: BucketDefinitionMapping; /** * For debug logging only. @@ -62,16 +65,13 @@ export class PersistedBatch { */ currentSize = 0; - constructor( - private group_id: number, - writtenSize: number, - options?: { logger?: Logger } - ) { + constructor(writtenSize: number, options: { logger: Logger; mapping: BucketDefinitionMapping }) { this.currentSize = writtenSize; - this.logger = options?.logger ?? defaultLogger; + this.logger = options.logger; + this.mapping = options.mapping; } - private incrementBucket(bucket: string, op_id: InternalOpId, bytes: number) { + private incrementBucket(defId: number, bucket: string, op_id: InternalOpId, bytes: number) { let existingState = this.bucketStates.get(bucket); if (existingState) { existingState.lastOp = op_id; @@ -81,7 +81,8 @@ export class PersistedBatch { this.bucketStates.set(bucket, { lastOp: op_id, incrementCount: 1, - incrementBytes: bytes + incrementBytes: bytes, + def: defId }); } } @@ -102,7 +103,14 @@ export class PersistedBatch { const dchecksum = BigInt(utils.hashDelete(replicaIdToSubkey(options.table.id, options.sourceKey))); for (const k of options.evaluated) { - const key = currentBucketKey(k); + const source = k.source; + const sourceDefinitionId = this.mapping.bucketSourceId(source); + const key = currentBucketKey({ + bucket: k.bucket, + table: k.table, + id: k.id, + def: sourceDefinitionId + }); // INSERT const recordData = JSONBig.stringify(k.data); @@ -127,7 +135,7 @@ export class PersistedBatch { insertOne: { document: { _id: { - g: this.group_id, + g: sourceDefinitionId, b: k.bucket, o: op_id }, @@ -141,11 +149,16 @@ export class PersistedBatch { } } }); - this.incrementBucket(k.bucket, op_id, byteEstimate); + this.incrementBucket(sourceDefinitionId, k.bucket, op_id, byteEstimate); } for (let bd of remaining_buckets.values()) { // REMOVE + if (options.table.bucketDataSourceIds?.indexOf(bd.def) === -1) { + // This bucket definition is no longer used for this table. + // Don't generate REMOVE operations for it. + continue; + } const op_id = options.op_seq.next(); this.debugLastOpId = op_id; @@ -154,7 +167,7 @@ export class PersistedBatch { insertOne: { document: { _id: { - g: this.group_id, + g: bd.def, b: bd.bucket, o: op_id }, @@ -169,7 +182,7 @@ export class PersistedBatch { } }); this.currentSize += 200; - this.incrementBucket(bd.bucket, op_id, 200); + this.incrementBucket(bd.def, bd.bucket, op_id, 200); } } @@ -178,7 +191,7 @@ export class PersistedBatch { sourceKey: storage.ReplicaId; sourceTable: storage.SourceTable; evaluated: EvaluatedParameters[]; - existing_lookups: bson.Binary[]; + existing_lookups: RecordedLookup[]; }) { // This is similar to saving bucket data. // A key difference is that we don't need to keep the history intact. @@ -189,16 +202,19 @@ export class PersistedBatch { // We also don't need to keep history intact. const { sourceTable, sourceKey, evaluated } = data; - const remaining_lookups = new Map(); + const remaining_lookups = new Map(); for (let l of data.existing_lookups) { - remaining_lookups.set(l.toString('base64'), l); + const key = l.d + '.' + l.l.toString('base64'); + remaining_lookups.set(key, l); } // 1. Insert new entries for (let result of evaluated) { + const sourceDefinitionId = this.mapping.parameterLookupId(result.lookup.source); const binLookup = storage.serializeLookup(result.lookup); const hex = binLookup.toString('base64'); - remaining_lookups.delete(hex); + const key = sourceDefinitionId + '.' + hex; + remaining_lookups.delete(key); const op_id = data.op_seq.next(); this.debugLastOpId = op_id; @@ -206,8 +222,9 @@ export class PersistedBatch { insertOne: { document: { _id: op_id, + def: sourceDefinitionId, key: { - g: this.group_id, + g: 0, t: mongoTableId(sourceTable.id), k: sourceKey }, @@ -222,6 +239,14 @@ export class PersistedBatch { // 2. "REMOVE" entries for any lookup not touched. for (let lookup of remaining_lookups.values()) { + const sourceDefinitionId = lookup.d; + + if (sourceTable.parameterLookupSourceIds?.indexOf(sourceDefinitionId) === -1) { + // This bucket definition is no longer used for this table. + // Don't generate REMOVE operations for it. + continue; + } + const op_id = data.op_seq.next(); this.debugLastOpId = op_id; this.bucketParameters.push({ @@ -229,11 +254,12 @@ export class PersistedBatch { document: { _id: op_id, key: { - g: this.group_id, + g: 0, t: mongoTableId(sourceTable.id), k: sourceKey }, - lookup: lookup, + def: sourceDefinitionId, + lookup: lookup.l, bucket_parameters: [] } } @@ -393,7 +419,7 @@ export class PersistedBatch { updateOne: { filter: { _id: { - g: this.group_id, + g: state.def, b: bucket } }, @@ -417,4 +443,5 @@ interface BucketStateUpdate { lastOp: InternalOpId; incrementCount: number; incrementBytes: number; + def: number; } diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index ccd45a556..dfc21cb31 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -14,7 +14,7 @@ export type ReplicaId = bson.UUID | bson.Document | any; export interface SourceKey { /** group_id */ - g: number; + g: 0; /** source table id */ t: bson.ObjectId; /** source key */ @@ -30,11 +30,16 @@ export interface BucketDataKey { o: bigint; } +export interface RecordedLookup { + d: number; + l: bson.Binary; +} + export interface CurrentDataDocument { _id: SourceKey; data: bson.Binary; buckets: CurrentBucket[]; - lookups: bson.Binary[]; + lookups: RecordedLookup[]; /** * If set, this can be deleted, once there is a consistent checkpoint >= pending_delete. * @@ -44,6 +49,7 @@ export interface CurrentDataDocument { } export interface CurrentBucket { + def: number; bucket: string; table: string; id: string; @@ -51,6 +57,7 @@ export interface CurrentBucket { export interface BucketParameterDocument { _id: bigint; + def: number; key: SourceKey; lookup: bson.Binary; bucket_parameters: Record[]; @@ -72,7 +79,8 @@ export type OpType = 'PUT' | 'REMOVE' | 'MOVE' | 'CLEAR'; export interface SourceTableDocument { _id: bson.ObjectId; - group_id: number; + bucket_data_source_ids: number[]; + parameter_lookup_source_ids: number[]; connection_id: number; relation_id: number | string | undefined; schema_name: string; @@ -210,6 +218,11 @@ export interface SyncRuleDocument { id: string; expires_at: Date; } | null; + + rule_mapping: { + definitions: Record; + parameter_lookups: Record; + }; } export interface CheckpointEventDocument { diff --git a/modules/module-mongodb-storage/src/storage/storage-index.ts b/modules/module-mongodb-storage/src/storage/storage-index.ts index cfb1d4ad0..fbd83d295 100644 --- a/modules/module-mongodb-storage/src/storage/storage-index.ts +++ b/modules/module-mongodb-storage/src/storage/storage-index.ts @@ -1,6 +1,6 @@ export * from './implementation/db.js'; export * from './implementation/models.js'; -export * from './implementation/MongoBucketBatch.js'; +export * from './implementation/MongoBucketDataWriter.js'; export * from './implementation/MongoIdSequence.js'; export * from './implementation/MongoPersistedSyncRules.js'; export * from './implementation/MongoPersistedSyncRulesContent.js'; diff --git a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap index e3449a7c7..436738361 100644 --- a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap +++ b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap @@ -6,7 +6,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -93886621, "count": 2, "priority": 3, @@ -36,7 +36,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": -93886621, @@ -54,7 +54,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 499012468, "count": 4, "priority": 3, @@ -71,7 +71,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` { "data": { "after": "2", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 1859363232, @@ -104,13 +104,13 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` ] `; -exports[`sync - mongodb > encodes sync rules id in buckes for streams 1`] = ` +exports[`sync - mongodb > encodes sync rules id in buckets for streams 1`] = ` [ { "checkpoint": { "buckets": [ { - "bucket": "1#test|0[]", + "bucket": "10002[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -135,7 +135,7 @@ exports[`sync - mongodb > encodes sync rules id in buckes for streams 1`] = ` { "data": { "after": "0", - "bucket": "1#test|0[]", + "bucket": "10002[]", "data": [ { "checksum": 920318466, @@ -159,13 +159,13 @@ exports[`sync - mongodb > encodes sync rules id in buckes for streams 1`] = ` ] `; -exports[`sync - mongodb > encodes sync rules id in buckes for streams 2`] = ` +exports[`sync - mongodb > encodes sync rules id in buckets for streams 2`] = ` [ { "checkpoint": { "buckets": [ { - "bucket": "2#test|0[]", + "bucket": "20002[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -181,7 +181,7 @@ exports[`sync - mongodb > encodes sync rules id in buckes for streams 2`] = ` { "errors": [], "is_default": true, - "name": "test", + "name": "test2", }, ], "write_checkpoint": undefined, @@ -190,7 +190,7 @@ exports[`sync - mongodb > encodes sync rules id in buckes for streams 2`] = ` { "data": { "after": "0", - "bucket": "2#test|0[]", + "bucket": "20002[]", "data": [ { "checksum": 920318466, @@ -199,7 +199,7 @@ exports[`sync - mongodb > encodes sync rules id in buckes for streams 2`] = ` "object_type": "test", "op": "PUT", "op_id": "2", - "subkey": "e5aa2ddc-1328-58fa-a000-0b5ed31eaf1a", + "subkey": "bfe6a7fc-1a36-5a95-877f-518ff63ecb56", }, ], "has_more": false, @@ -228,7 +228,7 @@ exports[`sync - mongodb > expiring token 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 0, "count": 0, "priority": 3, @@ -272,7 +272,7 @@ exports[`sync - mongodb > sends checkpoint complete line for empty checkpoint 1` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -1221282404, "count": 1, "priority": 3, @@ -297,7 +297,7 @@ exports[`sync - mongodb > sends checkpoint complete line for empty checkpoint 1` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 3073684892, @@ -341,7 +341,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "b0[]", + "bucket": "10003[]", "checksum": 920318466, "count": 1, "priority": 2, @@ -352,7 +352,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` ], }, { - "bucket": "b1[]", + "bucket": "10004[]", "checksum": -1382098757, "count": 1, "priority": 1, @@ -382,7 +382,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` { "data": { "after": "0", - "bucket": "b1[]", + "bucket": "10004[]", "data": [ { "checksum": 2912868539, @@ -407,7 +407,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` { "data": { "after": "0", - "bucket": "b0[]", + "bucket": "10003[]", "data": [ { "checksum": 920318466, @@ -437,7 +437,7 @@ exports[`sync - mongodb > sync global data 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -93886621, "count": 2, "priority": 3, @@ -462,7 +462,7 @@ exports[`sync - mongodb > sync global data 1`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 920318466, @@ -501,7 +501,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint "checkpoint": { "buckets": [ { - "bucket": "b0a[]", + "bucket": "10004[]", "checksum": -659831575, "count": 2000, "priority": 2, @@ -512,7 +512,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b0b[]", + "bucket": "10005[]", "checksum": -659831575, "count": 2000, "priority": 2, @@ -523,7 +523,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b1[]", + "bucket": "10006[]", "checksum": -1096116670, "count": 1, "priority": 1, @@ -558,7 +558,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "0", - "bucket": "b1[]", + "bucket": "10006[]", "data": undefined, "has_more": false, "next_after": "1", @@ -573,7 +573,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "0", - "bucket": "b0a[]", + "bucket": "10004[]", "data": undefined, "has_more": true, "next_after": "2000", @@ -582,7 +582,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "2000", - "bucket": "b0a[]", + "bucket": "10004[]", "data": undefined, "has_more": true, "next_after": "4000", @@ -594,7 +594,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint "removed_buckets": [], "updated_buckets": [ { - "bucket": "b0a[]", + "bucket": "10004[]", "checksum": 883076828, "count": 2001, "priority": 2, @@ -605,7 +605,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b0b[]", + "bucket": "10005[]", "checksum": 883076828, "count": 2001, "priority": 2, @@ -616,7 +616,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b1[]", + "bucket": "10006[]", "checksum": 1841937527, "count": 2, "priority": 1, @@ -633,7 +633,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "1", - "bucket": "b1[]", + "bucket": "10006[]", "data": undefined, "has_more": false, "next_after": "4002", @@ -648,7 +648,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "4000", - "bucket": "b0a[]", + "bucket": "10004[]", "data": undefined, "has_more": false, "next_after": "4003", @@ -657,7 +657,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "0", - "bucket": "b0b[]", + "bucket": "10005[]", "data": undefined, "has_more": true, "next_after": "1999", @@ -666,7 +666,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "1999", - "bucket": "b0b[]", + "bucket": "10005[]", "data": undefined, "has_more": true, "next_after": "3999", @@ -675,7 +675,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "3999", - "bucket": "b0b[]", + "bucket": "10005[]", "data": undefined, "has_more": false, "next_after": "4004", @@ -695,7 +695,7 @@ exports[`sync - mongodb > sync legacy non-raw data 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -852817836, "count": 1, "priority": 3, @@ -720,7 +720,7 @@ exports[`sync - mongodb > sync legacy non-raw data 1`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 3442149460n, @@ -755,7 +755,7 @@ exports[`sync - mongodb > sync updates to data query only 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 0, "count": 0, "priority": 3, @@ -793,7 +793,7 @@ exports[`sync - mongodb > sync updates to data query only 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 1418351250, "count": 1, "priority": 3, @@ -810,7 +810,7 @@ exports[`sync - mongodb > sync updates to data query only 2`] = ` { "data": { "after": "0", - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "data": [ { "checksum": 1418351250, @@ -819,7 +819,7 @@ exports[`sync - mongodb > sync updates to data query only 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "2", - "subkey": "0ffb7b58-d14d-5efa-be6c-c8eda74ab7a8", + "subkey": "ae9cbda1-5d8a-5a61-aaa4-366940758339", }, ], "has_more": false, @@ -840,7 +840,7 @@ exports[`sync - mongodb > sync updates to global data 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 0, "count": 0, "priority": 3, @@ -878,7 +878,7 @@ exports[`sync - mongodb > sync updates to global data 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -895,7 +895,7 @@ exports[`sync - mongodb > sync updates to global data 2`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 920318466, @@ -927,7 +927,7 @@ exports[`sync - mongodb > sync updates to global data 3`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -93886621, "count": 2, "priority": 3, @@ -944,7 +944,7 @@ exports[`sync - mongodb > sync updates to global data 3`] = ` { "data": { "after": "1", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 3280762209, @@ -1000,7 +1000,7 @@ exports[`sync - mongodb > sync updates to parameter query + data 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 1418351250, "count": 1, "priority": 3, @@ -1017,7 +1017,7 @@ exports[`sync - mongodb > sync updates to parameter query + data 2`] = ` { "data": { "after": "0", - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "data": [ { "checksum": 1418351250, @@ -1026,7 +1026,7 @@ exports[`sync - mongodb > sync updates to parameter query + data 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "1", - "subkey": "0ffb7b58-d14d-5efa-be6c-c8eda74ab7a8", + "subkey": "ae9cbda1-5d8a-5a61-aaa4-366940758339", }, ], "has_more": false, @@ -1073,7 +1073,7 @@ exports[`sync - mongodb > sync updates to parameter query only 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 0, "count": 0, "priority": 3, diff --git a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts index 0f11d2a0b..f468b9906 100644 --- a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts @@ -6,37 +6,42 @@ import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; describe('Mongo Sync Bucket Storage Compact', () => { register.registerCompactTests(INITIALIZED_MONGO_STORAGE_FACTORY); - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); - describe('with blank bucket_state', () => { // This can happen when migrating from older service versions, that did not populate bucket_state yet. - const populate = async (bucketStorage: SyncRulesBucketStorage) => { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - owner_id: 'u1' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - owner_id: 'u2' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.commit('1/1'); + const populate = async (bucketStorage: SyncRulesBucketStorage, sourceTableIndex: number) => { + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + + const sourceTable = await test_utils.resolveTestTable( + writer, + 'test', + ['id'], + INITIALIZED_MONGO_STORAGE_FACTORY, + sourceTableIndex + ); + await writer.markAllSnapshotDone('1/1'); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + owner_id: 'u1' + }, + afterReplicaId: test_utils.rid('t1') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + owner_id: 'u2' + }, + afterReplicaId: test_utils.rid('t2') }); + await writer.commit('1/1'); + return bucketStorage.getCheckpoint(); }; @@ -51,13 +56,13 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - const { checkpoint } = await populate(bucketStorage); + const { checkpoint } = await populate(bucketStorage, 1); - return { bucketStorage, checkpoint, factory }; + return { bucketStorage, checkpoint, factory, syncRules }; }; test('full compact', async () => { - const { bucketStorage, checkpoint, factory } = await setup(); + const { bucketStorage, checkpoint, factory, syncRules } = await setup(); // Simulate bucket_state from old version not being available await factory.db.bucket_state.deleteMany({}); @@ -72,14 +77,17 @@ bucket_definitions: signal: null as any }); - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['by_user["u1"]', 'by_user["u2"]']); - expect(checksumAfter.get('by_user["u1"]')).toEqual({ - bucket: 'by_user["u1"]', + const users = ['u1', 'u2']; + const userRequests = users.map((user) => test_utils.bucketRequest(syncRules, `by_user["${user}"]`)); + const [u1Request, u2Request] = userRequests; + const checksumAfter = await bucketStorage.getChecksums(checkpoint, userRequests); + expect(checksumAfter.get(u1Request.bucket)).toEqual({ + bucket: u1Request.bucket, checksum: -659469718, count: 1 }); - expect(checksumAfter.get('by_user["u2"]')).toEqual({ - bucket: 'by_user["u2"]', + expect(checksumAfter.get(u2Request.bucket)).toEqual({ + bucket: u2Request.bucket, checksum: 430217650, count: 1 }); @@ -89,7 +97,7 @@ bucket_definitions: // Populate old sync rules version const { factory } = await setup(); - // Not populate another version (bucket definition name changed) + // Now populate another version (bucket definition name changed) const syncRules = await factory.updateSyncRules({ content: ` bucket_definitions: @@ -100,7 +108,7 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - await populate(bucketStorage); + await populate(bucketStorage, 2); const { checkpoint } = await bucketStorage.getCheckpoint(); // Default is to small small numbers - should be a no-op @@ -123,14 +131,17 @@ bucket_definitions: }); expect(result2.buckets).toEqual(0); - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['by_user2["u1"]', 'by_user2["u2"]']); - expect(checksumAfter.get('by_user2["u1"]')).toEqual({ - bucket: 'by_user2["u1"]', + const users = ['u1', 'u2']; + const userRequests = users.map((user) => test_utils.bucketRequest(syncRules, `by_user2["${user}"]`)); + const [u1Request, u2Request] = userRequests; + const checksumAfter = await bucketStorage.getChecksums(checkpoint, userRequests); + expect(checksumAfter.get(u1Request.bucket)).toEqual({ + bucket: u1Request.bucket, checksum: -659469718, count: 1 }); - expect(checksumAfter.get('by_user2["u2"]')).toEqual({ - bucket: 'by_user2["u2"]', + expect(checksumAfter.get(u2Request.bucket)).toEqual({ + bucket: u2Request.bucket, checksum: 430217650, count: 1 }); diff --git a/modules/module-mongodb-storage/test/src/storage_sync.test.ts b/modules/module-mongodb-storage/test/src/storage_sync.test.ts index eaa636600..178aacb3e 100644 --- a/modules/module-mongodb-storage/test/src/storage_sync.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_sync.test.ts @@ -1,81 +1,82 @@ import { storage } from '@powersync/service-core'; -import { register, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, register, test_utils } from '@powersync/service-core-tests'; import { describe, expect, test } from 'vitest'; import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; describe('sync - mongodb', () => { register.registerSyncTests(INITIALIZED_MONGO_STORAGE_FACTORY); - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); // The split of returned results can vary depending on storage drivers test('large batch (2)', async () => { // Test syncing a batch of data that is small in count, // but large enough in size to be split over multiple returned chunks. // Similar to the above test, but splits over 1MB chunks. - const sync_rules = test_utils.testRules( - ` + + await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY.factory(); + const syncRules = await factory.updateSyncRules({ + content: ` bucket_definitions: global: data: - SELECT id, description FROM "%" ` - ); - await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY.factory(); - const bucketStorage = factory.getInstance(sync_rules); - - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - - const largeDescription = '0123456789'.repeat(2_000_00); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large1', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large1') - }); - - // Large enough to split the returned batch - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large2', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large2') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test3', - description: 'test3' - }, - afterReplicaId: test_utils.rid('test3') - }); }); + const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); + + const largeDescription = '0123456789'.repeat(2_000_00); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large1', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large1') + }); + + // Large enough to split the returned batch + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large2', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large2') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test3', + description: 'test3' + }, + afterReplicaId: test_utils.rid('test3') + }); + + const result = await writer.flush(); const checkpoint = result!.flushed_op; const options: storage.BucketDataBatchOptions = {}; const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), options) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules, 'global[]', 0n)], options) ); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, @@ -90,7 +91,7 @@ describe('sync - mongodb', () => { const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch1[0].chunkData.next_after)]]), + [bucketRequest(syncRules, 'global[]', batch1[0].chunkData.next_after)], options ) ); @@ -106,7 +107,7 @@ describe('sync - mongodb', () => { const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch2[0].chunkData.next_after)]]), + [bucketRequest(syncRules, 'global[]', batch2[0].chunkData.next_after)], options ) ); diff --git a/modules/module-mongodb/package.json b/modules/module-mongodb/package.json index 5150bb97f..9375c7216 100644 --- a/modules/module-mongodb/package.json +++ b/modules/module-mongodb/package.json @@ -35,6 +35,7 @@ "@powersync/service-sync-rules": "workspace:*", "@powersync/service-types": "workspace:*", "bson": "^6.10.4", + "p-defer": "^4.0.1", "ts-codec": "^1.3.0", "uuid": "^11.1.0" }, diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 8bd4e67de..40b7db919 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -1,7 +1,6 @@ -import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersync/lib-service-mongodb'; +import { mongo } from '@powersync/lib-service-mongodb'; import { container, - DatabaseConnectionError, logger as defaultLogger, ErrorCode, Logger, @@ -10,24 +9,20 @@ import { ServiceError } from '@powersync/lib-services-framework'; import { + BucketStorageFactory, MetricsEngine, - RelationCache, SaveOperationTag, SourceEntityDescriptor, SourceTable, storage } from '@powersync/service-core'; -import { - DatabaseInputRow, - SqliteInputRow, - SqliteRow, - HydratedSyncRules, - TablePattern -} from '@powersync/service-sync-rules'; +import { HydratedSyncRules, SqliteRow } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; -import { MongoLSN } from '../common/MongoLSN.js'; +import { MongoLSN, ZERO_LSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; import { escapeRegExp } from '../utils.js'; +import { ChangeStreamInvalidatedError, mapChangeStreamError } from './ChangeStreamErrors.js'; +import { ReplicationStreamConfig } from './ChangeStreamReplicationJob.js'; import { MongoManager } from './MongoManager.js'; import { constructAfterRecord, @@ -36,12 +31,13 @@ import { getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js'; -import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js'; +import { MongoSnapshotter } from './MongoSnapshotter.js'; import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; export interface ChangeStreamOptions { connections: MongoManager; - storage: storage.SyncRulesBucketStorage; + factory: BucketStorageFactory; + streams: Pick[]; metrics: MetricsEngine; abort_signal: AbortSignal; /** @@ -60,43 +56,63 @@ export interface ChangeStreamOptions { logger?: Logger; } +interface SubStreamOptions { + connections: MongoManager; + storage: storage.SyncRulesBucketStorage; + logger: Logger; + abortSignal: AbortSignal; + checkpointStreamId: mongo.ObjectId; + maxAwaitTimeMS: number; +} + interface InitResult { needsInitialSync: boolean; snapshotLsn: string | null; } -/** - * Thrown when the change stream is not valid anymore, and replication - * must be restarted. - * - * Possible reasons: - * * Some change stream documents do not have postImages. - * * startAfter/resumeToken is not valid anymore. - */ -export class ChangeStreamInvalidatedError extends DatabaseConnectionError { - constructor(message: string, cause: any) { - super(ErrorCode.PSYNC_S1344, message, cause); +class SubStream { + private readonly connections: MongoManager; + public readonly storage: storage.SyncRulesBucketStorage; + public readonly syncRules: HydratedSyncRules; + private readonly logger: Logger; + + constructor(options: SubStreamOptions) { + this.connections = options.connections; + this.storage = options.storage; + this.logger = options.logger; + this.syncRules = this.storage.getHydratedSyncRules({ + defaultSchema: this.connections.db.databaseName + }); + } + + async checkSlot(): Promise { + const status = await this.storage.getStatus(); + if (status.snapshot_done && status.checkpoint_lsn) { + this.logger.info(`Initial replication already done`); + return { needsInitialSync: false, snapshotLsn: null }; + } + + return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn }; } } export class ChangeStream { - sync_rules: HydratedSyncRules; - group_id: number; + substreams: SubStream[] = []; connection_id = 1; - private readonly storage: storage.SyncRulesBucketStorage; - private connections: MongoManager; private readonly client: mongo.MongoClient; private readonly defaultDb: mongo.Db; private readonly metrics: MetricsEngine; + private readonly factory: BucketStorageFactory; private readonly maxAwaitTimeMS: number; - private abort_signal: AbortSignal; + private abortController = new AbortController(); + private abortSignal: AbortSignal = this.abortController.signal; - private relationCache = new RelationCache(getCacheIdentifier); + private initPromise: Promise | null = null; /** * Time of the oldest uncommitted change, according to the source db. @@ -113,313 +129,87 @@ export class ChangeStream { private logger: Logger; - private snapshotChunkLength: number; - private changeStreamTimeout: number; + public readonly relationCache = new Map(); + + private readonly snapshotter: MongoSnapshotter; + + private readonly snapshotChunkLength: number | undefined; + constructor(options: ChangeStreamOptions) { - this.storage = options.storage; this.metrics = options.metrics; - this.group_id = options.storage.group_id; this.connections = options.connections; this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; - this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000; this.client = this.connections.client; this.defaultDb = this.connections.db; - this.sync_rules = options.storage.getParsedSyncRules({ - defaultSchema: this.defaultDb.databaseName - }); + this.factory = options.factory; // The change stream aggregation command should timeout before the socket times out, // so we use 90% of the socket timeout value. this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); - this.abort_signal = options.abort_signal; - this.abort_signal.addEventListener( - 'abort', - () => { - // TODO: Fast abort? - }, - { once: true } - ); - this.logger = options.logger ?? defaultLogger; - } - - get stopped() { - return this.abort_signal.aborted; - } - - private get usePostImages() { - return this.connections.options.postImages != PostImagesOption.OFF; - } - - private get configurePostImages() { - return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; - } - - /** - * This resolves a pattern, persists the related metadata, and returns - * the resulting SourceTables. - * - * This implicitly checks the collection postImage configuration. - */ - async resolveQualifiedTableNames( - batch: storage.BucketStorageBatch, - tablePattern: TablePattern - ): Promise { - const schema = tablePattern.schema; - if (tablePattern.connectionTag != this.connections.connectionTag) { - return []; - } - - let nameFilter: RegExp | string; - if (tablePattern.isWildcard) { - nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)); - } else { - nameFilter = tablePattern.name; - } - let result: storage.SourceTable[] = []; - - // Check if the collection exists - const collections = await this.client - .db(schema) - .listCollections( - { - name: nameFilter - }, - { nameOnly: false } - ) - .toArray(); - - if (!tablePattern.isWildcard && collections.length == 0) { - this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`); - } - - for (let collection of collections) { - const table = await this.handleRelation( - batch, - getMongoRelation({ db: schema, coll: collection.name }), - // This is done as part of the initial setup - snapshot is handled elsewhere - { snapshot: false, collectionInfo: collection } - ); - - result.push(table); - } - - return result; - } - - async initSlot(): Promise { - const status = await this.storage.getStatus(); - if (status.snapshot_done && status.checkpoint_lsn) { - this.logger.info(`Initial replication already done`); - return { needsInitialSync: false, snapshotLsn: null }; - } - - return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn }; - } - - async estimatedCount(table: storage.SourceTable): Promise { - const count = await this.estimatedCountNumber(table); - return `~${count}`; - } - - async estimatedCountNumber(table: storage.SourceTable): Promise { - const db = this.client.db(table.schema); - return await db.collection(table.name).estimatedDocumentCount(); - } - - /** - * This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot. - * - * This LSN can survive initial replication restarts. - */ - private async getSnapshotLsn(): Promise { - const hello = await this.defaultDb.command({ hello: 1 }); - // Basic sanity check - if (hello.msg == 'isdbgrid') { - throw new ServiceError( - ErrorCode.PSYNC_S1341, - 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).' - ); - } else if (hello.setName == null) { - throw new ServiceError( - ErrorCode.PSYNC_S1342, - 'Standalone MongoDB instances are not supported - use a replicaset.' - ); - } - - // Open a change stream just to get a resume token for later use. - // We could use clusterTime from the hello command, but that won't tell us if the - // snapshot isn't valid anymore. - // If we just use the first resumeToken from the stream, we get two potential issues: - // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes - // in source db or other stream issues. - // 2. The first actual change we get may have the same clusterTime, causing us to incorrect - // skip that event. - // Instead, we create a new checkpoint document, and wait until we get that document back in the stream. - // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document - // periodically until the timeout is reached. - - const LSN_TIMEOUT_SECONDS = 60; - const LSN_CREATE_INTERVAL_SECONDS = 1; - - await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 }); - const { stream } = streamManager; - const startTime = performance.now(); - let lastCheckpointCreated = -10_000; - let eventsSeen = 0; - - while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) { - if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) { - await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); - lastCheckpointCreated = performance.now(); - } - - // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream - const changeDocument = await stream.tryNext().catch((e) => { - throw mapChangeStreamError(e); + this.snapshotChunkLength = options.snapshotChunkLength; + + this.substreams = options.streams.map((config) => { + return new SubStream({ + abortSignal: this.abortSignal, + checkpointStreamId: this.checkpointStreamId, + connections: this.connections, + storage: config.storage, + logger: this.logger.child({ prefix: `[powersync_${config.storage.group_id}] ` }), + maxAwaitTimeMS: this.maxAwaitTimeMS }); - if (changeDocument == null) { - continue; - } + }); - const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; + const snapshotLogger = this.logger.child({ prefix: `[powersync_snapshot] ` }); - if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) { - const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; - if (!this.checkpointStreamId.equals(checkpointId)) { - continue; - } - const { comparable: lsn } = new MongoLSN({ - timestamp: changeDocument.clusterTime!, - resume_token: changeDocument._id - }); - return lsn; - } + const snapshotter = new MongoSnapshotter({ + writer: async () => { + const writer = await this.factory.createCombinedWriter( + this.substreams.map((s) => s.storage), + { + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + zeroLSN: ZERO_LSN, + logger: snapshotLogger + } + ); + return writer; + }, + abort_signal: this.abortSignal, + checkpointStreamId: this.checkpointStreamId, + connections: this.connections, + logger: snapshotLogger, + snapshotChunkLength: this.snapshotChunkLength, + metrics: this.metrics, + maxAwaitTimeMS: this.maxAwaitTimeMS + }); + this.snapshotter = snapshotter; - eventsSeen += 1; + // We wrap in our own abort controller so we can trigger abort internally. + options.abort_signal.addEventListener('abort', () => { + this.abortController.abort(options.abort_signal.reason); + }); + if (options.abort_signal.aborted) { + this.abortController.abort(options.abort_signal.reason); } - - // Could happen if there is a very large replication lag? - throw new ServiceError( - ErrorCode.PSYNC_S1301, - `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}` - ); } - /** - * Given a snapshot LSN, validate that we can read from it, by opening a change stream. - */ - private async validateSnapshotLsn(lsn: string) { - await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 }); - const { stream } = streamManager; - try { - // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream - await stream.tryNext(); - } catch (e) { - // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though - // we possibly cannot recover from it. - throw mapChangeStreamError(e); - } + private get usePostImages() { + return this.connections.options.postImages != PostImagesOption.OFF; } - async initialReplication(snapshotLsn: string | null) { - const sourceTables = this.sync_rules.getSourceTables(); - await this.client.connect(); - - const flushResult = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.defaultDb.databaseName, - storeCurrentData: false, - skipExistingRows: true - }, - async (batch) => { - if (snapshotLsn == null) { - // First replication attempt - get a snapshot and store the timestamp - snapshotLsn = await this.getSnapshotLsn(); - await batch.setResumeLsn(snapshotLsn); - this.logger.info(`Marking snapshot at ${snapshotLsn}`); - } else { - this.logger.info(`Resuming snapshot at ${snapshotLsn}`); - // Check that the snapshot is still valid. - await this.validateSnapshotLsn(snapshotLsn); - } - - // Start by resolving all tables. - // This checks postImage configuration, and that should fail as - // early as possible. - let allSourceTables: SourceTable[] = []; - for (let tablePattern of sourceTables) { - const tables = await this.resolveQualifiedTableNames(batch, tablePattern); - allSourceTables.push(...tables); - } - - let tablesWithStatus: SourceTable[] = []; - for (let table of allSourceTables) { - if (table.snapshotComplete) { - this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); - continue; - } - let count = await this.estimatedCountNumber(table); - const updated = await batch.updateTableProgress(table, { - totalEstimatedCount: count - }); - tablesWithStatus.push(updated); - this.relationCache.update(updated); - this.logger.info( - `To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` - ); - } - - for (let table of tablesWithStatus) { - await this.snapshotTable(batch, table); - await batch.markTableSnapshotDone([table]); - - this.touch(); - } - - // The checkpoint here is a marker - we need to replicate up to at least this - // point before the data can be considered consistent. - // We could do this for each individual table, but may as well just do it once for the entire snapshot. - const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - await batch.markAllSnapshotDone(checkpoint); - - // This will not create a consistent checkpoint yet, but will persist the op. - // Actual checkpoint will be created when streaming replication caught up. - await batch.commit(snapshotLsn); - - this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`); - } - ); - return { lastOpId: flushResult?.flushed_op }; + private get configurePostImages() { + return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; } - private async setupCheckpointsCollection() { - const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION); - if (collection == null) { - await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { - changeStreamPreAndPostImages: { enabled: true } - }); - } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) { - // Drop + create requires less permissions than collMod, - // and we don't care about the data in this collection. - await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION); - await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { - changeStreamPreAndPostImages: { enabled: true } - }); - } else { - // Clear the collection on startup, to keep it clean - // We never query this collection directly, and don't want to keep the data around. - // We only use this to get data into the oplog/changestream. - await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({}); - } + get stopped() { + return this.abortSignal.aborted; } private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } { - const sourceTables = this.sync_rules.getSourceTables(); + const sourceTables = this.substreams.flatMap((s) => s.syncRules.getSourceTables()); let $inFilters: { db: string; coll: string }[] = [ { db: this.defaultDb.databaseName, coll: CHECKPOINTS_COLLECTION } @@ -469,114 +259,6 @@ export class ChangeStream { return { $match: nsFilter, multipleDatabases }; } - static *getQueryData(results: Iterable): Generator { - for (let row of results) { - yield constructAfterRecord(row); - } - } - - private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) { - const totalEstimatedCount = await this.estimatedCountNumber(table); - let at = table.snapshotStatus?.replicatedCount ?? 0; - const db = this.client.db(table.schema); - const collection = db.collection(table.name); - await using query = new ChunkedSnapshotQuery({ - collection, - key: table.snapshotStatus?.lastKey, - batchSize: this.snapshotChunkLength - }); - if (query.lastKey != null) { - this.logger.info( - `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}` - ); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } - - let lastBatch = performance.now(); - let nextChunkPromise = query.nextChunk(); - while (true) { - const { docs: docBatch, lastKey } = await nextChunkPromise; - if (docBatch.length == 0) { - // No more data - stop iterating - break; - } - - if (this.abort_signal.aborted) { - throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason); - } - - // Pre-fetch next batch, so that we can read and write concurrently - nextChunkPromise = query.nextChunk(); - for (let document of docBatch) { - const record = this.constructAfterRecord(document); - - // This auto-flushes when the batch reaches its size limit - await batch.save({ - tag: SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: record, - afterReplicaId: document._id - }); - } - - // Important: flush before marking progress - await batch.flush(); - at += docBatch.length; - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length); - - table = await batch.updateTableProgress(table, { - lastKey, - replicatedCount: at, - totalEstimatedCount: totalEstimatedCount - }); - this.relationCache.update(table); - - const duration = performance.now() - lastBatch; - lastBatch = performance.now(); - this.logger.info( - `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms` - ); - this.touch(); - } - // In case the loop was interrupted, make sure we await the last promise. - await nextChunkPromise; - } - - private async getRelation( - batch: storage.BucketStorageBatch, - descriptor: SourceEntityDescriptor, - options: { snapshot: boolean } - ): Promise { - const existing = this.relationCache.get(descriptor); - if (existing != null) { - return existing; - } - - // Note: collection may have been dropped at this point, so we handle - // missing values. - const collection = await this.getCollectionInfo(descriptor.schema, descriptor.name); - - return this.handleRelation(batch, descriptor, { snapshot: options.snapshot, collectionInfo: collection }); - } - - private async getCollectionInfo(db: string, name: string): Promise { - const collection = ( - await this.client - .db(db) - .listCollections( - { - name: name - }, - { nameOnly: false } - ) - .toArray() - )[0]; - return collection; - } - private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) { if (!this.usePostImages) { // Nothing to check @@ -596,147 +278,112 @@ export class ChangeStream { } } - async handleRelation( - batch: storage.BucketStorageBatch, - descriptor: SourceEntityDescriptor, - options: { snapshot: boolean; collectionInfo: mongo.CollectionInfo | undefined } - ) { - if (options.collectionInfo != null) { - await this.checkPostImages(descriptor.schema, options.collectionInfo); + private async setupCheckpointsCollection() { + const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION); + if (collection == null) { + await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { + changeStreamPreAndPostImages: { enabled: true } + }); + } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) { + // Drop + create requires less permissions than collMod, + // and we don't care about the data in this collection. + await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION); + await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { + changeStreamPreAndPostImages: { enabled: true } + }); } else { - // If collectionInfo is null, the collection may have been dropped. - // Ignore the postImages check in this case. - } - - const snapshot = options.snapshot; - const result = await this.storage.resolveTable({ - group_id: this.group_id, - connection_id: this.connection_id, - connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor, - sync_rules: this.sync_rules - }); - this.relationCache.update(result.table); - - // Drop conflicting collections. - // This is generally not expected for MongoDB source dbs, so we log an error. - if (result.dropTables.length > 0) { - this.logger.error( - `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` - ); - await batch.drop(result.dropTables); - } - - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; - if (shouldSnapshot) { - this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); - // Truncate this table, in case a previous snapshot was interrupted. - await batch.truncate([result.table]); - - await this.snapshotTable(batch, result.table); - const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - - const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn); - return table; + // Clear the collection on startup, to keep it clean + // We never query this collection directly, and don't want to keep the data around. + // We only use this to get data into the oplog/changestream. + await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({}); } - - return result.table; - } - - private constructAfterRecord(document: mongo.Document): SqliteRow { - const inputRow = constructAfterRecord(document); - return this.sync_rules.applyRowContext(inputRow); } - async writeChange( - batch: storage.BucketStorageBatch, - table: storage.SourceTable, - change: mongo.ChangeStreamDocument - ): Promise { - if (!table.syncAny) { - this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`); - return null; - } + private async initReplication() { + await this.setupCheckpointsCollection(); + for (let stream of this.substreams) { + const result = await stream.checkSlot(); - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - if (change.operationType == 'insert') { - const baseRecord = this.constructAfterRecord(change.fullDocument); - return await batch.save({ - tag: SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: baseRecord, - afterReplicaId: change.documentKey._id - }); - } else if (change.operationType == 'update' || change.operationType == 'replace') { - if (change.fullDocument == null) { - // Treat as delete - return await batch.save({ - tag: SaveOperationTag.DELETE, - sourceTable: table, - before: undefined, - beforeReplicaId: change.documentKey._id - }); + if (result.needsInitialSync) { + if (result.snapshotLsn == null) { + // Snapshot LSN is not present, so we need to start replication from scratch. + await stream.storage.clear({ signal: this.abortSignal }); + } + await this.snapshotter.queueSnapshotTables(result.snapshotLsn); } - const after = this.constructAfterRecord(change.fullDocument!); - return await batch.save({ - tag: SaveOperationTag.UPDATE, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: after, - afterReplicaId: change.documentKey._id - }); - } else if (change.operationType == 'delete') { - return await batch.save({ - tag: SaveOperationTag.DELETE, - sourceTable: table, - before: undefined, - beforeReplicaId: change.documentKey._id - }); - } else { - throw new ReplicationAssertionError(`Unsupported operation: ${change.operationType}`); } } async replicate() { + let streamPromise: Promise | null = null; + let loopPromise: Promise | null = null; try { // If anything errors here, the entire replication process is halted, and // all connections automatically closed, including this one. - await this.initReplication(); - await this.streamChanges(); - } catch (e) { - await this.storage.reportError(e); - throw e; - } - } + this.initPromise = this.initReplication(); + // Important - need to wait for init. This sets the resumeLsn, amongst other setup + await this.initPromise; + streamPromise = this.streamChanges() + .then(() => { + throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); + }) + .catch(async (e) => { + // Report stream errors to all substreams + for (let substream of this.substreams) { + await substream.storage.reportError(e); + } - async initReplication() { - const result = await this.initSlot(); - await this.setupCheckpointsCollection(); - if (result.needsInitialSync) { - if (result.snapshotLsn == null) { - // Snapshot LSN is not present, so we need to start replication from scratch. - await this.storage.clear({ signal: this.abort_signal }); - } - const { lastOpId } = await this.initialReplication(result.snapshotLsn); - if (lastOpId != null) { - // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. - await this.storage.populatePersistentChecksumCache({ - signal: this.abort_signal, - // No checkpoint yet, but we do have the opId. - maxOpId: lastOpId + this.abortController.abort(e); + throw e; }); + loopPromise = this.snapshotter + .replicationLoop() + .then(() => { + throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); + }) + .catch(async (e) => { + // Report stream errors to all substreams for now - we can't yet distinguish the errors + for (let substream of this.substreams) { + await substream.storage.reportError(e); + } + + this.abortController.abort(e); + throw e; + }); + const results = await Promise.allSettled([loopPromise, streamPromise]); + // First, prioritize non-aborted errors + for (let result of results) { + if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) { + throw result.reason; + } } + // Then include aborted errors + for (let result of results) { + if (result.status == 'rejected') { + throw result.reason; + } + } + + // If we get here, both Promises completed successfully, which is unexpected. + throw new ReplicationAssertionError(`Replication loop exited unexpectedly`); + } finally { + // Just to make sure + this.abortController.abort(); + } + } + + /** + * For tests: Wait until the initial snapshot is complete. + */ + public async waitForInitialSnapshot() { + if (this.initPromise == null) { + throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()'); } + await this.initPromise; + await this.snapshotter?.waitForInitialSnapshot(); } - async streamChanges() { + private async streamChanges() { try { await this.streamChangesInternal(); } catch (e) { @@ -802,7 +449,7 @@ export class ChangeStream { stream = this.defaultDb.watch(pipeline, streamOptions); } - this.abort_signal.addEventListener('abort', () => { + this.abortSignal.addEventListener('abort', () => { stream.close(); }); @@ -815,289 +462,441 @@ export class ChangeStream { }; } - async streamChangesInternal() { - await this.storage.startBatch( + async handleRelations( + writer: storage.BucketDataWriter, + descriptor: SourceEntityDescriptor, + options: { snapshot: boolean; collectionInfo: mongo.CollectionInfo | undefined } + ): Promise { + if (options.collectionInfo != null) { + await this.checkPostImages(descriptor.schema, options.collectionInfo); + } else { + // If collectionInfo is null, the collection may have been dropped. + // Ignore the postImages check in this case. + } + + // In common cases, there would be at most one matching pattern, since patterns + // are de-duplicated. However, there may be multiple if: + // 1. There is overlap with direct name matching and wildcard matching. + // 2. There are multiple patterns with different replication config. + const patterns = writer.rowProcessor.getMatchingTablePatterns({ + connectionTag: this.connections.connectionTag, + schema: descriptor.schema, + name: descriptor.name + }); + + let allTables: SourceTable[] = []; + for (let pattern of patterns) { + const resolvedTables = await writer.resolveTables({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + pattern + }); + + const snapshot = options.snapshot; + + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + for (let table of resolvedTables) { + const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; + if (shouldSnapshot) { + this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); + await this.snapshotter.queueSnapshot(writer, table); + } + } + allTables.push(...resolvedTables); + } + this.relationCache.set(getCacheIdentifier(descriptor), allTables); + + return allTables; + } + + private async drop(writer: storage.BucketDataWriter, entity: SourceEntityDescriptor): Promise { + const tables = await this.getRelations(writer, entity, { + // We're "dropping" this collection, so never snapshot it. + snapshot: false + }); + if (tables.length > 0) { + await writer.drop(tables); + } + this.relationCache.delete(getCacheIdentifier(entity)); + } + + private async getCollectionInfo(db: string, name: string): Promise { + const collection = ( + await this.client + .db(db) + .listCollections( + { + name: name + }, + { nameOnly: false } + ) + .toArray() + )[0]; + return collection; + } + + async getRelations( + writer: storage.BucketDataWriter, + descriptor: SourceEntityDescriptor, + options: { snapshot: boolean } + ): Promise { + const existing = this.relationCache.get(getCacheIdentifier(descriptor)); + if (existing != null) { + return existing; + } + const collection = await this.getCollectionInfo(descriptor.schema, descriptor.name); + + return this.handleRelations(writer, descriptor, { snapshot: options.snapshot, collectionInfo: collection }); + } + + private async streamChangesInternal() { + await using writer = await this.factory.createCombinedWriter( + this.substreams.map((s) => s.storage), { - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, defaultSchema: this.defaultDb.databaseName, - // We get a complete postimage for every change, so we don't need to store the current data. - storeCurrentData: false - }, - async (batch) => { - const { resumeFromLsn } = batch; - if (resumeFromLsn == null) { - throw new ReplicationAssertionError(`No LSN found to resume from`); + storeCurrentData: false, + zeroLSN: ZERO_LSN, + logger: this.logger, + markRecordUnavailable: undefined, + skipExistingRows: false + } + ); + + // Even though we use a unified stream, the resumeFromLsn is tracked separately per sync rules version. + // This resumeFromLsn on the writer gives us the _minimum_ one. + // When starting with the first sync rules, we need to get an LSN from the snapshot. + // When we then start a new sync rules version, it will use the LSN from the existing sync rules version. + const resumeFromLsn = writer.resumeFromLsn; + if (resumeFromLsn == null) { + throw new ReplicationAssertionError(`No LSN found to resume from`); + } + const lastLsn = MongoLSN.fromSerialized(resumeFromLsn); + const startAfter = lastLsn?.timestamp; + + // It is normal for this to be a minute or two old when there is a low volume + // of ChangeStream events. + const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000); + + this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`); + + await using streamManager = this.openChangeStream({ lsn: resumeFromLsn }); + const { stream, filters } = streamManager; + if (this.abortSignal.aborted) { + await stream.close(); + return; + } + + // Always start with a checkpoint. + // This helps us to clear errors when restarting, even if there is + // no data to replicate. + let waitForCheckpointLsn: string | null = await createCheckpoint( + this.client, + this.defaultDb, + this.checkpointStreamId + ); + + let splitDocument: mongo.ChangeStreamDocument | null = null; + + let flexDbNameWorkaroundLogged = false; + let changesSinceLastCheckpoint = 0; + + let lastEmptyResume = performance.now(); + + /** + * Used only for checking change stream order. + */ + let lastCheckpointLsn: string | null = null; + + while (true) { + if (this.abortSignal.aborted) { + break; + } + + const originalChangeDocument = await stream.tryNext().catch((e) => { + throw mapChangeStreamError(e); + }); + // The stream was closed, we will only ever receive `null` from it + if (!originalChangeDocument && stream.closed) { + break; + } + + if (this.abortSignal.aborted) { + break; + } + + if (originalChangeDocument == null) { + // We get a new null document after `maxAwaitTimeMS` if there were no other events. + // In this case, stream.resumeToken is the resume token associated with the last response. + // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next() + // does update it. + // From observed behavior, the actual resumeToken changes around once every 10 seconds. + // If we don't update it on empty events, we do keep consistency, but resuming the stream + // with old tokens may cause connection timeouts. + // We throttle this further by only persisting a keepalive once a minute. + // We add an additional check for waitForCheckpointLsn == null, to make sure we're not + // doing a keepalive in the middle of a transaction. + if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) { + const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken); + await writer.keepalive(lsn); + this.touch(); + lastEmptyResume = performance.now(); + // Log the token update. This helps as a general "replication is still active" message in the logs. + // This token would typically be around 10s behind. + this.logger.info(`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`); + this.isStartingReplication = false; } - const lastLsn = MongoLSN.fromSerialized(resumeFromLsn); - const startAfter = lastLsn?.timestamp; + continue; + } - // It is normal for this to be a minute or two old when there is a low volume - // of ChangeStream events. - const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000); + this.touch(); - this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`); + if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) { + continue; + } - await using streamManager = this.openChangeStream({ lsn: resumeFromLsn }); - const { stream, filters } = streamManager; - if (this.abort_signal.aborted) { - await stream.close(); - return; + let changeDocument = originalChangeDocument; + if (originalChangeDocument?.splitEvent != null) { + // Handle split events from $changeStreamSplitLargeEvent. + // This is only relevant for very large update operations. + const splitEvent = originalChangeDocument?.splitEvent; + + if (splitDocument == null) { + splitDocument = originalChangeDocument; + } else { + splitDocument = Object.assign(splitDocument, originalChangeDocument); } - // Always start with a checkpoint. - // This helps us to clear errors when restarting, even if there is - // no data to replicate. - let waitForCheckpointLsn: string | null = await createCheckpoint( - this.client, - this.defaultDb, - this.checkpointStreamId - ); + if (splitEvent.fragment == splitEvent.of) { + // Got all fragments + changeDocument = splitDocument; + splitDocument = null; + } else { + // Wait for more fragments + continue; + } + } else if (splitDocument != null) { + // We were waiting for fragments, but got a different event + throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`); + } - let splitDocument: mongo.ChangeStreamDocument | null = null; + if ( + !filters.multipleDatabases && + 'ns' in changeDocument && + changeDocument.ns.db != this.defaultDb.databaseName && + changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`) + ) { + // When all of the following conditions are met: + // 1. We're replicating from an Atlas Flex instance. + // 2. There were changestream events recorded while the PowerSync service is paused. + // 3. We're only replicating from a single database. + // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'}, + // instead of the expected {db: 'ps'}. + // We correct this. + changeDocument.ns.db = this.defaultDb.databaseName; + + if (!flexDbNameWorkaroundLogged) { + flexDbNameWorkaroundLogged = true; + this.logger.warn( + `Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.` + ); + } + } - let flexDbNameWorkaroundLogged = false; - let changesSinceLastCheckpoint = 0; + const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; - let lastEmptyResume = performance.now(); + if (ns?.coll == CHECKPOINTS_COLLECTION) { + /** + * Dropping the database does not provide an `invalidate` event. + * We typically would receive `drop` events for the collection which we + * would process below. + * + * However we don't commit the LSN after collections are dropped. + * The prevents the `startAfter` or `resumeToken` from advancing past the drop events. + * The stream also closes after the drop events. + * This causes an infinite loop of processing the collection drop events. + * + * This check here invalidates the change stream if our `_checkpoints` collection + * is dropped. This allows for detecting when the DB is dropped. + */ + if (changeDocument.operationType == 'drop') { + throw new ChangeStreamInvalidatedError( + 'Internal collections have been dropped', + new Error('_checkpoints collection was dropped') + ); + } - while (true) { - if (this.abort_signal.aborted) { - break; - } + if ( + !( + changeDocument.operationType == 'insert' || + changeDocument.operationType == 'update' || + changeDocument.operationType == 'replace' + ) + ) { + continue; + } - const originalChangeDocument = await stream.tryNext().catch((e) => { - throw mapChangeStreamError(e); - }); - // The stream was closed, we will only ever receive `null` from it - if (!originalChangeDocument && stream.closed) { - break; - } + // We handle two types of checkpoint events: + // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these + // immediately, regardless of where they were created. + // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate + // limiting of commits, so we specifically want to exclude checkpoints from other streams. + // + // It may be useful to also throttle commits due to standalone checkpoints in the future. + // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now. - if (this.abort_signal.aborted) { - break; - } + const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; + if (!(checkpointId == STANDALONE_CHECKPOINT_ID || this.checkpointStreamId.equals(checkpointId))) { + continue; + } + const { comparable: lsn } = new MongoLSN({ + timestamp: changeDocument.clusterTime!, + resume_token: changeDocument._id + }); - if (originalChangeDocument == null) { - // We get a new null document after `maxAwaitTimeMS` if there were no other events. - // In this case, stream.resumeToken is the resume token associated with the last response. - // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next() - // does update it. - // From observed behavior, the actual resumeToken changes around once every 10 seconds. - // If we don't update it on empty events, we do keep consistency, but resuming the stream - // with old tokens may cause connection timeouts. - // We throttle this further by only persisting a keepalive once a minute. - // We add an additional check for waitForCheckpointLsn == null, to make sure we're not - // doing a keepalive in the middle of a transaction. - if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) { - const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken); - await batch.keepalive(lsn); - this.touch(); - lastEmptyResume = performance.now(); - // Log the token update. This helps as a general "replication is still active" message in the logs. - // This token would typically be around 10s behind. - this.logger.info( - `Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}` - ); - this.isStartingReplication = false; - } - continue; - } + if (lastCheckpointLsn != null && lsn < lastCheckpointLsn) { + // Checkpoint out of order - should never happen with MongoDB. + // If it does happen, we throw an error to stop the replication - restarting should recover. + // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042. + // This has been fixed in the driver in the meantime, but we still keep this as a safety-check. + throw new ReplicationAssertionError( + `Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last seen LSN ${lastCheckpointLsn}. Restarting replication.` + ); + } + lastCheckpointLsn = lsn; - this.touch(); + if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { + waitForCheckpointLsn = null; + } + const didCommit = await writer.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); - if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) { - continue; - } + if (didCommit) { + this.oldestUncommittedChange = null; + this.isStartingReplication = false; + changesSinceLastCheckpoint = 0; + } - let changeDocument = originalChangeDocument; - if (originalChangeDocument?.splitEvent != null) { - // Handle split events from $changeStreamSplitLargeEvent. - // This is only relevant for very large update operations. - const splitEvent = originalChangeDocument?.splitEvent; - - if (splitDocument == null) { - splitDocument = originalChangeDocument; - } else { - splitDocument = Object.assign(splitDocument, originalChangeDocument); - } - - if (splitEvent.fragment == splitEvent.of) { - // Got all fragments - changeDocument = splitDocument; - splitDocument = null; - } else { - // Wait for more fragments - continue; - } - } else if (splitDocument != null) { - // We were waiting for fragments, but got a different event - throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`); - } + continue; + } - if ( - !filters.multipleDatabases && - 'ns' in changeDocument && - changeDocument.ns.db != this.defaultDb.databaseName && - changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`) - ) { - // When all of the following conditions are met: - // 1. We're replicating from an Atlas Flex instance. - // 2. There were changestream events recorded while the PowerSync service is paused. - // 3. We're only replicating from a single database. - // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'}, - // instead of the expected {db: 'ps'}. - // We correct this. - changeDocument.ns.db = this.defaultDb.databaseName; - - if (!flexDbNameWorkaroundLogged) { - flexDbNameWorkaroundLogged = true; - this.logger.warn( - `Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.` - ); - } - } + if ( + changeDocument.operationType == 'insert' || + changeDocument.operationType == 'update' || + changeDocument.operationType == 'replace' || + changeDocument.operationType == 'delete' + ) { + if (waitForCheckpointLsn == null) { + waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); + } + const rel = getMongoRelation(changeDocument.ns); + const tables = await this.getRelations(writer, rel, { + // In most cases, we should not need to snapshot this. But if this is the first time we see the collection + // for whatever reason, then we do need to snapshot it. + // This may result in some duplicate operations when a collection is created for the first time after + // sync rules was deployed. + snapshot: true + }); + const filtered = tables.filter((t) => t.syncAny); - const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; - - if (ns?.coll == CHECKPOINTS_COLLECTION) { - /** - * Dropping the database does not provide an `invalidate` event. - * We typically would receive `drop` events for the collection which we - * would process below. - * - * However we don't commit the LSN after collections are dropped. - * The prevents the `startAfter` or `resumeToken` from advancing past the drop events. - * The stream also closes after the drop events. - * This causes an infinite loop of processing the collection drop events. - * - * This check here invalidates the change stream if our `_checkpoints` collection - * is dropped. This allows for detecting when the DB is dropped. - */ - if (changeDocument.operationType == 'drop') { - throw new ChangeStreamInvalidatedError( - 'Internal collections have been dropped', - new Error('_checkpoints collection was dropped') - ); - } - - if ( - !( - changeDocument.operationType == 'insert' || - changeDocument.operationType == 'update' || - changeDocument.operationType == 'replace' - ) - ) { - continue; - } - - // We handle two types of checkpoint events: - // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these - // immediately, regardless of where they were created. - // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate - // limiting of commits, so we specifically want to exclude checkpoints from other streams. - // - // It may be useful to also throttle commits due to standalone checkpoints in the future. - // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now. - - const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; - if (!(checkpointId == STANDALONE_CHECKPOINT_ID || this.checkpointStreamId.equals(checkpointId))) { - continue; - } + for (let table of filtered) { + if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) { + this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime); + } + const flushResult = await this.writeChange(writer, table, changeDocument); + changesSinceLastCheckpoint += 1; + if (flushResult != null && changesSinceLastCheckpoint >= 20_000) { + // When we are catching up replication after an initial snapshot, there may be a very long delay + // before we do a commit(). In that case, we need to periodically persist the resume LSN, so + // we don't restart from scratch if we restart replication. + // The same could apply if we need to catch up on replication after some downtime. const { comparable: lsn } = new MongoLSN({ timestamp: changeDocument.clusterTime!, resume_token: changeDocument._id }); - if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) { - // Checkpoint out of order - should never happen with MongoDB. - // If it does happen, we throw an error to stop the replication - restarting should recover. - // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop. - // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042. - // This has been fixed in the driver in the meantime, but we still keep this as a safety-check. - throw new ReplicationAssertionError( - `Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.` - ); - } - - if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { - waitForCheckpointLsn = null; - } - const didCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); - - if (didCommit) { - this.oldestUncommittedChange = null; - this.isStartingReplication = false; - changesSinceLastCheckpoint = 0; - } - } else if ( - changeDocument.operationType == 'insert' || - changeDocument.operationType == 'update' || - changeDocument.operationType == 'replace' || - changeDocument.operationType == 'delete' - ) { - if (waitForCheckpointLsn == null) { - waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); - } - const rel = getMongoRelation(changeDocument.ns); - const table = await this.getRelation(batch, rel, { - // In most cases, we should not need to snapshot this. But if this is the first time we see the collection - // for whatever reason, then we do need to snapshot it. - // This may result in some duplicate operations when a collection is created for the first time after - // sync rules was deployed. - snapshot: true - }); - if (table.syncAny) { - if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) { - this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime); - } - const flushResult = await this.writeChange(batch, table, changeDocument); - changesSinceLastCheckpoint += 1; - if (flushResult != null && changesSinceLastCheckpoint >= 20_000) { - // When we are catching up replication after an initial snapshot, there may be a very long delay - // before we do a commit(). In that case, we need to periodically persist the resume LSN, so - // we don't restart from scratch if we restart replication. - // The same could apply if we need to catch up on replication after some downtime. - const { comparable: lsn } = new MongoLSN({ - timestamp: changeDocument.clusterTime!, - resume_token: changeDocument._id - }); - this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); - await batch.setResumeLsn(lsn); - changesSinceLastCheckpoint = 0; - } - } - } else if (changeDocument.operationType == 'drop') { - const rel = getMongoRelation(changeDocument.ns); - const table = await this.getRelation(batch, rel, { - // We're "dropping" this collection, so never snapshot it. - snapshot: false - }); - if (table.syncAny) { - await batch.drop([table]); - this.relationCache.delete(table); - } - } else if (changeDocument.operationType == 'rename') { - const relFrom = getMongoRelation(changeDocument.ns); - const relTo = getMongoRelation(changeDocument.to); - const tableFrom = await this.getRelation(batch, relFrom, { - // We're "dropping" this collection, so never snapshot it. - snapshot: false - }); - if (tableFrom.syncAny) { - await batch.drop([tableFrom]); - this.relationCache.delete(relFrom); - } - // Here we do need to snapshot the new table - const collection = await this.getCollectionInfo(relTo.schema, relTo.name); - await this.handleRelation(batch, relTo, { - // This is a new (renamed) collection, so always snapshot it. - snapshot: true, - collectionInfo: collection - }); + this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); + await writer.setResumeLsn(lsn); + changesSinceLastCheckpoint = 0; } } + } else if (changeDocument.operationType == 'drop') { + const rel = getMongoRelation(changeDocument.ns); + await this.drop(writer, rel); + } else if (changeDocument.operationType == 'rename') { + const relFrom = getMongoRelation(changeDocument.ns); + const relTo = getMongoRelation(changeDocument.to); + await this.drop(writer, relFrom); + + // Here we do need to snapshot the new table + const collection = await this.getCollectionInfo(relTo.schema, relTo.name); + await this.handleRelations(writer, relTo, { + // This is a new (renamed) collection, so always snapshot it. + snapshot: true, + collectionInfo: collection + }); } - ); + } + + throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); + } + + private constructAfterRecord(writer: storage.BucketDataWriter, document: mongo.Document): SqliteRow { + const inputRow = constructAfterRecord(document); + return writer.rowProcessor.applyRowContext(inputRow); + } + + async writeChange( + writer: storage.BucketDataWriter, + table: storage.SourceTable, + change: mongo.ChangeStreamDocument + ): Promise { + if (!table.syncAny) { + this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`); + return null; + } + + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + if (change.operationType == 'insert') { + const baseRecord = this.constructAfterRecord(writer, change.fullDocument); + return await writer.save({ + tag: SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: baseRecord, + afterReplicaId: change.documentKey._id + }); + } else if (change.operationType == 'update' || change.operationType == 'replace') { + if (change.fullDocument == null) { + // Treat as delete + return await writer.save({ + tag: SaveOperationTag.DELETE, + sourceTable: table, + before: undefined, + beforeReplicaId: change.documentKey._id + }); + } + const after = this.constructAfterRecord(writer, change.fullDocument!); + return await writer.save({ + tag: SaveOperationTag.UPDATE, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: after, + afterReplicaId: change.documentKey._id + }); + } else if (change.operationType == 'delete') { + return await writer.save({ + tag: SaveOperationTag.DELETE, + sourceTable: table, + before: undefined, + beforeReplicaId: change.documentKey._id + }); + } else { + throw new ReplicationAssertionError(`Unsupported operation: ${change.operationType}`); + } } async getReplicationLagMillis(): Promise { @@ -1126,24 +925,4 @@ export class ChangeStream { } } -function mapChangeStreamError(e: any) { - if (isMongoNetworkTimeoutError(e)) { - // This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out". - // We wrap the error to make it more useful. - throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); - } else if (isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { - // maxTimeMS was reached. Example message: - // MongoServerError: Executor error during aggregate command on namespace: powersync_test_data.$cmd.aggregate :: caused by :: operation exceeded time limit - throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); - } else if ( - isMongoServerError(e) && - e.codeName == 'NoMatchingDocument' && - e.errmsg?.includes('post-image was not found') - ) { - throw new ChangeStreamInvalidatedError(e.errmsg, e); - } else if (isMongoServerError(e) && e.hasErrorLabel('NonResumableChangeStreamError')) { - throw new ChangeStreamInvalidatedError(e.message, e); - } else { - throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e); - } -} +export { ChangeStreamInvalidatedError }; diff --git a/modules/module-mongodb/src/replication/ChangeStreamErrors.ts b/modules/module-mongodb/src/replication/ChangeStreamErrors.ts new file mode 100644 index 000000000..44fc40cd3 --- /dev/null +++ b/modules/module-mongodb/src/replication/ChangeStreamErrors.ts @@ -0,0 +1,38 @@ +import { isMongoNetworkTimeoutError, isMongoServerError } from '@powersync/lib-service-mongodb'; +import { DatabaseConnectionError, ErrorCode } from '@powersync/lib-services-framework'; + +/** + * Thrown when the change stream is not valid anymore, and replication + * must be restarted. + * + * Possible reasons: + * * Some change stream documents do not have postImages. + * * startAfter/resumeToken is not valid anymore. + */ +export class ChangeStreamInvalidatedError extends DatabaseConnectionError { + constructor(message: string, cause: any) { + super(ErrorCode.PSYNC_S1344, message, cause); + } +} + +export function mapChangeStreamError(e: any) { + if (isMongoNetworkTimeoutError(e)) { + // This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out". + // We wrap the error to make it more useful. + throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); + } else if (isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { + // maxTimeMS was reached. Example message: + // MongoServerError: Executor error during aggregate command on namespace: powersync_test_data.$cmd.aggregate :: caused by :: operation exceeded time limit + throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); + } else if ( + isMongoServerError(e) && + e.codeName == 'NoMatchingDocument' && + e.errmsg?.includes('post-image was not found') + ) { + throw new ChangeStreamInvalidatedError(e.errmsg, e); + } else if (isMongoServerError(e) && e.hasErrorLabel('NonResumableChangeStreamError')) { + throw new ChangeStreamInvalidatedError(e.message, e); + } else { + throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e); + } +} diff --git a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts index 216138096..7e58d75a1 100644 --- a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts +++ b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts @@ -1,22 +1,43 @@ import { container, logger as defaultLogger } from '@powersync/lib-services-framework'; -import { replication } from '@powersync/service-core'; +import { + BucketStorageFactory, + PersistedSyncRulesContent, + replication, + ReplicationLock, + SyncRulesBucketStorage +} from '@powersync/service-core'; import { ChangeStream, ChangeStreamInvalidatedError } from './ChangeStream.js'; import { ConnectionManagerFactory } from './ConnectionManagerFactory.js'; export interface ChangeStreamReplicationJobOptions extends replication.AbstractReplicationJobOptions { connectionFactory: ConnectionManagerFactory; + storageFactory: BucketStorageFactory; + streams: ReplicationStreamConfig[]; +} + +export interface ReplicationStreamConfig { + syncRules: PersistedSyncRulesContent; + storage: SyncRulesBucketStorage; + lock: ReplicationLock; } export class ChangeStreamReplicationJob extends replication.AbstractReplicationJob { private connectionFactory: ConnectionManagerFactory; + private storageFactory: BucketStorageFactory; private lastStream: ChangeStream | null = null; + private readonly streams: ReplicationStreamConfig[]; + constructor(options: ChangeStreamReplicationJobOptions) { super(options); this.connectionFactory = options.connectionFactory; + this.streams = options.streams; + this.storageFactory = options.storageFactory; // We use a custom formatter to process the prefix - this.logger = defaultLogger.child({ prefix: `[powersync_${this.storage.group_id}] ` }); + this.logger = defaultLogger.child({ + prefix: `[powersync-${this.streams.map((stream) => stream.syncRules.id).join(',')}] ` + }); } async cleanUp(): Promise { @@ -27,6 +48,21 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ // Nothing needed here } + isDifferent(syncRules: PersistedSyncRulesContent[]): boolean { + if (syncRules.length != this.streams.length) { + return true; + } + + for (let rules of syncRules) { + const existing = this.streams.find((stream) => stream.syncRules.id === rules.id); + if (existing == null) { + return true; + } + } + + return false; + } + async replicate() { try { await this.replicateOnce(); @@ -47,12 +83,19 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ if (e instanceof ChangeStreamInvalidatedError) { // This stops replication and restarts with a new instance - await this.options.storage.factory.restartReplication(this.storage.group_id); + // FIXME: check this logic with multiple streams + for (let { storage } of Object.values(this.streams)) { + await storage.factory.restartReplication(storage.group_id); + } } // No need to rethrow - the error is already logged, and retry behavior is the same on error } finally { this.abortController.abort(); + + for (let { lock } of this.streams) { + await lock.release(); + } } } @@ -67,8 +110,9 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ return; } const stream = new ChangeStream({ + factory: this.storageFactory, abort_signal: this.abortController.signal, - storage: this.options.storage, + streams: this.streams, metrics: this.options.metrics, connections: connectionManager, logger: this.logger diff --git a/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts b/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts index 2fca7aec3..1f2830c39 100644 --- a/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts +++ b/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts @@ -1,10 +1,11 @@ import { storage, replication } from '@powersync/service-core'; -import { ChangeStreamReplicationJob } from './ChangeStreamReplicationJob.js'; +import { ChangeStreamReplicationJob, ReplicationStreamConfig } from './ChangeStreamReplicationJob.js'; import { ConnectionManagerFactory } from './ConnectionManagerFactory.js'; import { MongoErrorRateLimiter } from './MongoErrorRateLimiter.js'; import { MongoModule } from '../module/MongoModule.js'; import { MongoLSN } from '../common/MongoLSN.js'; import { timestampToDate } from './replication-utils.js'; +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; export interface ChangeStreamReplicatorOptions extends replication.AbstractReplicatorOptions { connectionFactory: ConnectionManagerFactory; @@ -12,6 +13,7 @@ export interface ChangeStreamReplicatorOptions extends replication.AbstractRepli export class ChangeStreamReplicator extends replication.AbstractReplicator { private readonly connectionFactory: ConnectionManagerFactory; + private job: ChangeStreamReplicationJob | null = null; constructor(options: ChangeStreamReplicatorOptions) { super(options); @@ -19,22 +21,79 @@ export class ChangeStreamReplicator extends replication.AbstractReplicator { // TODO: Implement anything? } + async refresh(options?: { configured_lock?: storage.ReplicationLock }) { + if (this.stopped) { + return; + } + + let configuredLock = options?.configured_lock; + + const replicatingSyncRules = await this.storage.getReplicatingSyncRules(); + + if (this.job?.isStopped) { + this.job = null; + } + + if (this.job != null && !this.job?.isDifferent(replicatingSyncRules)) { + // No changes + return; + } + + // Stop existing job, if any + await this.job?.stop(); + this.job = null; + if (replicatingSyncRules.length === 0) { + // No active replication + return; + } + + let streamConfig: ReplicationStreamConfig[] = []; + try { + for (let rules of replicatingSyncRules) { + let lock: storage.ReplicationLock; + if (configuredLock?.sync_rules_id == rules.id) { + lock = configuredLock; + } else { + lock = await rules.lock(); + } + streamConfig.push({ lock, syncRules: rules, storage: this.storage.getInstance(rules) }); + } + } catch (e) { + // Release any acquired locks + for (let { lock } of streamConfig) { + try { + await lock.release(); + } catch (ex) { + this.logger.warn('Failed to release replication lock after acquisition failure', ex); + } + } + throw e; + } + + const newJob = new ChangeStreamReplicationJob({ + id: this.createJobId(replicatingSyncRules[0].id), // FIXME: check the id + storageFactory: this.storage, + storage: streamConfig[0].storage, // FIXME: multi-stream logic + lock: streamConfig[0].lock, // FIXME: multi-stream logic + streams: streamConfig, + metrics: this.metrics, + connectionFactory: this.connectionFactory, + rateLimiter: new MongoErrorRateLimiter() + }); + this.job = newJob; + await newJob.start(); + } + async stop(): Promise { await super.stop(); + await this.job?.stop(); await this.connectionFactory.shutdown(); } diff --git a/modules/module-mongodb/src/replication/MongoRelation.ts b/modules/module-mongodb/src/replication/MongoRelation.ts index 7ca0e51b8..807b295ce 100644 --- a/modules/module-mongodb/src/replication/MongoRelation.ts +++ b/modules/module-mongodb/src/replication/MongoRelation.ts @@ -30,10 +30,7 @@ export function getMongoRelation(source: mongo.ChangeStreamNameSpace): storage.S /** * For in-memory cache only. */ -export function getCacheIdentifier(source: storage.SourceEntityDescriptor | storage.SourceTable): string { - if (source instanceof storage.SourceTable) { - return `${source.schema}.${source.name}`; - } +export function getCacheIdentifier(source: storage.SourceEntityDescriptor): string { return `${source.schema}.${source.name}`; } diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts new file mode 100644 index 000000000..7469c454f --- /dev/null +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -0,0 +1,602 @@ +import { mongo } from '@powersync/lib-service-mongodb'; +import { + container, + logger as defaultLogger, + ErrorCode, + Logger, + ReplicationAbortedError, + ServiceError +} from '@powersync/lib-services-framework'; +import { InternalOpId, MetricsEngine, SaveOperationTag, SourceTable, storage } from '@powersync/service-core'; +import { DatabaseInputRow, RowProcessor, SqliteInputRow, SqliteRow, TablePattern } from '@powersync/service-sync-rules'; +import { ReplicationMetric } from '@powersync/service-types'; +import pDefer, { DeferredPromise } from 'p-defer'; +import { MongoLSN } from '../common/MongoLSN.js'; +import { PostImagesOption } from '../types/types.js'; +import { escapeRegExp } from '../utils.js'; +import { mapChangeStreamError } from './ChangeStreamErrors.js'; +import { MongoManager } from './MongoManager.js'; +import { constructAfterRecord, createCheckpoint, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js'; +import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js'; +import { CHECKPOINTS_COLLECTION } from './replication-utils.js'; + +export interface MongoSnapshotterOptions { + connections: MongoManager; + writer: () => Promise; + metrics: MetricsEngine; + abort_signal: AbortSignal; + /** + * Override maxAwaitTimeMS for testing. + */ + maxAwaitTimeMS?: number; + /** + * Override snapshotChunkLength for testing. + */ + snapshotChunkLength?: number; + logger?: Logger; + checkpointStreamId: mongo.ObjectId; +} + +export class MongoSnapshotter { + connection_id = 1; + + private readonly writerFactory: () => Promise; + + private readonly metrics: MetricsEngine; + + private connections: MongoManager; + private readonly client: mongo.MongoClient; + private readonly defaultDb: mongo.Db; + + private readonly maxAwaitTimeMS: number; + private readonly snapshotChunkLength: number; + + private abortSignal: AbortSignal; + + private logger: Logger; + + private checkpointStreamId: mongo.ObjectId; + private changeStreamTimeout: number; + + private queue = new Set(); + private nextItemQueued: DeferredPromise | null = null; + private initialSnapshotDone = pDefer(); + private lastSnapshotOpId: InternalOpId | null = null; + + constructor(options: MongoSnapshotterOptions) { + this.writerFactory = options.writer; + this.metrics = options.metrics; + this.connections = options.connections; + this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; + this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000; + this.client = this.connections.client; + this.defaultDb = this.connections.db; + this.abortSignal = options.abort_signal; + this.logger = options.logger ?? defaultLogger; + this.checkpointStreamId = options.checkpointStreamId; + this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); + + this.abortSignal.addEventListener('abort', () => { + // Wake up the queue if is waiting for items + this.nextItemQueued?.resolve(); + }); + } + + private get usePostImages() { + return this.connections.options.postImages != PostImagesOption.OFF; + } + + private get configurePostImages() { + return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; + } + + async queueSnapshotTables(snapshotLsn: string | null) { + await using writer = await this.writerFactory(); + const sourceTables = writer.rowProcessor.getSourceTables(); + + if (snapshotLsn == null) { + // First replication attempt - get a snapshot and store the timestamp + snapshotLsn = await this.getSnapshotLsn(writer); + // FIXME: check the logic for resumeLSN. + await writer.setResumeLsn(snapshotLsn); + this.logger.info(`Marking snapshot at ${snapshotLsn}`); + } else { + this.logger.info(`Resuming snapshot at ${snapshotLsn}`); + // Check that the snapshot is still valid. + await this.validateSnapshotLsn(writer, snapshotLsn); + } + + // Start by resolving all tables. + // This checks postImage configuration, and that should fail as + // early as possible. + // This resolves _all_ tables, including those already snapshotted. + let allSourceTables: SourceTable[] = []; + for (let tablePattern of sourceTables) { + const tables = await this.resolveQualifiedTableNames(writer, tablePattern); + allSourceTables.push(...tables); + } + + let tablesWithStatus: SourceTable[] = []; + for (let table of allSourceTables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; + } + const count = await this.estimatedCountNumber(table); + const updated = await writer.updateTableProgress(table, { + totalEstimatedCount: count + }); + tablesWithStatus.push(updated); + this.logger.info( + `To replicate: ${updated.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` + ); + } + + for (let table of tablesWithStatus) { + this.queue.add(table); + } + this.nextItemQueued?.resolve(); + } + + async waitForInitialSnapshot() { + await this.initialSnapshotDone.promise; + } + + async replicationLoop() { + try { + await using writer = await this.writerFactory(); + if (this.queue.size == 0) { + // Special case where we start with no tables to snapshot + await this.markSnapshotDone(writer); + } + while (!this.abortSignal.aborted) { + const table = this.queue.values().next().value; + if (table == null) { + this.initialSnapshotDone.resolve(); + // There must be no await in between checking the queue above and creating this deferred promise, + // otherwise we may miss new items being queued. + this.nextItemQueued = pDefer(); + await this.nextItemQueued.promise; + this.nextItemQueued = null; + // At this point, either we have have a new item in the queue, or we are aborted. + continue; + } + + await this.replicateTable(writer, table); + this.queue.delete(table); + if (this.queue.size == 0) { + await this.markSnapshotDone(writer); + } + } + throw new ReplicationAbortedError(`Replication loop aborted`, this.abortSignal.reason); + } catch (e) { + // If initial snapshot already completed, this has no effect + this.initialSnapshotDone.reject(e); + throw e; + } + } + + private async markSnapshotDone(writer: storage.BucketDataWriter) { + // The checkpoint here is a marker - we need to replicate up to at least this + // point before the data can be considered consistent. + const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + await writer.markAllSnapshotDone(checkpoint); + // KLUDGE: We need to create an extra checkpoint _after_ marking the snapshot done, to fix + // issues with order of processing commits(). This is picked up by tests on postgres storage, + // the issue may be specific to that storage engine. + await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + + if (this.lastSnapshotOpId != null) { + // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. + // TODO: only run this after initial replication, not after each table. + // FIXME: implement this again + // await this.storage.populatePersistentChecksumCache({ + // // No checkpoint yet, but we do have the opId. + // maxOpId: this.lastSnapshotOpId, + // signal: this.abortSignal + // }); + } + } + + private async replicateTable(writer: storage.BucketDataWriter, tableRequest: SourceTable) { + // Get fresh table info, in case it was updated while queuing + const table = await writer.getTable(tableRequest); + if (table == null) { + return; + } + if (table.snapshotComplete) { + return; + } + await this.snapshotTable(writer, table); + + const noCheckpointBefore = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + await writer.markTableSnapshotDone([table], noCheckpointBefore); + + // This commit ensures we set keepalive_op. + const resumeLsn = writer.resumeFromLsn ?? MongoLSN.ZERO.comparable; + // FIXME: Only commit on relevant syncRules? + + await writer.commit(resumeLsn); + + // FIXME: check this + // if (flushResults?.flushed_op != null) { + // this.lastSnapshotOpId = flushResults.flushed_op; + // } + this.logger.info(`Flushed snapshot at ${this.lastSnapshotOpId}`); + } + + private queueTable(table: storage.SourceTable) { + // These two operations must be atomic to avoid race conditions + this.queue.add(table); + this.nextItemQueued?.resolve(); + } + + async queueSnapshot(writer: storage.BucketDataWriter, table: storage.SourceTable) { + await writer.markTableSnapshotRequired(table); + this.queueTable(table); + } + + async estimatedCount(table: storage.SourceTable): Promise { + const count = await this.estimatedCountNumber(table); + return `~${count}`; + } + + async estimatedCountNumber(table: storage.SourceTable): Promise { + const db = this.client.db(table.schema); + return await db.collection(table.name).estimatedDocumentCount(); + } + + private async resolveQualifiedTableNames( + writer: storage.BucketDataWriter, + tablePattern: TablePattern + ): Promise { + const schema = tablePattern.schema; + if (tablePattern.connectionTag != this.connections.connectionTag) { + return []; + } + + let nameFilter: RegExp | string; + if (tablePattern.isWildcard) { + nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)); + } else { + nameFilter = tablePattern.name; + } + let result: storage.SourceTable[] = []; + + // Check if the collection exists + const collections = await this.client + .db(schema) + .listCollections( + { + name: nameFilter + }, + { nameOnly: false } + ) + .toArray(); + + if (!tablePattern.isWildcard && collections.length == 0) { + this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`); + } + + for (let collection of collections) { + await this.checkPostImages(schema, collection); + const sourceTables = await writer.resolveTables({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: getMongoRelation({ db: schema, coll: collection.name }), + pattern: tablePattern + }); + + result.push(...sourceTables); + } + + return result; + } + + private async snapshotTable(writer: storage.BucketDataWriter, table: storage.SourceTable) { + const totalEstimatedCount = await this.estimatedCountNumber(table); + let at = table.snapshotStatus?.replicatedCount ?? 0; + const db = this.client.db(table.schema); + const collection = db.collection(table.name); + await using query = new ChunkedSnapshotQuery({ + collection, + key: table.snapshotStatus?.lastKey, + batchSize: this.snapshotChunkLength + }); + if (query.lastKey != null) { + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}` + ); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + } + + let lastBatch = performance.now(); + let nextChunkPromise = query.nextChunk(); + while (true) { + const { docs: docBatch, lastKey } = await nextChunkPromise; + if (docBatch.length == 0) { + // No more data - stop iterating + break; + } + + if (this.abortSignal.aborted) { + throw new ReplicationAbortedError(`Aborted initial replication`, this.abortSignal.reason); + } + + // Pre-fetch next batch, so that we can read and write concurrently + nextChunkPromise = query.nextChunk(); + for (let document of docBatch) { + const record = this.constructAfterRecord(writer.rowProcessor, document); + + // This auto-flushes when the batch reaches its size limit + await writer.save({ + tag: SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: record, + afterReplicaId: document._id + }); + } + + // Important: flush before marking progress + const flushResult = await writer.flush(); + if (flushResult != null) { + this.lastSnapshotOpId = flushResult.flushed_op; + } + at += docBatch.length; + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length); + + table = await writer.updateTableProgress(table, { + lastKey, + replicatedCount: at, + totalEstimatedCount: totalEstimatedCount + }); + + const duration = performance.now() - lastBatch; + lastBatch = performance.now(); + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms` + ); + this.touch(); + } + // In case the loop was interrupted, make sure we await the last promise. + await nextChunkPromise; + } + + private constructAfterRecord(rowProcessor: RowProcessor, document: mongo.Document): SqliteRow { + const inputRow = constructAfterRecord(document); + return rowProcessor.applyRowContext(inputRow); + } + + private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) { + if (!this.usePostImages) { + // Nothing to check + return; + } + + const enabled = collectionInfo.options?.changeStreamPreAndPostImages?.enabled == true; + + if (!enabled && this.configurePostImages) { + await this.client.db(db).command({ + collMod: collectionInfo.name, + changeStreamPreAndPostImages: { enabled: true } + }); + this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`); + } else if (!enabled) { + throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`); + } + } + + private async getSnapshotLsn(writer: storage.BucketDataWriter): Promise { + const hello = await this.defaultDb.command({ hello: 1 }); + // Basic sanity check + if (hello.msg == 'isdbgrid') { + throw new ServiceError( + ErrorCode.PSYNC_S1341, + 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).' + ); + } else if (hello.setName == null) { + throw new ServiceError( + ErrorCode.PSYNC_S1342, + 'Standalone MongoDB instances are not supported - use a replicaset.' + ); + } + + // Open a change stream just to get a resume token for later use. + // We could use clusterTime from the hello command, but that won't tell us if the + // snapshot isn't valid anymore. + // If we just use the first resumeToken from the stream, we get two potential issues: + // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes + // in source db or other stream issues. + // 2. The first actual change we get may have the same clusterTime, causing us to incorrect + // skip that event. + // Instead, we create a new checkpoint document, and wait until we get that document back in the stream. + // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document + // periodically until the timeout is reached. + + const LSN_TIMEOUT_SECONDS = 60; + const LSN_CREATE_INTERVAL_SECONDS = 1; + + await using streamManager = this.openChangeStream(writer, { lsn: null, maxAwaitTimeMs: 0 }); + const { stream } = streamManager; + const startTime = performance.now(); + let lastCheckpointCreated = -10_000; + let eventsSeen = 0; + + while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) { + if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) { + await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); + lastCheckpointCreated = performance.now(); + } + + // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream + const changeDocument = await stream.tryNext().catch((e) => { + throw mapChangeStreamError(e); + }); + if (changeDocument == null) { + continue; + } + + const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; + + if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) { + const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; + if (!this.checkpointStreamId.equals(checkpointId)) { + continue; + } + const { comparable: lsn } = new MongoLSN({ + timestamp: changeDocument.clusterTime!, + resume_token: changeDocument._id + }); + return lsn; + } + + eventsSeen += 1; + } + + // Could happen if there is a very large replication lag? + throw new ServiceError( + ErrorCode.PSYNC_S1301, + `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}` + ); + } + + /** + * Given a snapshot LSN, validate that we can read from it, by opening a change stream. + */ + private async validateSnapshotLsn(writer: storage.BucketDataWriter, lsn: string) { + await using streamManager = this.openChangeStream(writer, { lsn: lsn, maxAwaitTimeMs: 0 }); + const { stream } = streamManager; + try { + // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream + await stream.tryNext(); + } catch (e) { + // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though + // we possibly cannot recover from it. + throw mapChangeStreamError(e); + } + } + + private getSourceNamespaceFilters(rowProcessor: RowProcessor): { $match: any; multipleDatabases: boolean } { + const sourceTables = rowProcessor.getSourceTables(); + + let $inFilters: { db: string; coll: string }[] = [ + { db: this.defaultDb.databaseName, coll: CHECKPOINTS_COLLECTION } + ]; + let $refilters: { 'ns.db': string; 'ns.coll': RegExp }[] = []; + let multipleDatabases = false; + for (let tablePattern of sourceTables) { + if (tablePattern.connectionTag != this.connections.connectionTag) { + continue; + } + + if (tablePattern.schema != this.defaultDb.databaseName) { + multipleDatabases = true; + } + + if (tablePattern.isWildcard) { + $refilters.push({ + 'ns.db': tablePattern.schema, + 'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)) + }); + } else { + $inFilters.push({ + db: tablePattern.schema, + coll: tablePattern.name + }); + } + } + + const nsFilter = multipleDatabases + ? { ns: { $in: $inFilters } } + : { 'ns.coll': { $in: $inFilters.map((ns) => ns.coll) } }; + if ($refilters.length > 0) { + return { $match: { $or: [nsFilter, ...$refilters] }, multipleDatabases }; + } + return { $match: nsFilter, multipleDatabases }; + } + + static *getQueryData(results: Iterable): Generator { + for (let row of results) { + yield constructAfterRecord(row); + } + } + + private openChangeStream(writer: storage.BucketDataWriter, options: { lsn: string | null; maxAwaitTimeMs?: number }) { + const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null; + const startAfter = lastLsn?.timestamp; + const resumeAfter = lastLsn?.resumeToken; + + const filters = this.getSourceNamespaceFilters(writer.rowProcessor); + + const pipeline: mongo.Document[] = [ + { + $match: filters.$match + }, + { $changeStreamSplitLargeEvent: {} } + ]; + + let fullDocument: 'required' | 'updateLookup'; + + if (this.usePostImages) { + // 'read_only' or 'auto_configure' + // Configuration happens during snapshot, or when we see new + // collections. + fullDocument = 'required'; + } else { + fullDocument = 'updateLookup'; + } + const streamOptions: mongo.ChangeStreamOptions = { + showExpandedEvents: true, + maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS, + fullDocument: fullDocument, + maxTimeMS: this.changeStreamTimeout + }; + + /** + * Only one of these options can be supplied at a time. + */ + if (resumeAfter) { + streamOptions.resumeAfter = resumeAfter; + } else { + // Legacy: We don't persist lsns without resumeTokens anymore, but we do still handle the + // case if we have an old one. + streamOptions.startAtOperationTime = startAfter; + } + + let stream: mongo.ChangeStream; + if (filters.multipleDatabases) { + // Requires readAnyDatabase@admin on Atlas + stream = this.client.watch(pipeline, streamOptions); + } else { + // Same general result, but requires less permissions than the above + stream = this.defaultDb.watch(pipeline, streamOptions); + } + + this.abortSignal.addEventListener('abort', () => { + stream.close(); + }); + + return { + stream, + filters, + [Symbol.asyncDispose]: async () => { + return stream.close(); + } + }; + } + + private lastTouchedAt = performance.now(); + + private touch() { + if (performance.now() - this.lastTouchedAt > 1_000) { + this.lastTouchedAt = performance.now(); + // Update the probes, but don't wait for it + container.probes.touch().catch((e) => { + this.logger.error(`Failed to touch the container probe: ${e.message}`, e); + }); + } + } +} diff --git a/modules/module-mongodb/test/src/change_stream.test.ts b/modules/module-mongodb/test/src/change_stream.test.ts index 7e4f0ed48..859d53167 100644 --- a/modules/module-mongodb/test/src/change_stream.test.ts +++ b/modules/module-mongodb/test/src/change_stream.test.ts @@ -40,9 +40,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const result = await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); const test_id = result.insertedId; @@ -77,9 +75,7 @@ bucket_definitions: const result = await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); const test_id = result.insertedId; - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); await setTimeout(30); await collection.updateOne({ _id: test_id }, { $set: { description: 'test2' } }); @@ -108,8 +104,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const session = client.startSession(); let test_id: mongo.ObjectId | undefined; @@ -155,9 +150,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const session = client.startSession(); let test_id: mongo.ObjectId | undefined; @@ -202,9 +195,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const session = client.startSession(); let test_id: mongo.ObjectId | undefined; @@ -242,9 +233,7 @@ bucket_definitions: `); await db.createCollection('test_DATA'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_DATA'); const result = await collection.insertOne({ description: 'test1' }); @@ -266,8 +255,7 @@ bucket_definitions: `); await db.createCollection('test_data'); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const largeDescription = crypto.randomBytes(20_000).toString('hex'); @@ -299,8 +287,7 @@ bucket_definitions: data: [] `; await context.updateSyncRules(syncRuleContent); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_data'); const result = await collection.insertOne({ description: 'test1' }); @@ -329,8 +316,7 @@ bucket_definitions: - SELECT _id as id, description FROM "test_data2" `; await context.updateSyncRules(syncRuleContent); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_data1'); const result = await collection.insertOne({ description: 'test1' }); @@ -358,11 +344,10 @@ bucket_definitions: const result = await collection.insertOne({ description: 'test1' }); const test_id = result.insertedId.toHexString(); - await context.replicateSnapshot(); + await context.initializeReplication(); // Note: snapshot is only consistent some time into the streaming request. // At the point that we get the first acknowledged checkpoint, as is required // for getBucketData(), the data should be consistent. - context.startStreaming(); const data = await context.getBucketData('global[]'); expect(data).toMatchObject([test_utils.putOp('test_data', { id: test_id, description: 'test1' })]); @@ -384,7 +369,7 @@ bucket_definitions: await db.createCollection('test_data'); - await context.replicateSnapshot(); + await context.initializeReplication(); const collection = db.collection('test_data'); const result = await collection.insertOne({ name: 't1' }); @@ -399,7 +384,6 @@ bucket_definitions: const largeDescription = crypto.randomBytes(12000000 / 2).toString('hex'); await collection.updateOne({ _id: test_id }, { $set: { description: largeDescription } }); - context.startStreaming(); const data = await context.getBucketData('global[]'); expect(data.length).toEqual(2); @@ -428,9 +412,7 @@ bucket_definitions: const { db } = context; await context.updateSyncRules(BASIC_SYNC_RULES); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_donotsync'); const result = await collection.insertOne({ description: 'test' }); @@ -451,7 +433,7 @@ bucket_definitions: data: - SELECT _id as id, description FROM "test_%"`); - await context.replicateSnapshot(); + await context.initializeReplication(); await db.createCollection('test_data', { // enabled: true here - everything should work @@ -462,15 +444,24 @@ bucket_definitions: const test_id = result.insertedId; await collection.updateOne({ _id: test_id }, { $set: { description: 'test2' } }); - context.startStreaming(); - const data = await context.getBucketData('global[]'); - expect(data).toMatchObject([ - // An extra op here, since this triggers a snapshot in addition to getting the event. - test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }), - test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), - test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }) - ]); + // Either case is valid here + if (data.length == 3) { + expect( + data.sort((a, b) => JSON.parse(a.data!).description.localeCompare(JSON.parse(b.data!).description) ?? 0) + ).toMatchObject([ + // An extra op here, since this triggers a snapshot in addition to getting the event. + // Can be either test1, test2, test2 or test2, test1, test2 + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }), + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }) + ]); + } else { + expect(data).toMatchObject([ + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }) + ]); + } }); test('postImages - new collection with postImages disabled', async () => { @@ -484,7 +475,7 @@ bucket_definitions: data: - SELECT _id as id, description FROM "test_data%"`); - await context.replicateSnapshot(); + await context.initializeReplication(); await db.createCollection('test_data', { // enabled: false here, but autoConfigure will enable it. @@ -496,8 +487,6 @@ bucket_definitions: const test_id = result.insertedId; await collection.updateOne({ _id: test_id }, { $set: { description: 'test2' } }); - context.startStreaming(); - await expect(() => context.getBucketData('global[]')).rejects.toMatchObject({ message: expect.stringContaining('stream was configured to require a post-image for all update events') }); @@ -519,8 +508,8 @@ bucket_definitions: const collection = db.collection('test_data'); await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); - await context.replicateSnapshot(); - await context.markSnapshotConsistent(); + // Initialize + await context.initializeReplication(); // Simulate an error await context.storage!.reportError(new Error('simulated error')); @@ -528,10 +517,9 @@ bucket_definitions: expect(syncRules).toBeTruthy(); expect(syncRules?.last_fatal_error).toEqual('simulated error'); - // startStreaming() should automatically clear the error. - context.startStreaming(); + // The new checkpoint should clear the error + await context.getCheckpoint(); - // getBucketData() creates a checkpoint that clears the error, so we don't do that // Just wait, and check that the error is cleared automatically. await vi.waitUntil( async () => { diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 1f54a7810..20a04636e 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -1,5 +1,6 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { + BucketChecksumRequest, BucketStorageFactory, createCoreReplicationMetrics, initializeCoreReplicationMetrics, @@ -7,22 +8,25 @@ import { OplogEntry, ProtocolOpId, ReplicationCheckpoint, + settledPromise, SyncRulesBucketStorage, - TestStorageOptions + TestStorageOptions, + unsettledPromise } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import { ChangeStream, ChangeStreamOptions } from '@module/replication/ChangeStream.js'; import { MongoManager } from '@module/replication/MongoManager.js'; import { createCheckpoint, STANDALONE_CHECKPOINT_ID } from '@module/replication/MongoRelation.js'; import { NormalizedMongoConnectionConfig } from '@module/types/types.js'; +import { ReplicationAbortedError } from '@powersync/lib-services-framework'; import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js'; export class ChangeStreamTestContext { private _walStream?: ChangeStream; private abortController = new AbortController(); - private streamPromise?: Promise>; + private settledReplicationPromise?: Promise>; public storage?: SyncRulesBucketStorage; /** @@ -60,13 +64,13 @@ export class ChangeStreamTestContext { /** * Abort snapshot and/or replication, without actively closing connections. */ - abort() { - this.abortController.abort(); + abort(cause?: Error) { + this.abortController.abort(cause); } async dispose() { - this.abort(); - await this.streamPromise?.catch((e) => e); + this.abort(new Error('Disposing test context')); + await this.settledReplicationPromise; await this.factory[Symbol.asyncDispose](); await this.connectionManager.end(); } @@ -111,10 +115,12 @@ export class ChangeStreamTestContext { return this._walStream; } const options: ChangeStreamOptions = { - storage: this.storage, + factory: this.factory, + streams: [{ storage: this.storage }], metrics: METRICS_HELPER.metricsEngine, connections: this.connectionManager, abort_signal: this.abortController.signal, + logger: this.streamOptions?.logger, // Specifically reduce this from the default for tests on MongoDB <= 6.0, otherwise it can take // a long time to abort the stream. maxAwaitTimeMS: this.streamOptions?.maxAwaitTimeMS ?? 200, @@ -124,8 +130,31 @@ export class ChangeStreamTestContext { return this._walStream!; } + /** + * Replicate a snapshot, start streaming, and wait for a consistent checkpoint. + */ + async initializeReplication() { + await this.replicateSnapshot(); + // Make sure we're up to date + await this.getCheckpoint(); + } + + /** + * Replicate the initial snapshot, and start streaming. + */ async replicateSnapshot() { - await this.streamer.initReplication(); + // Use a settledPromise to avoid unhandled rejections + this.settledReplicationPromise ??= settledPromise(this.streamer.replicate()); + try { + await Promise.race([unsettledPromise(this.settledReplicationPromise), this.streamer.waitForInitialSnapshot()]); + } catch (e) { + if (e instanceof ReplicationAbortedError && e.cause != null) { + // Edge case for tests: replicate() can throw an error, but we'd receive the ReplicationAbortedError from + // waitForInitialSnapshot() first. In that case, prioritize the cause. + throw e.cause; + } + throw e; + } } /** @@ -136,28 +165,14 @@ export class ChangeStreamTestContext { */ async markSnapshotConsistent() { const checkpoint = await createCheckpoint(this.client, this.db, STANDALONE_CHECKPOINT_ID); - - await this.storage!.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive(checkpoint); - }); - } - - startStreaming() { - this.streamPromise = this.streamer - .streamChanges() - .then(() => ({ status: 'fulfilled', value: undefined }) satisfies PromiseFulfilledResult) - .catch((reason) => ({ status: 'rejected', reason }) satisfies PromiseRejectedResult); - return this.streamPromise; + await using writer = await this.storage!.createWriter(test_utils.BATCH_OPTIONS); + await writer.keepalive(checkpoint); } async getCheckpoint(options?: { timeout?: number }) { let checkpoint = await Promise.race([ getClientCheckpoint(this.client, this.db, this.factory, { timeout: options?.timeout ?? 15_000 }), - this.streamPromise?.then((e) => { - if (e.status == 'rejected') { - throw e.reason; - } - }) + unsettledPromise(this.settledReplicationPromise!) ]); if (checkpoint == null) { // This indicates an issue with the test setup - streamingPromise completed instead @@ -169,7 +184,8 @@ export class ChangeStreamTestContext { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -178,8 +194,9 @@ export class ChangeStreamTestContext { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + let map = [bucketRequest(syncRules, bucket, start)]; let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -189,20 +206,15 @@ export class ChangeStreamTestContext { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map.set(bucket, BigInt(batches[0]!.chunkData.next_after)); + map = [bucketRequest(syncRules, bucket, BigInt(batches[0]!.chunkData.next_after))]; } return data; } - async getChecksums(buckets: string[], options?: { timeout?: number }) { - let checkpoint = await this.getCheckpoint(options); - return this.storage!.getChecksums(checkpoint, buckets); - } - - async getChecksum(bucket: string, options?: { timeout?: number }) { + async getChecksum(request: BucketChecksumRequest, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = await this.storage!.getChecksums(checkpoint, [bucket]); - return map.get(bucket); + const map = await this.storage!.getChecksums(checkpoint, [request]); + return map.get(request.bucket); } } diff --git a/modules/module-mongodb/test/src/chunked_snapshot.test.ts b/modules/module-mongodb/test/src/chunked_snapshot.test.ts index 930c82e9c..26508b8f7 100644 --- a/modules/module-mongodb/test/src/chunked_snapshot.test.ts +++ b/modules/module-mongodb/test/src/chunked_snapshot.test.ts @@ -1,5 +1,11 @@ import { mongo } from '@powersync/lib-service-mongodb'; -import { reduceBucket, TestStorageConfig, TestStorageFactory } from '@powersync/service-core'; +import { + reduceBucket, + settledPromise, + TestStorageConfig, + TestStorageFactory, + unsettledPromise +} from '@powersync/service-core'; import { METRICS_HELPER } from '@powersync/service-core-tests'; import { JSONBig } from '@powersync/service-jsonbig'; import { SqliteJsonValue } from '@powersync/service-sync-rules'; @@ -116,7 +122,7 @@ function defineBatchTests(config: TestStorageConfig) { // 2. Replicate one batch of rows // Our "stopping point" here is not quite deterministic. - const p = context.replicateSnapshot(); + const p = settledPromise(context.initializeReplication()); const stopAfter = 100; const startRowCount = (await METRICS_HELPER.getMetricValueForTests('powersync_rows_replicated_total')) ?? 0; @@ -146,9 +152,10 @@ function defineBatchTests(config: TestStorageConfig) { await db.collection('test_data').insertOne({ _id: idD, description: 'new' }); // 4. Replicate the rest of the table. - await p; + await unsettledPromise(p); - context.startStreaming(); + // FIXME: only start streaming at this point: + // context.startStreaming(); const data = await context.getBucketData('global[]'); const reduced = reduceBucket(data); diff --git a/modules/module-mongodb/test/src/mongo_test.test.ts b/modules/module-mongodb/test/src/mongo_test.test.ts index 2b92eb52e..378f82a37 100644 --- a/modules/module-mongodb/test/src/mongo_test.test.ts +++ b/modules/module-mongodb/test/src/mongo_test.test.ts @@ -14,6 +14,7 @@ import { ChangeStream } from '@module/replication/ChangeStream.js'; import { constructAfterRecord } from '@module/replication/MongoRelation.js'; import { PostImagesOption } from '@module/types/types.js'; import { clearTestDb, connectMongoData, TEST_CONNECTION_OPTIONS } from './util.js'; +import { MongoSnapshotter } from '@module/replication/MongoSnapshotter.js'; describe('mongo data types', () => { async function setupTable(db: mongo.Db) { @@ -266,7 +267,7 @@ describe('mongo data types', () => { .toArray(); // It is tricky to save "undefined" with mongo, so we check that it succeeded. expect(rawResults[4].undefined).toBeUndefined(); - const transformed = [...ChangeStream.getQueryData(rawResults)]; + const transformed = [...MongoSnapshotter.getQueryData(rawResults)]; checkResults(transformed); } finally { await client.close(); @@ -287,7 +288,7 @@ describe('mongo data types', () => { .find({}, { sort: { _id: 1 } }) .toArray(); expect(rawResults[3].undefined).toEqual([undefined]); - const transformed = [...ChangeStream.getQueryData(rawResults)]; + const transformed = [...MongoSnapshotter.getQueryData(rawResults)]; checkResultsNested(transformed); } finally { @@ -548,7 +549,7 @@ bucket_definitions: .collection('test_data') .find({}, { sort: { _id: 1 } }) .toArray(); - const [row] = [...ChangeStream.getQueryData(rawResults)]; + const [row] = [...MongoSnapshotter.getQueryData(rawResults)]; const oldFormat = applyRowContext(row, CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY); expect(oldFormat).toMatchObject({ diff --git a/modules/module-mongodb/test/src/resume.test.ts b/modules/module-mongodb/test/src/resume.test.ts index a58bd8f4b..5e5dba9a9 100644 --- a/modules/module-mongodb/test/src/resume.test.ts +++ b/modules/module-mongodb/test/src/resume.test.ts @@ -1,7 +1,7 @@ import { ChangeStreamInvalidatedError } from '@module/replication/ChangeStream.js'; import { MongoManager } from '@module/replication/MongoManager.js'; import { normalizeConnectionConfig } from '@module/types/types.js'; -import { TestStorageConfig } from '@powersync/service-core'; +import { settledPromise, TestStorageConfig } from '@powersync/service-core'; import { describe, expect, test } from 'vitest'; import { ChangeStreamTestContext } from './change_stream_utils.js'; import { env } from './env.js'; @@ -26,8 +26,6 @@ function defineResumeTest(config: TestStorageConfig) { await context.replicateSnapshot(); - context.startStreaming(); - const collection = db.collection('test_data'); await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); @@ -60,7 +58,7 @@ function defineResumeTest(config: TestStorageConfig) { context2.storage = factory.getInstance(activeContent!); // If this test times out, it likely didn't throw the expected error here. - const result = await context2.startStreaming(); + const result = await settledPromise(context2.initializeReplication()); // The ChangeStreamReplicationJob will detect this and throw a ChangeStreamInvalidatedError expect(result.status).toEqual('rejected'); expect((result as PromiseRejectedResult).reason).toBeInstanceOf(ChangeStreamInvalidatedError); diff --git a/modules/module-mongodb/test/src/resuming_snapshots.test.ts b/modules/module-mongodb/test/src/resuming_snapshots.test.ts index 302f5cc7b..24bd2b3c9 100644 --- a/modules/module-mongodb/test/src/resuming_snapshots.test.ts +++ b/modules/module-mongodb/test/src/resuming_snapshots.test.ts @@ -6,6 +6,7 @@ import { describe, expect, test } from 'vitest'; import { ChangeStreamTestContext } from './change_stream_utils.js'; import { env } from './env.js'; import { describeWithStorage } from './util.js'; +import { logger } from '@powersync/lib-services-framework'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { describeWithStorage({ timeout: 240_000 }, function (config) { @@ -35,7 +36,9 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n let startRowCount: number; { - await using context = await ChangeStreamTestContext.open(factory, { streamOptions: { snapshotChunkLength: 1000 } }); + await using context = await ChangeStreamTestContext.open(factory, { + streamOptions: { snapshotChunkLength: 1000, logger: logger.child({ prefix: '[context1] ' }) } + }); await context.updateSyncRules(`bucket_definitions: global: @@ -87,7 +90,7 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n // Bypass the usual "clear db on factory open" step. await using context2 = await ChangeStreamTestContext.open(factory, { doNotClear: true, - streamOptions: { snapshotChunkLength: 1000 } + streamOptions: { snapshotChunkLength: 1000, logger: logger.child({ prefix: '[context2] ' }) } }); const { db } = context2; @@ -98,9 +101,8 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n await db.collection('test_data2').insertOne({ _id: 10001 as any, description: 'insert1' }); await context2.loadNextSyncRules(); - await context2.replicateSnapshot(); + await context2.initializeReplication(); - context2.startStreaming(); const data = await context2.getBucketData('global[]', undefined, {}); const deletedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === '1'); @@ -122,26 +124,30 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n // We only test the final version. expect(JSON.parse(updatedRowOps[1].data as string).description).toEqual('update1'); - expect(insertedRowOps.length).toEqual(2); expect(JSON.parse(insertedRowOps[0].data as string).description).toEqual('insert1'); - expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); + if (insertedRowOps.length != 1) { + // Also valid + expect(insertedRowOps.length).toEqual(2); + expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); + } // 1000 of test_data1 during first replication attempt. // N >= 1000 of test_data2 during first replication attempt. // 10000 - N - 1 + 1 of test_data2 during second replication attempt. // An additional update during streaming replication (2x total for this row). - // An additional insert during streaming replication (2x total for this row). + // An additional insert during streaming replication (1x or 2x total for this row). // If the deleted row was part of the first replication batch, it's removed by streaming replication. // This adds 2 ops. // We expect this to be 11002 for stopAfter: 2000, and 11004 for stopAfter: 8000. // However, this is not deterministic. - const expectedCount = 11002 + deletedRowOps.length; + const expectedCount = 11000 + deletedRowOps.length + insertedRowOps.length; expect(data.length).toEqual(expectedCount); const replicatedCount = ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; - // With resumable replication, there should be no need to re-replicate anything. - expect(replicatedCount).toEqual(expectedCount); + // With resumable replication, there should be no need to re-replicate anything, apart from the newly-inserted row + expect(replicatedCount).toBeGreaterThanOrEqual(expectedCount); + expect(replicatedCount).toBeLessThanOrEqual(expectedCount + 1); } } diff --git a/modules/module-mongodb/test/src/slow_tests.test.ts b/modules/module-mongodb/test/src/slow_tests.test.ts index df575ef39..77f6d2498 100644 --- a/modules/module-mongodb/test/src/slow_tests.test.ts +++ b/modules/module-mongodb/test/src/slow_tests.test.ts @@ -2,11 +2,12 @@ import { setTimeout } from 'node:timers/promises'; import { describe, expect, test } from 'vitest'; import { mongo } from '@powersync/lib-service-mongodb'; -import { storage } from '@powersync/service-core'; +import { settledPromise, storage, unsettledPromise } from '@powersync/service-core'; import { ChangeStreamTestContext, setSnapshotHistorySeconds } from './change_stream_utils.js'; import { env } from './env.js'; import { describeWithStorage } from './util.js'; +import { bucketRequest, PARSE_OPTIONS } from '@powersync/service-core-tests'; describe.runIf(env.CI || env.SLOW_TESTS)('change stream slow tests', { timeout: 60_000 }, function () { describeWithStorage({}, defineSlowTests); @@ -23,13 +24,14 @@ function defineSlowTests(config: storage.TestStorageConfig) { // snapshot session. await using _ = await setSnapshotHistorySeconds(context.client, 1); const { db } = context; - await context.updateSyncRules(` + const instance = await context.updateSyncRules(` bucket_definitions: global: data: - SELECT _id as id, description, num FROM "test_data1" - SELECT _id as id, description, num FROM "test_data2" `); + const syncRules = instance.getParsedSyncRules(PARSE_OPTIONS); const collection1 = db.collection('test_data1'); const collection2 = db.collection('test_data2'); @@ -41,9 +43,9 @@ bucket_definitions: await collection1.bulkWrite(operations); await collection2.bulkWrite(operations); - await context.replicateSnapshot(); - context.startStreaming(); - const checksum = await context.getChecksum('global[]'); + await context.initializeReplication(); + const request = bucketRequest(syncRules, 'global[]'); + const checksum = await context.getChecksum(request); expect(checksum).toMatchObject({ count: 20_000 }); @@ -71,7 +73,7 @@ bucket_definitions: } await collection.bulkWrite(operations); - const snapshotPromise = context.replicateSnapshot(); + const snapshotPromise = settledPromise(context.initializeReplication()); for (let i = 49; i >= 0; i--) { await collection.updateMany( @@ -81,8 +83,7 @@ bucket_definitions: await setTimeout(20); } - await snapshotPromise; - context.startStreaming(); + await unsettledPromise(snapshotPromise); const data = await context.getBucketData('global[]'); diff --git a/modules/module-mssql/src/replication/CDCReplicationJob.ts b/modules/module-mssql/src/replication/CDCReplicationJob.ts index 120649544..5545848a3 100644 --- a/modules/module-mssql/src/replication/CDCReplicationJob.ts +++ b/modules/module-mssql/src/replication/CDCReplicationJob.ts @@ -21,6 +21,10 @@ export class CDCReplicationJob extends replication.AbstractReplicationJob { this.cdcReplicationJobOptions = options; } + public get storage() { + return this.options.storage; + } + async keepAlive() { // TODO Might need to leverage checkpoints table as a keepAlive } diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index a2eecc7ac..67200f54d 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -110,7 +110,7 @@ export class CDCStream { constructor(private options: CDCStreamOptions) { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; - this.syncRules = options.storage.getParsedSyncRules({ defaultSchema: options.connections.schema }); + this.syncRules = options.storage.getHydratedSyncRules({ defaultSchema: options.connections.schema }); this.connections = options.connections; this.abortSignal = options.abortSignal; } @@ -167,26 +167,23 @@ export class CDCStream { async populateTableCache() { const sourceTables = this.syncRules.getSourceTables(); - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: LSN.ZERO, - defaultSchema: this.defaultSchema, - storeCurrentData: true - }, - async (batch) => { - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, tablePattern); - for (const table of tables) { - this.tableCache.set(table); - } - } + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: LSN.ZERO, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(writer, tablePattern); + for (const table of tables) { + this.tableCache.set(table); } - ); + } } async getQualifiedTableNames( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, tablePattern: TablePattern ): Promise { if (tablePattern.connectionTag != this.connections.connectionTag) { @@ -216,84 +213,104 @@ export class CDCStream { schema: matchedTable.schema }); - const table = await this.processTable( - batch, + const processedTables = await this.processTable( + writer, { name: matchedTable.name, schema: matchedTable.schema, objectId: matchedTable.objectId, replicaIdColumns: replicaIdColumns.columns }, - false + false, + tablePattern ); - tables.push(table); + // Drop conflicting tables. This includes for example renamed tables. + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: { + name: matchedTable.name, + schema: matchedTable.schema, + objectId: matchedTable.objectId, + replicaIdColumns: replicaIdColumns.columns + } + }); + await writer.drop(dropTables); + + tables.push(...processedTables); } + return tables; } async processTable( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, table: SourceEntityDescriptor, - snapshot: boolean - ): Promise { + snapshot: boolean, + pattern: TablePattern + ): Promise { if (!table.objectId && typeof table.objectId != 'number') { throw new ReplicationAssertionError(`objectId expected, got ${typeof table.objectId}`); } - const resolved = await this.storage.resolveTable({ - group_id: this.groupId, + + const resolvedTables = await writer.resolveTables({ connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: table, - sync_rules: this.syncRules - }); - const captureInstance = await getCaptureInstance({ - connectionManager: this.connections, - tableName: resolved.table.name, - schema: resolved.table.schema + pattern }); - if (!captureInstance) { - throw new ServiceAssertionError( - `Missing capture instance for table ${toQualifiedTableName(resolved.table.schema, resolved.table.name)}` - ); - } - const resolvedTable = new MSSQLSourceTable({ - sourceTable: resolved.table, - captureInstance: captureInstance - }); - - // Drop conflicting tables. This includes for example renamed tables. - await batch.drop(resolved.dropTables); - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !resolved.table.snapshotComplete && resolved.table.syncAny; + let resultingTables: MSSQLSourceTable[] = []; - if (shouldSnapshot) { - // Truncate this table in case a previous snapshot was interrupted. - await batch.truncate([resolved.table]); + for (let table of resolvedTables) { + const captureInstance = await getCaptureInstance({ + connectionManager: this.connections, + tableName: table.name, + schema: table.schema + }); + if (!captureInstance) { + throw new ServiceAssertionError( + `Missing capture instance for table ${toQualifiedTableName(table.schema, table.name)}` + ); + } + const resolvedTable = new MSSQLSourceTable({ + sourceTable: table, + captureInstance: captureInstance + }); - // Start the snapshot inside a transaction. - try { - await this.snapshotTableInTx(batch, resolvedTable); - } finally { - // TODO Cleanup? + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; + + if (shouldSnapshot) { + // Truncate this table in case a previous snapshot was interrupted. + await writer.truncate([table]); + + // Start the snapshot inside a transaction. + try { + await this.snapshotTableInTx(writer, resolvedTable); + } finally { + // TODO Cleanup? + } } + + resultingTables.push(resolvedTable); } - return resolvedTable; + return resultingTables; } - private async snapshotTableInTx(batch: storage.BucketStorageBatch, table: MSSQLSourceTable): Promise { + private async snapshotTableInTx(writer: storage.BucketDataWriter, table: MSSQLSourceTable): Promise { // Note: We use the "Read Committed" isolation level here, not snapshot isolation. // The data may change during the transaction, but that is compensated for in the streaming // replication afterward. const transaction = await this.connections.createTransaction(); await transaction.begin(sql.ISOLATION_LEVEL.READ_COMMITTED); try { - await this.snapshotTable(batch, transaction, table); + await this.snapshotTable(writer, transaction, table); // Get the current LSN. // The data will only be consistent once incremental replication has passed that point. @@ -310,7 +327,7 @@ export class CDCStream { const postSnapshotLSN = await getLatestLSN(this.connections); // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. await transaction.commit(); - const [updatedSourceTable] = await batch.markTableSnapshotDone([table.sourceTable], postSnapshotLSN.toString()); + const [updatedSourceTable] = await writer.markTableSnapshotDone([table.sourceTable], postSnapshotLSN.toString()); this.tableCache.updateSourceTable(updatedSourceTable); } catch (e) { await transaction.rollback(); @@ -318,11 +335,7 @@ export class CDCStream { } } - private async snapshotTable( - batch: storage.BucketStorageBatch, - transaction: sql.Transaction, - table: MSSQLSourceTable - ) { + private async snapshotTable(writer: storage.BucketDataWriter, transaction: sql.Transaction, table: MSSQLSourceTable) { let totalEstimatedCount = table.sourceTable.snapshotStatus?.totalEstimatedCount; let replicatedCount = table.sourceTable.snapshotStatus?.replicatedCount ?? 0; let lastCountTime = 0; @@ -378,7 +391,7 @@ export class CDCStream { const inputRow: SqliteInputRow = toSqliteInputRow(result, columns); const row = this.syncRules.applyRowContext(inputRow); // This auto-flushes when the batch reaches its size limit - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table.sourceTable, before: undefined, @@ -396,7 +409,7 @@ export class CDCStream { } // Important: flush before marking progress - await batch.flush(); + await writer.flush(); let lastKey: Uint8Array | undefined; if (query instanceof BatchedSnapshotQuery) { @@ -410,7 +423,7 @@ export class CDCStream { totalEstimatedCount = await this.estimatedCountNumber(table, transaction); lastCountTime = performance.now(); } - const updatedSourceTable = await batch.updateTableProgress(table.sourceTable, { + const updatedSourceTable = await writer.updateTableProgress(table.sourceTable, { lastKey: lastKey, replicatedCount: replicatedCount, totalEstimatedCount: totalEstimatedCount @@ -463,56 +476,52 @@ export class CDCStream { await this.storage.clear({ signal: this.abortSignal }); } - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: LSN.ZERO, - defaultSchema: this.defaultSchema, - storeCurrentData: false, - skipExistingRows: true - }, - async (batch) => { - if (snapshotLSN == null) { - // First replication attempt - set the snapshot LSN to the current LSN before starting - snapshotLSN = (await getLatestReplicatedLSN(this.connections)).toString(); - await batch.setResumeLsn(snapshotLSN); - const latestLSN = (await getLatestLSN(this.connections)).toString(); - this.logger.info(`Marking snapshot at ${snapshotLSN}, Latest DB LSN ${latestLSN}.`); - } else { - this.logger.info(`Resuming snapshot at ${snapshotLSN}.`); - } + await using writer = await await this.storage.createWriter({ + logger: this.logger, + zeroLSN: LSN.ZERO, + defaultSchema: this.defaultSchema, + storeCurrentData: false, + skipExistingRows: true + }); + if (snapshotLSN == null) { + // First replication attempt - set the snapshot LSN to the current LSN before starting + snapshotLSN = (await getLatestReplicatedLSN(this.connections)).toString(); + await writer.setResumeLsn(snapshotLSN); + const latestLSN = (await getLatestLSN(this.connections)).toString(); + this.logger.info(`Marking snapshot at ${snapshotLSN}, Latest DB LSN ${latestLSN}.`); + } else { + this.logger.info(`Resuming snapshot at ${snapshotLSN}.`); + } - const tablesToSnapshot: MSSQLSourceTable[] = []; - for (const table of this.tableCache.getAll()) { - if (table.sourceTable.snapshotComplete) { - this.logger.info(`Skipping table [${table.toQualifiedName()}] - snapshot already done.`); - continue; - } + const tablesToSnapshot: MSSQLSourceTable[] = []; + for (const table of this.tableCache.getAll()) { + if (table.sourceTable.snapshotComplete) { + this.logger.info(`Skipping table [${table.toQualifiedName()}] - snapshot already done.`); + continue; + } - const count = await this.estimatedCountNumber(table); - const updatedSourceTable = await batch.updateTableProgress(table.sourceTable, { - totalEstimatedCount: count - }); - this.tableCache.updateSourceTable(updatedSourceTable); - tablesToSnapshot.push(table); + const count = await this.estimatedCountNumber(table); + const updatedSourceTable = await writer.updateTableProgress(table.sourceTable, { + totalEstimatedCount: count + }); + this.tableCache.updateSourceTable(updatedSourceTable); + tablesToSnapshot.push(table); - this.logger.info(`To replicate: ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()}`); - } + this.logger.info(`To replicate: ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()}`); + } - for (const table of tablesToSnapshot) { - await this.snapshotTableInTx(batch, table); - this.touch(); - } + for (const table of tablesToSnapshot) { + await this.snapshotTableInTx(writer, table); + this.touch(); + } - // This will not create a consistent checkpoint yet, but will persist the op. - // Actual checkpoint will be created when streaming replication caught up. - const postSnapshotLSN = await getLatestLSN(this.connections); - await batch.markAllSnapshotDone(postSnapshotLSN.toString()); - await batch.commit(snapshotLSN); + // This will not create a consistent checkpoint yet, but will persist the op. + // Actual checkpoint will be created when streaming replication caught up. + const postSnapshotLSN = await getLatestLSN(this.connections); + await writer.markAllSnapshotDone(postSnapshotLSN.toString()); + await writer.commit(snapshotLSN); - this.logger.info(`Snapshot done. Need to replicate from ${snapshotLSN} to ${postSnapshotLSN} to be consistent`); - } - ); + this.logger.info(`Snapshot done. Need to replicate from ${snapshotLSN} to ${postSnapshotLSN} to be consistent`); } async initReplication() { @@ -558,52 +567,49 @@ export class CDCStream { } async streamChanges() { - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: LSN.ZERO, - defaultSchema: this.defaultSchema, - storeCurrentData: false, - skipExistingRows: false - }, - async (batch) => { - if (batch.resumeFromLsn == null) { - throw new ReplicationAssertionError(`No LSN found to resume replication from.`); - } - const startLSN = LSN.fromString(batch.resumeFromLsn); - const sourceTables: MSSQLSourceTable[] = this.tableCache.getAll(); - const eventHandler = this.createEventHandler(batch); - - const poller = new CDCPoller({ - connectionManager: this.connections, - eventHandler, - sourceTables, - startLSN, - logger: this.logger, - additionalConfig: this.options.additionalConfig - }); + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: LSN.ZERO, + defaultSchema: this.defaultSchema, + storeCurrentData: false, + skipExistingRows: false + }); - this.abortSignal.addEventListener( - 'abort', - async () => { - await poller.stop(); - }, - { once: true } - ); + if (writer.resumeFromLsn == null) { + throw new ReplicationAssertionError(`No LSN found to resume replication from.`); + } + const startLSN = LSN.fromString(writer.resumeFromLsn); + const sourceTables: MSSQLSourceTable[] = this.tableCache.getAll(); + const eventHandler = this.createEventHandler(writer); - await createCheckpoint(this.connections); + const poller = new CDCPoller({ + connectionManager: this.connections, + eventHandler, + sourceTables, + startLSN, + logger: this.logger, + additionalConfig: this.options.additionalConfig + }); - this.logger.info(`Streaming changes from: ${startLSN}`); - await poller.replicateUntilStopped(); - } + this.abortSignal.addEventListener( + 'abort', + async () => { + await poller.stop(); + }, + { once: true } ); + + await createCheckpoint(this.connections); + + this.logger.info(`Streaming changes from: ${startLSN}`); + await poller.replicateUntilStopped(); } - private createEventHandler(batch: storage.BucketStorageBatch): CDCEventHandler { + private createEventHandler(writer: storage.BucketDataWriter): CDCEventHandler { return { onInsert: async (row: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => { const afterRow = this.toSqliteRow(row, columns); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table.sourceTable, before: undefined, @@ -616,7 +622,7 @@ export class CDCStream { onUpdate: async (rowAfter: any, rowBefore: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => { const beforeRow = this.toSqliteRow(rowBefore, columns); const afterRow = this.toSqliteRow(rowAfter, columns); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.UPDATE, sourceTable: table.sourceTable, before: beforeRow, @@ -628,7 +634,7 @@ export class CDCStream { }, onDelete: async (row: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => { const beforeRow = this.toSqliteRow(row, columns); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.DELETE, sourceTable: table.sourceTable, before: beforeRow, @@ -639,7 +645,7 @@ export class CDCStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); }, onCommit: async (lsn: string, transactionCount: number) => { - await batch.commit(lsn); + await writer.commit(lsn); this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(transactionCount); this.isStartingReplication = false; }, diff --git a/modules/module-mssql/test/src/CDCStreamTestContext.ts b/modules/module-mssql/test/src/CDCStreamTestContext.ts index 6b674befc..8572b1aac 100644 --- a/modules/module-mssql/test/src/CDCStreamTestContext.ts +++ b/modules/module-mssql/test/src/CDCStreamTestContext.ts @@ -7,7 +7,7 @@ import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import { clearTestDb, getClientCheckpoint, TEST_CONNECTION_OPTIONS } from './util.js'; import { CDCStream, CDCStreamOptions } from '@module/replication/CDCStream.js'; import { MSSQLConnectionManager } from '@module/replication/MSSQLConnectionManager.js'; @@ -167,7 +167,8 @@ export class CDCStreamTestContext implements AsyncDisposable { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -179,8 +180,10 @@ export class CDCStreamTestContext implements AsyncDisposable { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + let map = [bucketRequest(syncRules, bucket, start)]; + let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -190,7 +193,7 @@ export class CDCStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map.set(bucket, BigInt(batches[0]!.chunkData.next_after)); + map = [bucketRequest(syncRules, bucket, BigInt(batches[0]!.chunkData.next_after))]; } return data; } @@ -204,7 +207,8 @@ export class CDCStreamTestContext implements AsyncDisposable { start = BigInt(start); } const { checkpoint } = await this.storage!.getCheckpoint(); - const map = new Map([[bucket, start]]); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = [bucketRequest(syncRules, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/modules/module-mysql/src/replication/BinLogReplicationJob.ts b/modules/module-mysql/src/replication/BinLogReplicationJob.ts index bf72ca728..6c77fd89c 100644 --- a/modules/module-mysql/src/replication/BinLogReplicationJob.ts +++ b/modules/module-mysql/src/replication/BinLogReplicationJob.ts @@ -17,6 +17,10 @@ export class BinLogReplicationJob extends replication.AbstractReplicationJob { this.connectionFactory = options.connectionFactory; } + public get storage() { + return this.options.storage; + } + get slot_name() { return this.options.storage.slot_name; } diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 9777e5a87..84df36748 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -72,7 +72,7 @@ export class BinLogStream { private readonly logger: Logger; - private tableCache = new Map(); + private tableCache = new Map(); /** * Time of the oldest uncommitted change, according to the source db. @@ -89,7 +89,7 @@ export class BinLogStream { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.connections = options.connections; - this.syncRules = options.storage.getParsedSyncRules({ defaultSchema: this.defaultSchema }); + this.syncRules = options.storage.getHydratedSyncRules({ defaultSchema: this.defaultSchema }); this.groupId = options.storage.group_id; this.abortSignal = options.abortSignal; } @@ -126,59 +126,107 @@ export class BinLogStream { return this.connections.databaseName; } - async handleRelation(batch: storage.BucketStorageBatch, entity: storage.SourceEntityDescriptor, snapshot: boolean) { - const result = await this.storage.resolveTable({ - group_id: this.groupId, + private async handleRelationSetup( + writer: storage.BucketDataWriter, + entity: storage.SourceEntityDescriptor, + pattern: sync_rules.TablePattern + ) { + const resolvedTables = await writer.resolveTables({ connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: entity, - sync_rules: this.syncRules + pattern }); - // Since we create the objectId ourselves, this is always defined - this.tableCache.set(entity.objectId!, result.table); - - // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage. - await batch.drop(result.dropTables); - - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not done yet, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; - - if (shouldSnapshot) { - // Truncate this table in case a previous snapshot was interrupted. - await batch.truncate([result.table]); - - let gtid: common.ReplicatedGTID; - // Start the snapshot inside a transaction. - // We use a dedicated connection for this. - const connection = await this.connections.getStreamingConnection(); - - const promiseConnection = (connection as mysql.Connection).promise(); - try { - await promiseConnection.query(`SET time_zone = '+00:00'`); - await promiseConnection.query('START TRANSACTION'); - try { - gtid = await common.readExecutedGtid(promiseConnection); - await this.snapshotTable(connection as mysql.Connection, batch, result.table); - await promiseConnection.query('COMMIT'); - } catch (e) { - await this.tryRollback(promiseConnection); - throw e; + + // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, + // but changes in replication identity columns can, so this is needed. + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: entity + }); + await writer.drop(dropTables); + + this.tableCache.set(entity.objectId!, resolvedTables); + + return resolvedTables; + } + + async handleChangeRelation(writer: storage.BucketDataWriter, entity: storage.SourceEntityDescriptor) { + // In common cases, there would be at most one matching pattern, since patterns + // are de-duplicated. However, there may be multiple if: + // 1. There is overlap with direct name matching and wildcard matching. + // 2. There are multiple patterns with different replication config. + const patterns = writer.rowProcessor.getMatchingTablePatterns({ + connectionTag: this.connections.connectionTag, + schema: entity.schema, + name: entity.name + }); + + // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, + // but changes in replication identity columns can, so this is needed. + // While order of drop / snapshots shouldn't matter, tests expect drops to happen first. + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: entity + }); + await writer.drop(dropTables); + + let allTables: SourceTable[] = []; + for (let pattern of patterns) { + const resolvedTables = await writer.resolveTables({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: entity, + pattern + }); + + for (let table of resolvedTables) { + // Snapshot if: + // 1. Snapshot is not done yet, AND: + // 2. The table is used in sync rules. + const shouldSnapshot = !table.snapshotComplete && table.syncAny; + + if (shouldSnapshot) { + // Truncate this table in case a previous snapshot was interrupted. + await writer.truncate([table]); + + let gtid: common.ReplicatedGTID; + // Start the snapshot inside a transaction. + // We use a dedicated connection for this. + const connection = await this.connections.getStreamingConnection(); + + const promiseConnection = (connection as mysql.Connection).promise(); + try { + await promiseConnection.query(`SET time_zone = '+00:00'`); + await promiseConnection.query('START TRANSACTION'); + try { + gtid = await common.readExecutedGtid(promiseConnection); + await this.snapshotTable(connection as mysql.Connection, writer, table); + await promiseConnection.query('COMMIT'); + } catch (e) { + await this.tryRollback(promiseConnection); + throw e; + } + } finally { + connection.release(); + } + const [updatedTable] = await writer.markTableSnapshotDone([table], gtid.comparable); + allTables.push(updatedTable); + } else { + allTables.push(table); } - } finally { - connection.release(); } - const [table] = await batch.markTableSnapshotDone([result.table], gtid.comparable); - return table; } - return result.table; + // Since we create the objectId ourselves, this is always defined + this.tableCache.set(entity.objectId!, allTables); + return allTables; } async getQualifiedTableNames( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, tablePattern: sync_rules.TablePattern ): Promise { if (tablePattern.connectionTag != this.connectionTag) { @@ -189,24 +237,24 @@ export class BinLogStream { const matchedTables: string[] = await common.getTablesFromPattern(connection, tablePattern); connection.release(); - const tables: storage.SourceTable[] = []; + const allTables: storage.SourceTable[] = []; for (const matchedTable of matchedTables) { const replicaIdColumns = await this.getReplicaIdColumns(matchedTable, tablePattern.schema); - const table = await this.handleRelation( - batch, + const resolvedTables = await this.handleRelationSetup( + writer, { name: matchedTable, schema: tablePattern.schema, objectId: createTableId(tablePattern.schema, matchedTable), replicaIdColumns: replicaIdColumns }, - false + tablePattern ); - tables.push(table); + allTables.push(...resolvedTables); } - return tables; + return allTables; } /** @@ -263,27 +311,25 @@ export class BinLogStream { await promiseConnection.query(`SET time_zone = '+00:00'`); const sourceTables = this.syncRules.getSourceTables(); - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: common.ReplicatedGTID.ZERO.comparable, - defaultSchema: this.defaultSchema, - storeCurrentData: true - }, - async (batch) => { - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, tablePattern); - for (let table of tables) { - await this.snapshotTable(connection as mysql.Connection, batch, table); - await batch.markTableSnapshotDone([table], headGTID.comparable); - await framework.container.probes.touch(); - } - } - const snapshotDoneGtid = await common.readExecutedGtid(promiseConnection); - await batch.markAllSnapshotDone(snapshotDoneGtid.comparable); - await batch.commit(headGTID.comparable); + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(writer, tablePattern); + for (let table of tables) { + await this.snapshotTable(connection as mysql.Connection, writer, table); + await writer.markTableSnapshotDone([table], headGTID.comparable); + await framework.container.probes.touch(); } - ); + } + const snapshotDoneGtid = await common.readExecutedGtid(promiseConnection); + await writer.markAllSnapshotDone(snapshotDoneGtid.comparable); + const flushResults = await writer.flush(); + await writer.commit(headGTID.comparable); + lastOp = flushResults?.flushed_op ?? null; this.logger.info(`Initial replication done`); await promiseConnection.query('COMMIT'); @@ -306,7 +352,7 @@ export class BinLogStream { private async snapshotTable( connection: mysql.Connection, - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, table: storage.SourceTable ) { this.logger.info(`Replicating ${qualifiedMySQLTable(table)}`); @@ -335,7 +381,7 @@ export class BinLogStream { } const record = this.toSQLiteRow(row, columns!); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table, before: undefined, @@ -346,7 +392,7 @@ export class BinLogStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); } - await batch.flush(); + await writer.flush(); } async replicate() { @@ -378,30 +424,26 @@ export class BinLogStream { // We need to find the existing tables, to populate our table cache. // This is needed for includeSchema to work correctly. const sourceTables = this.syncRules.getSourceTables(); - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: common.ReplicatedGTID.ZERO.comparable, - defaultSchema: this.defaultSchema, - storeCurrentData: true - }, - async (batch) => { - for (let tablePattern of sourceTables) { - await this.getQualifiedTableNames(batch, tablePattern); - } - } - ); + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + for (let tablePattern of sourceTables) { + await this.getQualifiedTableNames(writer, tablePattern); + } } } - private getTable(tableId: string): storage.SourceTable { - const table = this.tableCache.get(tableId); - if (table == null) { + private getTables(tableId: string): storage.SourceTable[] { + const tables = this.tableCache.get(tableId); + if (tables == null) { // We should always receive a replication message before the relation is used. // If we can't find it, it's a bug. throw new ReplicationAssertionError(`Missing relation cache for ${tableId}`); } - return table; + return tables; } async streamChanges() { @@ -418,38 +460,39 @@ export class BinLogStream { connection.release(); if (!this.stopped) { - await this.storage.startBatch( - { zeroLSN: common.ReplicatedGTID.ZERO.comparable, defaultSchema: this.defaultSchema, storeCurrentData: true }, - async (batch) => { - const binlogEventHandler = this.createBinlogEventHandler(batch); - const binlogListener = new BinLogListener({ - logger: this.logger, - sourceTables: this.syncRules.getSourceTables(), - startGTID: fromGTID, - connectionManager: this.connections, - serverId: serverId, - eventHandler: binlogEventHandler - }); - - this.abortSignal.addEventListener( - 'abort', - async () => { - await binlogListener.stop(); - }, - { once: true } - ); - - await binlogListener.start(); - await binlogListener.replicateUntilStopped(); - } + await using writer = await this.storage.createWriter({ + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + + const binlogEventHandler = this.createBinlogEventHandler(writer); + const binlogListener = new BinLogListener({ + logger: this.logger, + sourceTables: this.syncRules.getSourceTables(), + startGTID: fromGTID, + connectionManager: this.connections, + serverId: serverId, + eventHandler: binlogEventHandler + }); + + this.abortSignal.addEventListener( + 'abort', + async () => { + await binlogListener.stop(); + }, + { once: true } ); + + await binlogListener.start(); + await binlogListener.replicateUntilStopped(); } } - private createBinlogEventHandler(batch: storage.BucketStorageBatch): BinLogEventHandler { + private createBinlogEventHandler(writer: storage.BucketDataWriter): BinLogEventHandler { return { onWrite: async (rows: Row[], tableMap: TableMapEntry) => { - await this.writeChanges(batch, { + await this.writeChanges(writer, { type: storage.SaveOperationTag.INSERT, rows: rows, tableEntry: tableMap @@ -457,7 +500,7 @@ export class BinLogStream { }, onUpdate: async (rowsAfter: Row[], rowsBefore: Row[], tableMap: TableMapEntry) => { - await this.writeChanges(batch, { + await this.writeChanges(writer, { type: storage.SaveOperationTag.UPDATE, rows: rowsAfter, rows_before: rowsBefore, @@ -465,21 +508,21 @@ export class BinLogStream { }); }, onDelete: async (rows: Row[], tableMap: TableMapEntry) => { - await this.writeChanges(batch, { + await this.writeChanges(writer, { type: storage.SaveOperationTag.DELETE, rows: rows, tableEntry: tableMap }); }, onKeepAlive: async (lsn: string) => { - const didCommit = await batch.keepalive(lsn); + const didCommit = await writer.keepalive(lsn); if (didCommit) { this.oldestUncommittedChange = null; } }, onCommit: async (lsn: string) => { this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1); - const didCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + const didCommit = await writer.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); if (didCommit) { this.oldestUncommittedChange = null; this.isStartingReplication = false; @@ -494,41 +537,43 @@ export class BinLogStream { this.isStartingReplication = false; }, onSchemaChange: async (change: SchemaChange) => { - await this.handleSchemaChange(batch, change); + await this.handleSchemaChange(writer, change); } }; } - private async handleSchemaChange(batch: storage.BucketStorageBatch, change: SchemaChange): Promise { + private async handleSchemaChange(writer: storage.BucketDataWriter, change: SchemaChange): Promise { if (change.type === SchemaChangeType.RENAME_TABLE) { const fromTableId = createTableId(change.schema, change.table); - const fromTable = this.tableCache.get(fromTableId); + // FIXME: we should use tables from the storage, not from the cache. + const fromTables = this.tableCache.get(fromTableId); // Old table needs to be cleaned up - if (fromTable) { - await batch.drop([fromTable]); + if (fromTables != null) { + await writer.drop(fromTables); this.tableCache.delete(fromTableId); } + // The new table matched a table in the sync rules if (change.newTable) { - await this.handleCreateOrUpdateTable(batch, change.newTable!, change.schema); + await this.handleCreateOrUpdateTable(writer, change.newTable!, change.schema); } } else { const tableId = createTableId(change.schema, change.table); - const table = this.getTable(tableId); + const tables = this.getTables(tableId); switch (change.type) { case SchemaChangeType.ALTER_TABLE_COLUMN: case SchemaChangeType.REPLICATION_IDENTITY: // For these changes, we need to update the table if the replication identity columns have changed. - await this.handleCreateOrUpdateTable(batch, change.table, change.schema); + await this.handleCreateOrUpdateTable(writer, change.table, change.schema); break; case SchemaChangeType.TRUNCATE_TABLE: - await batch.truncate([table]); + await writer.truncate(tables); break; case SchemaChangeType.DROP_TABLE: - await batch.drop([table]); + await writer.drop(tables); this.tableCache.delete(tableId); break; default: @@ -551,25 +596,21 @@ export class BinLogStream { } private async handleCreateOrUpdateTable( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, tableName: string, schema: string - ): Promise { + ): Promise { const replicaIdColumns = await this.getReplicaIdColumns(tableName, schema); - return await this.handleRelation( - batch, - { - name: tableName, - schema: schema, - objectId: createTableId(schema, tableName), - replicaIdColumns: replicaIdColumns - }, - true - ); + return await this.handleChangeRelation(writer, { + name: tableName, + schema: schema, + objectId: createTableId(schema, tableName), + replicaIdColumns: replicaIdColumns + }); } private async writeChanges( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, msg: { type: storage.SaveOperationTag; rows: Row[]; @@ -580,23 +621,25 @@ export class BinLogStream { const columns = common.toColumnDescriptors(msg.tableEntry); const tableId = createTableId(msg.tableEntry.parentSchema, msg.tableEntry.tableName); - let table = this.tableCache.get(tableId); - if (table == null) { + let tables = this.tableCache.get(tableId); + if (tables == null) { // This is an insert for a new table that matches a table in the sync rules // We need to create the table in the storage and cache it. - table = await this.handleCreateOrUpdateTable(batch, msg.tableEntry.tableName, msg.tableEntry.parentSchema); + tables = await this.handleCreateOrUpdateTable(writer, msg.tableEntry.tableName, msg.tableEntry.parentSchema); } for (const [index, row] of msg.rows.entries()) { - await this.writeChange(batch, { - type: msg.type, - database: msg.tableEntry.parentSchema, - sourceTable: table!, - table: msg.tableEntry.tableName, - columns: columns, - row: row, - previous_row: msg.rows_before?.[index] - }); + for (let table of tables) { + await this.writeChange(writer, { + type: msg.type, + database: msg.tableEntry.parentSchema, + sourceTable: table!, + table: msg.tableEntry.tableName, + columns: columns, + row: row, + previous_row: msg.rows_before?.[index] + }); + } } return null; } @@ -607,14 +650,14 @@ export class BinLogStream { } private async writeChange( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, payload: WriteChangePayload ): Promise { switch (payload.type) { case storage.SaveOperationTag.INSERT: this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); const record = this.toSQLiteRow(payload.row, payload.columns); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: payload.sourceTable, before: undefined, @@ -631,7 +674,7 @@ export class BinLogStream { : undefined; const after = this.toSQLiteRow(payload.row, payload.columns); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.UPDATE, sourceTable: payload.sourceTable, before: beforeUpdated, @@ -646,7 +689,7 @@ export class BinLogStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); const beforeDeleted = this.toSQLiteRow(payload.row, payload.columns); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.DELETE, sourceTable: payload.sourceTable, before: beforeDeleted, diff --git a/modules/module-mysql/test/src/BinLogListener.test.ts b/modules/module-mysql/test/src/BinLogListener.test.ts index 9fb75fc5a..ddb9386ba 100644 --- a/modules/module-mysql/test/src/BinLogListener.test.ts +++ b/modules/module-mysql/test/src/BinLogListener.test.ts @@ -13,7 +13,7 @@ import { getMySQLVersion, qualifiedMySQLTable, satisfiesVersion } from '@module/ import crypto from 'crypto'; import { TablePattern } from '@powersync/service-sync-rules'; -describe('BinlogListener tests', () => { +describe('BinlogListener tests', async () => { const MAX_QUEUE_CAPACITY_MB = 1; const BINLOG_LISTENER_CONNECTION_OPTIONS = { ...TEST_CONNECTION_OPTIONS, @@ -23,15 +23,15 @@ describe('BinlogListener tests', () => { let connectionManager: MySQLConnectionManager; let eventHandler: TestBinLogEventHandler; let binLogListener: BinLogListener; - let isMySQL57: boolean = false; + let isMySQL57: boolean; - beforeAll(async () => { + { connectionManager = new MySQLConnectionManager(BINLOG_LISTENER_CONNECTION_OPTIONS, {}); const connection = await connectionManager.getConnection(); const version = await getMySQLVersion(connection); isMySQL57 = satisfiesVersion(version, '5.7.x'); connection.release(); - }); + } beforeEach(async () => { const connection = await connectionManager.getConnection(); @@ -232,20 +232,19 @@ describe('BinlogListener tests', () => { ); }); - test('Schema change event: Rename column via rename statement', async () => { + test.skipIf(isMySQL57)('Schema change event: Rename column via rename statement', async () => { // Syntax ALTER TABLE RENAME COLUMN was only introduced in MySQL 8.0.0 - if (!isMySQL57) { - await binLogListener.start(); - await connectionManager.query(`ALTER TABLE test_DATA RENAME COLUMN description TO description_new`); - await vi.waitFor(() => expect(eventHandler.schemaChanges.length).toBe(1), { timeout: 5000 }); - await binLogListener.stop(); - assertSchemaChange( - eventHandler.schemaChanges[0], - SchemaChangeType.ALTER_TABLE_COLUMN, - connectionManager.databaseName, - 'test_DATA' - ); - } + + await binLogListener.start(); + await connectionManager.query(`ALTER TABLE test_DATA RENAME COLUMN description TO description_new`); + await vi.waitFor(() => expect(eventHandler.schemaChanges.length).toBe(1), { timeout: 5000 }); + await binLogListener.stop(); + assertSchemaChange( + eventHandler.schemaChanges[0], + SchemaChangeType.ALTER_TABLE_COLUMN, + connectionManager.databaseName, + 'test_DATA' + ); }); test('Schema change event: Multiple column changes', async () => { diff --git a/modules/module-mysql/test/src/BinlogStreamUtils.ts b/modules/module-mysql/test/src/BinlogStreamUtils.ts index 665be6c21..07e06087c 100644 --- a/modules/module-mysql/test/src/BinlogStreamUtils.ts +++ b/modules/module-mysql/test/src/BinlogStreamUtils.ts @@ -13,7 +13,7 @@ import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import mysqlPromise from 'mysql2/promise'; import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js'; import timers from 'timers/promises'; @@ -150,7 +150,8 @@ export class BinlogStreamTestContext { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { const checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -163,8 +164,9 @@ export class BinlogStreamTestContext { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + const map = [bucketRequest(syncRules, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/modules/module-mysql/test/src/schema-changes.test.ts b/modules/module-mysql/test/src/schema-changes.test.ts index e09522488..e83f0c3a7 100644 --- a/modules/module-mysql/test/src/schema-changes.test.ts +++ b/modules/module-mysql/test/src/schema-changes.test.ts @@ -26,18 +26,19 @@ const PUT_T3 = test_utils.putOp('test_data', { id: 't3', description: 'test3' }) const REMOVE_T1 = test_utils.removeOp('test_data', 't1'); const REMOVE_T2 = test_utils.removeOp('test_data', 't2'); -function defineTests(config: storage.TestStorageConfig) { +async function defineTests(config: storage.TestStorageConfig) { const factory = config.factory; - let isMySQL57: boolean = false; - beforeAll(async () => { + let isMySQL57: boolean; + { + // This is similar to a beforeAll() block, but doing it this way ensures the flag is available for skipIf(). const connectionManager = new MySQLConnectionManager(TEST_CONNECTION_OPTIONS, {}); const connection = await connectionManager.getConnection(); const version = await getMySQLVersion(connection); isMySQL57 = satisfiesVersion(version, '5.7.x'); connection.release(); await connectionManager.end(); - }); + } test('Re-create table', async () => { await using context = await BinlogStreamTestContext.open(factory); @@ -81,7 +82,7 @@ function defineTests(config: storage.TestStorageConfig) { ]); }); - test('Create table: New table in is in the sync rules', async () => { + test('Create table: New table is in the sync rules', async () => { await using context = await BinlogStreamTestContext.open(factory); const { connectionManager } = context; await context.updateSyncRules(BASIC_SYNC_RULES); @@ -102,48 +103,47 @@ function defineTests(config: storage.TestStorageConfig) { expect(data).toMatchObject([PUT_T1, PUT_T1]); }); - test('Create table: New table is created from existing data', async () => { + test.skipIf(isMySQL57)('Create table: New table is created from existing data', async () => { // Create table with select from is not allowed in MySQL 5.7 when enforce_gtid_consistency=ON - if (!isMySQL57) { - await using context = await BinlogStreamTestContext.open(factory); - const { connectionManager } = context; - await context.updateSyncRules(BASIC_SYNC_RULES); - await connectionManager.query(`CREATE TABLE test_data_from + await using context = await BinlogStreamTestContext.open(factory); + const { connectionManager } = context; + await context.updateSyncRules(BASIC_SYNC_RULES); + + await connectionManager.query(`CREATE TABLE test_data_from ( id CHAR(36) PRIMARY KEY, description TEXT )`); - await connectionManager.query(`INSERT INTO test_data_from(id, description) + await connectionManager.query(`INSERT INTO test_data_from(id, description) VALUES ('t1', 'test1')`); - await connectionManager.query(`INSERT INTO test_data_from(id, description) + await connectionManager.query(`INSERT INTO test_data_from(id, description) VALUES ('t2', 'test2')`); - await connectionManager.query(`INSERT INTO test_data_from(id, description) + await connectionManager.query(`INSERT INTO test_data_from(id, description) VALUES ('t3', 'test3')`); - await context.replicateSnapshot(); - await context.startStreaming(); - - // Add table after initial replication - await connectionManager.query(`CREATE TABLE test_data SELECT * FROM test_data_from`); - - const data = await context.getBucketData('global[]'); - - const reduced = test_utils.reduceBucket(data).slice(1); - expect(reduced.sort(compareIds)).toMatchObject([PUT_T1, PUT_T2, PUT_T3]); - - // Interestingly, the create with select triggers binlog row write events - expect(data).toMatchObject([ - // From snapshot - PUT_T1, - PUT_T2, - PUT_T3, - // From replication stream - PUT_T1, - PUT_T2, - PUT_T3 - ]); - } + await context.replicateSnapshot(); + await context.startStreaming(); + + // Add table after initial replication + await connectionManager.query(`CREATE TABLE test_data SELECT * FROM test_data_from`); + + const data = await context.getBucketData('global[]'); + + const reduced = test_utils.reduceBucket(data).slice(1); + expect(reduced.sort(compareIds)).toMatchObject([PUT_T1, PUT_T2, PUT_T3]); + + // Interestingly, the create with select triggers binlog row write events + expect(data).toMatchObject([ + // From snapshot + PUT_T1, + PUT_T2, + PUT_T3, + // From replication stream + PUT_T1, + PUT_T2, + PUT_T3 + ]); }); test('Create table: New table is not in the sync rules', async () => { diff --git a/modules/module-mysql/test/src/util.ts b/modules/module-mysql/test/src/util.ts index 23eb076bc..58a031e41 100644 --- a/modules/module-mysql/test/src/util.ts +++ b/modules/module-mysql/test/src/util.ts @@ -30,11 +30,11 @@ export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils. export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageConfig) => void) { describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { - fn(INITIALIZED_MONGO_STORAGE_FACTORY); + return fn(INITIALIZED_MONGO_STORAGE_FACTORY); }); describe.skipIf(!env.TEST_POSTGRES_STORAGE)(`postgres storage`, options, function () { - fn(INITIALIZED_POSTGRES_STORAGE_FACTORY); + return fn(INITIALIZED_POSTGRES_STORAGE_FACTORY); }); } diff --git a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts index 85fbe4dad..e49f49b2d 100644 --- a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts +++ b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts @@ -9,10 +9,11 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { models, NormalizedPostgresStorageConfig } from '../types/types.js'; import { NOTIFICATION_CHANNEL, STORAGE_SCHEMA_NAME } from '../utils/db.js'; -import { notifySyncRulesUpdate } from './batch/PostgresBucketBatch.js'; +import { notifySyncRulesUpdate, PostgresBucketBatch } from './batch/PostgresBucketBatch.js'; import { PostgresSyncRulesStorage } from './PostgresSyncRulesStorage.js'; import { PostgresPersistedSyncRulesContent } from './sync-rules/PostgresPersistedSyncRulesContent.js'; import { getStorageApplicationName } from '../utils/application-name.js'; +import { PostgresBucketDataWriter } from './batch/PostgresBucketDataWriter.js'; export type PostgresBucketStorageOptions = { config: NormalizedPostgresStorageConfig; @@ -43,6 +44,29 @@ export class PostgresBucketStorageFactory }); } + async createCombinedWriter( + storages: SyncRulesBucketStorage[], + options: storage.CreateWriterOptions + ): Promise { + const syncRules = storages.map((s) => s.getHydratedSyncRules(options)); + + const rowProcessor = new sync_rules.MultiSyncRules(syncRules); + const writer = new PostgresBucketDataWriter({ + ...options, + db: this.db, + rowProcessor, + storeCurrentData: options.storeCurrentData ?? true, + skipExistingRows: options.skipExistingRows ?? false + }); + + for (let storage of storages) { + const bucketBatch = await (storage as PostgresSyncRulesStorage).createBucketBatch(options); + writer.addSubWriter(bucketBatch); + } + + return writer; + } + async [Symbol.asyncDispose]() { await this.db[Symbol.asyncDispose](); } diff --git a/modules/module-postgres-storage/src/storage/PostgresSourceTable.ts b/modules/module-postgres-storage/src/storage/PostgresSourceTable.ts new file mode 100644 index 000000000..3eafe949c --- /dev/null +++ b/modules/module-postgres-storage/src/storage/PostgresSourceTable.ts @@ -0,0 +1,41 @@ +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { SourceTable, SourceTableOptions } from '@powersync/service-core'; + +export class PostgresSourceTable extends SourceTable { + public readonly groupId: number; + + constructor(options: SourceTableOptions, postgresOptions: { groupId: number }) { + super(options); + this.groupId = postgresOptions.groupId; + + if (typeof options.id != 'string') { + throw new ReplicationAssertionError('PostgresSourceTable id must be a string'); + } + } + + get id() { + return this.options.id as string; + } + + clone(): PostgresSourceTable { + const copy = new PostgresSourceTable( + { + id: this.id, + connectionTag: this.connectionTag, + objectId: this.objectId, + schema: this.schema, + name: this.name, + replicaIdColumns: this.replicaIdColumns, + snapshotComplete: this.snapshotComplete, + pattern: this.pattern, + bucketDataSourceIds: this.bucketDataSourceIds, + parameterLookupSourceIds: this.parameterLookupSourceIds + }, + { groupId: this.groupId } + ); + copy.syncData = this.syncData; + copy.syncParameters = this.syncParameters; + copy.snapshotStatus = this.snapshotStatus; + return copy; + } +} diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 004d10ed0..8e22026b4 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -2,6 +2,9 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { BroadcastIterable, BucketChecksum, + BucketChecksumRequest, + BucketDataRequest, + BucketDataWriter, CHECKPOINT_INVALIDATE_ALL, CheckpointChanges, GetCheckpointChangesOptions, @@ -10,6 +13,7 @@ import { LastValueSink, maxLsn, PartialChecksum, + PersistedSyncRules, PopulateChecksumCacheOptions, PopulateChecksumCacheResults, ReplicationCheckpoint, @@ -35,6 +39,8 @@ import { PostgresBucketBatch } from './batch/PostgresBucketBatch.js'; import { PostgresWriteCheckpointAPI } from './checkpoints/PostgresWriteCheckpointAPI.js'; import { PostgresBucketStorageFactory } from './PostgresBucketStorageFactory.js'; import { PostgresCompactor } from './PostgresCompactor.js'; +import { postgresTableId } from './batch/PostgresPersistedBatch.js'; +import { PostgresSourceTable } from './PostgresSourceTable.js'; export type PostgresSyncRulesStorageOptions = { factory: PostgresBucketStorageFactory; @@ -60,7 +66,7 @@ export class PostgresSyncRulesStorage // TODO we might be able to share this in an abstract class private parsedSyncRulesCache: - | { parsed: sync_rules.HydratedSyncRules; options: storage.ParseSyncRulesOptions } + | { parsed: PersistedSyncRules; hydrated: sync_rules.HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined; private _checksumCache: storage.ChecksumCache | undefined; @@ -97,17 +103,24 @@ export class PostgresSyncRulesStorage } // TODO we might be able to share this in an abstract class - getParsedSyncRules(options: storage.ParseSyncRulesOptions): sync_rules.HydratedSyncRules { + + getParsedSyncRules(options: storage.ParseSyncRulesOptions): PersistedSyncRules { + this.getHydratedSyncRules(options); + return this.parsedSyncRulesCache!.parsed; + } + + getHydratedSyncRules(options: storage.ParseSyncRulesOptions): sync_rules.HydratedSyncRules { const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {}; /** * Check if the cached sync rules, if present, had the same options. * Parse sync rules if the options are different or if there is no cached value. */ if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) { - this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncRules(), options }; + const parsed = this.sync_rules.parsed(options); + this.parsedSyncRulesCache = { parsed, hydrated: parsed.hydratedSyncRules(), options }; } - return this.parsedSyncRulesCache!.parsed; + return this.parsedSyncRulesCache!.hydrated; } async reportError(e: any): Promise { @@ -166,10 +179,12 @@ export class PostgresSyncRulesStorage } async resolveTable(options: storage.ResolveTableOptions): Promise { - const { group_id, connection_id, connection_tag, entity_descriptor } = options; + const { connection_id, connection_tag, entity_descriptor } = options; const { schema, name: table, objectId, replicaIdColumns } = entity_descriptor; + const group_id = this.group_id; + const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ name: column.name, type: column.type, @@ -212,6 +227,7 @@ export class PostgresSyncRulesStorage } if (sourceTableRow == null) { + const id = options.idGenerator ? postgresTableId(options.idGenerator()) : uuid.v4(); const row = await db.sql` INSERT INTO source_tables ( @@ -225,7 +241,7 @@ export class PostgresSyncRulesStorage ) VALUES ( - ${{ type: 'varchar', value: uuid.v4() }}, + ${{ type: 'varchar', value: id }}, ${{ type: 'int4', value: group_id }}, ${{ type: 'int4', value: connection_id }}, --- The objectId can be string | number | undefined, we store it as jsonb value @@ -242,15 +258,18 @@ export class PostgresSyncRulesStorage sourceTableRow = row; } - const sourceTable = new storage.SourceTable({ - id: sourceTableRow!.id, - connectionTag: connection_tag, - objectId: objectId, - schema: schema, - name: table, - replicaIdColumns: replicaIdColumns, - snapshotComplete: sourceTableRow!.snapshot_done ?? true - }); + const sourceTable = new PostgresSourceTable( + { + id: sourceTableRow!.id, + connectionTag: connection_tag, + objectId: objectId, + schema: schema, + name: table, + replicaIdColumns: replicaIdColumns, + snapshotComplete: sourceTableRow!.snapshot_done ?? true + }, + { groupId: group_id } + ); if (!sourceTable.snapshotComplete) { sourceTable.snapshotStatus = { totalEstimatedCount: Number(sourceTableRow!.snapshot_total_estimated_count ?? -1n), @@ -308,29 +327,33 @@ export class PostgresSyncRulesStorage table: sourceTable, dropTables: truncatedTables.map( (doc) => - new storage.SourceTable({ - id: doc.id, - connectionTag: connection_tag, - objectId: doc.relation_id?.object_id ?? 0, - schema: doc.schema_name, - name: doc.table_name, - replicaIdColumns: - doc.replica_id_columns?.map((c) => ({ - name: c.name, - typeOid: c.typeId, - type: c.type - })) ?? [], - snapshotComplete: doc.snapshot_done ?? true - }) + new PostgresSourceTable( + { + id: doc.id, + connectionTag: connection_tag, + objectId: doc.relation_id?.object_id ?? 0, + schema: doc.schema_name, + name: doc.table_name, + replicaIdColumns: + doc.replica_id_columns?.map((c) => ({ + name: c.name, + typeOid: c.typeId, + type: c.type + })) ?? [], + snapshotComplete: doc.snapshot_done ?? true + }, + { groupId: group_id } + ) ) }; }); } - async startBatch( - options: storage.StartBatchOptions, - callback: (batch: storage.BucketStorageBatch) => Promise - ): Promise { + async createWriter(options: storage.CreateWriterOptions): Promise { + return await this.factory.createCombinedWriter([this], options); + } + + async createBucketBatch(options: storage.CreateWriterOptions): Promise { const syncRules = await this.db.sql` SELECT last_checkpoint_lsn, @@ -350,6 +373,7 @@ export class PostgresSyncRulesStorage const batch = new PostgresBucketBatch({ logger: options.logger ?? framework.logger, db: this.db, + storage: this, sync_rules: this.sync_rules.parsed(options).hydratedSyncRules(), group_id: this.group_id, slot_name: this.slot_name, @@ -362,14 +386,7 @@ export class PostgresSyncRulesStorage markRecordUnavailable: options.markRecordUnavailable }); this.iterateListeners((cb) => cb.batchStarted?.(batch)); - - await callback(batch); - await batch.flush(); - if (batch.last_flushed_op != null) { - return { flushed_op: batch.last_flushed_op }; - } else { - return null; - } + return batch; } async getParameterSets( @@ -414,10 +431,10 @@ export class PostgresSyncRulesStorage async *getBucketDataBatch( checkpoint: InternalOpId, - dataBuckets: Map, + dataBuckets: BucketDataRequest[], options?: storage.BucketDataBatchOptions ): AsyncIterable { - if (dataBuckets.size == 0) { + if (dataBuckets.length == 0) { return; } @@ -429,10 +446,11 @@ export class PostgresSyncRulesStorage // not match up with chunks. const end = checkpoint ?? BIGINT_MAX; - const filters = Array.from(dataBuckets.entries()).map(([name, start]) => ({ - bucket_name: name, + const filters = dataBuckets.map(({ bucket, start }) => ({ + bucket_name: bucket, start: start })); + const bucketMap = new Map(dataBuckets.map((d) => [d.bucket, d.start])); const batchRowLimit = options?.limit ?? storage.DEFAULT_DOCUMENT_BATCH_LIMIT; const chunkSizeLimitBytes = options?.chunkLimitBytes ?? storage.DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES; @@ -532,7 +550,7 @@ export class PostgresSyncRulesStorage } if (start == null) { - const startOpId = dataBuckets.get(bucket_name); + const startOpId = bucketMap.get(bucket_name); if (startOpId == null) { throw new framework.ServiceAssertionError(`data for unexpected bucket: ${bucket_name}`); } @@ -587,7 +605,7 @@ export class PostgresSyncRulesStorage } } - async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: utils.InternalOpId, buckets: BucketChecksumRequest[]): Promise { return this.checksumCache.getChecksumMap(checkpoint, buckets); } diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index 000928abb..a4cd2cf93 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -27,10 +27,12 @@ import { batchCreateCustomWriteCheckpoints } from '../checkpoints/PostgresWriteC import { cacheKey, encodedCacheKey, OperationBatch, RecordOperation } from './OperationBatch.js'; import { PostgresPersistedBatch, postgresTableId } from './PostgresPersistedBatch.js'; import { bigint } from '../../types/codecs.js'; +import { PostgresSyncRulesStorage } from '../PostgresSyncRulesStorage.js'; export interface PostgresBucketBatchOptions { logger: Logger; db: lib_postgres.DatabaseClient; + storage: PostgresSyncRulesStorage; sync_rules: sync_rules.HydratedSyncRules; group_id: number; slot_name: string; @@ -64,7 +66,6 @@ const CheckpointWithStatus = StatefulCheckpoint.and( created_checkpoint: t.boolean }) ); -type CheckpointWithStatusDecoded = t.Decoded; /** * 15MB. Currently matches MongoDB. @@ -85,11 +86,12 @@ export class PostgresBucketBatch protected db: lib_postgres.DatabaseClient; protected group_id: number; protected last_checkpoint_lsn: string | null; + public readonly storage: PostgresSyncRulesStorage; protected persisted_op: InternalOpId | null; protected write_checkpoint_batch: storage.CustomWriteCheckpointOptions[]; - protected readonly sync_rules: sync_rules.HydratedSyncRules; + public readonly sync_rules: sync_rules.HydratedSyncRules; protected batch: OperationBatch | null; private lastWaitingLogThrottled = 0; private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; @@ -100,6 +102,7 @@ export class PostgresBucketBatch super(); this.logger = options.logger; this.db = options.db; + this.storage = options.storage; this.group_id = options.group_id; this.last_checkpoint_lsn = options.last_checkpoint_lsn; this.resumeFromLsn = options.resumeFromLsn; @@ -118,6 +121,10 @@ export class PostgresBucketBatch } async [Symbol.asyncDispose]() { + await this.dispose(); + } + + async dispose(): Promise { super.clearListeners(); } @@ -177,6 +184,9 @@ export class PostgresBucketBatch } } + /** + * No-op for tables we do not own, although it does still have some overhead. + */ protected async truncateSingle(sourceTable: storage.SourceTable) { // To avoid too large transactions, we limit the amount of data we delete per transaction. // Since we don't use the record data here, we don't have explicit size limits per batch. @@ -255,10 +265,12 @@ export class PostgresBucketBatch await this.db.transaction(async (db) => { for (const table of sourceTables) { + // Only delete tables we own await db.sql` DELETE FROM source_tables WHERE id = ${{ type: 'varchar', value: table.id }} + AND group_id = ${{ type: 'int4', value: this.group_id }} `.execute(); } }); diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts new file mode 100644 index 000000000..47879161e --- /dev/null +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts @@ -0,0 +1,265 @@ +import * as lib_postgres from '@powersync/lib-service-postgres'; +import { Logger, ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { + BatchedCustomWriteCheckpointOptions, + BucketStorageMarkRecordUnavailable, + maxLsn, + storage +} from '@powersync/service-core'; +import { RowProcessor } from '@powersync/service-sync-rules'; +import { models } from '../../types/types.js'; +import { PostgresBucketBatch } from './PostgresBucketBatch.js'; +import { postgresTableId } from './PostgresPersistedBatch.js'; +import { PostgresSourceTable } from '../PostgresSourceTable.js'; + +export interface PostgresWriterOptions { + db: lib_postgres.DatabaseClient; + rowProcessor: RowProcessor; + storeCurrentData: boolean; + skipExistingRows: boolean; + logger?: Logger; + markRecordUnavailable?: BucketStorageMarkRecordUnavailable; +} + +export class PostgresBucketDataWriter implements storage.BucketDataWriter { + public readonly rowProcessor: RowProcessor; + write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; + + protected db: lib_postgres.DatabaseClient; + + public subWriters: PostgresBucketBatch[] = []; + + constructor(options: PostgresWriterOptions) { + this.db = options.db; + this.rowProcessor = options.rowProcessor; + } + + addSubWriter(subWriter: PostgresBucketBatch) { + this.subWriters.push(subWriter); + } + + get resumeFromLsn(): string | null { + // FIXME: check the logic here when there are multiple batches + let lsn: string | null = null; + for (let sub of this.subWriters) { + // TODO: should this be min instead? + lsn = maxLsn(lsn, sub.resumeFromLsn); + } + return lsn; + } + + async keepalive(lsn: string): Promise { + let didAny = false; + for (let batch of this.subWriters) { + const didBatchKeepalive = await batch.keepalive(lsn); + didAny ||= didBatchKeepalive; + } + return didAny; + } + + async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { + let didCommit = false; + for (let batch of this.subWriters) { + const didWriterCommit = await batch.commit(lsn, options); + didCommit ||= didWriterCommit; + } + return didCommit; + } + + async setResumeLsn(lsn: string): Promise { + for (let batch of this.subWriters) { + await batch.setResumeLsn(lsn); + } + } + + async resolveTables(options: storage.ResolveTablesOptions): Promise { + let result: storage.SourceTable[] = []; + for (let subWriter of this.subWriters) { + const subResult = await subWriter.storage.resolveTable({ + connection_id: options.connection_id, + connection_tag: options.connection_tag, + entity_descriptor: options.entity_descriptor, + sync_rules: subWriter.sync_rules, + idGenerator: options.idGenerator + }); + result.push(subResult.table); + } + return result; + } + + async resolveTablesToDrop(options: storage.ResolveTableToDropsOptions): Promise { + // FIXME: remove the duplicate work between this and resolveTables() + let result: storage.SourceTable[] = []; + for (let subWriter of this.subWriters) { + const subResult = await subWriter.storage.resolveTable({ + connection_id: options.connection_id, + connection_tag: options.connection_tag, + entity_descriptor: options.entity_descriptor, + sync_rules: subWriter.sync_rules + }); + result.push(...subResult.dropTables); + } + return result; + } + + private subWriterForTable(table: storage.SourceTable): PostgresBucketBatch { + // FIXME: store on the SourceTable instead? + if (!(table instanceof PostgresSourceTable)) { + throw new ReplicationAssertionError(`Source table is not a PostgresSourceTable`); + } + const subWriter = this.subWriters.find((sw) => sw.storage.group_id === table.groupId); + if (subWriter == null) { + throw new ReplicationAssertionError( + `No sub-writer found for source table ${table.qualifiedName} with group ID ${table.groupId}` + ); + } + + return subWriter; + } + + async getTable(ref: storage.SourceTable): Promise { + const sourceTableRow = await this.db.sql` + SELECT + * + FROM + source_tables + WHERE + id = ${{ type: 'varchar', value: postgresTableId(ref.id) }} + ` + .decoded(models.SourceTable) + .first(); + if (sourceTableRow == null) { + return null; + } + + const subWriter = this.subWriters.find((sw) => sw.storage.group_id === sourceTableRow.group_id); + if (subWriter == null) { + throw new ReplicationAssertionError( + `No sub-writer found for source table ${ref.qualifiedName} with group ID ${sourceTableRow.group_id}` + ); + } + + const sourceTable = new PostgresSourceTable( + { + // Immutable values + id: sourceTableRow.id, + connectionTag: ref.connectionTag, + objectId: ref.objectId, + schema: ref.schema, + name: ref.name, + replicaIdColumns: ref.replicaIdColumns, + pattern: ref.pattern, + + // Table state + snapshotComplete: sourceTableRow!.snapshot_done ?? true + }, + { groupId: sourceTableRow.group_id } + ); + if (!sourceTable.snapshotComplete) { + sourceTable.snapshotStatus = { + totalEstimatedCount: Number(sourceTableRow!.snapshot_total_estimated_count ?? -1n), + replicatedCount: Number(sourceTableRow!.snapshot_replicated_count ?? 0n), + lastKey: sourceTableRow!.snapshot_last_key + }; + } + // Immutable + sourceTable.syncEvent = ref.syncEvent; + sourceTable.syncData = ref.syncData; + sourceTable.syncParameters = ref.syncParameters; + return sourceTable; + } + + async save(record: storage.SaveOptions): Promise { + const writer = this.subWriterForTable(record.sourceTable); + return writer.save(record); + } + + async truncate(sourceTables: storage.SourceTable[]): Promise { + let flushedResult: storage.FlushedResult | null = null; + for (let table of sourceTables) { + const writer = this.subWriterForTable(table); + const subResult = await writer.truncate([table]); + flushedResult = maxFlushedResult(flushedResult, subResult); + } + return flushedResult; + } + + async drop(sourceTables: storage.SourceTable[]): Promise { + let flushedResult: storage.FlushedResult | null = null; + for (let table of sourceTables) { + const writer = this.subWriterForTable(table); + const subResult = await writer.drop([table]); + flushedResult = maxFlushedResult(flushedResult, subResult); + } + return flushedResult; + } + + async flush(options?: storage.BatchBucketFlushOptions): Promise { + let flushedResult: storage.FlushedResult | null = null; + for (let writer of this.subWriters) { + const subResult = await writer.flush(); + flushedResult = maxFlushedResult(flushedResult, subResult); + } + return flushedResult; + } + + async markTableSnapshotDone( + tables: storage.SourceTable[], + no_checkpoint_before_lsn?: string + ): Promise { + let result: storage.SourceTable[] = []; + for (let table of tables) { + const writer = this.subWriterForTable(table); + const mapped = await writer.markTableSnapshotDone([table], no_checkpoint_before_lsn); + result.push(...mapped); + } + return result; + } + + async markTableSnapshotRequired(table: storage.SourceTable): Promise { + const writer = this.subWriterForTable(table); + await writer.markTableSnapshotRequired(table); + } + + async markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise { + for (let writer of this.subWriters) { + await writer.markAllSnapshotDone(no_checkpoint_before_lsn); + } + } + + async updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + const writer = this.subWriterForTable(table); + return await writer.updateTableProgress(table, progress); + } + + /** + * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. + */ + addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void { + for (let writer of this.subWriters) { + writer.addCustomWriteCheckpoint(checkpoint); + } + } + + async [Symbol.asyncDispose]() { + for (let writer of this.subWriters) { + await writer[Symbol.asyncDispose](); + } + } +} + +function maxFlushedResult( + a: storage.FlushedResult | null, + b: storage.FlushedResult | null +): storage.FlushedResult | null { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + return a.flushed_op > b.flushed_op ? a : b; +} diff --git a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts index d7e98e970..f1645da07 100644 --- a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts +++ b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts @@ -1,8 +1,14 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { storage } from '@powersync/service-core'; -import { SqlSyncRules, versionedHydrationState } from '@powersync/service-sync-rules'; +import { + CompatibilityOption, + DEFAULT_HYDRATION_STATE, + HydrationState, + SqlSyncRules +} from '@powersync/service-sync-rules'; +import { versionedHydrationState } from '@powersync/service-sync-rules'; import { models } from '../../types/types.js'; export class PostgresPersistedSyncRulesContent implements storage.PersistedSyncRulesContent { @@ -32,15 +38,21 @@ export class PostgresPersistedSyncRulesContent implements storage.PersistedSyncR } parsed(options: storage.ParseSyncRulesOptions): storage.PersistedSyncRules { + let hydrationState: HydrationState; + const syncRules = SqlSyncRules.fromYaml(this.sync_rules_content, options); + if (syncRules.config.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { + hydrationState = versionedHydrationState(this.id); + } else { + hydrationState = DEFAULT_HYDRATION_STATE; + } return { id: this.id, slot_name: this.slot_name, - sync_rules: SqlSyncRules.fromYaml(this.sync_rules_content, options), + sync_rules: syncRules, hydratedSyncRules() { - return this.sync_rules.config.hydrate({ - hydrationState: versionedHydrationState(this.id) - }); - } + return this.sync_rules.config.hydrate({ hydrationState }); + }, + hydrationState }; } diff --git a/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap b/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap index 08a73c8c1..4cab7117e 100644 --- a/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap +++ b/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap @@ -104,7 +104,7 @@ exports[`sync - postgres > compacting data - invalidate checkpoint 2`] = ` ] `; -exports[`sync - postgres > encodes sync rules id in buckes for streams 1`] = ` +exports[`sync - postgres > encodes sync rules id in buckets for streams 1`] = ` [ { "checkpoint": { @@ -159,13 +159,13 @@ exports[`sync - postgres > encodes sync rules id in buckes for streams 1`] = ` ] `; -exports[`sync - postgres > encodes sync rules id in buckes for streams 2`] = ` +exports[`sync - postgres > encodes sync rules id in buckets for streams 2`] = ` [ { "checkpoint": { "buckets": [ { - "bucket": "2#test|0[]", + "bucket": "2#test2|0[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -181,7 +181,7 @@ exports[`sync - postgres > encodes sync rules id in buckes for streams 2`] = ` { "errors": [], "is_default": true, - "name": "test", + "name": "test2", }, ], "write_checkpoint": undefined, @@ -190,7 +190,7 @@ exports[`sync - postgres > encodes sync rules id in buckes for streams 2`] = ` { "data": { "after": "0", - "bucket": "2#test|0[]", + "bucket": "2#test2|0[]", "data": [ { "checksum": 920318466, @@ -199,7 +199,7 @@ exports[`sync - postgres > encodes sync rules id in buckes for streams 2`] = ` "object_type": "test", "op": "PUT", "op_id": "2", - "subkey": "02d285ac-4f96-5124-8fba-c6d1df992dd1", + "subkey": "8a5f3fdd-3f59-5153-92ae-ac115c458441", }, ], "has_more": false, @@ -819,7 +819,7 @@ exports[`sync - postgres > sync updates to data query only 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "2", - "subkey": "5ad0aa14-3d5e-5428-ad5b-2c33927d991c", + "subkey": "b9f16d58-e6f5-55b5-9622-7bc360dba34f", }, ], "has_more": false, @@ -1026,7 +1026,7 @@ exports[`sync - postgres > sync updates to parameter query + data 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "1", - "subkey": "5ad0aa14-3d5e-5428-ad5b-2c33927d991c", + "subkey": "b9f16d58-e6f5-55b5-9622-7bc360dba34f", }, ], "has_more": false, diff --git a/modules/module-postgres-storage/test/src/storage.test.ts b/modules/module-postgres-storage/test/src/storage.test.ts index 2e701aa56..ca7cf7e2d 100644 --- a/modules/module-postgres-storage/test/src/storage.test.ts +++ b/modules/module-postgres-storage/test/src/storage.test.ts @@ -1,5 +1,5 @@ import { storage } from '@powersync/service-core'; -import { register, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, register, test_utils } from '@powersync/service-core-tests'; import { describe, expect, test } from 'vitest'; import { POSTGRES_STORAGE_FACTORY } from './util.js'; @@ -24,70 +24,70 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { // Test syncing a batch of data that is small in count, // but large enough in size to be split over multiple returned chunks. // Similar to the above test, but splits over 1MB chunks. - const sync_rules = test_utils.testRules( - ` + await using factory = await POSTGRES_STORAGE_FACTORY.factory(); + const syncRules = await factory.updateSyncRules({ + content: ` bucket_definitions: global: data: - SELECT id, description FROM "%" ` - ); - await using factory = await POSTGRES_STORAGE_FACTORY.factory(); - const bucketStorage = factory.getInstance(sync_rules); - - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = test_utils.makeTestTable('test', ['id'], POSTGRES_STORAGE_FACTORY); - - const largeDescription = '0123456789'.repeat(2_000_00); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large1', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large1') - }); - - // Large enough to split the returned batch - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large2', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large2') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test3', - description: 'test3' - }, - afterReplicaId: test_utils.rid('test3') - }); }); + const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], POSTGRES_STORAGE_FACTORY); + + const largeDescription = '0123456789'.repeat(2_000_00); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large1', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large1') + }); + + // Large enough to split the returned batch + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large2', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large2') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test3', + description: 'test3' + }, + afterReplicaId: test_utils.rid('test3') + }); + + const result = await writer.flush(); const checkpoint = result!.flushed_op; const options: storage.BucketDataBatchOptions = {}; const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), options) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules, 'global[]', 0n)], options) ); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -101,7 +101,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch1[0].chunkData.next_after)]]), + [bucketRequest(syncRules, 'global[]', batch1[0].chunkData.next_after)], options ) ); @@ -117,7 +117,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch2[0].chunkData.next_after)]]), + [bucketRequest(syncRules, 'global[]', batch2[0].chunkData.next_after)], options ) ); @@ -133,7 +133,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch4 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch3[0].chunkData.next_after)]]), + [bucketRequest(syncRules, 'global[]', batch3[0].chunkData.next_after)], options ) ); diff --git a/modules/module-postgres/package.json b/modules/module-postgres/package.json index 9ccd818eb..957c5f376 100644 --- a/modules/module-postgres/package.json +++ b/modules/module-postgres/package.json @@ -35,6 +35,7 @@ "@powersync/service-jsonbig": "workspace:*", "@powersync/service-sync-rules": "workspace:*", "@powersync/service-types": "workspace:*", + "p-defer": "^4.0.1", "semver": "^7.5.4", "ts-codec": "^1.3.0", "uuid": "^11.1.0" diff --git a/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts b/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts index 8d05eb267..c06952aad 100644 --- a/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts +++ b/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts @@ -315,7 +315,8 @@ LEFT JOIN ( AND NOT a.attisdropped AND has_column_privilege(tbl.quoted_name, a.attname, 'SELECT, INSERT, UPDATE, REFERENCES') ) -GROUP BY schemaname, tablename, quoted_name` +GROUP BY schemaname, tablename, quoted_name +ORDER BY schemaname, tablename;` ); await this.typeCache.fetchTypesForSchema(); const rows = pgwire.pgwireRows(results); diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts new file mode 100644 index 000000000..367e1155b --- /dev/null +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -0,0 +1,645 @@ +import { + container, + logger as defaultLogger, + Logger, + ReplicationAbortedError, + ReplicationAssertionError +} from '@powersync/lib-services-framework'; +import { getUuidReplicaIdentityBson, MetricsEngine, SourceTable, storage } from '@powersync/service-core'; +import * as pgwire from '@powersync/service-jpgwire'; +import { + DatabaseInputRow, + HydratedSyncRules, + SqliteInputRow, + SqliteInputValue, + TablePattern, + toSyncRulesRow, + toSyncRulesValue +} from '@powersync/service-sync-rules'; + +import { ReplicationMetric } from '@powersync/service-types'; +import pDefer, { DeferredPromise } from 'p-defer'; +import { PostgresTypeResolver } from '../types/resolver.js'; +import { PgManager } from './PgManager.js'; +import { + checkSourceConfiguration, + checkTableRls, + ensureStorageCompatibility, + getReplicationIdentityColumns +} from './replication-utils.js'; +import { + ChunkedSnapshotQuery, + IdSnapshotQuery, + PrimaryKeyValue, + SimpleSnapshotQuery, + SnapshotQuery +} from './SnapshotQuery.js'; +import { + MissingReplicationSlotError, + POSTGRES_DEFAULT_SCHEMA, + PUBLICATION_NAME, + sendKeepAlive, + WalStreamOptions, + ZERO_LSN +} from './WalStream.js'; + +interface InitResult { + /** True if initial snapshot is not yet done. */ + needsInitialSync: boolean; + /** True if snapshot must be started from scratch with a new slot. */ + needsNewSlot: boolean; +} + +export class PostgresSnapshotter { + sync_rules: HydratedSyncRules; + group_id: number; + + connection_id = 1; + + private logger: Logger; + + private readonly storage: storage.SyncRulesBucketStorage; + private readonly metrics: MetricsEngine; + private readonly slot_name: string; + + private connections: PgManager; + + private abortSignal: AbortSignal; + + private snapshotChunkLength: number; + + private queue = new Set(); + private nextItemQueued: DeferredPromise | null = null; + private initialSnapshotDone = pDefer(); + + constructor(options: WalStreamOptions) { + this.logger = options.logger ?? defaultLogger; + this.storage = options.storage; + this.metrics = options.metrics; + this.sync_rules = options.storage.getHydratedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); + this.group_id = options.storage.group_id; + this.slot_name = options.storage.slot_name; + this.connections = options.connections; + this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; + + this.abortSignal = options.abort_signal; + + this.abortSignal.addEventListener('abort', () => { + // Wake up the queue if is waiting for items + this.nextItemQueued?.resolve(); + }); + } + + async getQualifiedTableNames( + writer: storage.BucketDataWriter, + db: pgwire.PgConnection, + tablePattern: TablePattern + ): Promise { + const schema = tablePattern.schema; + if (tablePattern.connectionTag != this.connections.connectionTag) { + return []; + } + + let tableRows: any[]; + const prefix = tablePattern.isWildcard ? tablePattern.tablePrefix : undefined; + + { + let query = ` + SELECT + c.oid AS relid, + c.relname AS table_name + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = $1 + AND c.relkind = 'r'`; + + if (tablePattern.isWildcard) { + query += ' AND c.relname LIKE $2'; + } else { + query += ' AND c.relname = $2'; + } + + const result = await db.query({ + statement: query, + params: [ + { type: 'varchar', value: schema }, + { type: 'varchar', value: tablePattern.tablePattern } + ] + }); + + tableRows = pgwire.pgwireRows(result); + } + + let result: storage.SourceTable[] = []; + + for (let row of tableRows) { + const name = row.table_name as string; + if (typeof row.relid != 'bigint') { + throw new ReplicationAssertionError(`Missing relid for ${name}`); + } + const relid = Number(row.relid as bigint); + + if (prefix && !name.startsWith(prefix)) { + continue; + } + + const rs = await db.query({ + statement: `SELECT 1 FROM pg_publication_tables WHERE pubname = $1 AND schemaname = $2 AND tablename = $3`, + params: [ + { type: 'varchar', value: PUBLICATION_NAME }, + { type: 'varchar', value: tablePattern.schema }, + { type: 'varchar', value: name } + ] + }); + if (rs.rows.length == 0) { + this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); + continue; + } + + try { + const result = await checkTableRls(db, relid); + if (!result.canRead) { + // We log the message, then continue anyway, since the check does not cover all cases. + this.logger.warn(result.message!); + } + } catch (e) { + // It's possible that we just don't have permission to access pg_roles - log the error and continue. + this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); + } + + const cresult = await getReplicationIdentityColumns(db, relid); + const columnTypesResult = await db.query({ + statement: `SELECT DISTINCT atttypid + FROM pg_attribute + WHERE attnum > 0 AND NOT attisdropped AND attrelid = $1`, + params: [{ type: 'int4', value: relid }] + }); + + const columnTypes = columnTypesResult.rows.map((row) => Number(row.decodeWithoutCustomTypes(0))); + + const resolveOptions = { + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: { + schema: schema, + name: name, + objectId: relid, + replicaIdColumns: cresult.replicationColumns + }, + pattern: tablePattern + }; + const resolvedResult = await writer.resolveTables(resolveOptions); + + // Ensure we have a description for custom types referenced in the table. + await this.connections.types.fetchTypes(columnTypes); + + result.push(...resolvedResult); + + const dropTables = await writer.resolveTablesToDrop(resolveOptions); + // TODO: Do this in the replication loop, not when listing the tables + await writer.drop(dropTables); + } + return result; + } + + async checkSlot(): Promise { + await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME); + await ensureStorageCompatibility(this.connections.pool, this.storage.factory); + + const slotName = this.slot_name; + + const status = await this.storage.getStatus(); + const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null; + if (snapshotDone) { + // Snapshot is done, but we still need to check the replication slot status + this.logger.info(`Initial replication already done`); + } + + // Check if replication slot exists + const slot = pgwire.pgwireRows( + await this.connections.pool.query({ + // We specifically want wal_status and invalidation_reason, but it's not available on older versions, + // so we just query *. + statement: 'SELECT * FROM pg_replication_slots WHERE slot_name = $1', + params: [{ type: 'varchar', value: slotName }] + }) + )[0]; + + // Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot. + // However, there were some edge cases where the query times out, repeating the query, ultimately + // causing high load on the source database and never recovering automatically. + // We now instead jump straight to replication if the wal_status is not "lost", rather detecting those + // errors during streaming replication, which is a little more robust. + + // We can have: + // 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version). + // Theoretically we could handle this the same as (2). + // 2. needsInitialSync: true, no slot -> create new slot + // 3. needsInitialSync: true, valid slot -> resume initial sync + // 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version) + // 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version) + // 6. needsInitialSync: false, valid slot -> resume streaming replication + // The main advantage of MissingReplicationSlotError are: + // 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while + // we do the reprocessing under a new slot name. + // 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for + // the partial data to be cleared. + if (slot != null) { + // This checks that the slot is still valid + + // wal_status is present in postgres 13+ + // invalidation_reason is present in postgres 17+ + const lost = slot.wal_status == 'lost'; + if (lost) { + // Case 1 / 4 + throw new MissingReplicationSlotError( + `Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}` + ); + } + // Case 3 / 6 + return { + needsInitialSync: !snapshotDone, + needsNewSlot: false + }; + } else { + if (snapshotDone) { + // Case 5 + // This will create a new slot, while keeping the current sync rules active + throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`); + } + // Case 2 + // This will clear data (if any) and re-create the same slot + return { needsInitialSync: true, needsNewSlot: true }; + } + } + + async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise { + const results = await db.query({ + statement: `SELECT reltuples::bigint AS estimate + FROM pg_class + WHERE oid = $1::regclass`, + params: [{ value: table.qualifiedName, type: 'varchar' }] + }); + const count = results.rows[0]?.decodeWithoutCustomTypes(0); + return Number(count ?? -1n); + } + + public async setupSlot(db: pgwire.PgConnection, status: InitResult) { + // If anything here errors, the entire replication process is aborted, + // and all connections are closed, including this one. + const slotName = this.slot_name; + + if (status.needsNewSlot) { + // This happens when there is no existing replication slot, or if the + // existing one is unhealthy. + // In those cases, we have to start replication from scratch. + // If there is an existing healthy slot, we can skip this and continue + // initial replication where we left off. + await this.storage.clear({ signal: this.abortSignal }); + + await db.query({ + statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1', + params: [{ type: 'varchar', value: slotName }] + }); + + // We use the replication connection here, not a pool. + // The replication slot must be created before we start snapshotting tables. + const initReplicationConnection = await this.connections.replicationConnection(); + try { + await initReplicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`); + } finally { + await initReplicationConnection.end(); + } + + this.logger.info(`Created replication slot ${slotName}`); + } + } + + async replicateTable(requestTable: SourceTable) { + const db = await this.connections.snapshotConnection(); + await using _ = { [Symbol.asyncDispose]: () => db.end() }; + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }); + + // Get fresh table info, in case it was updated while queuing + const table = await writer.getTable(requestTable); + if (table == null) { + return; + } + if (table.snapshotComplete) { + return; + } + await this.snapshotTableInTx(writer, db, table); + // This commit ensures we set keepalive_op. + // It may be better if that is automatically set when flushing. + const flushResults = await writer.flush(); + await writer.commit(ZERO_LSN); + + this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); + } + + async waitForInitialSnapshot() { + await this.initialSnapshotDone.promise; + } + + async replicationLoop() { + try { + if (this.queue.size == 0) { + // Special case where we start with no tables to snapshot + await this.markSnapshotDone(); + } + while (!this.abortSignal.aborted) { + const table = this.queue.values().next().value; + if (table == null) { + this.initialSnapshotDone.resolve(); + // There must be no await in between checking the queue above and creating this deferred promise, + // otherwise we may miss new items being queued. + this.nextItemQueued = pDefer(); + await this.nextItemQueued.promise; + this.nextItemQueued = null; + // At this point, either we have have a new item in the queue, or we are aborted. + continue; + } + + await this.replicateTable(table); + this.queue.delete(table); + if (this.queue.size == 0) { + await this.markSnapshotDone(); + } + } + throw new ReplicationAbortedError(`Replication loop aborted`, this.abortSignal.reason); + } catch (e) { + // If initial snapshot already completed, this has no effect + this.initialSnapshotDone.reject(e); + throw e; + } + } + + private async markSnapshotDone() { + const db = await this.connections.snapshotConnection(); + await using _ = { [Symbol.asyncDispose]: () => db.end() }; + + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true + }); + + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + const globalLsnNotBefore = rs.rows[0].decodeWithoutCustomTypes(0); + await writer.markAllSnapshotDone(globalLsnNotBefore); + + /** + * Send a keepalive message after initial replication. + * In some edge cases we wait for a keepalive after the initial snapshot. + * If we don't explicitly check the contents of keepalive messages then a keepalive is detected + * rather quickly after initial replication - perhaps due to other WAL events. + * If we do explicitly check the contents of messages, we need an actual keepalive payload in order + * to advance the active sync rules LSN. + */ + await sendKeepAlive(db); + + // FIXME: Implement this again + // const lastOp = flushResults?.flushed_op; + // if (lastOp != null) { + // // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. + // // TODO: only run this after initial replication, not after each table. + // await this.storage.populatePersistentChecksumCache({ + // // No checkpoint yet, but we do have the opId. + // maxOpId: lastOp, + // signal: this.abortSignal + // }); + // } + } + + /** + * Start initial replication. + * + * If (partial) replication was done before on this slot, this clears the state + * and starts again from scratch. + */ + async queueSnapshotTables(db: pgwire.PgConnection) { + const sourceTables = this.sync_rules.getSourceTables(); + + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }); + + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(writer, db, tablePattern); + // Pre-get counts + for (let table of tables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; + } + const count = await this.estimatedCountNumber(db, table); + table = await writer.updateTableProgress(table, { totalEstimatedCount: count }); + + this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + + this.queueTable(table); + } + } + } + + static *getQueryData(results: Iterable): Generator { + for (let row of results) { + yield toSyncRulesRow(row); + } + } + + private queueTable(table: storage.SourceTable) { + this.queue.add(table); + this.nextItemQueued?.resolve(); + } + + public async queueSnapshot(writer: storage.BucketDataWriter, table: storage.SourceTable) { + await writer.markTableSnapshotRequired(table); + this.queueTable(table); + } + + public async snapshotTableInTx( + writer: storage.BucketDataWriter, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ): Promise { + // Note: We use the default "Read Committed" isolation level here, not snapshot isolation. + // The data may change during the transaction, but that is compensated for in the streaming + // replication afterwards. + await db.query('BEGIN'); + try { + let tableLsnNotBefore: string; + await this.snapshotTable(writer, db, table, limited); + + // Get the current LSN. + // The data will only be consistent once incremental replication has passed that point. + // We have to get this LSN _after_ we have finished the table snapshot. + // + // There are basically two relevant LSNs here: + // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side, + // but it is implicitly recorded in the replication slot. + // B: The LSN after the table snapshot is complete, which is what we get here. + // When we do the snapshot queries, the data that we get back for each chunk could match the state + // anywhere between A and B. To actually have a consistent state on our side, we need to: + // 1. Complete the snapshot. + // 2. Wait until logical replication has caught up with all the change between A and B. + // Calling `markSnapshotDone(LSN B)` covers that. + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + tableLsnNotBefore = rs.rows[0].decodeWithoutCustomTypes(0); + // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. + await db.query('COMMIT'); + this.logger.info(`Snapshot complete for table ${table.qualifiedName}, resume at ${tableLsnNotBefore}`); + const [resultTable] = await writer.markTableSnapshotDone([table], tableLsnNotBefore); + return resultTable; + } catch (e) { + await db.query('ROLLBACK'); + throw e; + } + } + + private async snapshotTable( + writer: storage.BucketDataWriter, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ) { + let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount; + let at = table.snapshotStatus?.replicatedCount ?? 0; + let lastCountTime = 0; + let q: SnapshotQuery; + // We do streaming on two levels: + // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time. + // 2. Fine level: Stream chunks from each fetch call. + if (limited) { + q = new IdSnapshotQuery(db, table, limited); + } else if (ChunkedSnapshotQuery.supports(table)) { + // Single primary key - we can use the primary key for chunking + const orderByKey = table.replicaIdColumns[0]; + q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null); + if (table.snapshotStatus?.lastKey != null) { + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}` + ); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`); + } + } else { + // Fallback case - query the entire table + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`); + q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength); + at = 0; + } + await q.initialize(); + + let hasRemainingData = true; + while (hasRemainingData) { + // Fetch 10k at a time. + // The balance here is between latency overhead per FETCH call, + // and not spending too much time on each FETCH call. + // We aim for a couple of seconds on each FETCH call. + const cursor = q.nextChunk(); + hasRemainingData = false; + // pgwire streams rows in chunks. + // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. + // There are typically 100-200 rows per chunk. + for await (let chunk of cursor) { + if (chunk.tag == 'RowDescription') { + continue; + } + + if (chunk.rows.length > 0) { + hasRemainingData = true; + } + + for (const rawRow of chunk.rows) { + const record = this.sync_rules.applyRowContext( + PostgresSnapshotter.decodeRow(rawRow, this.connections.types) + ); + + // This auto-flushes when the batch reaches its size limit + await writer.save({ + tag: storage.SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: record, + afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) + }); + } + + at += chunk.rows.length; + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(chunk.rows.length); + + this.touch(); + } + + // Important: flush before marking progress + await writer.flush(); + if (limited == null) { + let lastKey: Uint8Array | undefined; + if (q instanceof ChunkedSnapshotQuery) { + lastKey = q.getLastKeySerialized(); + } + if (lastCountTime < performance.now() - 10 * 60 * 1000) { + // Even though we're doing the snapshot inside a transaction, the transaction uses + // the default "Read Committed" isolation level. This means we can get new data + // within the transaction, so we re-estimate the count every 10 minutes when replicating + // large tables. + totalEstimatedCount = await this.estimatedCountNumber(db, table); + lastCountTime = performance.now(); + } + table = await writer.updateTableProgress(table, { + lastKey: lastKey, + replicatedCount: at, + totalEstimatedCount: totalEstimatedCount + }); + + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`); + } + + if (this.abortSignal.aborted) { + // We only abort after flushing + throw new ReplicationAbortedError(`Table snapshot interrupted`, this.abortSignal.reason); + } + } + } + + private touch() { + container.probes.touch().catch((e) => { + this.logger.error(`Error touching probe`, e); + }); + } + + static decodeRow(row: pgwire.PgRow, types: PostgresTypeResolver): SqliteInputRow { + let result: SqliteInputRow = {}; + + row.raw.forEach((rawValue, i) => { + const column = row.columns[i]; + let mappedValue: SqliteInputValue; + + if (typeof rawValue == 'string') { + mappedValue = toSyncRulesValue(types.registry.decodeDatabaseValue(rawValue, column.typeOid), false, true); + } else { + // Binary format, expose as-is. + mappedValue = rawValue; + } + + result[column.name] = mappedValue; + }); + return result; + } +} diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 11b3f0d21..05ee68580 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -1,18 +1,15 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { container, - DatabaseConnectionError, logger as defaultLogger, - ErrorCode, Logger, ReplicationAbortedError, ReplicationAssertionError } from '@powersync/lib-services-framework'; import { - BucketStorageBatch, + BucketDataWriter, getUuidReplicaIdentityBson, MetricsEngine, - RelationCache, SaveUpdate, SourceEntityDescriptor, SourceTable, @@ -26,24 +23,15 @@ import { SqliteInputRow, SqliteInputValue, SqliteRow, - TablePattern, - ToastableSqliteRow, - toSyncRulesValue + ToastableSqliteRow } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; -import { PostgresTypeResolver } from '../types/resolver.js'; import { PgManager } from './PgManager.js'; import { getPgOutputRelation, getRelId, referencedColumnTypeIds } from './PgRelation.js'; -import { checkSourceConfiguration, checkTableRls, getReplicationIdentityColumns } from './replication-utils.js'; -import { - ChunkedSnapshotQuery, - IdSnapshotQuery, - MissingRow, - PrimaryKeyValue, - SimpleSnapshotQuery, - SnapshotQuery -} from './SnapshotQuery.js'; +import { PostgresSnapshotter } from './PostgresSnapshotter.js'; +import { ensureStorageCompatibility } from './replication-utils.js'; +import { IdSnapshotQuery, MissingRow, PrimaryKeyValue } from './SnapshotQuery.js'; export interface WalStreamOptions { logger?: Logger; @@ -62,13 +50,6 @@ export interface WalStreamOptions { snapshotChunkLength?: number; } -interface InitResult { - /** True if initial snapshot is not yet done. */ - needsInitialSync: boolean; - /** True if snapshot must be started from scratch with a new slot. */ - needsNewSlot: boolean; -} - export const ZERO_LSN = '00000000/00000000'; export const PUBLICATION_NAME = 'powersync'; export const POSTGRES_DEFAULT_SCHEMA = 'public'; @@ -120,18 +101,15 @@ export class WalStream { private connections: PgManager; - private abort_signal: AbortSignal; + private abortController = new AbortController(); + private abortSignal: AbortSignal = this.abortController.signal; - private relationCache = new RelationCache((relation: number | SourceTable) => { - if (typeof relation == 'number') { - return relation; - } - return relation.objectId!; - }); + private initPromise: Promise | null = null; + private snapshotter: PostgresSnapshotter; - private startedStreaming = false; + public readonly relationCache = new Map(); - private snapshotChunkLength: number; + private startedStreaming = false; /** * Time of the oldest uncommitted change, according to the source db. @@ -144,20 +122,25 @@ export class WalStream { */ private isStartingReplication = true; - private initialSnapshotPromise: Promise | null = null; - constructor(options: WalStreamOptions) { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.metrics = options.metrics; - this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); + this.sync_rules = options.storage.getHydratedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); this.group_id = options.storage.group_id; this.slot_name = options.storage.slot_name; this.connections = options.connections; - this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; - this.abort_signal = options.abort_signal; - this.abort_signal.addEventListener( + // We wrap in our own abort controller so we can trigger abort internally. + options.abort_signal.addEventListener('abort', () => { + this.abortController.abort(options.abort_signal.reason); + }); + if (options.abort_signal.aborted) { + this.abortController.abort(options.abort_signal.reason); + } + + this.snapshotter = new PostgresSnapshotter({ ...options, abort_signal: this.abortSignal }); + this.abortSignal.addEventListener( 'abort', () => { if (this.startedStreaming) { @@ -179,516 +162,71 @@ export class WalStream { } get stopped() { - return this.abort_signal.aborted; - } - - async getQualifiedTableNames( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - tablePattern: TablePattern - ): Promise { - const schema = tablePattern.schema; - if (tablePattern.connectionTag != this.connections.connectionTag) { - return []; - } - - let tableRows: any[]; - const prefix = tablePattern.isWildcard ? tablePattern.tablePrefix : undefined; - - { - let query = ` - SELECT - c.oid AS relid, - c.relname AS table_name, - (SELECT - json_agg(DISTINCT a.atttypid) - FROM pg_attribute a - WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attrelid = c.oid) - AS column_types - FROM pg_class c - JOIN pg_namespace n ON n.oid = c.relnamespace - WHERE n.nspname = $1 - AND c.relkind = 'r'`; - - if (tablePattern.isWildcard) { - query += ' AND c.relname LIKE $2'; - } else { - query += ' AND c.relname = $2'; - } - - const result = await db.query({ - statement: query, - params: [ - { type: 'varchar', value: schema }, - { type: 'varchar', value: tablePattern.tablePattern } - ] - }); - - tableRows = pgwire.pgwireRows(result); - } - - let result: storage.SourceTable[] = []; - - for (let row of tableRows) { - const name = row.table_name as string; - if (typeof row.relid != 'bigint') { - throw new ReplicationAssertionError(`Missing relid for ${name}`); - } - const relid = Number(row.relid as bigint); - - if (prefix && !name.startsWith(prefix)) { - continue; - } - - const rs = await db.query({ - statement: `SELECT 1 FROM pg_publication_tables WHERE pubname = $1 AND schemaname = $2 AND tablename = $3`, - params: [ - { type: 'varchar', value: PUBLICATION_NAME }, - { type: 'varchar', value: tablePattern.schema }, - { type: 'varchar', value: name } - ] - }); - if (rs.rows.length == 0) { - this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); - continue; - } - - try { - const result = await checkTableRls(db, relid); - if (!result.canRead) { - // We log the message, then continue anyway, since the check does not cover all cases. - this.logger.warn(result.message!); - } - } catch (e) { - // It's possible that we just don't have permission to access pg_roles - log the error and continue. - this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); - } - - const cresult = await getReplicationIdentityColumns(db, relid); - - const columnTypes = (JSON.parse(row.column_types) as string[]).map((e) => Number(e)); - const table = await this.handleRelation({ - batch, - descriptor: { - name, - schema, - objectId: relid, - replicaIdColumns: cresult.replicationColumns - } as SourceEntityDescriptor, - snapshot: false, - referencedTypeIds: columnTypes - }); - - result.push(table); - } - return result; + return this.abortSignal.aborted; } - async initSlot(): Promise { - await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME); - await this.ensureStorageCompatibility(); - - const slotName = this.slot_name; - - const status = await this.storage.getStatus(); - const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null; - if (snapshotDone) { - // Snapshot is done, but we still need to check the replication slot status - this.logger.info(`Initial replication already done`); - } + async handleRelation(options: { + writer: storage.BucketDataWriter; + descriptor: SourceEntityDescriptor; + snapshot: boolean; + referencedTypeIds: number[]; + }) { + const { writer, descriptor, snapshot, referencedTypeIds } = options; - // Check if replication slot exists - const slot = pgwire.pgwireRows( - await this.connections.pool.query({ - // We specifically want wal_status and invalidation_reason, but it's not available on older versions, - // so we just query *. - statement: 'SELECT * FROM pg_replication_slots WHERE slot_name = $1', - params: [{ type: 'varchar', value: slotName }] - }) - )[0]; - - // Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot. - // However, there were some edge cases where the query times out, repeating the query, ultimately - // causing high load on the source database and never recovering automatically. - // We now instead jump straight to replication if the wal_status is not "lost", rather detecting those - // errors during streaming replication, which is a little more robust. - - // We can have: - // 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version). - // Theoretically we could handle this the same as (2). - // 2. needsInitialSync: true, no slot -> create new slot - // 3. needsInitialSync: true, valid slot -> resume initial sync - // 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version) - // 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version) - // 6. needsInitialSync: false, valid slot -> resume streaming replication - // The main advantage of MissingReplicationSlotError are: - // 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while - // we do the reprocessing under a new slot name. - // 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for - // the partial data to be cleared. - if (slot != null) { - // This checks that the slot is still valid - - // wal_status is present in postgres 13+ - // invalidation_reason is present in postgres 17+ - const lost = slot.wal_status == 'lost'; - if (lost) { - // Case 1 / 4 - throw new MissingReplicationSlotError( - `Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}` - ); - } - // Case 3 / 6 - return { - needsInitialSync: !snapshotDone, - needsNewSlot: false - }; - } else { - if (snapshotDone) { - // Case 5 - // This will create a new slot, while keeping the current sync rules active - throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`); - } - // Case 2 - // This will clear data (if any) and re-create the same slot - return { needsInitialSync: true, needsNewSlot: true }; + if (!descriptor.objectId && typeof descriptor.objectId != 'number') { + throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); } - } - - async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise { - const results = await db.query({ - statement: `SELECT reltuples::bigint AS estimate -FROM pg_class -WHERE oid = $1::regclass`, - params: [{ value: table.qualifiedName, type: 'varchar' }] + // In common cases, there would be at most one matching pattern, since patterns + // are de-duplicated. However, there may be multiple if: + // 1. There is overlap with direct name matching and wildcard matching. + // 2. There are multiple patterns with different replication config. + const patterns = writer.rowProcessor.getMatchingTablePatterns({ + connectionTag: this.connections.connectionTag, + schema: descriptor.schema, + name: descriptor.name }); - const row = results.rows[0]; - return Number(row?.decodeWithoutCustomTypes(0) ?? -1n); - } - /** - * Start initial replication. - * - * If (partial) replication was done before on this slot, this clears the state - * and starts again from scratch. - */ - async startInitialReplication(replicationConnection: pgwire.PgConnection, status: InitResult) { - // If anything here errors, the entire replication process is aborted, - // and all connections are closed, including this one. - const db = await this.connections.snapshotConnection(); - - const slotName = this.slot_name; - - if (status.needsNewSlot) { - // This happens when there is no existing replication slot, or if the - // existing one is unhealthy. - // In those cases, we have to start replication from scratch. - // If there is an existing healthy slot, we can skip this and continue - // initial replication where we left off. - await this.storage.clear({ signal: this.abort_signal }); - - await db.query({ - statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1', - params: [{ type: 'varchar', value: slotName }] + let allTables: SourceTable[] = []; + for (let pattern of patterns) { + const resolvedTables = await writer.resolveTables({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + pattern }); - // We use the replication connection here, not a pool. - // The replication slot must be created before we start snapshotting tables. - await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`); - - this.logger.info(`Created replication slot ${slotName}`); - } - - await this.initialReplication(db); - } - - async initialReplication(db: pgwire.PgConnection) { - const sourceTables = this.sync_rules.getSourceTables(); - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: true - }, - async (batch) => { - let tablesWithStatus: SourceTable[] = []; - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, db, tablePattern); - // Pre-get counts - for (let table of tables) { - if (table.snapshotComplete) { - this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); - continue; - } - const count = await this.estimatedCountNumber(db, table); - table = await batch.updateTableProgress(table, { totalEstimatedCount: count }); - this.relationCache.update(table); - tablesWithStatus.push(table); - - this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } - } - - for (let table of tablesWithStatus) { - await this.snapshotTableInTx(batch, db, table); - this.touch(); - } - - // Always commit the initial snapshot at zero. - // This makes sure we don't skip any changes applied before starting this snapshot, - // in the case of snapshot retries. - // We could alternatively commit at the replication slot LSN. - - // Get the current LSN for the snapshot. - // We could also use the LSN from the last table snapshot. - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - const noCommitBefore = rs.rows[0].decodeWithoutCustomTypes(0); - - await batch.markAllSnapshotDone(noCommitBefore); - await batch.commit(ZERO_LSN); - } - ); - /** - * Send a keepalive message after initial replication. - * In some edge cases we wait for a keepalive after the initial snapshot. - * If we don't explicitly check the contents of keepalive messages then a keepalive is detected - * rather quickly after initial replication - perhaps due to other WAL events. - * If we do explicitly check the contents of messages, we need an actual keepalive payload in order - * to advance the active sync rules LSN. - */ - await sendKeepAlive(db); - - const lastOp = flushResults?.flushed_op; - if (lastOp != null) { - // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. - await this.storage.populatePersistentChecksumCache({ - // No checkpoint yet, but we do have the opId. - maxOpId: lastOp, - signal: this.abort_signal - }); - } - } - - static decodeRow(row: pgwire.PgRow, types: PostgresTypeResolver): SqliteInputRow { - let result: SqliteInputRow = {}; - - row.raw.forEach((rawValue, i) => { - const column = row.columns[i]; - let mappedValue: SqliteInputValue; - - if (typeof rawValue == 'string') { - mappedValue = toSyncRulesValue(types.registry.decodeDatabaseValue(rawValue, column.typeOid), false, true); - } else { - // Binary format, expose as-is. - mappedValue = rawValue; - } - - result[column.name] = mappedValue; - }); - return result; - } - - private async snapshotTableInTx( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - table: storage.SourceTable, - limited?: PrimaryKeyValue[] - ): Promise { - // Note: We use the default "Read Committed" isolation level here, not snapshot isolation. - // The data may change during the transaction, but that is compensated for in the streaming - // replication afterwards. - await db.query('BEGIN'); - try { - await this.snapshotTable(batch, db, table, limited); - - // Get the current LSN. - // The data will only be consistent once incremental replication has passed that point. - // We have to get this LSN _after_ we have finished the table snapshot. - // - // There are basically two relevant LSNs here: - // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side, - // but it is implicitly recorded in the replication slot. - // B: The LSN after the table snapshot is complete, which is what we get here. - // When we do the snapshot queries, the data that we get back for each chunk could match the state - // anywhere between A and B. To actually have a consistent state on our side, we need to: - // 1. Complete the snapshot. - // 2. Wait until logical replication has caught up with all the change between A and B. - // Calling `markSnapshotDone(LSN B)` covers that. - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - const tableLsnNotBefore = rs.rows[0].decodeWithoutCustomTypes(0); - // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. - await db.query('COMMIT'); - const [resultTable] = await batch.markTableSnapshotDone([table], tableLsnNotBefore); - this.relationCache.update(resultTable); - return resultTable; - } catch (e) { - await db.query('ROLLBACK'); - throw e; - } - } - - private async snapshotTable( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - table: storage.SourceTable, - limited?: PrimaryKeyValue[] - ) { - let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount; - let at = table.snapshotStatus?.replicatedCount ?? 0; - let lastCountTime = 0; - let q: SnapshotQuery; - // We do streaming on two levels: - // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time. - // 2. Fine level: Stream chunks from each fetch call. - if (limited) { - q = new IdSnapshotQuery(db, table, limited); - } else if (ChunkedSnapshotQuery.supports(table)) { - // Single primary key - we can use the primary key for chunking - const orderByKey = table.replicaIdColumns[0]; - q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null); - if (table.snapshotStatus?.lastKey != null) { - this.logger.info( - `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}` - ); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`); - } - } else { - // Fallback case - query the entire table - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`); - q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength); - at = 0; - } - await q.initialize(); - - let hasRemainingData = true; - while (hasRemainingData) { - // Fetch 10k at a time. - // The balance here is between latency overhead per FETCH call, - // and not spending too much time on each FETCH call. - // We aim for a couple of seconds on each FETCH call. - const cursor = q.nextChunk(); - hasRemainingData = false; - // pgwire streams rows in chunks. - // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. - // There are typically 100-200 rows per chunk. - for await (let chunk of cursor) { - if (chunk.tag == 'RowDescription') { - continue; - } - - if (chunk.rows.length > 0) { - hasRemainingData = true; - } - - for (const rawRow of chunk.rows) { - const record = this.sync_rules.applyRowContext(WalStream.decodeRow(rawRow, this.connections.types)); - - // This auto-flushes when the batch reaches its size limit - await batch.save({ - tag: storage.SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: record, - afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) - }); - } - - at += chunk.rows.length; - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(chunk.rows.length); - - this.touch(); - } - - // Important: flush before marking progress - await batch.flush(); - if (limited == null) { - let lastKey: Uint8Array | undefined; - if (q instanceof ChunkedSnapshotQuery) { - lastKey = q.getLastKeySerialized(); - } - if (lastCountTime < performance.now() - 10 * 60 * 1000) { - // Even though we're doing the snapshot inside a transaction, the transaction uses - // the default "Read Committed" isolation level. This means we can get new data - // within the transaction, so we re-estimate the count every 10 minutes when replicating - // large tables. - totalEstimatedCount = await this.estimatedCountNumber(db, table); - lastCountTime = performance.now(); + // Ensure we have a description for custom types referenced in the table. + await this.connections.types.fetchTypes(referencedTypeIds); + + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + for (let table of resolvedTables) { + const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; + if (shouldSnapshot) { + this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); + await this.snapshotter.queueSnapshot(writer, table); } - table = await batch.updateTableProgress(table, { - lastKey: lastKey, - replicatedCount: at, - totalEstimatedCount: totalEstimatedCount - }); - this.relationCache.update(table); - - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`); } - if (this.abort_signal.aborted) { - // We only abort after flushing - throw new ReplicationAbortedError(`Initial replication interrupted`); - } + allTables.push(...resolvedTables); } - } - async handleRelation(options: { - batch: storage.BucketStorageBatch; - descriptor: SourceEntityDescriptor; - snapshot: boolean; - referencedTypeIds: number[]; - }) { - const { batch, descriptor, snapshot, referencedTypeIds } = options; - - if (!descriptor.objectId && typeof descriptor.objectId != 'number') { - throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); - } - const result = await this.storage.resolveTable({ - group_id: this.group_id, + const dropTables = await writer.resolveTablesToDrop({ connection_id: this.connection_id, connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor, - sync_rules: this.sync_rules + entity_descriptor: descriptor }); - this.relationCache.update(result.table); - // Drop conflicting tables. This includes for example renamed tables. - await batch.drop(result.dropTables); - - // Ensure we have a description for custom types referenced in the table. - await this.connections.types.fetchTypes(referencedTypeIds); - - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; - - if (shouldSnapshot) { - // Truncate this table, in case a previous snapshot was interrupted. - await batch.truncate([result.table]); - - // Start the snapshot inside a transaction. - // We use a dedicated connection for this. - const db = await this.connections.snapshotConnection(); - try { - const table = await this.snapshotTableInTx(batch, db, result.table); - // After the table snapshot, we wait for replication to catch up. - // To make sure there is actually something to replicate, we send a keepalive - // message. - await sendKeepAlive(db); - return table; - } finally { - await db.end(); - } + this.logger.info(`Dropping conflicting tables: ${dropTables.map((t) => t.qualifiedName).join(', ')}`); + if (dropTables.length > 0) { + await writer.drop(dropTables); } - return result.table; + this.relationCache.set(descriptor.objectId, allTables); + + return allTables; } /** @@ -699,7 +237,7 @@ WHERE oid = $1::regclass`, * We handle this similar to an inline table snapshot, but limited to the specific * set of rows. */ - private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) { + private async resnapshot(writer: BucketDataWriter, rows: MissingRow[]) { const byTable = new Map(); for (let row of rows) { const relId = row.table.objectId as number; // always a number for postgres @@ -712,8 +250,8 @@ WHERE oid = $1::regclass`, try { for (let rows of byTable.values()) { const table = rows[0].table; - await this.snapshotTableInTx( - batch, + await this.snapshotter.snapshotTableInTx( + writer, db, table, rows.map((r) => r.key) @@ -727,14 +265,14 @@ WHERE oid = $1::regclass`, } } - private getTable(relationId: number): storage.SourceTable { - const table = this.relationCache.get(relationId); - if (table == null) { + private getTable(relationId: number): storage.SourceTable[] { + const tables = this.relationCache.get(relationId); + if (tables == null) { // We should always receive a replication message before the relation is used. // If we can't find it, it's a bug. throw new ReplicationAssertionError(`Missing relation cache for ${relationId}`); } - return table; + return tables; } private syncRulesRecord(row: SqliteInputRow): SqliteRow; @@ -752,135 +290,177 @@ WHERE oid = $1::regclass`, } async writeChange( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, msg: pgwire.PgoutputMessage ): Promise { if (msg.lsn == null) { return null; } if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') { - const table = this.getTable(getRelId(msg.relation)); - if (!table.syncAny) { - this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`); - return null; - } + const tables = this.getTable(getRelId(msg.relation)); + const filtered = tables.filter((t) => t.syncAny); + + for (let table of filtered) { + if (msg.tag == 'insert') { + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + const baseRecord = this.syncRulesRecord(this.connections.types.constructAfterRecord(msg)); + return await writer.save({ + tag: storage.SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: baseRecord, + afterReplicaId: getUuidReplicaIdentityBson(baseRecord, table.replicaIdColumns) + }); + } else if (msg.tag == 'update') { + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + // "before" may be null if the replica id columns are unchanged + // It's fine to treat that the same as an insert. + const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)); + const after = this.toastableSyncRulesRecord(this.connections.types.constructAfterRecord(msg)); + return await writer.save({ + tag: storage.SaveOperationTag.UPDATE, + sourceTable: table, + before: before, + beforeReplicaId: before ? getUuidReplicaIdentityBson(before, table.replicaIdColumns) : undefined, + after: after, + afterReplicaId: getUuidReplicaIdentityBson(after, table.replicaIdColumns) + }); + } else if (msg.tag == 'delete') { + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)!); - if (msg.tag == 'insert') { - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - const baseRecord = this.syncRulesRecord(this.connections.types.constructAfterRecord(msg)); - return await batch.save({ - tag: storage.SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: baseRecord, - afterReplicaId: getUuidReplicaIdentityBson(baseRecord, table.replicaIdColumns) - }); - } else if (msg.tag == 'update') { - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - // "before" may be null if the replica id columns are unchanged - // It's fine to treat that the same as an insert. - const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)); - const after = this.toastableSyncRulesRecord(this.connections.types.constructAfterRecord(msg)); - return await batch.save({ - tag: storage.SaveOperationTag.UPDATE, - sourceTable: table, - before: before, - beforeReplicaId: before ? getUuidReplicaIdentityBson(before, table.replicaIdColumns) : undefined, - after: after, - afterReplicaId: getUuidReplicaIdentityBson(after, table.replicaIdColumns) - }); - } else if (msg.tag == 'delete') { - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)!); - - return await batch.save({ - tag: storage.SaveOperationTag.DELETE, - sourceTable: table, - before: before, - beforeReplicaId: getUuidReplicaIdentityBson(before, table.replicaIdColumns), - after: undefined, - afterReplicaId: undefined - }); + return await writer.save({ + tag: storage.SaveOperationTag.DELETE, + sourceTable: table, + before: before, + beforeReplicaId: getUuidReplicaIdentityBson(before, table.replicaIdColumns), + after: undefined, + afterReplicaId: undefined + }); + } } } else if (msg.tag == 'truncate') { let tables: storage.SourceTable[] = []; for (let relation of msg.relations) { - const table = this.getTable(getRelId(relation)); - tables.push(table); + const relTables = this.getTable(getRelId(relation)); + tables.push(...relTables); } - return await batch.truncate(tables); + return await writer.truncate(tables); } return null; } + /** + * Start replication loop, and continue until aborted or error. + */ async replicate() { + let streamPromise: Promise | null = null; + let loopPromise: Promise | null = null; try { - // If anything errors here, the entire replication process is halted, and - // all connections automatically closed, including this one. - this.initialSnapshotPromise = (async () => { - const initReplicationConnection = await this.connections.replicationConnection(); - await this.initReplication(initReplicationConnection); - await initReplicationConnection.end(); - })(); - - await this.initialSnapshotPromise; - - // At this point, the above connection has often timed out, so we start a new one - const streamReplicationConnection = await this.connections.replicationConnection(); - await this.streamChanges(streamReplicationConnection); - await streamReplicationConnection.end(); + this.initPromise = this.initReplication(); + await this.initPromise; + // These Promises are both expected to run until aborted or error. + streamPromise = this.streamChanges() + .then(() => { + throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + loopPromise = this.snapshotter + .replicationLoop() + .then(() => { + throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + const results = await Promise.allSettled([loopPromise, streamPromise]); + // First, prioritize non-aborted errors + for (let result of results) { + if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) { + throw result.reason; + } + } + // Then include aborted errors + for (let result of results) { + if (result.status == 'rejected') { + throw result.reason; + } + } + + // If we get here, both Promises completed successfully, which is unexpected. + throw new ReplicationAssertionError(`Replication loop exited unexpectedly`); } catch (e) { await this.storage.reportError(e); throw e; + } finally { + // Just to make sure + this.abortController.abort(); } } /** - * After calling replicate(), call this to wait for the initial snapshot to complete. - * - * For tests only. + * For tests: Wait until the initial snapshot is complete. */ - async waitForInitialSnapshot() { - if (this.initialSnapshotPromise == null) { - throw new ReplicationAssertionError(`Initial snapshot not started yet`); + public async waitForInitialSnapshot() { + if (this.initPromise == null) { + throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()'); } - return this.initialSnapshotPromise; + await this.initPromise; + + await this.snapshotter.waitForInitialSnapshot(); } - async initReplication(replicationConnection: pgwire.PgConnection) { - const result = await this.initSlot(); - if (result.needsInitialSync) { - await this.startInitialReplication(replicationConnection, result); + /** + * Initialize replication. + * Start replication loop, and continue until aborted, error or initial snapshot completed. + */ + private async initReplication() { + const result = await this.snapshotter.checkSlot(); + const db = await this.connections.snapshotConnection(); + try { + await this.snapshotter.setupSlot(db, result); + if (result.needsInitialSync) { + await this.snapshotter.queueSnapshotTables(db); + } + } finally { + await db.end(); } } - async streamChanges(replicationConnection: pgwire.PgConnection) { + private async streamChanges() { + const streamReplicationConnection = await this.connections.replicationConnection(); try { - await this.streamChangesInternal(replicationConnection); + await this.streamChangesInternal(streamReplicationConnection); } catch (e) { if (isReplicationSlotInvalidError(e)) { throw new MissingReplicationSlotError(e.message, e); } throw e; + } finally { + await streamReplicationConnection.end(); } } private async streamChangesInternal(replicationConnection: pgwire.PgConnection) { // When changing any logic here, check /docs/wal-lsns.md. - const { createEmptyCheckpoints } = await this.ensureStorageCompatibility(); + + // Viewing the contents of logical messages emitted with `pg_logical_emit_message` + // is only supported on Postgres >= 14.0. + // https://www.postgresql.org/docs/14/protocol-logical-replication.html + const { createEmptyCheckpoints, exposesLogicalMessages } = await ensureStorageCompatibility( + this.connections.pool, + this.storage.factory + ); const replicationOptions: Record = { proto_version: '1', publication_names: PUBLICATION_NAME }; - - /** - * Viewing the contents of logical messages emitted with `pg_logical_emit_message` - * is only supported on Postgres >= 14.0. - * https://www.postgresql.org/docs/14/protocol-logical-replication.html - */ - const exposesLogicalMessages = await this.checkLogicalMessageSupport(); if (exposesLogicalMessages) { /** * Only add this option if the Postgres server supports it. @@ -922,137 +502,137 @@ WHERE oid = $1::regclass`, }); }; - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: false, - markRecordUnavailable - }, - async (batch) => { - // We don't handle any plain keepalive messages while we have transactions. - // While we have transactions, we use that to advance the position. - // Replication never starts in the middle of a transaction, so this starts as false. - let skipKeepalive = false; - let count = 0; - - for await (const chunk of replicationStream.pgoutputDecode()) { - this.touch(); - - if (this.abort_signal.aborted) { - break; - } + await using writer = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: false, + markRecordUnavailable + }); - // chunkLastLsn may come from normal messages in the chunk, - // or from a PrimaryKeepalive message. - const { messages, lastLsn: chunkLastLsn } = chunk; + // We don't handle any plain keepalive messages while we have transactions. + // While we have transactions, we use that to advance the position. + // Replication never starts in the middle of a transaction, so this starts as false. + let skipKeepalive = false; + let count = 0; + + for await (const chunk of replicationStream.pgoutputDecode()) { + this.touch(); + + if (this.abortSignal.aborted) { + break; + } + + // chunkLastLsn may come from normal messages in the chunk, + // or from a PrimaryKeepalive message. + const { messages, lastLsn: chunkLastLsn } = chunk; + + /** + * We can check if an explicit keepalive was sent if `exposesLogicalMessages == true`. + * If we can't check the logical messages, we should assume a keepalive if we + * receive an empty array of messages in a replication event. + */ + const assumeKeepAlive = !exposesLogicalMessages; + let keepAliveDetected = false; + const lastCommit = messages.findLast((msg) => msg.tag == 'commit'); + + for (const msg of messages) { + if (msg.tag == 'relation') { + await this.handleRelation({ + writer, + descriptor: getPgOutputRelation(msg), + snapshot: true, + referencedTypeIds: referencedColumnTypeIds(msg) + }); + } else if (msg.tag == 'begin') { + // This may span multiple transactions in the same chunk, or even across chunks. + skipKeepalive = true; + if (this.oldestUncommittedChange == null) { + this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n)); + } + } else if (msg.tag == 'commit') { + this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1); + if (msg == lastCommit) { + // Only commit if this is the last commit in the chunk. + // This effectively lets us batch multiple transactions within the same chunk + // into a single flush, increasing throughput for many small transactions. + skipKeepalive = false; + // flush() must be before the resnapshot check - that is + // typically what reports the resnapshot records. + await writer.flush({ oldestUncommittedChange: this.oldestUncommittedChange }); + // This _must_ be checked after the flush(), and before + // commit() or ack(). We never persist the resnapshot list, + // so we have to process it before marking our progress. + if (resnapshot.length > 0) { + await this.resnapshot(writer, resnapshot); + resnapshot = []; + } + const didCommit = await writer.commit(msg.lsn!, { + createEmptyCheckpoints, + oldestUncommittedChange: this.oldestUncommittedChange + }); + await this.ack(msg.lsn!, replicationStream); + if (didCommit) { + this.oldestUncommittedChange = null; + this.isStartingReplication = false; + } + } + } else { + if (count % 100 == 0) { + this.logger.info(`Replicating op ${count} ${msg.lsn}`); + } /** - * We can check if an explicit keepalive was sent if `exposesLogicalMessages == true`. - * If we can't check the logical messages, we should assume a keepalive if we - * receive an empty array of messages in a replication event. + * If we can see the contents of logical messages, then we can check if a keepalive + * message is present. We only perform a keepalive (below) if we explicitly detect a keepalive message. + * If we can't see the contents of logical messages, then we should assume a keepalive is required + * due to the default value of `assumeKeepalive`. */ - const assumeKeepAlive = !exposesLogicalMessages; - let keepAliveDetected = false; - const lastCommit = messages.findLast((msg) => msg.tag == 'commit'); - - for (const msg of messages) { - if (msg.tag == 'relation') { - await this.handleRelation({ - batch, - descriptor: getPgOutputRelation(msg), - snapshot: true, - referencedTypeIds: referencedColumnTypeIds(msg) - }); - } else if (msg.tag == 'begin') { - // This may span multiple transactions in the same chunk, or even across chunks. - skipKeepalive = true; - if (this.oldestUncommittedChange == null) { - this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n)); - } - } else if (msg.tag == 'commit') { - this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1); - if (msg == lastCommit) { - // Only commit if this is the last commit in the chunk. - // This effectively lets us batch multiple transactions within the same chunk - // into a single flush, increasing throughput for many small transactions. - skipKeepalive = false; - // flush() must be before the resnapshot check - that is - // typically what reports the resnapshot records. - await batch.flush({ oldestUncommittedChange: this.oldestUncommittedChange }); - // This _must_ be checked after the flush(), and before - // commit() or ack(). We never persist the resnapshot list, - // so we have to process it before marking our progress. - if (resnapshot.length > 0) { - await this.resnapshot(batch, resnapshot); - resnapshot = []; - } - const didCommit = await batch.commit(msg.lsn!, { - createEmptyCheckpoints, - oldestUncommittedChange: this.oldestUncommittedChange - }); - await this.ack(msg.lsn!, replicationStream); - if (didCommit) { - this.oldestUncommittedChange = null; - this.isStartingReplication = false; - } - } - } else { - if (count % 100 == 0) { - this.logger.info(`Replicating op ${count} ${msg.lsn}`); - } - - /** - * If we can see the contents of logical messages, then we can check if a keepalive - * message is present. We only perform a keepalive (below) if we explicitly detect a keepalive message. - * If we can't see the contents of logical messages, then we should assume a keepalive is required - * due to the default value of `assumeKeepalive`. - */ - if (exposesLogicalMessages && isKeepAliveMessage(msg)) { - keepAliveDetected = true; - } - - count += 1; - const flushResult = await this.writeChange(batch, msg); - if (flushResult != null && resnapshot.length > 0) { - // If we have large transactions, we also need to flush the resnapshot list - // periodically. - // TODO: make sure this bit is actually triggered - await this.resnapshot(batch, resnapshot); - resnapshot = []; - } - } + if (exposesLogicalMessages && isKeepAliveMessage(msg)) { + keepAliveDetected = true; } - if (!skipKeepalive) { - if (assumeKeepAlive || keepAliveDetected) { - // Reset the detection flag. - keepAliveDetected = false; - - // In a transaction, we ack and commit according to the transaction progress. - // Outside transactions, we use the PrimaryKeepalive messages to advance progress. - // Big caveat: This _must not_ be used to skip individual messages, since this LSN - // may be in the middle of the next transaction. - // It must only be used to associate checkpoints with LSNs. - const didCommit = await batch.keepalive(chunkLastLsn); - if (didCommit) { - this.oldestUncommittedChange = null; - } + count += 1; + const flushResult = await this.writeChange(writer, msg); + if (flushResult != null && resnapshot.length > 0) { + // If we have large transactions, we also need to flush the resnapshot list + // periodically. + // TODO: make sure this bit is actually triggered + await this.resnapshot(writer, resnapshot); + resnapshot = []; + } + } + } - this.isStartingReplication = false; - } + if (!skipKeepalive) { + if (assumeKeepAlive || keepAliveDetected) { + // Reset the detection flag. + keepAliveDetected = false; + + // In a transaction, we ack and commit according to the transaction progress. + // Outside transactions, we use the PrimaryKeepalive messages to advance progress. + // Big caveat: This _must not_ be used to skip individual messages, since this LSN + // may be in the middle of the next transaction. + // It must only be used to associate checkpoints with LSNs. - // We receive chunks with empty messages often (about each second). - // Acknowledging here progresses the slot past these and frees up resources. - await this.ack(chunkLastLsn, replicationStream); + const didCommit = await writer.keepalive(chunkLastLsn); + if (didCommit) { + this.oldestUncommittedChange = null; } - this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED).add(1); + this.isStartingReplication = false; } + + // We receive chunks with empty messages often (about each second). + // Acknowledging here progresses the slot past these and frees up resources. + await this.ack(chunkLastLsn, replicationStream); } - ); + + this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED).add(1); + } + + throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); } async ack(lsn: string, replicationStream: pgwire.ReplicationStream) { @@ -1063,55 +643,6 @@ WHERE oid = $1::regclass`, replicationStream.ack(lsn); } - /** - * Ensures that the storage is compatible with the replication connection. - * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection. - */ - protected async ensureStorageCompatibility(): Promise { - const supportsLogicalMessages = await this.checkLogicalMessageSupport(); - - const storageIdentifier = await this.storage.factory.getSystemIdentifier(); - if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) { - return { - // Keep the same behaviour as before allowing Postgres storage. - createEmptyCheckpoints: true, - oldestUncommittedChange: null - }; - } - - const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id); - /** - * Check if the same server is being used for both the sync bucket storage and the logical replication. - */ - const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(this.connections.pool); - - if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) { - throw new DatabaseConnectionError( - ErrorCode.PSYNC_S1144, - `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`, - new Error('Postgres version is below 14') - ); - } - - return { - /** - * Don't create empty checkpoints if the same Postgres database is used for the data source - * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops. - */ - createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name, - oldestUncommittedChange: null - }; - } - - /** - * Check if the replication connection Postgres server supports - * viewing the contents of logical replication messages. - */ - protected async checkLogicalMessageSupport() { - const version = await this.connections.getServerVersion(); - return version ? version.compareMain('14.0.0') >= 0 : false; - } - async getReplicationLagMillis(): Promise { if (this.oldestUncommittedChange == null) { if (this.isStartingReplication) { diff --git a/modules/module-postgres/src/replication/WalStreamReplicationJob.ts b/modules/module-postgres/src/replication/WalStreamReplicationJob.ts index 340af22b9..737bb97c4 100644 --- a/modules/module-postgres/src/replication/WalStreamReplicationJob.ts +++ b/modules/module-postgres/src/replication/WalStreamReplicationJob.ts @@ -21,6 +21,10 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob this.connectionFactory = options.connectionFactory; } + public get storage() { + return this.options.storage; + } + /** * Postgres on RDS writes performs a WAL checkpoint every 5 minutes by default, which creates a new 64MB file. * diff --git a/modules/module-postgres/src/replication/replication-utils.ts b/modules/module-postgres/src/replication/replication-utils.ts index 1fe33bd0a..416f6675a 100644 --- a/modules/module-postgres/src/replication/replication-utils.ts +++ b/modules/module-postgres/src/replication/replication-utils.ts @@ -1,11 +1,18 @@ import * as pgwire from '@powersync/service-jpgwire'; import * as lib_postgres from '@powersync/lib-service-postgres'; -import { ErrorCode, logger, ServiceAssertionError, ServiceError } from '@powersync/lib-services-framework'; -import { PatternResult, storage } from '@powersync/service-core'; +import { + DatabaseConnectionError, + ErrorCode, + logger, + ServiceAssertionError, + ServiceError +} from '@powersync/lib-services-framework'; +import { BucketStorageFactory, PatternResult, storage } from '@powersync/service-core'; import * as sync_rules from '@powersync/service-sync-rules'; import * as service_types from '@powersync/service-types'; import { ReplicationIdentity } from './PgRelation.js'; +import { getServerVersion } from '../utils/postgres_version.js'; export interface ReplicaIdentityResult { replicationColumns: storage.ColumnDescriptor[]; @@ -396,3 +403,57 @@ export async function cleanUpReplicationSlot(slotName: string, db: pgwire.PgClie params: [{ type: 'varchar', value: slotName }] }); } + +/** + * Ensures that the storage is compatible with the replication connection. + * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection. + */ +export async function ensureStorageCompatibility( + db: pgwire.PgClient, + factory: BucketStorageFactory +): Promise { + const supportsLogicalMessages = await checkLogicalMessageSupport(db); + + const storageIdentifier = await factory.getSystemIdentifier(); + if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) { + return { + // Keep the same behaviour as before allowing Postgres storage. + createEmptyCheckpoints: true, + oldestUncommittedChange: null, + exposesLogicalMessages: supportsLogicalMessages + }; + } + + const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id); + /** + * Check if the same server is being used for both the sync bucket storage and the logical replication. + */ + const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(db); + + if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) { + throw new DatabaseConnectionError( + ErrorCode.PSYNC_S1144, + `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`, + new Error('Postgres version is below 14') + ); + } + + return { + /** + * Don't create empty checkpoints if the same Postgres database is used for the data source + * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops. + */ + createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name, + oldestUncommittedChange: null, + exposesLogicalMessages: supportsLogicalMessages + }; +} + +/** + * Check if the replication connection Postgres server supports + * viewing the contents of logical replication messages. + */ +export async function checkLogicalMessageSupport(db: pgwire.PgClient) { + const version = await getServerVersion(db); + return version ? version.compareMain('14.0.0') >= 0 : false; +} diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts index 43a29ea28..d67f88784 100644 --- a/modules/module-postgres/test/src/large_batch.test.ts +++ b/modules/module-postgres/test/src/large_batch.test.ts @@ -4,6 +4,7 @@ import { populateData } from '../../dist/utils/populate_test_data.js'; import { env } from './env.js'; import { describeWithStorage, TEST_CONNECTION_OPTIONS } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; +import { bucketRequest } from '@powersync/service-core-tests'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { describeWithStorage({ timeout: 240_000 }, function (config) { @@ -44,8 +45,13 @@ function defineBatchTests(config: storage.TestStorageConfig) { const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); const duration = Date.now() - start; const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024); - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual(operation_count); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual(operation_count); const perSecond = Math.round((operation_count / duration) * 1000); console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s. ${used}MB heap`); }); @@ -89,9 +95,15 @@ function defineBatchTests(config: storage.TestStorageConfig) { await context.replicateSnapshot(); const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); + + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); const duration = Date.now() - start; - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual(operation_count); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual(operation_count); const perSecond = Math.round((operation_count / duration) * 1000); console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s.`); printMemoryUsage(); @@ -136,13 +148,19 @@ function defineBatchTests(config: storage.TestStorageConfig) { operationCount += perTransaction * 2; } + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); + const start = Date.now(); const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); const duration = Date.now() - start; const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024); - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual(operationCount); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual(operationCount); const perSecond = Math.round((operationCount / duration) * 1000); // This number depends on the test machine, so we keep the test significantly // lower than expected numbers. @@ -158,8 +176,8 @@ function defineBatchTests(config: storage.TestStorageConfig) { const checkpoint2 = await context.getCheckpoint({ timeout: 20_000 }); const truncateDuration = Date.now() - truncateStart; - const checksum2 = await context.storage!.getChecksums(checkpoint2, ['global[]']); - const truncateCount = checksum2.get('global[]')!.count - checksum.get('global[]')!.count; + const checksum2 = await context.storage!.getChecksums(checkpoint2, [request]); + const truncateCount = checksum2.get(request.bucket)!.count - checksum.get(request.bucket)!.count; expect(truncateCount).toEqual(numTransactions * perTransaction); const truncatePerSecond = Math.round((truncateCount / truncateDuration) * 1000); console.log(`Truncated ${truncateCount} ops in ${truncateDuration}ms ${truncatePerSecond} ops/s. ${used}MB heap`); @@ -224,8 +242,13 @@ function defineBatchTests(config: storage.TestStorageConfig) { await context.replicateSnapshot(); const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual((numDocs + 2) * 4); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual((numDocs + 2) * 4); }); function printMemoryUsage() { diff --git a/modules/module-postgres/test/src/pg_test.test.ts b/modules/module-postgres/test/src/pg_test.test.ts index 0e4705a45..ff581d1ce 100644 --- a/modules/module-postgres/test/src/pg_test.test.ts +++ b/modules/module-postgres/test/src/pg_test.test.ts @@ -1,8 +1,8 @@ -import { WalStream } from '@module/replication/WalStream.js'; import { PostgresTypeResolver } from '@module/types/resolver.js'; import * as dns from 'node:dns'; import type { LookupFunction } from 'node:net'; +import { PostgresSnapshotter } from '@module/replication/PostgresSnapshotter.js'; import * as pgwire from '@powersync/service-jpgwire'; import { applyRowContext, @@ -725,5 +725,5 @@ async function interpretResults(db: pgwire.PgClient, results: pgwire.PgResult) { const typeCache = new PostgresTypeResolver(db); await typeCache.fetchTypesForSchema(); - return results.rows.map((row) => WalStream.decodeRow(row, typeCache)); + return results.rows.map((row) => PostgresSnapshotter.decodeRow(row, typeCache)); } diff --git a/modules/module-postgres/test/src/schema_changes.test.ts b/modules/module-postgres/test/src/schema_changes.test.ts index 4f9b75e38..a2418cf2c 100644 --- a/modules/module-postgres/test/src/schema_changes.test.ts +++ b/modules/module-postgres/test/src/schema_changes.test.ts @@ -55,23 +55,24 @@ function defineTests(config: storage.TestStorageConfig) { const reduced = reduceBucket(data).slice(1); expect(reduced.sort(compareIds)).toMatchObject([PUT_T3]); - // Initial inserts - expect(data.slice(0, 2)).toMatchObject([PUT_T1, PUT_T2]); - - // Truncate - order doesn't matter - expect(data.slice(2, 4).sort(compareIds)).toMatchObject([REMOVE_T1, REMOVE_T2]); - - expect(data.slice(4, 5)).toMatchObject([ - // Snapshot and/or replication insert - PUT_T3 - ]); - - if (data.length > 5) { - expect(data.slice(5)).toMatchObject([ - // Replicated insert (optional duplication) - PUT_T3 - ]); - } + // Actual operations may look like this, but is not stable: + // // Initial inserts + // expect(data.slice(0, 2)).toMatchObject([PUT_T1, PUT_T2]); + + // // Truncate - order doesn't matter + // expect(data.slice(2, 4).sort(compareIds)).toMatchObject([REMOVE_T1, REMOVE_T2]); + + // expect(data.slice(4, 5)).toMatchObject([ + // // Snapshot and/or replication insert + // PUT_T3 + // ]); + + // if (data.length > 5) { + // expect(data.slice(5)).toMatchObject([ + // // Replicated insert (optional duplication) + // PUT_T3 + // ]); + // } }); test('add table', async () => { @@ -646,7 +647,7 @@ config: { statement: `UPDATE test_data SET other = ROW(TRUE, 2)::composite;` } ); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([ putOp('test_data', { id: 't1' }), putOp('test_data', { id: 't1', other: '{"foo":1,"bar":2}' }) diff --git a/modules/module-postgres/test/src/slow_tests.test.ts b/modules/module-postgres/test/src/slow_tests.test.ts index 1d2e9a424..cf5b371c6 100644 --- a/modules/module-postgres/test/src/slow_tests.test.ts +++ b/modules/module-postgres/test/src/slow_tests.test.ts @@ -19,7 +19,9 @@ import { createCoreReplicationMetrics, initializeCoreReplicationMetrics, reduceBucket, - storage + settledPromise, + storage, + unsettledPromise } from '@powersync/service-core'; import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import * as mongo_storage from '@powersync/service-module-mongodb-storage'; @@ -39,7 +41,7 @@ function defineSlowTests(config: storage.TestStorageConfig) { let walStream: WalStream | undefined; let connections: PgManager | undefined; let abortController: AbortController | undefined; - let streamPromise: Promise | undefined; + let streamPromise: Promise> | undefined; beforeAll(async () => { createCoreReplicationMetrics(METRICS_HELPER.metricsEngine); @@ -50,7 +52,7 @@ function defineSlowTests(config: storage.TestStorageConfig) { // This cleans up, similar to WalStreamTestContext.dispose(). // These tests are a little more complex than what is supported by WalStreamTestContext. abortController?.abort(); - await streamPromise?.catch((_) => {}); + await streamPromise; streamPromise = undefined; connections?.destroy(); @@ -105,9 +107,11 @@ bucket_definitions: await pool.query(`ALTER TABLE test_data REPLICA IDENTITY FULL`); let abort = false; - streamPromise = walStream.replicate().finally(() => { - abort = true; - }); + streamPromise = settledPromise( + walStream.replicate().finally(() => { + abort = true; + }) + ); await walStream.waitForInitialSnapshot(); const start = Date.now(); @@ -298,7 +302,7 @@ bucket_definitions: } abortController.abort(); - await streamPromise.catch((e) => { + await unsettledPromise(streamPromise).catch((e) => { if (e instanceof ReplicationAbortedError) { // Ignore } else { @@ -361,7 +365,7 @@ bucket_definitions: // 3. Start replication, but don't wait for it let initialReplicationDone = false; - streamPromise = walStream.replicate(); + streamPromise = settledPromise(walStream.replicate()); walStream .waitForInitialSnapshot() .catch((_) => {}) @@ -409,7 +413,7 @@ bucket_definitions: } abortController.abort(); - await streamPromise.catch((e) => { + await unsettledPromise(streamPromise).catch((e) => { if (e instanceof ReplicationAbortedError) { // Ignore } else { @@ -479,7 +483,7 @@ bucket_definitions: // 3. Start replication, but don't wait for it let initialReplicationDone = false; - streamPromise = context.replicateSnapshot().finally(() => { + streamPromise = settledPromise(context.replicateSnapshot()).finally(() => { initialReplicationDone = true; }); @@ -501,7 +505,7 @@ bucket_definitions: await new Promise((resolve) => setTimeout(resolve, Math.random() * 10)); } - await streamPromise; + await unsettledPromise(streamPromise); // 5. Once initial replication is done, wait for the streaming changes to complete syncing. const data = await context.getBucketData('global[]', 0n); diff --git a/modules/module-postgres/test/src/wal_stream.test.ts b/modules/module-postgres/test/src/wal_stream.test.ts index 3a225ecec..89cacc72d 100644 --- a/modules/module-postgres/test/src/wal_stream.test.ts +++ b/modules/module-postgres/test/src/wal_stream.test.ts @@ -502,7 +502,7 @@ config: await context.initializeReplication(); await pool.query(`INSERT INTO test_data(id, description) VALUES ('t1', '2025-09-10 15:17:14+02')`); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([putOp('test_data', { id: 't1', description: '2025-09-10T13:17:14.000000Z' })]); }); @@ -534,7 +534,7 @@ config: `INSERT INTO test_data(id, description, ts) VALUES ('t2', ROW(TRUE, 2)::composite, '2025-11-17T09:12:00Z')` ); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([ putOp('test_data', { id: 't1', description: '{"foo":1,"bar":1}', ts: '2025-11-17T09:11:00.000000Z' }), putOp('test_data', { id: 't2', description: '{"foo":1,"bar":2}', ts: '2025-11-17T09:12:00.000000Z' }) @@ -561,7 +561,7 @@ config: await context.initializeReplication(); await pool.query(`INSERT INTO test_data(id) VALUES ('t1')`); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([putOp('test_data', { id: 't1' })]); }); diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index 96c49a441..c7ada271a 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -1,5 +1,6 @@ import { PgManager } from '@module/replication/PgManager.js'; import { PUBLICATION_NAME, WalStream, WalStreamOptions } from '@module/replication/WalStream.js'; +import { ReplicationAbortedError } from '@powersync/lib-services-framework'; import { BucketStorageFactory, createCoreReplicationMetrics, @@ -11,11 +12,9 @@ import { SyncRulesBucketStorage, unsettledPromise } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import * as pgwire from '@powersync/service-jpgwire'; import { clearTestDb, getClientCheckpoint, TEST_CONNECTION_OPTIONS } from './util.js'; -import { CustomTypeRegistry } from '@module/types/registry.js'; -import { ReplicationAbortedError } from '@powersync/lib-services-framework'; export class WalStreamTestContext implements AsyncDisposable { private _walStream?: WalStream; @@ -171,7 +170,8 @@ export class WalStreamTestContext implements AsyncDisposable { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -183,8 +183,9 @@ export class WalStreamTestContext implements AsyncDisposable { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + let map = [bucketRequest(syncRules, bucket, start)]; let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -194,7 +195,7 @@ export class WalStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map.set(bucket, BigInt(batches[0]!.chunkData.next_after)); + map = [bucketRequest(syncRules, bucket, BigInt(batches[0]!.chunkData.next_after))]; } return data; } @@ -207,8 +208,9 @@ export class WalStreamTestContext implements AsyncDisposable { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const { checkpoint } = await this.storage!.getCheckpoint(); - const map = new Map([[bucket, start]]); + const map = [bucketRequest(syncRules, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index e23c109e5..f6d64bf1d 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -1,60 +1,76 @@ -import { storage, utils } from '@powersync/service-core'; -import { GetQuerierOptions, RequestParameters, SqlSyncRules } from '@powersync/service-sync-rules'; -import { versionedHydrationState } from '@powersync/service-sync-rules/src/HydrationState.js'; +import { BucketDataRequest, InternalOpId, JwtPayload, storage, utils } from '@powersync/service-core'; +import { + GetQuerierOptions, + RequestParameters, + SqlSyncRules, + versionedHydrationState +} from '@powersync/service-sync-rules'; import * as bson from 'bson'; +import { SOURCE } from '@powersync/service-sync-rules'; + export const ZERO_LSN = '0/0'; export const PARSE_OPTIONS: storage.ParseSyncRulesOptions = { defaultSchema: 'public' }; -export const BATCH_OPTIONS: storage.StartBatchOptions = { +export const BATCH_OPTIONS: storage.CreateWriterOptions = { ...PARSE_OPTIONS, zeroLSN: ZERO_LSN, storeCurrentData: true }; -export function testRules(content: string): storage.PersistedSyncRulesContent { - return { - id: 1, - sync_rules_content: content, - slot_name: 'test', - active: true, - last_checkpoint_lsn: '', - parsed(options) { - return { - id: 1, - sync_rules: SqlSyncRules.fromYaml(content, options), - slot_name: 'test', - hydratedSyncRules() { - return this.sync_rules.config.hydrate({ hydrationState: versionedHydrationState(1) }); - } - }; - }, - lock() { - throw new Error('Not implemented'); - } - }; -} - -export function makeTestTable( +export async function resolveTestTable( + writer: storage.BucketDataWriter, name: string, replicaIdColumns: string[] | undefined, - options: { tableIdStrings: boolean } + options: { tableIdStrings: boolean }, + idIndex: number = 1 ) { const relId = utils.hashData('table', name, (replicaIdColumns ?? ['id']).join(',')); - const id = - options.tableIdStrings == false ? new bson.ObjectId('6544e3899293153fa7b38331') : '6544e3899293153fa7b38331'; - return new storage.SourceTable({ - id: id, - connectionTag: storage.SourceTable.DEFAULT_TAG, - objectId: relId, + // Semi-hardcoded id for tests, to get consistent output. + // If the same test uses multiple tables, pass idIndex to get different ids. + const idString = '6544e3899293153fa7b383' + (30 + idIndex).toString().padStart(2, '0'); + + const id = options.tableIdStrings == false ? new bson.ObjectId(idString) : idString; + let didGenerateId = false; + const patterns = writer.rowProcessor.getMatchingTablePatterns({ schema: 'public', name: name, - replicaIdColumns: (replicaIdColumns ?? ['id']).map((column) => ({ name: column, type: 'VARCHAR', typeId: 25 })), - snapshotComplete: true + connectionTag: storage.SourceTable.DEFAULT_TAG + }); + if (patterns.length == 0) { + throw new Error(`Table ${name} not found in sync rules`); + } else if (patterns.length > 1) { + throw new Error(`Multiple patterns match table ${name} - not supported in test`); + } + const pattern = patterns[0]; + const result = await writer.resolveTables({ + connection_id: 1, + connection_tag: storage.SourceTable.DEFAULT_TAG, + + entity_descriptor: { + name: name, + schema: 'public', + objectId: relId, + + replicaIdColumns: (replicaIdColumns ?? ['id']).map((column) => ({ name: column, type: 'VARCHAR', typeId: 25 })) + }, + pattern, + idGenerator: () => { + if (didGenerateId) { + throw new Error('idGenerator called multiple times - not supported in tests'); + } + didGenerateId = true; + return id; + } }); + const table = result[0]; + if (table == null) { + throw new Error(`Failed to resolve test table ${name}`); + } + return result[0]; } export function getBatchData( @@ -119,3 +135,63 @@ export function querierOptions(globalParameters: RequestParameters): GetQuerierO streams: {} }; } + +export function requestParameters( + jwtPayload: Record, + clientParameters?: Record +): RequestParameters { + return new RequestParameters(new JwtPayload(jwtPayload), clientParameters ?? {}); +} + +function isParsedSyncRules( + syncRules: storage.PersistedSyncRulesContent | storage.PersistedSyncRules +): syncRules is storage.PersistedSyncRules { + return (syncRules as storage.PersistedSyncRules).sync_rules !== undefined; +} + +/** + * Bucket names no longer purely depend on the sync rules. + * This converts a bucket name like "global[]" into the actual bucket name, for use in tests. + */ +export function bucketRequest( + syncRules: storage.PersistedSyncRulesContent | storage.PersistedSyncRules, + bucket?: string, + start?: InternalOpId | string | number +): BucketDataRequest { + const parsed = isParsedSyncRules(syncRules) ? syncRules : syncRules.parsed(PARSE_OPTIONS); + const hydrationState = parsed.hydrationState; + bucket ??= 'global[]'; + const definitionName = bucket.substring(0, bucket.indexOf('[')); + const parameters = bucket.substring(bucket.indexOf('[')); + const source = parsed.sync_rules.config.bucketDataSources.find((b) => b.uniqueName === definitionName); + + if (source == null) { + throw new Error(`Failed to find global bucket ${bucket}`); + } + const bucketName = hydrationState.getBucketSourceScope(source).bucketPrefix + parameters; + return { + bucket: bucketName, + start: BigInt(start ?? 0n), + source: source + }; +} + +/** + * Removes the source property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSource(obj: T): Omit { + const { source, ...rest } = obj; + return rest; +} + +/** + * Removes the [SOURCE] symbol property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSourceSymbol(obj: T): Omit { + const { [SOURCE]: source, ...rest } = obj; + return rest; +} diff --git a/packages/service-core-tests/src/tests/register-compacting-tests.ts b/packages/service-core-tests/src/tests/register-compacting-tests.ts index 64c00eb2d..3b26384cd 100644 --- a/packages/service-core-tests/src/tests/register-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-compacting-tests.ts @@ -1,10 +1,10 @@ import { addChecksums, storage } from '@powersync/service-core'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; +import { bucketRequest } from '../test-utils/test-utils-index.js'; export function registerCompactTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); test('compacting (1)', async () => { await using factory = await generateStorageFactory(); @@ -16,46 +16,47 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + const request = bucketRequest(syncRules); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: test_utils.rid('t1') + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') }); + const result = await writer.flush(); + await writer.commit('1/1'); + const checkpoint = result!.flushed_op; + const request2 = bucketRequest(syncRules); - const batchBefore = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) - ); + const batchBefore = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request2])); const dataBefore = batchBefore.chunkData.data; - const checksumBefore = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumBefore = await bucketStorage.getChecksums(checkpoint, [request2]); expect(dataBefore).toMatchObject([ { @@ -84,13 +85,11 @@ bucket_definitions: minChangeRatio: 0 }); - const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) - ); + const batchAfter = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const dataAfter = batchAfter.chunkData.data; - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumAfter = await bucketStorage.getChecksums(checkpoint, [request]); bucketStorage.clearChecksumCache(); - const checksumAfter2 = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumAfter2 = await bucketStorage.getChecksums(checkpoint, [request]); expect(batchAfter.targetOp).toEqual(3n); expect(dataAfter).toMatchObject([ @@ -108,8 +107,8 @@ bucket_definitions: } ]); - expect(checksumAfter.get('global[]')).toEqual(checksumBefore.get('global[]')); - expect(checksumAfter2.get('global[]')).toEqual(checksumBefore.get('global[]')); + expect(checksumAfter.get(request.bucket)).toEqual(checksumBefore.get(request.bucket)); + expect(checksumAfter2.get(request.bucket)).toEqual(checksumBefore.get(request.bucket)); test_utils.validateCompactedBucket(dataBefore, dataAfter); }); @@ -124,55 +123,55 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: test_utils.rid('t1') + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: test_utils.rid('t1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') + }); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: test_utils.rid('t1') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') }); + const result = await writer.flush(); + await writer.commit('1/1'); + const checkpoint = result!.flushed_op; + const request = bucketRequest(syncRules); - const batchBefore = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) - ); + const batchBefore = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const dataBefore = batchBefore.chunkData.data; - const checksumBefore = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumBefore = await bucketStorage.getChecksums(checkpoint, [request]); // op_id sequence depends on the storage implementation expect(dataBefore).toMatchObject([ @@ -202,12 +201,10 @@ bucket_definitions: minChangeRatio: 0 }); - const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) - ); + const batchAfter = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const dataAfter = batchAfter.chunkData.data; bucketStorage.clearChecksumCache(); - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumAfter = await bucketStorage.getChecksums(checkpoint, [request]); expect(batchAfter.targetOp).toBeLessThanOrEqual(checkpoint); expect(dataAfter).toMatchObject([ @@ -224,8 +221,8 @@ bucket_definitions: op: 'PUT' } ]); - expect(checksumAfter.get('global[]')).toEqual({ - ...checksumBefore.get('global[]'), + expect(checksumAfter.get(request.bucket)).toEqual({ + ...checksumBefore.get(request.bucket), count: 2 }); @@ -242,53 +239,54 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: 't2' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: 't2' + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + const result = await writer.flush(); + await writer.commit('1/1'); + const checkpoint1 = result!.flushed_op; - await bucketStorage.getChecksums(checkpoint1, ['global[]']); - - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't2' - }, - beforeReplicaId: 't2' - }); - await batch.commit('2/1'); + const request = bucketRequest(syncRules); + await bucketStorage.getChecksums(checkpoint1, [request]); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't2' + }, + beforeReplicaId: 't2' }); + const result2 = await writer.flush(); + await writer.commit('2/1'); const checkpoint2 = result2!.flushed_op; await bucketStorage.compact({ @@ -299,20 +297,18 @@ bucket_definitions: minChangeRatio: 0 }); - const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint2, new Map([['global[]', 0n]])) - ); + const batchAfter = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint2, [request])); const dataAfter = batchAfter.chunkData.data; await bucketStorage.clearChecksumCache(); - const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); + const checksumAfter = await bucketStorage.getChecksums(checkpoint2, [request]); expect(dataAfter).toMatchObject([ { op: 'CLEAR' } ]); - expect(checksumAfter.get('global[]')).toEqual({ - bucket: 'global[]', + expect(checksumAfter.get(request.bucket)).toEqual({ + bucket: request.bucket, count: 1, checksum: dataAfter[0].checksum }); @@ -331,77 +327,77 @@ bucket_definitions: - select * from test where b = bucket.b` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + /** + * Repeatedly create operations which fall into different buckets. + * The bucket operations are purposely interleaved as the op_id increases. + * A large amount of operations are created here. + * The configured window of compacting operations is 100. This means the initial window will + * contain operations from multiple buckets. + */ + for (let count = 0; count < 100; count++) { + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + b: 'b1', + value: 'start' + }, + afterReplicaId: test_utils.rid('t1') + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - /** - * Repeatedly create operations which fall into different buckets. - * The bucket operations are purposely interleaved as the op_id increases. - * A large amount of operations are created here. - * The configured window of compacting operations is 100. This means the initial window will - * contain operations from multiple buckets. - */ - for (let count = 0; count < 100; count++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - b: 'b1', - value: 'start' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - b: 'b1', - value: 'intermediate' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - b: 'b2', - value: 'start' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - b: 'b1', - value: 'final' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2', - b: 'b2', - value: 'final' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.commit('1/1'); - } - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + b: 'b1', + value: 'intermediate' + }, + afterReplicaId: test_utils.rid('t1') + }); - const checkpoint = result!.flushed_op; + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + b: 'b2', + value: 'start' + }, + afterReplicaId: test_utils.rid('t2') + }); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + b: 'b1', + value: 'final' + }, + afterReplicaId: test_utils.rid('t1') + }); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2', + b: 'b2', + value: 'final' + }, + afterReplicaId: test_utils.rid('t2') + }); + + await writer.commit('1/1'); + } + + const checkpoint = (await bucketStorage.getCheckpoint()).checkpoint; await bucketStorage.compact({ clearBatchLimit: 100, @@ -411,15 +407,9 @@ bucket_definitions: minChangeRatio: 0 }); - const batchAfter = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch( - checkpoint, - new Map([ - ['grouped["b1"]', 0n], - ['grouped["b2"]', 0n] - ]) - ) - ); + const groupedBuckets = ['b1', 'b2']; + const groupedRequests = groupedBuckets.map((bucket) => bucketRequest(syncRules, `grouped["${bucket}"]`, 0n)); + const batchAfter = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, groupedRequests)); const dataAfter = batchAfter.flatMap((b) => b.chunkData.data); // The op_ids will vary between MongoDB and Postgres storage @@ -459,39 +449,39 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: 't2' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: 't2' + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + await writer.commit('1/1'); + await bucketStorage.compact({ clearBatchLimit: 2, moveBatchLimit: 1, @@ -500,22 +490,22 @@ bucket_definitions: minChangeRatio: 0 }); - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't2' - }, - beforeReplicaId: 't2' - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't2' + }, + beforeReplicaId: 't2' }); + const result2 = await writer.flush(); + await writer.commit('2/1'); const checkpoint2 = result2!.flushed_op; await bucketStorage.clearChecksumCache(); - const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); - expect(checksumAfter.get('global[]')).toMatchObject({ - bucket: 'global[]', + const request = bucketRequest(syncRules); + const checksumAfter = await bucketStorage.getChecksums(checkpoint2, [request]); + expect(checksumAfter.get(request.bucket)).toMatchObject({ + bucket: request.bucket, count: 4 }); }); @@ -530,43 +520,44 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + const request = bucketRequest(syncRules); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1' + }, + afterReplicaId: 't1' }); + const result = await writer.flush(); + await writer.commit('1/1'); + // Get checksums here just to populate the cache - await bucketStorage.getChecksums(result!.flushed_op, ['global[]']); - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); - await batch.commit('2/1'); + await bucketStorage.getChecksums(result!.flushed_op, [request]); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + const result2 = await writer.flush(); + await writer.commit('2/1'); await bucketStorage.compact({ clearBatchLimit: 20, @@ -577,9 +568,9 @@ bucket_definitions: }); const checkpoint2 = result2!.flushed_op; - const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); - expect(checksumAfter.get('global[]')).toMatchObject({ - bucket: 'global[]', + const checksumAfter = await bucketStorage.getChecksums(checkpoint2, [request]); + expect(checksumAfter.get(request.bucket)).toMatchObject({ + bucket: request.bucket, count: 1 }); }); diff --git a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts index d597f2cba..c991b99f2 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts @@ -33,18 +33,15 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const writeCheckpoint = await bucketStorage.createManagedWriteCheckpoint({ heads: { '1': '5/0' }, user_id: 'user1' }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('5/0'); - }); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -71,9 +68,8 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(r.persisted_sync_rules!); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -81,9 +77,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('5/0'); - }); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -104,9 +98,7 @@ bucket_definitions: // We have to trigger a new keepalive after the checkpoint, at least to cover postgres storage. // This is what is effetively triggered with RouteAPI.createReplicationHead(). // MongoDB storage doesn't explicitly need this anymore. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('6/0'); - }); + await writer.keepalive('6/0'); let result2 = await iter.next(); if (result2.value?.base?.lsn == '5/0') { @@ -138,9 +130,8 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -148,14 +139,12 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.addCustomWriteCheckpoint({ - checkpoint: 5n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('5/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 5n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -182,9 +171,8 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -192,17 +180,15 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Flush to clear state - await batch.flush(); + // Flush to clear state + await writer.flush(); - await batch.addCustomWriteCheckpoint({ - checkpoint: 5n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('5/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 5n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -229,9 +215,8 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -239,9 +224,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('5/0'); - }); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -254,14 +237,12 @@ bucket_definitions: } }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.addCustomWriteCheckpoint({ - checkpoint: 6n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('6/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 6n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('6/0'); let result2 = await iter.next(); expect(result2).toMatchObject({ @@ -275,14 +256,12 @@ bucket_definitions: } }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.addCustomWriteCheckpoint({ - checkpoint: 7n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('7/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 7n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('7/0'); let result3 = await iter.next(); expect(result3).toMatchObject({ diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index 5ba42456d..247f67339 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -1,7 +1,7 @@ import { BucketDataBatchOptions, getUuidReplicaIdentityBson, OplogEntry, storage } from '@powersync/service-core'; import { describe, expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; - +import { bucketRequest } from '../test-utils/test-utils-index.js'; /** * Normalize data from OplogEntries for comparison in tests. * Tests typically expect the stringified result @@ -25,7 +25,7 @@ const normalizeOplogData = (data: OplogEntry['data']) => { */ export function registerDataStorageDataTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); + test('removing row', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -34,34 +34,34 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - await batch.markAllSnapshotDone('1/1'); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const request = bucketRequest(syncRules); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -78,10 +78,10 @@ bucket_definitions: { op: 'REMOVE', object_id: 'test1', checksum: c2 } ]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: (c1 + c2) & 0xffffffff, count: 2 } @@ -96,41 +96,37 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.markAllSnapshotDone('1/1'); - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; + await writer.commit('0/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.commit('2/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const request = bucketRequest(syncRules); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -143,10 +139,10 @@ bucket_definitions: expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: c1 & 0xffffffff, count: 1 } @@ -162,45 +158,41 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; + await writer.commit('0/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1' - }, - after: { - id: 'test1', - description: 'test1' - }, - beforeReplicaId: test_utils.rid('test1'), - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1' + }, + after: { + id: 'test1', + description: 'test1' + }, + beforeReplicaId: test_utils.rid('test1'), + afterReplicaId: test_utils.rid('test1') }); + await writer.commit('2/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -213,10 +205,10 @@ bucket_definitions: expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: c1 & 0xffffffff, count: 1 } @@ -231,34 +223,34 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('1/1'); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') + }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -271,10 +263,10 @@ bucket_definitions: expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: c1 & 0xffffffff, count: 1 } @@ -289,49 +281,48 @@ bucket_definitions: global: data: - SELECT client_id as id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + client_id: 'client1a', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + client_id: 'client1b', + description: 'test1b' + }, + afterReplicaId: test_utils.rid('test1') + }); - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - client_id: 'client1a', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - client_id: 'client1b', - description: 'test1b' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test2', - client_id: 'client2', - description: 'test2' - }, - afterReplicaId: test_utils.rid('test2') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test2', + client_id: 'client2', + description: 'test2' + }, + afterReplicaId: test_utils.rid('test2') }); + + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -355,50 +346,44 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - await batch.markAllSnapshotDone('1/1'); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.flush(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; + await writer.commit('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); + await writer.flush(); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -415,10 +400,10 @@ bucket_definitions: { op: 'REMOVE', object_id: 'test1', checksum: c2 } ]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: (c1 + c2) & 0xffffffff, count: 2 } @@ -433,94 +418,89 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.flush(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.commit('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); + await writer.commit('2/1'); + const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { @@ -540,10 +520,10 @@ bucket_definitions: { op: 'REMOVE', object_id: 'test1', checksum: c2 } ]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: (c1 + c1 + c1 + c2) & 0xffffffff, count: 4 } @@ -566,123 +546,119 @@ bucket_definitions: global: data: - SELECT id, description FROM "test" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); // Pre-setup - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test2', - description: 'test2a' - }, - afterReplicaId: test_utils.rid('test2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test2', + description: 'test2a' + }, + afterReplicaId: test_utils.rid('test2') }); + const result1 = await writer.flush(); const checkpoint1 = result1?.flushed_op ?? 0n; // Test batch - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - // b - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1b' - }, - afterReplicaId: test_utils.rid('test1') - }); + // b + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1b' + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1' - }, - beforeReplicaId: test_utils.rid('test1'), - after: { - id: 'test2', - description: 'test2b' - }, - afterReplicaId: test_utils.rid('test2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1' + }, + beforeReplicaId: test_utils.rid('test1'), + after: { + id: 'test2', + description: 'test2b' + }, + afterReplicaId: test_utils.rid('test2') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test2' - }, - beforeReplicaId: test_utils.rid('test2'), - after: { - id: 'test3', - description: 'test3b' - }, + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test2' + }, + beforeReplicaId: test_utils.rid('test2'), + after: { + id: 'test3', + description: 'test3b' + }, - afterReplicaId: test_utils.rid('test3') - }); + afterReplicaId: test_utils.rid('test3') + }); - // c - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test2', - description: 'test2c' - }, - afterReplicaId: test_utils.rid('test2') - }); + // c + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test2', + description: 'test2c' + }, + afterReplicaId: test_utils.rid('test2') + }); - // d - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test4', - description: 'test4d' - }, - afterReplicaId: test_utils.rid('test4') - }); + // d + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test4', + description: 'test4d' + }, + afterReplicaId: test_utils.rid('test4') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test4' - }, - beforeReplicaId: test_utils.rid('test4'), - after: { - id: 'test5', - description: 'test5d' - }, - afterReplicaId: test_utils.rid('test5') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test4' + }, + beforeReplicaId: test_utils.rid('test4'), + after: { + id: 'test5', + description: 'test5d' + }, + afterReplicaId: test_utils.rid('test5') }); + const result2 = await writer.flush(); const checkpoint2 = result2!.flushed_op; + const request = bucketRequest(syncRules, 'global[]', checkpoint1); - const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint2, new Map([['global[]', checkpoint1]])) - ); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint2, [request])); const data = batch[0].chunkData.data.map((d) => { return { @@ -725,64 +701,62 @@ bucket_definitions: global: data: - SELECT id, description FROM "test" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id', 'description'], config); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id', 'description'], config); // Pre-setup - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: rid2('test1', 'test1a') - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: rid2('test1', 'test1a') }); + const result1 = await writer.flush(); const checkpoint1 = result1?.flushed_op ?? 0n; - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Unchanged, but has a before id - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1', - description: 'test1a' - }, - beforeReplicaId: rid2('test1', 'test1a'), - after: { - id: 'test1', - description: 'test1b' - }, - afterReplicaId: rid2('test1', 'test1b') - }); + // Unchanged, but has a before id + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1', + description: 'test1a' + }, + beforeReplicaId: rid2('test1', 'test1a'), + after: { + id: 'test1', + description: 'test1b' + }, + afterReplicaId: rid2('test1', 'test1b') }); + await writer.flush(); - const result3 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Delete - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 'test1', - description: 'test1b' - }, - beforeReplicaId: rid2('test1', 'test1b'), - after: undefined - }); + // Delete + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 'test1', + description: 'test1b' + }, + beforeReplicaId: rid2('test1', 'test1b'), + after: undefined }); + const result3 = await writer.flush(); const checkpoint3 = result3!.flushed_op; + const request = bucketRequest(syncRules); const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint3, new Map([['global[]', checkpoint1]])) + bucketStorage.getBucketDataBatch(checkpoint3, [{ ...request, start: checkpoint1 }]) ); const data = batch[0].chunkData.data.map((d) => { return { @@ -834,64 +808,62 @@ bucket_definitions: global: data: - SELECT id, description FROM "test" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id', 'description'], config); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id', 'description'], config); // Pre-setup - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: rid2('test1', 'test1a') - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: rid2('test1', 'test1a') }); + const result1 = await writer.flush(); const checkpoint1 = result1?.flushed_op ?? 0n; - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Unchanged, but has a before id - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1', - description: 'test1a' - }, - beforeReplicaId: rid2('test1', 'test1a'), - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: rid2('test1', 'test1a') - }); + // Unchanged, but has a before id + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1', + description: 'test1a' + }, + beforeReplicaId: rid2('test1', 'test1a'), + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: rid2('test1', 'test1a') }); + await writer.flush(); - const result3 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Delete - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 'test1', - description: 'test1a' - }, - beforeReplicaId: rid2('test1', 'test1a'), - after: undefined - }); + // Delete + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 'test1', + description: 'test1a' + }, + beforeReplicaId: rid2('test1', 'test1a'), + after: undefined }); + const result3 = await writer.flush(); const checkpoint3 = result3!.flushed_op; + const request = bucketRequest(syncRules); const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint3, new Map([['global[]', checkpoint1]])) + bucketStorage.getBucketDataBatch(checkpoint3, [{ ...request, start: checkpoint1 }]) ); const data = batch[0].chunkData.data.map((d) => { return { @@ -933,69 +905,66 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + const largeDescription = '0123456789'.repeat(12_000_00); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; - - const largeDescription = '0123456789'.repeat(12_000_00); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large1', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large1') - }); - - // Large enough to split the returned batch - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large2', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large1', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test3', - description: 'test3' - }, - afterReplicaId: test_utils.rid('test3') - }); + // Large enough to split the returned batch + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large2', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large2') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test3', + description: 'test3' + }, + afterReplicaId: test_utils.rid('test3') }); + await writer.commit('1/1'); + const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); const options: storage.BucketDataBatchOptions = { chunkLimitBytes: 16 * 1024 * 1024 }; - const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), options) - ); + const batch1 = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request], options)); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, { op_id: '2', op: 'PUT', object_id: 'large1', checksum: 454746904 } @@ -1009,7 +978,7 @@ bucket_definitions: const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch1[0].chunkData.next_after)]]), + [{ ...request, start: BigInt(batch1[0].chunkData.next_after) }], options ) ); @@ -1026,7 +995,7 @@ bucket_definitions: const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch2[0].chunkData.next_after)]]), + [{ ...request, start: BigInt(batch2[0].chunkData.next_after) }], options ) ); @@ -1043,34 +1012,32 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.markAllSnapshotDone('1/1'); - for (let i = 1; i <= 6; i++) { - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `test${i}`, - description: `test${i}` - }, - afterReplicaId: `test${i}` - }); - } + for (let i = 1; i <= 6; i++) { + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: `test${i}`, + description: `test${i}` + }, + afterReplicaId: `test${i}` + }); + } - await batch.commit('1/1'); - }); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch1 = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), { limit: 4 }) - ); + const batch1 = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request], { limit: 4 })); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, @@ -1086,7 +1053,7 @@ bucket_definitions: }); const batch2 = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', BigInt(batch1.chunkData.next_after)]]), { + bucketStorage.getBucketDataBatch(checkpoint, [{ ...request, start: BigInt(batch1.chunkData.next_after) }], { limit: 4 }) ); @@ -1102,7 +1069,7 @@ bucket_definitions: }); const batch3 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', BigInt(batch2.chunkData.next_after)]]), { + bucketStorage.getBucketDataBatch(checkpoint, [{ ...request, start: BigInt(batch2.chunkData.next_after) }], { limit: 4 }) ); @@ -1126,46 +1093,41 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; - - for (let i = 1; i <= 10; i++) { - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `test${i}`, - description: `test${i}`, - bucket: i == 1 ? 'global1' : 'global2' - }, - afterReplicaId: `test${i}` - }); - } - - await batch.commit('1/1'); - }); + await writer.markAllSnapshotDone('1/1'); + + for (let i = 1; i <= 10; i++) { + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: `test${i}`, + description: `test${i}`, + bucket: i == 1 ? 'global1' : 'global2' + }, + afterReplicaId: `test${i}` + }); + } + + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); - return await test_utils.fromAsync( - bucketStorage.getBucketDataBatch( - checkpoint, - new Map([ - ['global1[]', 0n], - ['global2[]', 0n] - ]), - options - ) + const global1Request = bucketRequest(syncRules, 'global1[]', 0n); + const global2Request = bucketRequest(syncRules, 'global2[]', 0n); + const batch = await test_utils.fromAsync( + bucketStorage.getBucketDataBatch(checkpoint, [global1Request, global2Request], options) ); + return { batch, global1Request, global2Request }; }; test('batch has_more (1)', async () => { - const batch = await setup({ limit: 5 }); + const { batch, global1Request, global2Request } = await setup({ limit: 5 }); expect(batch.length).toEqual(2); - expect(batch[0].chunkData.bucket).toEqual('global1[]'); - expect(batch[1].chunkData.bucket).toEqual('global2[]'); + expect(batch[0].chunkData.bucket).toEqual(global1Request.bucket); + expect(batch[1].chunkData.bucket).toEqual(global2Request.bucket); expect(test_utils.getBatchData(batch[0])).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -1192,11 +1154,11 @@ bucket_definitions: }); test('batch has_more (2)', async () => { - const batch = await setup({ limit: 11 }); + const { batch, global1Request, global2Request } = await setup({ limit: 11 }); expect(batch.length).toEqual(2); - expect(batch[0].chunkData.bucket).toEqual('global1[]'); - expect(batch[1].chunkData.bucket).toEqual('global2[]'); + expect(batch[0].chunkData.bucket).toEqual(global1Request.bucket); + expect(batch[1].chunkData.bucket).toEqual(global2Request.bucket); expect(test_utils.getBatchData(batch[0])).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -1229,12 +1191,12 @@ bucket_definitions: test('batch has_more (3)', async () => { // 50 bytes is more than 1 row, less than 2 rows - const batch = await setup({ limit: 3, chunkLimitBytes: 50 }); + const { batch, global1Request, global2Request } = await setup({ limit: 3, chunkLimitBytes: 50 }); expect(batch.length).toEqual(3); - expect(batch[0].chunkData.bucket).toEqual('global1[]'); - expect(batch[1].chunkData.bucket).toEqual('global2[]'); - expect(batch[2].chunkData.bucket).toEqual('global2[]'); + expect(batch[0].chunkData.bucket).toEqual(global1Request.bucket); + expect(batch[1].chunkData.bucket).toEqual(global2Request.bucket); + expect(batch[2].chunkData.bucket).toEqual(global2Request.bucket); expect(test_utils.getBatchData(batch[0])).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -1278,10 +1240,9 @@ bucket_definitions: const r = await f.configureSyncRules({ content: 'bucket_definitions: {}', validate: false }); const storage = f.getInstance(r.persisted_sync_rules!); - await storage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/0'); - await batch.keepalive('1/0'); - }); + await using writer = await storage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/0'); + await writer.keepalive('1/0'); const metrics2 = await f.getStorageMetrics(); expect(metrics2.operations_size_bytes).toBeLessThanOrEqual(20_000); @@ -1304,36 +1265,34 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id'], config); - const sourceTableIgnore = test_utils.makeTestTable('test_ignore', ['id'], config); - - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - // This saves a record to current_data, but not bucket_data. - // This causes a checkpoint to be created without increasing the op_id sequence. - await batch.save({ - sourceTable: sourceTableIgnore, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config, 1); + const sourceTableIgnore = await test_utils.resolveTestTable(writer, 'test_ignore', ['id'], config, 2); + + await writer.markAllSnapshotDone('1/1'); + // This saves a record to current_data, but not bucket_data. + // This causes a checkpoint to be created without increasing the op_id sequence. + await writer.save({ + sourceTable: sourceTableIgnore, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + const result1 = await writer.flush(); const checkpoint1 = result1!.flushed_op; - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test2' - }, - afterReplicaId: test_utils.rid('test2') - }); + await writer.save({ + sourceTable: sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test2' + }, + afterReplicaId: test_utils.rid('test2') }); + const result2 = await writer.flush(); const checkpoint2 = result2!.flushed_op; // we expect 0n and 1n, or 1n and 2n. @@ -1348,30 +1307,30 @@ bucket_definitions: global: data: - SELECT client_id as id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('1/1'); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; - expect(checksums).toEqual([{ bucket: 'global[]', checksum: 1917136889, count: 1 }]); - const checksums2 = [...(await bucketStorage.getChecksums(checkpoint + 1n, ['global[]'])).values()]; - expect(checksums2).toEqual([{ bucket: 'global[]', checksum: 1917136889, count: 1 }]); + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; + expect(checksums).toEqual([{ bucket: request.bucket, checksum: 1917136889, count: 1 }]); + const checksums2 = [...(await bucketStorage.getChecksums(checkpoint + 1n, [request])).values()]; + expect(checksums2).toEqual([{ bucket: request.bucket, checksum: 1917136889, count: 1 }]); }); testChecksumBatching(config); @@ -1384,31 +1343,29 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.commit('1/1'); - - const cp1 = await bucketStorage.getCheckpoint(); - expect(cp1.lsn).toEqual('1/1'); - - await batch.commit('2/1', { createEmptyCheckpoints: true }); - const cp2 = await bucketStorage.getCheckpoint(); - expect(cp2.lsn).toEqual('2/1'); - - await batch.keepalive('3/1'); - const cp3 = await bucketStorage.getCheckpoint(); - expect(cp3.lsn).toEqual('3/1'); - - // For the last one, we skip creating empty checkpoints - // This means the LSN stays at 3/1. - await batch.commit('4/1', { createEmptyCheckpoints: false }); - const cp4 = await bucketStorage.getCheckpoint(); - expect(cp4.lsn).toEqual('3/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); + await writer.commit('1/1'); + + const cp1 = await bucketStorage.getCheckpoint(); + expect(cp1.lsn).toEqual('1/1'); + + await writer.commit('2/1', { createEmptyCheckpoints: true }); + const cp2 = await bucketStorage.getCheckpoint(); + expect(cp2.lsn).toEqual('2/1'); + + await writer.keepalive('3/1'); + const cp3 = await bucketStorage.getCheckpoint(); + expect(cp3.lsn).toEqual('3/1'); + + // For the last one, we skip creating empty checkpoints + // This means the LSN stays at 3/1. + await writer.commit('4/1', { createEmptyCheckpoints: false }); + const cp4 = await bucketStorage.getCheckpoint(); + expect(cp4.lsn).toEqual('3/1'); }); test('empty checkpoints (2)', async () => { @@ -1422,40 +1379,38 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer1 = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await using writer2 = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer2, 'test', ['id'], config); + + // We simulate two concurrent batches, but sequential calls are enough for this test. + await writer1.markAllSnapshotDone('1/1'); + await writer1.commit('1/1'); + + await writer1.commit('2/1', { createEmptyCheckpoints: false }); + const cp2 = await bucketStorage.getCheckpoint(); + expect(cp2.lsn).toEqual('1/1'); // checkpoint 2/1 skipped + + await writer2.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); + // This simulates what happens on a snapshot processor. + // This may later change to a flush() rather than commit(). + await writer2.commit(test_utils.BATCH_OPTIONS.zeroLSN); - const sourceTable = TEST_TABLE; - // We simulate two concurrent batches, but nesting is the easiest way to do this. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch1) => { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch2) => { - await batch1.markAllSnapshotDone('1/1'); - await batch1.commit('1/1'); - - await batch1.commit('2/1', { createEmptyCheckpoints: false }); - const cp2 = await bucketStorage.getCheckpoint(); - expect(cp2.lsn).toEqual('1/1'); // checkpoint 2/1 skipped - - await batch2.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - // This simulates what happens on a snapshot processor. - // This may later change to a flush() rather than commit(). - await batch2.commit(test_utils.BATCH_OPTIONS.zeroLSN); - - const cp3 = await bucketStorage.getCheckpoint(); - expect(cp3.lsn).toEqual('1/1'); // Still unchanged + const cp3 = await bucketStorage.getCheckpoint(); + expect(cp3.lsn).toEqual('1/1'); // Still unchanged - // This now needs to advance the LSN, despite {createEmptyCheckpoints: false} - await batch1.commit('4/1', { createEmptyCheckpoints: false }); - const cp4 = await bucketStorage.getCheckpoint(); - expect(cp4.lsn).toEqual('4/1'); - }); - }); + // This now needs to advance the LSN, despite {createEmptyCheckpoints: false} + await writer1.commit('4/1', { createEmptyCheckpoints: false }); + const cp4 = await bucketStorage.getCheckpoint(); + expect(cp4.lsn).toEqual('4/1'); }); test('deleting while streaming', async () => { @@ -1466,47 +1421,49 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using snapshotWriter = await bucketStorage.createWriter({ + ...test_utils.BATCH_OPTIONS, + skipExistingRows: true + }); + await using streamingWriter = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const snapshotTable = await test_utils.resolveTestTable(snapshotWriter, 'test', ['id'], config, 1); + const streamingTable = await test_utils.resolveTestTable(streamingWriter, 'test', ['id'], config, 1); - const sourceTable = TEST_TABLE; - // We simulate two concurrent batches, and nesting is the easiest way to do this. + // We simulate two concurrent batches; separate writers are enough for this test. // For this test, we assume that we start with a row "test1", which is picked up by a snapshot // query, right before the delete is streamed. But the snapshot query is only persisted _after_ // the delete is streamed, and we need to ensure that the streamed delete takes precedence. - await bucketStorage.startBatch({ ...test_utils.BATCH_OPTIONS, skipExistingRows: true }, async (snapshotBatch) => { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (streamingBatch) => { - streamingBatch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 'test1' - }, - beforeReplicaId: test_utils.rid('test1') - }); - await streamingBatch.commit('2/1'); - - await snapshotBatch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - await snapshotBatch.markAllSnapshotDone('3/1'); - await snapshotBatch.commit('1/1'); + await streamingWriter.save({ + sourceTable: streamingTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 'test1' + }, + beforeReplicaId: test_utils.rid('test1') + }); + await streamingWriter.commit('2/1'); - await streamingBatch.keepalive('3/1'); - }); + await snapshotWriter.save({ + sourceTable: snapshotTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') }); + await snapshotWriter.markAllSnapshotDone('3/1'); + await snapshotWriter.commit('1/1'); + + await streamingWriter.keepalive('3/1'); const cp = await bucketStorage.getCheckpoint(); expect(cp.lsn).toEqual('3/1'); const data = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(cp.checkpoint, new Map([['global[]', 0n]])) + bucketStorage.getBucketDataBatch(cp.checkpoint, [bucketRequest(syncRules)]) ); expect(data).toEqual([]); @@ -1531,38 +1488,40 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - for (let u of ['u1', 'u2', 'u3', 'u4']) { - for (let t of ['t1', 't2', 't3', 't4']) { - const id = `${t}_${u}`; - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id, - description: `${t} description`, - user_id: u - }, - afterReplicaId: test_utils.rid(id) - }); - } + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + for (let u of ['u1', 'u2', 'u3', 'u4']) { + for (let t of ['t1', 't2', 't3', 't4']) { + const id = `${t}_${u}`; + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id, + description: `${t} description`, + user_id: u + }, + afterReplicaId: test_utils.rid(id) + }); } - await batch.commit('1/1'); - }); + } + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); bucketStorage.clearChecksumCache(); - const buckets = ['user["u1"]', 'user["u2"]', 'user["u3"]', 'user["u4"]']; - const checksums = [...(await bucketStorage.getChecksums(checkpoint, buckets)).values()]; + const users = ['u1', 'u2', 'u3', 'u4']; + const expectedChecksums = [346204588, 5261081, 134760718, -302639724]; + const bucketRequests = users.map((user) => bucketRequest(syncRules, `user["${user}"]`)); + const checksums = [...(await bucketStorage.getChecksums(checkpoint, bucketRequests)).values()]; checksums.sort((a, b) => a.bucket.localeCompare(b.bucket)); - expect(checksums).toEqual([ - { bucket: 'user["u1"]', count: 4, checksum: 346204588 }, - { bucket: 'user["u2"]', count: 4, checksum: 5261081 }, - { bucket: 'user["u3"]', count: 4, checksum: 134760718 }, - { bucket: 'user["u4"]', count: 4, checksum: -302639724 } - ]); + const expected = bucketRequests.map((request, index) => ({ + bucket: request.bucket, + count: 4, + checksum: expectedChecksums[index] + })); + expected.sort((a, b) => a.bucket.localeCompare(b.bucket)); + expect(checksums).toEqual(expected); }); } diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 9281a15f9..d2ced62a1 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -1,6 +1,5 @@ -import { JwtPayload, storage } from '@powersync/service-core'; -import { RequestParameters, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; -import { ParameterLookupScope } from '@powersync/service-sync-rules/src/HydrationState.js'; +import { storage } from '@powersync/service-core'; +import { ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; @@ -16,8 +15,6 @@ import * as test_utils from '../test-utils/test-utils-index.js'; */ export function registerDataStorageParameterTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); - const MYBUCKET_1: ParameterLookupScope = { lookupName: 'mybucket', queryId: '1' }; test('save and load parameters', async () => { await using factory = await generateStorageFactory(); @@ -31,44 +28,44 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + + await writer.markAllSnapshotDone('1/1'); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + id1: 'user3', + id2: 'user4', + group_id: 'group2a' + }, + afterReplicaId: test_utils.rid('t2') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - id1: 'user3', - id2: 'user4', - group_id: 'group2a' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - id1: 'user1', - id2: 'user2', - group_id: 'group1a' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + id1: 'user1', + id2: 'user2', + group_id: 'group1a' + }, + afterReplicaId: test_utils.rid('t1') }); + await writer.commit('1/1'); + const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); - expect(parameters).toEqual([ - { - group_id: 'group1a' - } - ]); + + const parameters = test_utils.requestParameters({ sub: 'user1' }); + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const parameter_sets = await querier.queryDynamicBucketDescriptions(checkpoint); + expect(parameter_sets).toMatchObject([{ bucket: expect.stringMatching(/"group1a"/) }]); }); test('it should use the latest version', async () => { @@ -83,47 +80,50 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - group_id: 'group1' - }, - afterReplicaId: test_utils.rid('user1') - }); - await batch.commit('1/1'); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + group_id: 'group1' + }, + afterReplicaId: test_utils.rid('user1') }); + await writer.commit('1/1'); const checkpoint1 = await bucketStorage.getCheckpoint(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - group_id: 'group2' - }, - afterReplicaId: test_utils.rid('user1') - }); - await batch.commit('1/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + group_id: 'group2' + }, + afterReplicaId: test_utils.rid('user1') }); + await writer.commit('1/2'); const checkpoint2 = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint2.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); - expect(parameters).toEqual([ + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions(test_utils.requestParameters({ sub: 'user1' })) + ).querier; + + const buckets1 = await querier.queryDynamicBucketDescriptions(checkpoint2); + expect(buckets1).toMatchObject([ { - group_id: 'group2' + bucket: expect.stringMatching(/"group2"/) } ]); // Use the checkpoint to get older data if relevant - const parameters2 = await checkpoint1.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); - expect(parameters2).toEqual([ + const buckets2 = await querier.queryDynamicBucketDescriptions(checkpoint1); + expect(buckets2).toMatchObject([ { - group_id: 'group1' + bucket: expect.stringMatching(/"group1"/) } ]); }); @@ -143,63 +143,63 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const table = test_utils.makeTestTable('todos', ['id', 'list_id'], config); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - // Create two todos which initially belong to different lists - await batch.save({ - sourceTable: table, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'todo1', - list_id: 'list1' - }, - afterReplicaId: test_utils.rid('todo1') - }); - await batch.save({ - sourceTable: table, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'todo2', - list_id: 'list2' - }, - afterReplicaId: test_utils.rid('todo2') - }); - - await batch.commit('1/1'); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + const table = await test_utils.resolveTestTable(writer, 'todos', ['id', 'list_id'], config); + + await writer.markAllSnapshotDone('1/1'); + // Create two todos which initially belong to different lists + await writer.save({ + sourceTable: table, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'todo1', + list_id: 'list1' + }, + afterReplicaId: test_utils.rid('todo1') + }); + await writer.save({ + sourceTable: table, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'todo2', + list_id: 'list2' + }, + afterReplicaId: test_utils.rid('todo2') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Update the second todo item to now belong to list 1 - await batch.save({ - sourceTable: table, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'todo2', - list_id: 'list1' - }, - afterReplicaId: test_utils.rid('todo2') - }); + await writer.commit('1/1'); - await batch.commit('1/1'); + // Update the second todo item to now belong to list 1 + await writer.save({ + sourceTable: table, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'todo2', + list_id: 'list1' + }, + afterReplicaId: test_utils.rid('todo2') }); + await writer.commit('1/1'); + // We specifically request the todo_ids for both lists. // There removal operation for the association of `list2`::`todo2` should not interfere with the new // association of `list1`::`todo2` + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + test_utils.requestParameters({ sub: 'user1', parameters: { list_id: ['list1', 'list2'] } }) + ) + ).querier; const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, ['list1']), - ScopedParameterLookup.direct(MYBUCKET_1, ['list2']) - ]); + const buckets = await querier.queryDynamicBucketDescriptions(checkpoint); - expect(parameters.sort((a, b) => (a.todo_id as string).localeCompare(b.todo_id as string))).toEqual([ + expect(buckets.sort((a, b) => a.bucket.localeCompare(b.bucket))).toMatchObject([ { - todo_id: 'todo1' + bucket: expect.stringMatching(/"todo1"/) }, { - todo_id: 'todo2' + bucket: expect.stringMatching(/"todo2"/) } ]); }); @@ -216,39 +216,51 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - group_id: 'group1', - n1: 314n, - f2: 314, - f3: 3.14 - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.commit('1/1'); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + group_id: 'group1', + n1: 314n, + f2: 314, + f3: 3.14 + }, + afterReplicaId: test_utils.rid('t1') }); - const TEST_PARAMS = { group_id: 'group1' }; + await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); - const parameters1 = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, [314n, 314, 3.14]) - ]); - expect(parameters1).toEqual([TEST_PARAMS]); - const parameters2 = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, [314, 314n, 3.14]) - ]); - expect(parameters2).toEqual([TEST_PARAMS]); - const parameters3 = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, [314n, 314, 3])]); - expect(parameters3).toEqual([]); + const querier1 = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3.14 } }) + ) + ).querier; + const buckets1 = await querier1.queryDynamicBucketDescriptions(checkpoint); + expect(buckets1).toMatchObject([{ bucket: expect.stringMatching(/"group1"/), definition: 'mybucket' }]); + + const querier2 = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 314, f2: 314n, f3: 3.14 } }) + ) + ).querier; + const buckets2 = await querier2.queryDynamicBucketDescriptions(checkpoint); + expect(buckets2).toMatchObject([{ bucket: expect.stringMatching(/"group1"/), definition: 'mybucket' }]); + + const querier3 = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3 } }) + ) + ).querier; + const buckets3 = await querier3.queryDynamicBucketDescriptions(checkpoint); + expect(buckets3).toEqual([]); }); test('save and load parameters with large numbers', async () => { @@ -267,49 +279,53 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + group_id: 'group1', + n1: 1152921504606846976n // 2^60 + }, + afterReplicaId: test_utils.rid('t1') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - group_id: 'group1', - n1: 1152921504606846976n // 2^60 - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - group_id: 'group1', - // Simulate a TOAST value, even though it can't happen for values like this - // in practice. - n1: undefined - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + group_id: 'group1', + // Simulate a TOAST value, even though it can't happen for values like this + // in practice. + n1: undefined + }, + afterReplicaId: test_utils.rid('t1') }); - const TEST_PARAMS = { group_id: 'group1' }; + await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); - - const parameters1 = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, [1152921504606846976n]) + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 1152921504606846976n } }) + ) + ).querier; + const buckets = await querier.queryDynamicBucketDescriptions(checkpoint); + expect(buckets.map(test_utils.removeSourceSymbol)).toMatchObject([ + { + bucket: expect.stringMatching(/"group1"/), + definition: 'mybucket' + } ]); - expect(parameters1).toEqual([TEST_PARAMS]); }); test('save and load parameters with workspaceId', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', ['id'], config); - await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ content: ` @@ -321,45 +337,49 @@ bucket_definitions: data: [] ` }); - const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace1', - userId: 'u1' - }, - afterReplicaId: test_utils.rid('workspace1') - }); - await batch.commit('1/1'); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace1', + userId: 'u1' + }, + afterReplicaId: test_utils.rid('workspace1') }); + await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = new RequestParameters(new JwtPayload({ sub: 'u1' }), {}); + const parameters = test_utils.requestParameters({ sub: 'u1' }); - const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const buckets = await querier.queryDynamicBucketDescriptions({ async getParameterSets(lookups) { - expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1' }, ['u1'])]); + // Lookups are not stable anymore + // expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1' }, ['u1'])]); const parameter_sets = await checkpoint.getParameterSets(lookups); expect(parameter_sets).toEqual([{ workspace_id: 'workspace1' }]); return parameter_sets; } }); - expect(buckets).toEqual([ - { bucket: 'by_workspace["workspace1"]', priority: 3, definition: 'by_workspace', inclusion_reasons: ['default'] } - ]); + const cleanedBuckets = buckets.map(test_utils.removeSourceSymbol); + expect(cleanedBuckets).toHaveLength(1); + expect(cleanedBuckets[0]).toMatchObject({ + priority: 3, + definition: 'by_workspace', + inclusion_reasons: ['default'] + }); + expect(cleanedBuckets[0].bucket.endsWith('["workspace1"]')).toBe(true); }); test('save and load parameters with dynamic global buckets', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', undefined, config); - await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ content: ` @@ -371,55 +391,56 @@ bucket_definitions: data: [] ` }); - const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace1', + visibility: 'public' + }, + afterReplicaId: test_utils.rid('workspace1') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace1', - visibility: 'public' - }, - afterReplicaId: test_utils.rid('workspace1') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace2', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace2') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace3', - visibility: 'public' - }, - afterReplicaId: test_utils.rid('workspace3') - }); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace2', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace2') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace3', + visibility: 'public' + }, + afterReplicaId: test_utils.rid('workspace3') }); + await writer.commit('1/1'); + const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = new RequestParameters(new JwtPayload({ sub: 'unknown' }), {}); + const parameters = test_utils.requestParameters({ sub: 'unknown' }); - const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const buckets = await querier.queryDynamicBucketDescriptions({ async getParameterSets(lookups) { - expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_public_workspace', queryId: '1' }, []) - ]); + // Lookups are not stable anymore + // expect(lookups).toEqual([ + // ScopedParameterLookup.direct({ lookupName: 'by_public_workspace', queryId: '1' }, []) + // ]); const parameter_sets = await checkpoint.getParameterSets(lookups); parameter_sets.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b))); @@ -427,26 +448,20 @@ bucket_definitions: return parameter_sets; } }); - buckets.sort((a, b) => a.bucket.localeCompare(b.bucket)); - expect(buckets).toEqual([ - { - bucket: 'by_public_workspace["workspace1"]', - priority: 3, - definition: 'by_public_workspace', - inclusion_reasons: ['default'] - }, - { - bucket: 'by_public_workspace["workspace3"]', + const cleanedBuckets = buckets.map(test_utils.removeSourceSymbol); + expect(cleanedBuckets).toHaveLength(2); + for (const bucket of cleanedBuckets) { + expect(bucket).toMatchObject({ priority: 3, definition: 'by_public_workspace', inclusion_reasons: ['default'] - } - ]); + }); + } + const bucketSuffixes = cleanedBuckets.map((bucket) => bucket.bucket.slice(bucket.bucket.indexOf('['))).sort(); + expect(bucketSuffixes).toEqual(['["workspace1"]', '["workspace3"]']); }); test('multiple parameter queries', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', undefined, config); - await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ content: ` @@ -460,62 +475,62 @@ bucket_definitions: data: [] ` }); - const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace1', + visibility: 'public' + }, + afterReplicaId: test_utils.rid('workspace1') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace1', - visibility: 'public' - }, - afterReplicaId: test_utils.rid('workspace1') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace2', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace2') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace3', - user_id: 'u1', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace3') - }); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace2', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace2') + }); - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace4', - user_id: 'u2', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace4') - }); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace3', + user_id: 'u1', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace3') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace4', + user_id: 'u2', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace4') }); + await writer.commit('1/1'); + const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = new RequestParameters(new JwtPayload({ sub: 'u1' }), {}); + const parameters = test_utils.requestParameters({ sub: 'u1' }); // Test intermediate values - could be moved to sync_rules.test.ts - const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; // Test final values - the important part const foundLookups: ScopedParameterLookup[] = []; @@ -530,15 +545,16 @@ bucket_definitions: } }) ).map((e) => e.bucket); - expect(foundLookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1' }, []), - ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '2' }, ['u1']) - ]); + // Lookups are not stable anymore + // expect(foundLookups).toEqual([ + // ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1' }, []), + // ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '2' }, ['u1']) + // ]); parameter_sets.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b))); expect(parameter_sets).toEqual([{ workspace_id: 'workspace1' }, { workspace_id: 'workspace3' }]); - buckets.sort(); - expect(buckets).toEqual(['by_workspace["workspace1"]', 'by_workspace["workspace3"]']); + const bucketSuffixes = buckets.map((bucket) => bucket.slice(bucket.indexOf('['))).sort(); + expect(bucketSuffixes).toEqual(['["workspace1"]', '["workspace3"]']); }); test('truncate parameters', async () => { @@ -553,27 +569,32 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - id1: 'user3', - id2: 'user4', - group_id: 'group2a' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.truncate([TEST_TABLE]); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + id1: 'user3', + id2: 'user4', + group_id: 'group2a' + }, + afterReplicaId: test_utils.rid('t2') }); + await writer.truncate([testTable]); + await writer.flush(); + const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions(test_utils.requestParameters({ sub: 'user1' })) + ).querier; + const parameters = await querier.queryDynamicBucketDescriptions(checkpoint); expect(parameters).toEqual([]); }); @@ -591,11 +612,11 @@ bucket_definitions: }); const syncBucketStorage = bucketStorageFactory.getInstance(syncRules); - const parsedSchema1 = syncBucketStorage.getParsedSyncRules({ + const parsedSchema1 = syncBucketStorage.getHydratedSyncRules({ defaultSchema: 'public' }); - const parsedSchema2 = syncBucketStorage.getParsedSyncRules({ + const parsedSchema2 = syncBucketStorage.getHydratedSyncRules({ defaultSchema: 'public' }); @@ -603,7 +624,7 @@ bucket_definitions: expect(parsedSchema2).equals(parsedSchema1); expect(parsedSchema1.getSourceTables()[0].schema).equals('public'); - const parsedSchema3 = syncBucketStorage.getParsedSyncRules({ + const parsedSchema3 = syncBucketStorage.getHydratedSyncRules({ defaultSchema: 'databasename' }); diff --git a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts index 609b6f6fd..cde9e710a 100644 --- a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts @@ -6,8 +6,6 @@ import * as test_utils from '../test-utils/test-utils-index.js'; export function registerParameterCompactTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); - test('compacting parameters', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -19,60 +17,58 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: 't2' - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: 't2' }); - const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1' }, ['t1']); + await writer.commit('1/1'); + + const lookup = ScopedParameterLookup.direct({ lookupName: '20002', queryId: '', source: null as any }, ['t1']); const checkpoint1 = await bucketStorage.getCheckpoint(); const parameters1 = await checkpoint1.getParameterSets([lookup]); expect(parameters1).toEqual([{ id: 't1' }]); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 't1' - }, - beforeReplicaId: 't1', - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 't1' + }, + beforeReplicaId: 't1', + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); - await batch.commit('1/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + await writer.commit('1/2'); const checkpoint2 = await bucketStorage.getCheckpoint(); const parameters2 = await checkpoint2.getParameterSets([lookup]); expect(parameters2).toEqual([]); @@ -103,59 +99,55 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - uid: 'u1' - }, - afterReplicaId: 't1' - }); - // Interleave with another operation, to evict the other cache entry when compacting. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - uid: 'u1' - }, - afterReplicaId: 't2' - }); - - await batch.commit('1/1'); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + uid: 'u1' + }, + afterReplicaId: 't1' }); + // Interleave with another operation, to evict the other cache entry when compacting. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + uid: 'u1' + }, + afterReplicaId: 't2' + }); + + await writer.commit('1/1'); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1', - uid: 'u1' - }, - beforeReplicaId: 't1' - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1', + uid: 'u1' + }, + beforeReplicaId: 't1' }); + await writer.commit('2/1'); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2', - uid: 'u2' - }, - afterReplicaId: 't2' - }); - await batch.commit('3/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2', + uid: 'u2' + }, + afterReplicaId: 't2' }); + await writer.commit('3/1'); - const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1' }, ['u1']); + const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1', source: null as any }, ['u1']); const checkpoint1 = await bucketStorage.getCheckpoint(); const parameters1 = await checkpoint1.getParameterSets([lookup]); diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index 7c1531418..575544d09 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -46,8 +46,6 @@ export function registerSyncTests(config: storage.TestStorageConfig) { maxDataFetchConcurrency: 2 }); - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); - test('sync global data', async () => { await using f = await factory(); @@ -56,37 +54,37 @@ export function registerSyncTests(config: storage.TestStorageConfig) { }); const bucketStorage = f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); + await writer.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - description: 'Test 2' - }, - afterReplicaId: 't2' - }); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' + }); - await batch.commit('0/1'); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + description: 'Test 2' + }, + afterReplicaId: 't2' }); + await writer.commit('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage: bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -119,36 +117,36 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'earlier', - description: 'Test 2' - }, - afterReplicaId: 'earlier' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'earlier', + description: 'Test 2' + }, + afterReplicaId: 'earlier' }); + await writer.commit('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -181,38 +179,38 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // Initial data: Add one priority row and 10k low-priority rows. - await batch.save({ - sourceTable: TEST_TABLE, + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + // Initial data: Add one priority row and 10k low-priority rows. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio', + description: 'High priority row' + }, + afterReplicaId: 'highprio' + }); + for (let i = 0; i < 10_000; i++) { + await writer.save({ + sourceTable: testTable, tag: storage.SaveOperationTag.INSERT, after: { - id: 'highprio', - description: 'High priority row' + id: `${i}`, + description: 'low prio' }, - afterReplicaId: 'highprio' + afterReplicaId: `${i}` }); - for (let i = 0; i < 10_000; i++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `${i}`, - description: 'low prio' - }, - afterReplicaId: `${i}` - }); - } + } - await batch.commit('0/1'); - }); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -235,20 +233,18 @@ bucket_definitions: if (sentCheckpoints == 1) { // Save new data to interrupt the low-priority sync. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add another high-priority row. This should interrupt the long-running low-priority sync. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio2', - description: 'Another high-priority row' - }, - afterReplicaId: 'highprio2' - }); - - await batch.commit('0/2'); + // Add another high-priority row. This should interrupt the long-running low-priority sync. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio2', + description: 'Another high-priority row' + }, + afterReplicaId: 'highprio2' }); + + await writer.commit('0/2'); } else { // Low-priority sync from the first checkpoint was interrupted. This should not happen before // 1000 low-priority items were synchronized. @@ -292,38 +288,38 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // Initial data: Add one priority row and 10k low-priority rows. - await batch.save({ - sourceTable: TEST_TABLE, + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + // Initial data: Add one priority row and 10k low-priority rows. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio', + description: 'user_one' + }, + afterReplicaId: 'highprio' + }); + for (let i = 0; i < 10_000; i++) { + await writer.save({ + sourceTable: testTable, tag: storage.SaveOperationTag.INSERT, after: { - id: 'highprio', - description: 'user_one' + id: `${i}`, + description: 'low prio' }, - afterReplicaId: 'highprio' + afterReplicaId: `${i}` }); - for (let i = 0; i < 10_000; i++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `${i}`, - description: 'low prio' - }, - afterReplicaId: `${i}` - }); - } + } - await batch.commit('0/1'); - }); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -351,20 +347,18 @@ bucket_definitions: if (typeof next === 'object' && next !== null) { if ('partial_checkpoint_complete' in next) { if (sentCheckpoints == 1) { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add a high-priority row that doesn't affect this sync stream. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio2', - description: 'user_two' - }, - afterReplicaId: 'highprio2' - }); - - await batch.commit('0/2'); + // Add a high-priority row that doesn't affect this sync stream. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio2', + description: 'user_two' + }, + afterReplicaId: 'highprio2' }); + + await writer.commit('0/2'); } else { expect(sentCheckpoints).toBe(2); expect(sentRows).toBe(10002); @@ -385,20 +379,18 @@ bucket_definitions: if (completedCheckpoints == 1) { expect(sentRows).toBe(10001); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add a high-priority row that affects this sync stream. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio3', - description: 'user_one' - }, - afterReplicaId: 'highprio3' - }); - - await batch.commit('0/3'); + // Add a high-priority row that affects this sync stream. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio3', + description: 'user_one' + }, + afterReplicaId: 'highprio3' }); + + await writer.commit('0/3'); } } } @@ -434,38 +426,38 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // Initial data: Add one priority row and 10k low-priority rows. - await batch.save({ - sourceTable: TEST_TABLE, + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + // Initial data: Add one priority row and 10k low-priority rows. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio', + description: 'High priority row' + }, + afterReplicaId: 'highprio' + }); + for (let i = 0; i < 2_000; i++) { + await writer.save({ + sourceTable: testTable, tag: storage.SaveOperationTag.INSERT, after: { - id: 'highprio', - description: 'High priority row' + id: `${i}`, + description: 'low prio' }, - afterReplicaId: 'highprio' + afterReplicaId: `${i}` }); - for (let i = 0; i < 2_000; i++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `${i}`, - description: 'low prio' - }, - afterReplicaId: `${i}` - }); - } + } - await batch.commit('0/1'); - }); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -497,31 +489,29 @@ bucket_definitions: if (sentRows == 1001) { // Save new data to interrupt the low-priority sync. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add another high-priority row. This should interrupt the long-running low-priority sync. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio2', - description: 'Another high-priority row' - }, - afterReplicaId: 'highprio2' - }); - - // Also add a low-priority row - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: '2001', - description: 'Another low-priority row' - }, - afterReplicaId: '2001' - }); - - await batch.commit('0/2'); + // Add another high-priority row. This should interrupt the long-running low-priority sync. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio2', + description: 'Another high-priority row' + }, + afterReplicaId: 'highprio2' + }); + + // Also add a low-priority row + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: '2001', + description: 'Another low-priority row' + }, + afterReplicaId: '2001' }); + + await writer.commit('0/2'); } if (sentRows >= 1000 && sentRows <= 2001) { @@ -564,25 +554,25 @@ bucket_definitions: content: BASIC_SYNC_RULES }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'sync' - }, - afterReplicaId: 't1' - }); - await batch.commit('0/1'); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'sync' + }, + afterReplicaId: 't1' }); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -608,9 +598,7 @@ bucket_definitions: if (receivedCompletions == 1) { // Trigger an empty bucket update. await bucketStorage.createManagedWriteCheckpoint({ user_id: '', heads: { '1': '1/0' } }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.commit('1/0'); - }); + await writer.commit('1/0'); } else { break; } @@ -622,34 +610,34 @@ bucket_definitions: }); test('sync legacy non-raw data', async () => { - const f = await factory(); + await using f = await factory(); const syncRules = await f.updateSyncRules({ content: BASIC_SYNC_RULES }); const bucketStorage = await f.getInstance(syncRules); - - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test\n"string"', - large_num: 12345678901234567890n - }, - afterReplicaId: 't1' - }); - - await batch.commit('0/1'); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test\n"string"', + large_num: 12345678901234567890n + }, + afterReplicaId: 't1' }); + await writer.commit('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -678,7 +666,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -701,16 +689,16 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepalive('0/0'); const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -727,36 +715,32 @@ bucket_definitions: expect(await getCheckpointLines(iter)).toMatchSnapshot(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' }); - expect(await getCheckpointLines(iter)).toMatchSnapshot(); + await writer.commit('0/1'); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - description: 'Test 2' - }, - afterReplicaId: 't2' - }); + expect(await getCheckpointLines(iter)).toMatchSnapshot(); - await batch.commit('0/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + description: 'Test 2' + }, + afterReplicaId: 't2' }); + await writer.commit('0/2'); + expect(await getCheckpointLines(iter)).toMatchSnapshot(); }); @@ -772,20 +756,18 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id'], config); - const listsTable = test_utils.makeTestTable('lists', ['id'], config); - const bucketStorage = await f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); + // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepalive('0/0'); const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -806,24 +788,24 @@ bucket_definitions: expect(checkpoint1).toMatchSnapshot(); // Add user - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: usersTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'user1' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: usersTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'user1' }); + await writer.commit('0/1'); + const checkpoint2 = await getCheckpointLines(iter); + + const { bucket } = test_utils.bucketRequest(syncRules, 'by_user["user1"]'); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['by_user["user1"]']); + ).toEqual([bucket]); expect(checkpoint2).toMatchSnapshot(); }); @@ -839,30 +821,28 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id'], config); - const listsTable = test_utils.makeTestTable('lists', ['id'], config); - const bucketStorage = await f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: usersTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'user1' - }); - - await batch.commit('0/1'); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); + const listsTable = await test_utils.resolveTestTable(writer, 'lists', ['id'], config, 2); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: usersTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'user1' }); + await writer.commit('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -878,30 +858,28 @@ bucket_definitions: }); const checkpoint1 = await getCheckpointLines(iter); - expect((checkpoint1[0] as StreamingSyncCheckpoint).checkpoint?.buckets?.map((b) => b.bucket)).toEqual([ - 'by_user["user1"]' - ]); - expect(checkpoint1).toMatchSnapshot(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: listsTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'list1', - user_id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'list1' - }); + const { bucket } = test_utils.bucketRequest(syncRules, 'by_user["user1"]'); + expect((checkpoint1[0] as StreamingSyncCheckpoint).checkpoint?.buckets?.map((b) => b.bucket)).toEqual([bucket]); + expect(checkpoint1).toMatchSnapshot(); - await batch.commit('0/1'); + await writer.save({ + sourceTable: listsTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'list1', + user_id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'list1' }); + await writer.commit('0/1'); + const checkpoint2 = await getCheckpointLines(iter); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['by_user["user1"]']); + ).toEqual([bucket]); expect(checkpoint2).toMatchSnapshot(); }); @@ -917,20 +895,18 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id'], config); - const listsTable = test_utils.makeTestTable('lists', ['id'], config); - const bucketStorage = await f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); + const listsTable = await test_utils.resolveTestTable(writer, 'lists', ['id'], config, 2); // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepalive('0/0'); const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -948,36 +924,36 @@ bucket_definitions: // Initial empty checkpoint expect(await getCheckpointLines(iter)).toMatchSnapshot(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: listsTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'list1', - user_id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'list1' - }); - - await batch.save({ - sourceTable: usersTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'user1' - }); + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: listsTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'list1', + user_id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'list1' + }); - await batch.commit('0/1'); + await writer.save({ + sourceTable: usersTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'user1' }); + await writer.commit('0/1'); + + const { bucket } = test_utils.bucketRequest(syncRules, 'by_user["user1"]'); + const checkpoint2 = await getCheckpointLines(iter); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['by_user["user1"]']); + ).toEqual([bucket]); expect(checkpoint2).toMatchSnapshot(); }); @@ -989,18 +965,17 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepalive('0/0'); const exp = Date.now() / 1000 + 0.1; const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -1035,36 +1010,36 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - description: 'Test 2' - }, - afterReplicaId: 't2' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + description: 'Test 2' + }, + afterReplicaId: 't2' }); + await writer.commit('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -1092,31 +1067,29 @@ bucket_definitions: // Now we save additional data AND compact before continuing. // This invalidates the checkpoint we've received above. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - description: 'Test 1b' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2', - description: 'Test 2b' - }, - afterReplicaId: 't2' - }); + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + description: 'Test 1b' + }, + afterReplicaId: 't1' + }); - await batch.commit('0/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2', + description: 'Test 2b' + }, + afterReplicaId: 't2' }); + await writer.commit('0/2'); + await bucketStorage.compact({ minBucketChanges: 1, minChangeRatio: 0 @@ -1180,12 +1153,11 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // <= the managed write checkpoint LSN below - await batch.commit('0/1'); - }); + await writer.markAllSnapshotDone('0/1'); + // <= the managed write checkpoint LSN below + await writer.commit('0/1'); const checkpoint = await bucketStorage.createManagedWriteCheckpoint({ user_id: 'test', @@ -1195,7 +1167,7 @@ bucket_definitions: const params: sync.SyncStreamParameters = { syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -1217,11 +1189,9 @@ bucket_definitions: }) }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // must be >= the managed write checkpoint LSN - await batch.commit('1/0'); - }); + await writer.markAllSnapshotDone('0/1'); + // must be >= the managed write checkpoint LSN + await writer.commit('1/0'); // At this point the LSN has advanced, so the write checkpoint should be // included in the next checkpoint message. @@ -1235,9 +1205,12 @@ bucket_definitions: }); }); - test('encodes sync rules id in buckes for streams', async () => { + test('encodes sync rules id in buckets for streams', async () => { await using f = await factory(); - const rules = ` + // This test relies making an actual update to sync rules to test the different bucket names. + // The actual naming scheme may change, as long as the two buckets have different names. + const rules = [ + ` streams: test: auto_subscribe: true @@ -1245,32 +1218,43 @@ streams: config: edition: 2 -`; +`, + ` +streams: + test2: + auto_subscribe: true + query: SELECT * FROM test WHERE 1; + +config: + edition: 2 +` + ]; for (let i = 0; i < 2; i++) { const syncRules = await f.updateSyncRules({ - content: rules + content: rules[i] }); const bucketStorage = f.getInstance(syncRules); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config, i + 1); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - await batch.commit('0/1'); + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' }); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, bucketStorage: bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, diff --git a/packages/service-core/src/replication/AbstractReplicationJob.ts b/packages/service-core/src/replication/AbstractReplicationJob.ts index 02f03bad5..63b053da9 100644 --- a/packages/service-core/src/replication/AbstractReplicationJob.ts +++ b/packages/service-core/src/replication/AbstractReplicationJob.ts @@ -54,7 +54,7 @@ export abstract class AbstractReplicationJob { * Safely stop the replication process */ public async stop(): Promise { - this.logger.info(`Stopping replication job for sync rule iteration: ${this.storage.group_id}`); + this.logger.info(`Stopping replication job for sync rule iteration: ${this.id}`); this.abortController.abort(); await this.isReplicatingPromise; } @@ -63,10 +63,6 @@ export abstract class AbstractReplicationJob { return this.options.id; } - public get storage() { - return this.options.storage; - } - protected get lock() { return this.options.lock; } diff --git a/packages/service-core/src/replication/AbstractReplicator.ts b/packages/service-core/src/replication/AbstractReplicator.ts index 8331a6c27..ff255727d 100644 --- a/packages/service-core/src/replication/AbstractReplicator.ts +++ b/packages/service-core/src/replication/AbstractReplicator.ts @@ -159,6 +159,8 @@ export abstract class AbstractReplicator return; } - const syncRules = bucketStorage.getParsedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); + const syncRules = bucketStorage.getHydratedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); const removeStopHandler = routerEngine.addStopHandler(() => { closeReason ??= 'process shutdown'; diff --git a/packages/service-core/src/routes/endpoints/sync-stream.ts b/packages/service-core/src/routes/endpoints/sync-stream.ts index cddb73555..784a0b4c0 100644 --- a/packages/service-core/src/routes/endpoints/sync-stream.ts +++ b/packages/service-core/src/routes/endpoints/sync-stream.ts @@ -72,7 +72,7 @@ export const syncStreamed = routeDefinition({ }); } - const syncRules = bucketStorage.getParsedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); + const syncRules = bucketStorage.getHydratedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); const controller = new AbortController(); const tracker = new sync.RequestTracker(metricsEngine); diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketDataWriter.ts similarity index 75% rename from packages/service-core/src/storage/BucketStorageBatch.ts rename to packages/service-core/src/storage/BucketDataWriter.ts index e35a83395..0c7479200 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketDataWriter.ts @@ -1,17 +1,62 @@ import { ObserverClient } from '@powersync/lib-services-framework'; -import { EvaluatedParameters, EvaluatedRow, SqliteRow, ToastableSqliteRow } from '@powersync/service-sync-rules'; +import { + EvaluatedParameters, + EvaluatedRow, + RowProcessor, + SqliteRow, + ToastableSqliteRow +} from '@powersync/service-sync-rules'; import { BSON } from 'bson'; +import { InternalOpId } from '../util/utils.js'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { SourceTable, TableSnapshotStatus } from './SourceTable.js'; -import { BatchedCustomWriteCheckpointOptions } from './storage-index.js'; -import { InternalOpId } from '../util/utils.js'; +import { + BatchedCustomWriteCheckpointOptions, + ResolveTablesOptions, + ResolveTablesResult, + ResolveTableToDropsOptions +} from './storage-index.js'; export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptions = { createEmptyCheckpoints: true, oldestUncommittedChange: null }; -export interface BucketStorageBatch extends ObserverClient, AsyncDisposable { +export interface BucketDataWriter extends BucketDataWriterBase, AsyncDisposable { + readonly rowProcessor: RowProcessor; + + /** + * Perform a keepalive on every replication stream. + */ + keepalive(lsn: string): Promise; + + /** + * Performs a commit on every replication stream. + */ + commit(lsn: string, options?: BucketBatchCommitOptions): Promise; + + /** + * Set resume LSN on every replication stream. + */ + setResumeLsn(lsn: string): Promise; + + /** + * Resolve a table, keeping track of it internally. + */ + resolveTables(options: ResolveTablesOptions): Promise; + getTable(ref: SourceTable): Promise; + + /** + * Given a replicated table, return a list of tables that should be dropped due to conflicts. + * + * This can be due to renames, or replica id changes. + */ + resolveTablesToDrop(options: ResolveTableToDropsOptions): Promise; +} + +export interface BucketDataWriterBase { + readonly resumeFromLsn: string | null; + /** * Save an op, and potentially flush. * @@ -41,6 +86,30 @@ export interface BucketStorageBatch extends ObserverClient; + markTableSnapshotDone(tables: SourceTable[], no_checkpoint_before_lsn?: string): Promise; + markTableSnapshotRequired(table: SourceTable): Promise; + markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise; + + updateTableProgress(table: SourceTable, progress: Partial): Promise; + + /** + * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. + */ + addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void; +} + +/** + * @deprecated Use BucketDataWriter instead. + */ +export interface BucketStorageBatch + extends ObserverClient, + AsyncDisposable, + BucketDataWriterBase { + /** + * Alias for [Symbol.asyncDispose] + */ + dispose(): Promise; + /** * Flush and commit any saved ops. This creates a new checkpoint by default. * @@ -83,12 +152,6 @@ export interface BucketStorageBatch extends ObserverClient; - markTableSnapshotRequired(table: SourceTable): Promise; - markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise; - - updateTableProgress(table: SourceTable, progress: Partial): Promise; - /** * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. */ diff --git a/packages/service-core/src/storage/BucketStorageFactory.ts b/packages/service-core/src/storage/BucketStorageFactory.ts index 87299fc05..98b03bf3f 100644 --- a/packages/service-core/src/storage/BucketStorageFactory.ts +++ b/packages/service-core/src/storage/BucketStorageFactory.ts @@ -1,9 +1,10 @@ -import { ObserverClient } from '@powersync/lib-services-framework'; +import { Logger, ObserverClient } from '@powersync/lib-services-framework'; +import { BucketDataWriter, SaveUpdate } from './BucketDataWriter.js'; import { ParseSyncRulesOptions, PersistedSyncRules, PersistedSyncRulesContent } from './PersistedSyncRulesContent.js'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { ReplicationLock } from './ReplicationLock.js'; -import { SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; import { ReportStorage } from './ReportStorage.js'; +import { SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; /** * Represents a configured storage provider. @@ -26,6 +27,8 @@ export interface BucketStorageFactory extends ObserverClient; + /** * Deploy new sync rules. */ @@ -171,3 +174,35 @@ export interface TestStorageConfig { factory: TestStorageFactory; tableIdStrings: boolean; } + +export interface CreateWriterOptions extends ParseSyncRulesOptions { + zeroLSN: string; + /** + * Whether or not to store a copy of the current data. + * + * This is needed if we need to apply partial updates, for example + * when we get TOAST values from Postgres. + * + * This is not needed when we get the full document from the source + * database, for example from MongoDB. + */ + storeCurrentData: boolean; + + /** + * Set to true for initial replication. + * + * This will avoid creating new operations for rows previously replicated. + */ + skipExistingRows?: boolean; + + /** + * Callback called if we streamed an update to a record that we don't have yet. + * + * This is expected to happen in some initial replication edge cases, only if storeCurrentData = true. + */ + markRecordUnavailable?: BucketStorageMarkRecordUnavailable; + + logger?: Logger; +} + +export type BucketStorageMarkRecordUnavailable = (record: SaveUpdate) => void; diff --git a/packages/service-core/src/storage/ChecksumCache.ts b/packages/service-core/src/storage/ChecksumCache.ts index 2e5ac1c22..e315d61e8 100644 --- a/packages/service-core/src/storage/ChecksumCache.ts +++ b/packages/service-core/src/storage/ChecksumCache.ts @@ -2,6 +2,8 @@ import { OrderedSet } from '@js-sdsl/ordered-set'; import { LRUCache } from 'lru-cache/min'; import { BucketChecksum } from '../util/protocol-types.js'; import { addBucketChecksums, ChecksumMap, InternalOpId, PartialChecksum } from '../util/utils.js'; +import { BucketChecksumRequest } from './SyncRulesBucketStorage.js'; +import { BucketDataSource } from '@powersync/service-sync-rules'; interface ChecksumFetchContext { fetch(bucket: string): Promise; @@ -10,6 +12,7 @@ interface ChecksumFetchContext { export interface FetchPartialBucketChecksum { bucket: string; + source: BucketDataSource; start?: InternalOpId; end: InternalOpId; } @@ -113,10 +116,10 @@ export class ChecksumCache { this.bucketCheckpoints.clear(); } - async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: InternalOpId, buckets: BucketChecksumRequest[]): Promise { const checksums = await this.getChecksumMap(checkpoint, buckets); // Return results in the same order as the request - return buckets.map((bucket) => checksums.get(bucket)!); + return buckets.map((bucket) => checksums.get(bucket.bucket)!); } /** @@ -126,7 +129,7 @@ export class ChecksumCache { * * @returns a Map with exactly one entry for each bucket requested */ - async getChecksumMap(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksumMap(checkpoint: InternalOpId, buckets: BucketChecksumRequest[]): Promise { // Buckets that don't have a cached checksum for this checkpoint yet let toFetch = new Set(); @@ -163,20 +166,21 @@ export class ChecksumCache { // One promise to await to ensure all fetch requests completed. let settledPromise: Promise[]> | null = null; + let sourceMap = new Map(); try { // Individual cache fetch promises let cacheFetchPromises: Promise[] = []; for (let bucket of buckets) { - const cacheKey = makeCacheKey(checkpoint, bucket); + const cacheKey = makeCacheKey(checkpoint, bucket.bucket); let status: LRUCache.Status = {}; const p = this.cache.fetch(cacheKey, { context: context, status: status }).then((checksums) => { if (checksums == null) { // Should never happen throw new Error(`Failed to get checksums for ${cacheKey}`); } - finalResults.set(bucket, checksums); + finalResults.set(bucket.bucket, checksums); }); cacheFetchPromises.push(p); if (status.fetch == 'hit' || status.fetch == 'inflight') { @@ -185,7 +189,8 @@ export class ChecksumCache { // In either case, we don't need to fetch a new checksum. } else { // We need a new request for this checksum. - toFetch.add(bucket); + toFetch.add(bucket.bucket); + sourceMap.set(bucket.bucket, bucket.source); } } // We do this directly after creating the promises, otherwise @@ -220,6 +225,7 @@ export class ChecksumCache { // Partial checksum found - make a partial checksum request bucketRequest = { bucket, + source: sourceMap.get(bucket)!, start: cp, end: checkpoint }; @@ -240,7 +246,8 @@ export class ChecksumCache { // No partial checksum found - make a new full checksum request bucketRequest = { bucket, - end: checkpoint + end: checkpoint, + source: sourceMap.get(bucket)! }; add.set(bucket, { bucket, diff --git a/packages/service-core/src/storage/PersistedSyncRulesContent.ts b/packages/service-core/src/storage/PersistedSyncRulesContent.ts index b8b40f7f9..13e42e21b 100644 --- a/packages/service-core/src/storage/PersistedSyncRulesContent.ts +++ b/packages/service-core/src/storage/PersistedSyncRulesContent.ts @@ -1,4 +1,4 @@ -import { HydratedSyncRules, SyncConfig, SyncConfigWithErrors } from '@powersync/service-sync-rules'; +import { HydratedSyncRules, HydrationState, SyncConfigWithErrors } from '@powersync/service-sync-rules'; import { ReplicationLock } from './ReplicationLock.js'; export interface ParseSyncRulesOptions { @@ -30,6 +30,10 @@ export interface PersistedSyncRules { readonly id: number; readonly sync_rules: SyncConfigWithErrors; readonly slot_name: string; + /** + * For testing only. + */ + readonly hydrationState: HydrationState; hydratedSyncRules(): HydratedSyncRules; } diff --git a/packages/service-core/src/storage/ReplicationEventPayload.ts b/packages/service-core/src/storage/ReplicationEventPayload.ts index d86ea50ef..ccffc3dbd 100644 --- a/packages/service-core/src/storage/ReplicationEventPayload.ts +++ b/packages/service-core/src/storage/ReplicationEventPayload.ts @@ -1,6 +1,6 @@ import * as sync_rules from '@powersync/service-sync-rules'; import { SourceTable } from './SourceTable.js'; -import { BucketStorageBatch, SaveOp } from './BucketStorageBatch.js'; +import { BucketStorageBatch, SaveOp } from './BucketDataWriter.js'; export type EventData = { op: SaveOp; diff --git a/packages/service-core/src/storage/SourceTable.ts b/packages/service-core/src/storage/SourceTable.ts index 9a36bc125..bad7fb5ca 100644 --- a/packages/service-core/src/storage/SourceTable.ts +++ b/packages/service-core/src/storage/SourceTable.ts @@ -1,4 +1,4 @@ -import { DEFAULT_TAG } from '@powersync/service-sync-rules'; +import { DEFAULT_TAG, TablePattern } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; import { ColumnDescriptor, SourceEntityDescriptor } from './SourceEntity.js'; import { bson } from '../index.js'; @@ -16,6 +16,11 @@ export interface SourceTableOptions { name: string; replicaIdColumns: ColumnDescriptor[]; snapshotComplete: boolean; + + bucketDataSourceIds?: number[]; + parameterLookupSourceIds?: number[]; + // FIXME: Make required once all code is updated + pattern?: TablePattern; } export interface TableSnapshotStatus { @@ -102,6 +107,18 @@ export class SourceTable implements SourceEntityDescriptor { return this.syncData || this.syncParameters || this.syncEvent; } + get bucketDataSourceIds() { + return this.options.bucketDataSourceIds ?? []; + } + + get parameterLookupSourceIds() { + return this.options.parameterLookupSourceIds ?? []; + } + + get pattern() { + return this.options.pattern; + } + /** * In-memory clone of the table status. */ @@ -113,7 +130,10 @@ export class SourceTable implements SourceEntityDescriptor { schema: this.schema, name: this.name, replicaIdColumns: this.replicaIdColumns, - snapshotComplete: this.snapshotComplete + snapshotComplete: this.snapshotComplete, + pattern: this.pattern, + bucketDataSourceIds: this.bucketDataSourceIds, + parameterLookupSourceIds: this.parameterLookupSourceIds }); copy.syncData = this.syncData; copy.syncParameters = this.syncParameters; diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 5e67a27c8..d6ce08804 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -1,12 +1,19 @@ import { Logger, ObserverClient } from '@powersync/lib-services-framework'; -import { HydratedSyncRules, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; +import { + BucketDataSource, + HydratedSyncRules, + ScopedParameterLookup, + SqliteJsonRow, + TablePattern +} from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; -import { BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; -import { BucketStorageFactory } from './BucketStorageFactory.js'; -import { ParseSyncRulesOptions } from './PersistedSyncRulesContent.js'; +import { BucketDataWriter, BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketDataWriter.js'; +import { BucketStorageFactory, CreateWriterOptions } from './BucketStorageFactory.js'; +import { ParseSyncRulesOptions, PersistedSyncRules } from './PersistedSyncRulesContent.js'; import { SourceEntityDescriptor } from './SourceEntity.js'; import { SourceTable } from './SourceTable.js'; import { SyncStorageWriteCheckpointAPI } from './WriteCheckpointAPI.js'; +import { bson } from '../index.js'; /** * Storage for a specific copy of sync rules. @@ -20,19 +27,21 @@ export interface SyncRulesBucketStorage readonly factory: BucketStorageFactory; /** - * Resolve a table, keeping track of it internally. + * Create a new writer. + * + * The writer is stateful. It is not safe to use the same writer concurrently from multiple places, + * but different writers can be used concurrently. + * + * The writer must be flushed and disposed when done. */ - resolveTable(options: ResolveTableOptions): Promise; + createWriter(options: CreateWriterOptions): Promise; + + getHydratedSyncRules(options: ParseSyncRulesOptions): HydratedSyncRules; /** - * Use this to get access to update storage data. + * For tests only. */ - startBatch( - options: StartBatchOptions, - callback: (batch: BucketStorageBatch) => Promise - ): Promise; - - getParsedSyncRules(options: ParseSyncRulesOptions): HydratedSyncRules; + getParsedSyncRules(options: ParseSyncRulesOptions): PersistedSyncRules; /** * Terminate the sync rules. @@ -103,7 +112,7 @@ export interface SyncRulesBucketStorage */ getBucketDataBatch( checkpoint: util.InternalOpId, - dataBuckets: Map, + dataBuckets: BucketDataRequest[], options?: BucketDataBatchOptions ): AsyncIterable; @@ -115,7 +124,7 @@ export interface SyncRulesBucketStorage * This may be slow, depending on the size of the buckets. * The checksums are cached internally to compensate for this, but does not cover all cases. */ - getChecksums(checkpoint: util.InternalOpId, buckets: string[]): Promise; + getChecksums(checkpoint: util.InternalOpId, buckets: BucketChecksumRequest[]): Promise; /** * Clear checksum cache. Primarily intended for tests. @@ -127,54 +136,58 @@ export interface SyncRulesBucketStorageListener { batchStarted: (batch: BucketStorageBatch) => void; } +export interface BucketDataRequest { + bucket: string; + start: util.InternalOpId; + source: BucketDataSource; +} +export interface BucketChecksumRequest { + bucket: string; + source: BucketDataSource; +} + export interface SyncRuleStatus { checkpoint_lsn: string | null; active: boolean; snapshot_done: boolean; snapshot_lsn: string | null; } -export interface ResolveTableOptions { - group_id: number; +export interface ResolveTablesOptions { connection_id: number; connection_tag: string; entity_descriptor: SourceEntityDescriptor; - - sync_rules: HydratedSyncRules; + pattern: TablePattern; + /** + * For tests only - custom id generator for stable ids. + */ + idGenerator?: () => string | bson.ObjectId; } -export interface ResolveTableResult { - table: SourceTable; - dropTables: SourceTable[]; +export interface ResolveTableToDropsOptions { + connection_id: number; + connection_tag: string; + entity_descriptor: SourceEntityDescriptor; } -export interface StartBatchOptions extends ParseSyncRulesOptions { - zeroLSN: string; - /** - * Whether or not to store a copy of the current data. - * - * This is needed if we need to apply partial updates, for example - * when we get TOAST values from Postgres. - * - * This is not needed when we get the full document from the source - * database, for example from MongoDB. - */ - storeCurrentData: boolean; - +export interface ResolveTableOptions { + connection_id: number; + connection_tag: string; + entity_descriptor: SourceEntityDescriptor; + sync_rules: HydratedSyncRules; /** - * Set to true for initial replication. - * - * This will avoid creating new operations for rows previously replicated. + * For tests only - custom id generator for stable ids. */ - skipExistingRows?: boolean; + idGenerator?: () => string | bson.ObjectId; +} - /** - * Callback called if we streamed an update to a record that we don't have yet. - * - * This is expected to happen in some initial replication edge cases, only if storeCurrentData = true. - */ - markRecordUnavailable?: BucketStorageMarkRecordUnavailable; +export interface ResolveTablesResult { + tables: SourceTable[]; + dropTables: SourceTable[]; +} - logger?: Logger; +export interface ResolveTableResult { + table: SourceTable; + dropTables: SourceTable[]; } export interface CompactOptions { @@ -337,5 +350,3 @@ export const CHECKPOINT_INVALIDATE_ALL: CheckpointChanges = { updatedParameterLookups: new Set(), invalidateParameterBuckets: true }; - -export type BucketStorageMarkRecordUnavailable = (record: SaveUpdate) => void; diff --git a/packages/service-core/src/storage/bson.ts b/packages/service-core/src/storage/bson.ts index ad7ee3e16..69c5fffec 100644 --- a/packages/service-core/src/storage/bson.ts +++ b/packages/service-core/src/storage/bson.ts @@ -1,7 +1,7 @@ import * as bson from 'bson'; import { ScopedParameterLookup, SqliteJsonValue } from '@powersync/service-sync-rules'; -import { ReplicaId } from './BucketStorageBatch.js'; +import { ReplicaId } from './BucketDataWriter.js'; type NodeBuffer = Buffer; diff --git a/packages/service-core/src/storage/storage-index.ts b/packages/service-core/src/storage/storage-index.ts index b83a2fb2f..9348a7e8f 100644 --- a/packages/service-core/src/storage/storage-index.ts +++ b/packages/service-core/src/storage/storage-index.ts @@ -9,7 +9,7 @@ export * from './StorageProvider.js'; export * from './storage-metrics.js'; export * from './WriteCheckpointAPI.js'; export * from './BucketStorageFactory.js'; -export * from './BucketStorageBatch.js'; +export * from './BucketDataWriter.js'; export * from './SyncRulesBucketStorage.js'; export * from './PersistedSyncRulesContent.js'; export * from './ReplicationLock.js'; diff --git a/packages/service-core/src/sync/BucketChecksumState.ts b/packages/service-core/src/sync/BucketChecksumState.ts index 0130ab85c..9ef3bab01 100644 --- a/packages/service-core/src/sync/BucketChecksumState.ts +++ b/packages/service-core/src/sync/BucketChecksumState.ts @@ -5,7 +5,8 @@ import { HydratedSyncRules, RequestedStream, RequestParameters, - ResolvedBucket + ResolvedBucket, + SOURCE } from '@powersync/service-sync-rules'; import * as storage from '../storage/storage-index.js'; @@ -136,20 +137,23 @@ export class BucketChecksumState { } // Re-check updated buckets only - let checksumLookups: string[] = []; + let checksumLookups: storage.BucketChecksumRequest[] = []; let newChecksums = new Map(); - for (let bucket of bucketDescriptionMap.keys()) { - if (!updatedBuckets.has(bucket)) { - const existing = this.lastChecksums.get(bucket); + for (let desc of bucketDescriptionMap.values()) { + if (!updatedBuckets.has(desc.bucket)) { + const existing = this.lastChecksums.get(desc.bucket); if (existing == null) { // If this happens, it means updatedBuckets did not correctly include all new buckets - throw new ServiceAssertionError(`Existing checksum not found for bucket ${bucket}`); + throw new ServiceAssertionError(`Existing checksum not found for bucket ${desc}`); } // Bucket is not specifically updated, and we have a previous checksum - newChecksums.set(bucket, existing); + newChecksums.set(desc.bucket, existing); } else { - checksumLookups.push(bucket); + checksumLookups.push({ + bucket: desc.bucket, + source: desc[SOURCE] + }); } } @@ -162,7 +166,7 @@ export class BucketChecksumState { checksumMap = newChecksums; } else { // Re-check all buckets - const bucketList = [...bucketDescriptionMap.keys()]; + const bucketList = [...bucketDescriptionMap.values()].map((b) => ({ bucket: b.bucket, source: b[SOURCE] })); checksumMap = await storage.getChecksums(base.checkpoint, bucketList); } @@ -206,8 +210,10 @@ export class BucketChecksumState { ...this.parameterState.translateResolvedBucket(bucketDescriptionMap.get(e.bucket)!, streamNameToIndex) })); bucketsToFetch = [...generateBucketsToFetch].map((b) => { + const description = bucketDescriptionMap.get(b); return { - priority: bucketDescriptionMap.get(b)!.priority, + priority: description!.priority, + [SOURCE]: description![SOURCE], bucket: b }; }); @@ -241,7 +247,7 @@ export class BucketChecksumState { message += `buckets: ${allBuckets.length} ${limitedBuckets(allBuckets, 20)}`; this.logger.info(message, { checkpoint: base.checkpoint, user_id: userIdForLogs, buckets: allBuckets.length }); }; - bucketsToFetch = allBuckets.map((b) => ({ bucket: b.bucket, priority: b.priority })); + bucketsToFetch = allBuckets.map((b) => ({ bucket: b.bucket, priority: b.priority, [SOURCE]: b[SOURCE] })); const subscriptions: util.StreamDescription[] = []; const streamNameToIndex = new Map(); @@ -318,17 +324,21 @@ export class BucketChecksumState { deferredLog(); }, - getFilteredBucketPositions: (buckets?: BucketDescription[]): Map => { + getFilteredBucketPositions: (buckets?: BucketDescription[]): storage.BucketDataRequest[] => { if (!hasAdvanced) { throw new ServiceAssertionError('Call line.advance() before getFilteredBucketPositions()'); } buckets ??= bucketsToFetch; - const filtered = new Map(); + const filtered: storage.BucketDataRequest[] = []; for (let bucket of buckets) { const state = this.bucketDataPositions.get(bucket.bucket); if (state) { - filtered.set(bucket.bucket, state.start_op_id); + filtered.push({ + bucket: bucket.bucket, + start: state.start_op_id, + source: bucket[SOURCE] + }); } } return filtered; @@ -621,7 +631,7 @@ export interface CheckpointLine { * * @param bucketsToFetch List of buckets to fetch - either this.bucketsToFetch, or a subset of it. Defaults to this.bucketsToFetch. */ - getFilteredBucketPositions(bucketsToFetch?: BucketDescription[]): Map; + getFilteredBucketPositions(bucketsToFetch?: BucketDescription[]): storage.BucketDataRequest[]; /** * Update the position of bucket data the client has, after it was sent to the client. @@ -672,7 +682,9 @@ function mergeBuckets(buckets: ResolvedBucket[]): ResolvedBucket[] { if (Object.hasOwn(byBucketId, bucket.bucket)) { byBucketId[bucket.bucket].inclusion_reasons.push(...bucket.inclusion_reasons); } else { - byBucketId[bucket.bucket] = structuredClone(bucket); + let clone = structuredClone(bucket); + clone[SOURCE] = bucket[SOURCE]; // structuredClone does not clone symbol-keyed properties + byBucketId[bucket.bucket] = clone; } } diff --git a/packages/service-core/src/util/protocol-types.ts b/packages/service-core/src/util/protocol-types.ts index 82baa89e5..0156c0f23 100644 --- a/packages/service-core/src/util/protocol-types.ts +++ b/packages/service-core/src/util/protocol-types.ts @@ -1,5 +1,5 @@ import { JsonContainer } from '@powersync/service-jsonbig'; -import { BucketPriority, SqliteJsonRow } from '@powersync/service-sync-rules'; +import { BucketDataScope, BucketPriority, SqliteJsonRow } from '@powersync/service-sync-rules'; import * as t from 'ts-codec'; export const BucketRequest = t.object({ diff --git a/packages/service-core/test/src/checksum_cache.test.ts b/packages/service-core/test/src/checksum_cache.test.ts index f0b61342c..1ae7d9ba6 100644 --- a/packages/service-core/test/src/checksum_cache.test.ts +++ b/packages/service-core/test/src/checksum_cache.test.ts @@ -1,7 +1,9 @@ import { ChecksumCache, FetchChecksums, FetchPartialBucketChecksum } from '@/storage/ChecksumCache.js'; import { addChecksums, BucketChecksum, InternalOpId, PartialChecksum } from '@/util/util-index.js'; +import { BucketDataSource } from '@powersync/service-sync-rules'; import * as crypto from 'node:crypto'; import { describe, expect, it } from 'vitest'; +import { removeSource } from './utils.js'; /** * Create a deterministic BucketChecksum based on the bucket name and checkpoint for testing purposes. @@ -67,6 +69,12 @@ describe('checksum cache', function () { return new ChecksumCache({ fetchChecksums: fetch }); }; + const DUMMY_SOURCE: BucketDataSource = null as any; + + function removeLookupSources(lookups: FetchPartialBucketChecksum[]) { + return lookups.map((b) => removeSource(b)); + } + it('should handle a sequential lookups (a)', async function () { let lookups: FetchPartialBucketChecksum[][] = []; const cache = factory(async (batch) => { @@ -74,13 +82,13 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([TEST_1234]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_1234]); - expect(await cache.getChecksums(123n, ['test2'])).toEqual([TEST2_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }])).toEqual([TEST2_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toMatchObject([ [{ bucket: 'test', end: 123n }], // This should use the previous lookup [{ bucket: 'test', start: 123n, end: 1234n }], @@ -96,13 +104,13 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test2'])).toEqual([TEST2_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }])).toEqual([TEST2_123]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([TEST_1234]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_1234]); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // With this order, there is no option for a partial lookup [{ bucket: 'test2', end: 123n }], [{ bucket: 'test', end: 1234n }], @@ -117,16 +125,16 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const p1 = cache.getChecksums(123n, ['test']); - const p2 = cache.getChecksums(1234n, ['test']); - const p3 = cache.getChecksums(123n, ['test2']); + const p1 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p2 = cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p3 = cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }]); expect(await p1).toEqual([TEST_123]); expect(await p2).toEqual([TEST_1234]); expect(await p3).toEqual([TEST2_123]); // Concurrent requests, so we can't do a partial lookup for 123 -> 1234 - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], [{ bucket: 'test', end: 1234n }], [{ bucket: 'test2', end: 123n }] @@ -140,15 +148,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const p1 = cache.getChecksums(123n, ['test']); - const p2 = cache.getChecksums(123n, ['test']); + const p1 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p2 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); expect(await p1).toEqual([TEST_123]); expect(await p2).toEqual([TEST_123]); // The lookup should be deduplicated, even though it's in progress - expect(lookups).toEqual([[{ bucket: 'test', end: 123n }]]); + expect(lookups.map(removeLookupSources)).toEqual([[{ bucket: 'test', end: 123n }]]); }); it('should handle serial + concurrent lookups', async function () { @@ -158,15 +166,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - const p2 = cache.getChecksums(1234n, ['test']); - const p3 = cache.getChecksums(1234n, ['test']); + const p2 = cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p3 = cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }]); expect(await p2).toEqual([TEST_1234]); expect(await p3).toEqual([TEST_1234]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], // This lookup is deduplicated [{ bucket: 'test', start: 123n, end: 1234n }] @@ -180,9 +188,14 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test', 'test2'])).toEqual([TEST_123, TEST2_123]); + expect( + await cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]) + ).toEqual([TEST_123, TEST2_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [ // Both lookups in the same request { bucket: 'test', end: 123n }, @@ -198,10 +211,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); - expect(await cache.getChecksums(123n, ['test', 'test2'])).toEqual([TEST_123, TEST2_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); + expect( + await cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]) + ).toEqual([TEST_123, TEST2_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // Request 1 [{ bucket: 'test', end: 123n }], // Request 2 @@ -216,13 +234,19 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const a = cache.getChecksums(123n, ['test', 'test2']); - const b = cache.getChecksums(123n, ['test2', 'test3']); + const a = cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]); + const b = cache.getChecksums(123n, [ + { bucket: 'test2', source: DUMMY_SOURCE }, + { bucket: 'test3', source: DUMMY_SOURCE } + ]); expect(await a).toEqual([TEST_123, TEST2_123]); expect(await b).toEqual([TEST2_123, TEST3_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // Request A [ { bucket: 'test', end: 123n }, @@ -240,9 +264,9 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(await cache.getChecksums(125n, ['test'])).toEqual([ + expect(await cache.getChecksums(125n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: -1865121912, @@ -250,14 +274,14 @@ describe('checksum cache', function () { } ]); - expect(await cache.getChecksums(124n, ['test'])).toEqual([ + expect(await cache.getChecksums(124n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1887460431, count: 124 } ]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], [{ bucket: 'test', start: 123n, end: 125n }], [{ bucket: 'test', start: 123n, end: 124n }] @@ -275,19 +299,31 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const a = cache.getChecksums(123n, ['test', 'test2']); - const b = cache.getChecksums(123n, ['test2', 'test3']); + const a = cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]); + const b = cache.getChecksums(123n, [ + { bucket: 'test2', source: DUMMY_SOURCE }, + { bucket: 'test3', source: DUMMY_SOURCE } + ]); await expect(a).rejects.toEqual(TEST_ERROR); await expect(b).rejects.toEqual(TEST_ERROR); - const a2 = cache.getChecksums(123n, ['test', 'test2']); - const b2 = cache.getChecksums(123n, ['test2', 'test3']); + const a2 = cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]); + const b2 = cache.getChecksums(123n, [ + { bucket: 'test2', source: DUMMY_SOURCE }, + { bucket: 'test3', source: DUMMY_SOURCE } + ]); expect(await a2).toEqual([TEST_123, TEST2_123]); expect(await b2).toEqual([TEST2_123, TEST3_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // Request A (fails) [ { bucket: 'test', end: 123n }, @@ -311,11 +347,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch.filter((b) => b.bucket != 'test')); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([{ bucket: 'test', checksum: 0, count: 0 }]); - expect(await cache.getChecksums(123n, ['test', 'test2'])).toEqual([ - { bucket: 'test', checksum: 0, count: 0 }, - TEST2_123 + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ + { bucket: 'test', checksum: 0, count: 0 } ]); + expect( + await cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]) + ).toEqual([{ bucket: 'test', checksum: 0, count: 0 }, TEST2_123]); }); it('should handle missing checksums (b)', async function () { @@ -325,8 +365,10 @@ describe('checksum cache', function () { return fetchTestChecksums(batch.filter((b) => b.bucket != 'test' || b.end != 123n)); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([{ bucket: 'test', checksum: 0, count: 0 }]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([ + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ + { bucket: 'test', checksum: 0, count: 0 } + ]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1597020602, @@ -334,7 +376,10 @@ describe('checksum cache', function () { } ]); - expect(lookups).toEqual([[{ bucket: 'test', end: 123n }], [{ bucket: 'test', start: 123n, end: 1234n }]]); + expect(lookups.map(removeLookupSources)).toEqual([ + [{ bucket: 'test', end: 123n }], + [{ bucket: 'test', start: 123n, end: 1234n }] + ]); }); it('should use maxSize', async function () { @@ -347,8 +392,8 @@ describe('checksum cache', function () { maxSize: 2 }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); - expect(await cache.getChecksums(124n, ['test'])).toEqual([ + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); + expect(await cache.getChecksums(124n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1887460431, @@ -356,30 +401,30 @@ describe('checksum cache', function () { } ]); - expect(await cache.getChecksums(125n, ['test'])).toEqual([ + expect(await cache.getChecksums(125n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: -1865121912, count: 125 } ]); - expect(await cache.getChecksums(126n, ['test'])).toEqual([ + expect(await cache.getChecksums(126n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: -1720007310, count: 126 } ]); - expect(await cache.getChecksums(124n, ['test'])).toEqual([ + expect(await cache.getChecksums(124n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1887460431, count: 124 } ]); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], [{ bucket: 'test', start: 123n, end: 124n }], [{ bucket: 'test', start: 124n, end: 125n }], @@ -400,10 +445,10 @@ describe('checksum cache', function () { maxSize: 2 }); - const p3 = cache.getChecksums(123n, ['test3']); - const p4 = cache.getChecksums(123n, ['test4']); - const p1 = cache.getChecksums(123n, ['test']); - const p2 = cache.getChecksums(123n, ['test2']); + const p3 = cache.getChecksums(123n, [{ bucket: 'test3', source: DUMMY_SOURCE }]); + const p4 = cache.getChecksums(123n, [{ bucket: 'test4', source: DUMMY_SOURCE }]); + const p1 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p2 = cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }]); expect(await p1).toEqual([TEST_123]); expect(await p2).toEqual([TEST2_123]); @@ -417,7 +462,7 @@ describe('checksum cache', function () { ]); // The lookup should be deduplicated, even though it's in progress - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test3', end: 123n }], [{ bucket: 'test4', end: 123n }], [{ bucket: 'test', end: 123n }], @@ -434,7 +479,7 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([TEST_1234]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_1234]); }); }); diff --git a/packages/service-core/test/src/routes/stream.test.ts b/packages/service-core/test/src/routes/stream.test.ts index b59a2764a..9a543e7fc 100644 --- a/packages/service-core/test/src/routes/stream.test.ts +++ b/packages/service-core/test/src/routes/stream.test.ts @@ -1,6 +1,6 @@ import { BasicRouterRequest, Context, JwtPayload, SyncRulesBucketStorage } from '@/index.js'; import { RouterResponse, ServiceError, logger } from '@powersync/lib-services-framework'; -import { SqlSyncRules } from '@powersync/service-sync-rules'; +import { DEFAULT_HYDRATION_STATE, SqlSyncRules } from '@powersync/service-sync-rules'; import { Readable, Writable } from 'stream'; import { pipeline } from 'stream/promises'; import { describe, expect, it } from 'vitest'; @@ -44,8 +44,8 @@ describe('Stream Route', () => { // when compressing the stream. const storage = { - getParsedSyncRules() { - return new SqlSyncRules('bucket_definitions: {}').hydrate(); + getHydratedSyncRules() { + return new SqlSyncRules('bucket_definitions: {}').hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); }, watchCheckpointChanges: async function* (options) { throw new Error('Simulated storage error'); @@ -82,8 +82,8 @@ describe('Stream Route', () => { it('logs the application metadata', async () => { const storage = { - getParsedSyncRules() { - return new SqlSyncRules('bucket_definitions: {}').hydrate(); + getHydratedSyncRules() { + return new SqlSyncRules('bucket_definitions: {}').hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); }, watchCheckpointChanges: async function* (options) { throw new Error('Simulated storage error'); diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts index 95d36df88..b9286877c 100644 --- a/packages/service-core/test/src/sync/BucketChecksumState.test.ts +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -1,5 +1,6 @@ import { BucketChecksum, + BucketChecksumRequest, BucketChecksumState, BucketChecksumStateOptions, BucketChecksumStateStorage, @@ -13,9 +14,17 @@ import { WatchFilterEvent } from '@/index.js'; import { JSONBig } from '@powersync/service-jsonbig'; -import { RequestJwtPayload, ScopedParameterLookup, SqliteJsonRow, SqlSyncRules } from '@powersync/service-sync-rules'; -import { versionedHydrationState } from '@powersync/service-sync-rules/src/HydrationState.js'; +import { + DEFAULT_HYDRATION_STATE, + RequestJwtPayload, + ScopedParameterLookup, + SqliteJsonRow, + SqlSyncRules, + TablePattern, + versionedHydrationState +} from '@powersync/service-sync-rules'; import { beforeEach, describe, expect, test } from 'vitest'; +import { removeSource, removeSourceSymbol } from '../utils.js'; describe('BucketChecksumState', () => { // Single global[] bucket. @@ -27,7 +36,7 @@ bucket_definitions: data: [] `, { defaultSchema: 'public' } - ).config.hydrate({ hydrationState: versionedHydrationState(1) }); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); // global[1] and global[2] const SYNC_RULES_GLOBAL_TWO = SqlSyncRules.fromYaml( @@ -40,7 +49,7 @@ bucket_definitions: data: [] `, { defaultSchema: 'public' } - ).config.hydrate({ hydrationState: versionedHydrationState(2) }); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); // by_project[n] const SYNC_RULES_DYNAMIC = SqlSyncRules.fromYaml( @@ -51,7 +60,7 @@ bucket_definitions: data: [] `, { defaultSchema: 'public' } - ).config.hydrate({ hydrationState: versionedHydrationState(3) }); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); const syncContext = new SyncContext({ maxBuckets: 100, @@ -89,14 +98,14 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[]', priority: 3 } ]); // This is the bucket data to be fetched - expect(line.getFilteredBucketPositions()).toEqual(new Map([['global[]', 0n]])); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 0n }]); // This similuates the bucket data being sent line.advance(); @@ -125,7 +134,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.getFilteredBucketPositions()).toEqual(new Map([['global[]', 1n]])); + expect(line2.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 1n }]); }); test('global bucket with initial state', async () => { @@ -159,14 +168,14 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[]', priority: 3 } ]); // This is the main difference between this and the previous test - expect(line.getFilteredBucketPositions()).toEqual(new Map([['global[]', 1n]])); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 1n }]); }); test('multiple static buckets', async () => { @@ -199,7 +208,7 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 @@ -267,13 +276,13 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[]', priority: 3 } ]); - expect(line.getFilteredBucketPositions()).toEqual(new Map([['global[]', 0n]])); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 0n }]); }); test('invalidating individual bucket', async () => { @@ -330,7 +339,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.bucketsToFetch).toEqual([{ bucket: 'global[1]', priority: 3 }]); + expect(line2.bucketsToFetch.map(removeSourceSymbol)).toEqual([{ bucket: 'global[1]', priority: 3 }]); }); test('invalidating all buckets', async () => { @@ -380,7 +389,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.bucketsToFetch).toEqual([ + expect(line2.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 }, { bucket: 'global[2]', priority: 3 } ]); @@ -417,7 +426,7 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 @@ -429,12 +438,16 @@ bucket_definitions: ]); // This is the bucket data to be fetched - expect(line.getFilteredBucketPositions()).toEqual( - new Map([ - ['global[1]', 0n], - ['global[2]', 0n] - ]) - ); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([ + { + bucket: 'global[1]', + start: 0n + }, + { + bucket: 'global[2]', + start: 0n + } + ]); // No data changes here. // We simulate partial data sent, before a checkpoint is interrupted. @@ -470,7 +483,7 @@ bucket_definitions: } }); // This should contain both buckets, even though only one changed. - expect(line2.bucketsToFetch).toEqual([ + expect(line2.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 @@ -481,12 +494,16 @@ bucket_definitions: } ]); - expect(line2.getFilteredBucketPositions()).toEqual( - new Map([ - ['global[1]', 3n], - ['global[2]', 1n] - ]) - ); + expect(line2.getFilteredBucketPositions().map(removeSource)).toEqual([ + { + bucket: 'global[1]', + start: 3n + }, + { + bucket: 'global[2]', + start: 1n + } + ]); }); test('dynamic buckets with updates', async () => { @@ -504,9 +521,14 @@ bucket_definitions: bucketStorage: storage }); + const source = SYNC_RULES_DYNAMIC.getMatchingSources(new TablePattern('public', 'projects')) + .parameterIndexLookupCreators[0]; + const line = (await state.buildNextCheckpointLine({ base: storage.makeCheckpoint(1n, (lookups) => { - expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1' }, ['u1'])]); + expect(lookups).toEqual([ + ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source }, ['u1']) + ]); return [{ id: 1 }, { id: 2 }]; }), writeCheckpoint: null, @@ -541,7 +563,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'by_project[1]', priority: 3 @@ -553,12 +575,16 @@ bucket_definitions: ]); line.advance(); // This is the bucket data to be fetched - expect(line.getFilteredBucketPositions()).toEqual( - new Map([ - ['by_project[1]', 0n], - ['by_project[2]', 0n] - ]) - ); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([ + { + bucket: 'by_project[1]', + start: 0n + }, + { + bucket: 'by_project[2]', + start: 0n + } + ]); line.advance(); line.updateBucketPosition({ bucket: 'by_project[1]', nextAfter: 1n, hasMore: false }); @@ -567,7 +593,9 @@ bucket_definitions: // Now we get a new line const line2 = (await state.buildNextCheckpointLine({ base: storage.makeCheckpoint(2n, (lookups) => { - expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1' }, ['u1'])]); + expect(lookups).toEqual([ + ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source }, ['u1']) + ]); return [{ id: 1 }, { id: 2 }, { id: 3 }]; }), writeCheckpoint: null, @@ -595,7 +623,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.getFilteredBucketPositions()).toEqual(new Map([['by_project[3]', 0n]])); + expect(line2.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'by_project[3]', start: 0n }]); }); describe('streams', () => { @@ -837,14 +865,14 @@ class MockBucketChecksumStateStorage implements BucketChecksumStateStorage { this.filter?.({ invalidate: true }); } - async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: InternalOpId, requests: BucketChecksumRequest[]): Promise { return new Map( - buckets.map((bucket) => { - const checksum = this.state.get(bucket); + requests.map((request) => { + const checksum = this.state.get(request.bucket); return [ - bucket, + request.bucket, { - bucket: bucket, + bucket: request.bucket, checksum: checksum?.checksum ?? 0, count: checksum?.count ?? 0 } diff --git a/packages/service-core/test/src/utils.ts b/packages/service-core/test/src/utils.ts new file mode 100644 index 000000000..d37876be2 --- /dev/null +++ b/packages/service-core/test/src/utils.ts @@ -0,0 +1,20 @@ +import { SOURCE } from '@powersync/service-sync-rules'; +/** + * Removes the source property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSource(obj: T): Omit { + const { source, ...rest } = obj; + return rest; +} + +/** + * Removes the [SOURCE] symbol property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSourceSymbol(obj: T): Omit { + const { [SOURCE]: source, ...rest } = obj; + return rest; +} diff --git a/packages/service-core/test/tsconfig.json b/packages/service-core/test/tsconfig.json index f868fd0a8..9055101d7 100644 --- a/packages/service-core/test/tsconfig.json +++ b/packages/service-core/test/tsconfig.json @@ -1,12 +1,12 @@ { "extends": "../../../tsconfig.tests.json", "compilerOptions": { + "rootDir": "src", "baseUrl": "./", "outDir": "dist", "paths": { "@/*": ["../src/*"] - }, - "rootDir": "src" + } }, "include": ["src"], "references": [ diff --git a/packages/sync-rules/src/BaseSqlDataQuery.ts b/packages/sync-rules/src/BaseSqlDataQuery.ts index 9d86a157d..4e1f59497 100644 --- a/packages/sync-rules/src/BaseSqlDataQuery.ts +++ b/packages/sync-rules/src/BaseSqlDataQuery.ts @@ -1,21 +1,21 @@ import { SelectedColumn } from 'pgsql-ast-parser'; +import { idFromData } from './cast.js'; import { SqlRuleError } from './errors.js'; import { ColumnDefinition } from './ExpressionType.js'; import { SourceTableInterface } from './SourceTableInterface.js'; import { AvailableTable, SqlTools } from './sql_filters.js'; -import { castAsText } from './sql_functions.js'; import { TablePattern } from './TablePattern.js'; import { QueryParameters, QuerySchema, - UnscopedEvaluatedRow, - UnscopedEvaluationResult, SourceSchema, SourceSchemaTable, SqliteJsonRow, - SqliteRow + SqliteRow, + UnscopedEvaluatedRow, + UnscopedEvaluationResult } from './types.js'; -import { filterJsonRow, idFromData } from './utils.js'; +import { filterJsonRow } from './utils.js'; export interface RowValueExtractor { extract(tables: QueryParameters, into: SqliteRow): void; diff --git a/packages/sync-rules/src/BucketDescription.ts b/packages/sync-rules/src/BucketDescription.ts index 8dd732f34..032bcd282 100644 --- a/packages/sync-rules/src/BucketDescription.ts +++ b/packages/sync-rules/src/BucketDescription.ts @@ -1,3 +1,6 @@ +import { BucketDataSource } from './index.js'; +import { SOURCE } from './utils.js'; + /** * The priority in which to synchronize buckets. * @@ -29,6 +32,8 @@ export interface BucketDescription { * The priority used to synchronize this bucket, derived from its definition. */ priority: BucketPriority; + + [SOURCE]: BucketDataSource; } /** diff --git a/packages/sync-rules/src/BucketParameterQuerier.ts b/packages/sync-rules/src/BucketParameterQuerier.ts index 4d603b0a8..88960babc 100644 --- a/packages/sync-rules/src/BucketParameterQuerier.ts +++ b/packages/sync-rules/src/BucketParameterQuerier.ts @@ -4,6 +4,7 @@ import { ParameterLookupScope } from './HydrationState.js'; import { RequestedStream } from './SqlSyncRules.js'; import { RequestParameters, SqliteJsonRow, SqliteJsonValue } from './types.js'; import { normalizeParameterValue } from './utils.js'; +import { ParameterIndexLookupCreator } from './index.js'; /** * Represents a set of parameter queries for a specific request. @@ -106,6 +107,7 @@ export function mergeBucketParameterQueriers(queriers: BucketParameterQuerier[]) export class ScopedParameterLookup { // bucket definition name, parameter query index, ...lookup values readonly values: readonly SqliteJsonValue[]; + readonly source: ParameterIndexLookupCreator; #cachedSerializedForm?: string; @@ -119,22 +121,27 @@ export class ScopedParameterLookup { } static normalized(scope: ParameterLookupScope, lookup: UnscopedParameterLookup): ScopedParameterLookup { - return new ScopedParameterLookup([scope.lookupName, scope.queryId, ...lookup.lookupValues]); + return new ScopedParameterLookup(scope.source, [scope.lookupName, scope.queryId, ...lookup.lookupValues]); } /** * Primarily for test fixtures. */ static direct(scope: ParameterLookupScope, values: SqliteJsonValue[]): ScopedParameterLookup { - return new ScopedParameterLookup([scope.lookupName, scope.queryId, ...values.map(normalizeParameterValue)]); + return new ScopedParameterLookup(scope.source, [ + scope.lookupName, + scope.queryId, + ...values.map(normalizeParameterValue) + ]); } /** * * @param values must be pre-normalized (any integer converted into bigint) */ - private constructor(values: SqliteJsonValue[]) { + private constructor(source: ParameterIndexLookupCreator, values: SqliteJsonValue[]) { this.values = Object.freeze(values); + this.source = source; } } diff --git a/packages/sync-rules/src/BucketSource.ts b/packages/sync-rules/src/BucketSource.ts index 482fb5f5b..bdec256d8 100644 --- a/packages/sync-rules/src/BucketSource.ts +++ b/packages/sync-rules/src/BucketSource.ts @@ -1,8 +1,8 @@ import { BucketParameterQuerier, - UnscopedParameterLookup, PendingQueriers, - ScopedParameterLookup + ScopedParameterLookup, + UnscopedParameterLookup } from './BucketParameterQuerier.js'; import { ColumnDefinition } from './ExpressionType.js'; import { DEFAULT_HYDRATION_STATE, HydrationState, ParameterLookupScope } from './HydrationState.js'; @@ -10,18 +10,18 @@ import { SourceTableInterface } from './SourceTableInterface.js'; import { GetQuerierOptions } from './SqlSyncRules.js'; import { TablePattern } from './TablePattern.js'; import { + EvaluatedParameters, EvaluatedParametersResult, EvaluatedRow, EvaluateRowOptions, EvaluationResult, isEvaluationError, - UnscopedEvaluationResult, SourceSchema, SqliteRow, UnscopedEvaluatedParametersResult, - EvaluatedParameters + UnscopedEvaluationResult } from './types.js'; -import { buildBucketName } from './utils.js'; +import { buildBucketInfo, SOURCE } from './utils.js'; export interface CreateSourceParams { hydrationState: HydrationState; @@ -104,7 +104,7 @@ export interface BucketDataSource { */ readonly bucketParameters: string[]; - getSourceTables(): Set; + getSourceTables(): TablePattern[]; tableSyncsData(table: SourceTableInterface): boolean; /** @@ -137,7 +137,7 @@ export interface ParameterIndexLookupCreator { */ readonly defaultLookupScope: ParameterLookupScope; - getSourceTables(): Set; + getSourceTables(): TablePattern[]; /** * Given a row in a source table that affects sync parameters, returns a structure to index which buckets rows should @@ -171,11 +171,13 @@ export function hydrateEvaluateRow(hydrationState: HydrationState, source: Bucke if (isEvaluationError(result)) { return result; } + const info = buildBucketInfo(scope, result.serializedBucketParameters); return { - bucket: buildBucketName(scope, result.serializedBucketParameters), + bucket: info.bucket, id: result.id, table: result.table, - data: result.data + data: result.data, + source: info[SOURCE] } satisfies EvaluatedRow; }); }; diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 7cabaeb37..a7856ea22 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -1,6 +1,5 @@ import { BucketDataSource, CreateSourceParams, HydratedBucketSource } from './BucketSource.js'; import { - ParameterIndexLookupCreator, BucketParameterQuerier, CompatibilityContext, EvaluatedParameters, @@ -14,22 +13,51 @@ import { mergeBucketParameterQueriers, mergeDataSources, mergeParameterIndexLookupCreators, + ParameterIndexLookupCreator, QuerierError, ScopedEvaluateParameterRow, ScopedEvaluateRow, SqlEventDescriptor, SqliteInputValue, SqliteValue, - SyncConfig + SyncConfig, + TablePattern } from './index.js'; import { SourceTableInterface } from './SourceTableInterface.js'; import { EvaluatedParametersResult, EvaluateRowOptions, EvaluationResult, SqliteRow } from './types.js'; +export interface RowProcessor { + readonly eventDescriptors: SqlEventDescriptor[]; + readonly compatibility: CompatibilityContext; + + getSourceTables(): TablePattern[]; + + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[]; + + getMatchingSources(pattern: TablePattern): TableDataSources; + + applyRowContext( + source: SqliteRow + ): SqliteRow; + + evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] }; + + evaluateParameterRowWithErrors( + table: SourceTableInterface, + row: SqliteRow + ): { results: EvaluatedParameters[]; errors: EvaluationError[] }; +} + +export interface TableDataSources { + bucketDataSources: BucketDataSource[]; + parameterIndexLookupCreators: ParameterIndexLookupCreator[]; +} + /** * Hydrated sync rules is sync rule definitions along with persisted state. Currently, the persisted state * specifically affects bucket names. */ -export class HydratedSyncRules { +export class HydratedSyncRules implements RowProcessor { bucketSources: HydratedBucketSource[] = []; eventDescriptors: SqlEventDescriptor[] = []; compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; @@ -38,6 +66,8 @@ export class HydratedSyncRules { private readonly innerEvaluateRow: ScopedEvaluateRow; private readonly innerEvaluateParameterRow: ScopedEvaluateParameterRow; + private readonly bucketDataSources: BucketDataSource[]; + private readonly bucketParameterIndexLookupCreators: ParameterIndexLookupCreator[]; constructor(params: { definition: SyncConfig; @@ -50,6 +80,8 @@ export class HydratedSyncRules { const hydrationState = params.createParams.hydrationState; this.definition = params.definition; + this.bucketDataSources = params.bucketDataSources; + this.bucketParameterIndexLookupCreators = params.bucketParameterIndexLookupCreators; this.innerEvaluateRow = mergeDataSources(hydrationState, params.bucketDataSources).evaluateRow; this.innerEvaluateParameterRow = mergeParameterIndexLookupCreators( hydrationState, @@ -66,6 +98,30 @@ export class HydratedSyncRules { this.bucketSources = this.definition.bucketSources.map((source) => source.hydrate(params.createParams)); } + getMatchingSources(pattern: TablePattern): { + bucketDataSources: BucketDataSource[]; + parameterIndexLookupCreators: ParameterIndexLookupCreator[]; + } { + // FIXME: Fix performance - don't scan all sources + // Or just merge implementations with MergedSyncRules + const bucketDataSources = this.bucketDataSources.filter((ds) => + ds.getSourceTables().some((table) => table.equals(pattern)) + ); + const parameterIndexLookupCreators: ParameterIndexLookupCreator[] = this.bucketParameterIndexLookupCreators.filter( + (ds) => ds.getSourceTables().some((table) => table.equals(pattern)) + ); + return { + bucketDataSources, + parameterIndexLookupCreators + }; + } + + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[] { + return this.definition.getSourceTables().filter((pattern) => { + return pattern.matches(table); + }); + } + // These methods do not depend on hydration, so we can just forward them to the definition. getSourceTables() { @@ -148,3 +204,75 @@ export class HydratedSyncRules { return { querier, errors }; } } + +/** + * Combines multiple hydrated sync rules into a single row processor. + * + * Does not merge any definitions; simply forwards calls to all contained sync rules. + */ +export class MultiSyncRules implements RowProcessor { + private readonly syncRules: HydratedSyncRules[]; + + constructor(syncRules: HydratedSyncRules[]) { + this.syncRules = syncRules; + } + + get eventDescriptors(): SqlEventDescriptor[] { + return this.syncRules.flatMap((sr) => sr.eventDescriptors); + } + + get compatibility(): CompatibilityContext { + // FIXME + return this.syncRules[0].compatibility; + } + + getSourceTables(): TablePattern[] { + return this.syncRules.flatMap((sr) => sr.getSourceTables()); + } + + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[] { + return this.syncRules.flatMap((sr) => sr.getMatchingTablePatterns(table)); + } + + getMatchingSources(pattern: TablePattern): TableDataSources { + let result: TableDataSources = { bucketDataSources: [], parameterIndexLookupCreators: [] }; + for (let sr of this.syncRules) { + const sources = sr.getMatchingSources(pattern); + result.bucketDataSources.push(...sources.bucketDataSources); + result.parameterIndexLookupCreators.push(...sources.parameterIndexLookupCreators); + } + return result; + } + + applyRowContext( + source: SqliteRow + ): SqliteRow { + // FIXME + return this.syncRules[0].applyRowContext(source); + } + + evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { + let results: EvaluatedRow[] = []; + let errors: EvaluationError[] = []; + for (let sr of this.syncRules) { + const { results: srResults, errors: srErrors } = sr.evaluateRowWithErrors(options); + results.push(...srResults); + errors.push(...srErrors); + } + return { results, errors }; + } + + evaluateParameterRowWithErrors( + table: SourceTableInterface, + row: SqliteRow + ): { results: EvaluatedParameters[]; errors: EvaluationError[] } { + let results: EvaluatedParameters[] = []; + let errors: EvaluationError[] = []; + for (let sr of this.syncRules) { + const { results: srResults, errors: srErrors } = sr.evaluateParameterRowWithErrors(table, row); + results.push(...srResults); + errors.push(...srErrors); + } + return { results, errors }; + } +} diff --git a/packages/sync-rules/src/HydrationState.ts b/packages/sync-rules/src/HydrationState.ts index f836a62b4..a42831760 100644 --- a/packages/sync-rules/src/HydrationState.ts +++ b/packages/sync-rules/src/HydrationState.ts @@ -3,12 +3,14 @@ import { BucketDataSource, ParameterIndexLookupCreator } from './BucketSource.js export interface BucketDataScope { /** The prefix is the bucket name before the parameters. */ bucketPrefix: string; + source: BucketDataSource; } export interface ParameterLookupScope { /** The lookup name + queryid is used to reference the parameter lookup record. */ lookupName: string; queryId: string; + source: ParameterIndexLookupCreator; } /** @@ -37,7 +39,8 @@ export interface HydrationState { export const DEFAULT_HYDRATION_STATE: HydrationState = { getBucketSourceScope(source: BucketDataSource) { return { - bucketPrefix: source.uniqueName + bucketPrefix: source.uniqueName, + source }; }, getParameterIndexLookupScope(source) { @@ -61,7 +64,8 @@ export function versionedHydrationState(version: number): HydrationState { return { getBucketSourceScope(source: BucketDataSource): BucketDataScope { return { - bucketPrefix: `${version}#${source.uniqueName}` + bucketPrefix: `${version}#${source.uniqueName}`, + source }; }, diff --git a/packages/sync-rules/src/SqlBucketDescriptor.ts b/packages/sync-rules/src/SqlBucketDescriptor.ts index 04eb7f590..e1522fd8f 100644 --- a/packages/sync-rules/src/SqlBucketDescriptor.ts +++ b/packages/sync-rules/src/SqlBucketDescriptor.ts @@ -190,10 +190,10 @@ export class BucketDefinitionDataSource implements BucketDataSource { return results; } - getSourceTables(): Set { - let result = new Set(); + getSourceTables(): TablePattern[] { + let result: TablePattern[] = []; for (let query of this.descriptor.dataQueries) { - result.add(query.sourceTable); + result.push(query.sourceTable); } return result; } diff --git a/packages/sync-rules/src/SqlParameterQuery.ts b/packages/sync-rules/src/SqlParameterQuery.ts index 2f4cb8e8b..94afdd453 100644 --- a/packages/sync-rules/src/SqlParameterQuery.ts +++ b/packages/sync-rules/src/SqlParameterQuery.ts @@ -42,7 +42,7 @@ import { SqliteRow } from './types.js'; import { - buildBucketName, + buildBucketInfo, filterJsonRow, isJsonValue, isSelectStatement, @@ -337,7 +337,8 @@ export class SqlParameterQuery implements ParameterIndexLookupCreator { public get defaultLookupScope(): ParameterLookupScope { return { lookupName: this.descriptorName, - queryId: this.queryId + queryId: this.queryId, + source: this }; } @@ -345,8 +346,8 @@ export class SqlParameterQuery implements ParameterIndexLookupCreator { return this.sourceTable.matches(table); } - getSourceTables(): Set { - return new Set([this.sourceTable]); + getSourceTables() { + return [this.sourceTable]; } createParameterQuerierSource(params: CreateSourceParams): BucketParameterQuerierSource { @@ -442,7 +443,7 @@ export class SqlParameterQuery implements ParameterIndexLookupCreator { const serializedParameters = serializeBucketParameters(this.bucketParameters, result); return { - bucket: buildBucketName(bucketScope, serializedParameters), + ...buildBucketInfo(bucketScope, serializedParameters), priority: this.priority }; }) diff --git a/packages/sync-rules/src/StaticSqlParameterQuery.ts b/packages/sync-rules/src/StaticSqlParameterQuery.ts index f08422ee0..cb233c55f 100644 --- a/packages/sync-rules/src/StaticSqlParameterQuery.ts +++ b/packages/sync-rules/src/StaticSqlParameterQuery.ts @@ -10,7 +10,7 @@ import { AvailableTable, SqlTools } from './sql_filters.js'; import { checkUnsupportedFeatures, isClauseError, sqliteBool } from './sql_support.js'; import { TablePattern } from './TablePattern.js'; import { ParameterValueClause, QueryParseOptions, RequestParameters, SqliteJsonValue } from './types.js'; -import { buildBucketName, isJsonValue, serializeBucketParameters } from './utils.js'; +import { buildBucketInfo, isJsonValue, serializeBucketParameters } from './utils.js'; import { DetectRequestParameters } from './validators.js'; export interface StaticSqlParameterQueryOptions { @@ -227,7 +227,7 @@ export class StaticSqlParameterQuery { return [ { - bucket: buildBucketName(bucketSourceScope, serializedParamters), + ...buildBucketInfo(bucketSourceScope, serializedParamters), priority: this.priority } ]; diff --git a/packages/sync-rules/src/SyncConfig.ts b/packages/sync-rules/src/SyncConfig.ts index a8103131d..93f0a9663 100644 --- a/packages/sync-rules/src/SyncConfig.ts +++ b/packages/sync-rules/src/SyncConfig.ts @@ -33,17 +33,12 @@ export abstract class SyncConfig { /** * Hydrate the sync rule definitions with persisted state into runnable sync rules. * - * @param params.hydrationState Transforms bucket ids based on persisted state. May omit for tests. + * @param createParams.hydrationState Transforms bucket ids based on persisted state. */ - hydrate(params?: CreateSourceParams): HydratedSyncRules { - let hydrationState = params?.hydrationState; - if (hydrationState == null || !this.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { - hydrationState = DEFAULT_HYDRATION_STATE; - } - const resolvedParams = { hydrationState }; + hydrate(createParams: CreateSourceParams): HydratedSyncRules { return new HydratedSyncRules({ definition: this, - createParams: resolvedParams, + createParams: createParams, bucketDataSources: this.bucketDataSources, bucketParameterIndexLookupCreators: this.bucketParameterLookupSources, eventDescriptors: this.eventDescriptors, diff --git a/packages/sync-rules/src/TablePattern.ts b/packages/sync-rules/src/TablePattern.ts index ee5d66400..59d8c4d00 100644 --- a/packages/sync-rules/src/TablePattern.ts +++ b/packages/sync-rules/src/TablePattern.ts @@ -38,6 +38,13 @@ export class ImplicitSchemaTablePattern implements Equatable { } } + /** + * Unique lookup key for this pattern. For in-memory use only - no gaurantee of stability across restarts. + */ + get key(): string { + return JSON.stringify([this.connectionTag, this.schema, this.tablePattern]); + } + get isWildcard() { return this.tablePattern.endsWith('%'); } diff --git a/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts b/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts index 8e176bfed..2b964a4cb 100644 --- a/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts +++ b/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts @@ -23,7 +23,7 @@ import { SqliteJsonValue, SqliteRow } from './types.js'; -import { buildBucketName, isJsonValue, serializeBucketParameters } from './utils.js'; +import { buildBucketInfo, isJsonValue, serializeBucketParameters } from './utils.js'; import { DetectRequestParameters } from './validators.js'; export interface TableValuedFunctionSqlParameterQueryOptions { @@ -224,7 +224,7 @@ export class TableValuedFunctionSqlParameterQuery { } getSourceTables() { - return new Set(); + return []; } tableSyncsParameters(_table: SourceTableInterface): boolean { @@ -308,7 +308,7 @@ export class TableValuedFunctionSqlParameterQuery { const serializedBucketParameters = serializeBucketParameters(this.bucketParameters, result); return { - bucket: buildBucketName(bucketScope, serializedBucketParameters), + ...buildBucketInfo(bucketScope, serializedBucketParameters), priority: this.priority }; } diff --git a/packages/sync-rules/src/cast.ts b/packages/sync-rules/src/cast.ts new file mode 100644 index 000000000..58ebfa715 --- /dev/null +++ b/packages/sync-rules/src/cast.ts @@ -0,0 +1,123 @@ +import type { SqliteJsonRow, SqliteValue } from './types.js'; + +/** + * Extracts and normalizes the ID column from a row. + */ +export function idFromData(data: SqliteJsonRow): string { + let id = data.id; + if (typeof id != 'string') { + // While an explicit cast would be better, this covers against very common + // issues when initially testing out sync, for example when the id column is an + // auto-incrementing integer. + // If there is no id column, we use a blank id. This will result in the user syncing + // a single arbitrary row for this table - better than just not being able to sync + // anything. + id = castAsText(id) ?? ''; + } + return id; +} + +export const CAST_TYPES = new Set(['text', 'numeric', 'integer', 'real', 'blob']); + +const textEncoder = new TextEncoder(); +const textDecoder = new TextDecoder(); + +export function castAsText(value: SqliteValue): string | null { + if (value == null) { + return null; + } else if (value instanceof Uint8Array) { + return textDecoder.decode(value); + } else { + return value.toString(); + } +} + +export function castAsBlob(value: SqliteValue): Uint8Array | null { + if (value == null) { + return null; + } else if (value instanceof Uint8Array) { + return value!; + } + + if (typeof value != 'string') { + value = value.toString(); + } + return textEncoder.encode(value); +} + +export function cast(value: SqliteValue, to: string) { + if (value == null) { + return null; + } + if (to == 'text') { + return castAsText(value); + } else if (to == 'numeric') { + if (value instanceof Uint8Array) { + value = textDecoder.decode(value); + } + if (typeof value == 'string') { + return parseNumeric(value); + } else if (typeof value == 'number' || typeof value == 'bigint') { + return value; + } else { + return 0n; + } + } else if (to == 'real') { + if (value instanceof Uint8Array) { + value = textDecoder.decode(value); + } + if (typeof value == 'string') { + const nr = parseFloat(value); + if (isNaN(nr)) { + return 0.0; + } else { + return nr; + } + } else if (typeof value == 'number') { + return value; + } else if (typeof value == 'bigint') { + return Number(value); + } else { + return 0.0; + } + } else if (to == 'integer') { + if (value instanceof Uint8Array) { + value = textDecoder.decode(value); + } + if (typeof value == 'string') { + return parseBigInt(value); + } else if (typeof value == 'number') { + return Number.isInteger(value) ? BigInt(value) : BigInt(Math.floor(value)); + } else if (typeof value == 'bigint') { + return value; + } else { + return 0n; + } + } else if (to == 'blob') { + return castAsBlob(value); + } else { + throw new Error(`Type not supported for cast: '${to}'`); + } +} + +function parseNumeric(text: string): bigint | number { + const match = /^\s*(\d+)(\.\d*)?(e[+\-]?\d+)?/i.exec(text); + if (!match) { + return 0n; + } + + if (match[2] != null || match[3] != null) { + const v = parseFloat(match[0]); + return isNaN(v) ? 0n : v; + } else { + return BigInt(match[1]); + } +} + +function parseBigInt(text: string): bigint { + const match = /^\s*(\d+)/.exec(text); + if (!match) { + return 0n; + } + return BigInt(match[1]); +} diff --git a/packages/sync-rules/src/compiler/sqlite.ts b/packages/sync-rules/src/compiler/sqlite.ts index b4ce74812..962d3a455 100644 --- a/packages/sync-rules/src/compiler/sqlite.ts +++ b/packages/sync-rules/src/compiler/sqlite.ts @@ -8,17 +8,17 @@ import { PGNode, SelectFromStatement } from 'pgsql-ast-parser'; -import { CAST_TYPES } from '../sql_functions.js'; -import { ColumnInRow, ConnectionParameter, ExpressionInput, NodeLocations, SyncExpression } from './expression.js'; +import { CAST_TYPES } from '../cast.js'; import { BetweenExpression, LiteralExpression, SqlExpression, - supportedFunctions, - BinaryOperator as SupportedBinaryOperator + BinaryOperator as SupportedBinaryOperator, + supportedFunctions } from '../sync_plan/expression.js'; import { ConnectionParameterSource } from '../sync_plan/plan.js'; import { ParsingErrorListener } from './compiler.js'; +import { ColumnInRow, ConnectionParameter, ExpressionInput, NodeLocations, SyncExpression } from './expression.js'; import { BaseSourceResultSet, PhysicalSourceResultSet, SourceResultSet, SyntacticResultSetSource } from './table.js'; import { SqlScope } from './scope.js'; import { SourceSchema } from '../types.js'; diff --git a/packages/sync-rules/src/events/SqlEventDescriptor.ts b/packages/sync-rules/src/events/SqlEventDescriptor.ts index 33c34b98e..77c908d83 100644 --- a/packages/sync-rules/src/events/SqlEventDescriptor.ts +++ b/packages/sync-rules/src/events/SqlEventDescriptor.ts @@ -53,10 +53,10 @@ export class SqlEventDescriptor { return matchingQuery.evaluateRowWithErrors(options.sourceTable, options.record); } - getSourceTables(): Set { - let result = new Set(); + getSourceTables(): TablePattern[] { + let result: TablePattern[] = []; for (let query of this.sourceQueries) { - result.add(query.sourceTable!); + result.push(query.sourceTable!); } return result; } diff --git a/packages/sync-rules/src/index.ts b/packages/sync-rules/src/index.ts index 7e37329e6..fff256b3b 100644 --- a/packages/sync-rules/src/index.ts +++ b/packages/sync-rules/src/index.ts @@ -28,8 +28,10 @@ export * from './types.js'; export * from './types/custom_sqlite_value.js'; export * from './types/time.js'; export * from './utils.js'; +export * from './cast.js'; export { versionedHydrationState } from './HydrationState.js'; export * from './HydratedSyncRules.js'; +export * from './HydrationState.js'; export * from './compiler/compiler.js'; export * from './sync_plan/plan.js'; diff --git a/packages/sync-rules/src/sql_functions.ts b/packages/sync-rules/src/sql_functions.ts index bbf89b662..f4f356691 100644 --- a/packages/sync-rules/src/sql_functions.ts +++ b/packages/sync-rules/src/sql_functions.ts @@ -10,6 +10,7 @@ import { ExpressionType, SqliteType, SqliteValueType, TYPE_INTEGER } from './Exp import * as uuid from 'uuid'; import { CustomSqliteValue } from './types/custom_sqlite_value.js'; import { CompatibilityContext, CompatibilityOption } from './compatibility.js'; +import { cast, CAST_TYPES, castAsBlob, castAsText } from './cast.js'; export const BASIC_OPERATORS = new Set([ '=', @@ -526,89 +527,6 @@ export function generateSqlFunctions(compatibility: CompatibilityContext) { }; } -export const CAST_TYPES = new Set(['text', 'numeric', 'integer', 'real', 'blob']); - -const textEncoder = new TextEncoder(); -const textDecoder = new TextDecoder(); - -export function castAsText(value: SqliteValue): string | null { - if (value == null) { - return null; - } else if (value instanceof Uint8Array) { - return textDecoder.decode(value); - } else { - return value.toString(); - } -} - -export function castAsBlob(value: SqliteValue): Uint8Array | null { - if (value == null) { - return null; - } else if (value instanceof Uint8Array) { - return value!; - } - - if (typeof value != 'string') { - value = value.toString(); - } - return textEncoder.encode(value); -} - -export function cast(value: SqliteValue, to: string) { - if (value == null) { - return null; - } - if (to == 'text') { - return castAsText(value); - } else if (to == 'numeric') { - if (value instanceof Uint8Array) { - value = textDecoder.decode(value); - } - if (typeof value == 'string') { - return parseNumeric(value); - } else if (typeof value == 'number' || typeof value == 'bigint') { - return value; - } else { - return 0n; - } - } else if (to == 'real') { - if (value instanceof Uint8Array) { - value = textDecoder.decode(value); - } - if (typeof value == 'string') { - const nr = parseFloat(value); - if (isNaN(nr)) { - return 0.0; - } else { - return nr; - } - } else if (typeof value == 'number') { - return value; - } else if (typeof value == 'bigint') { - return Number(value); - } else { - return 0.0; - } - } else if (to == 'integer') { - if (value instanceof Uint8Array) { - value = textDecoder.decode(value); - } - if (typeof value == 'string') { - return parseBigInt(value); - } else if (typeof value == 'number') { - return Number.isInteger(value) ? BigInt(value) : BigInt(Math.floor(value)); - } else if (typeof value == 'bigint') { - return value; - } else { - return 0n; - } - } else if (to == 'blob') { - return castAsBlob(value); - } else { - throw new Error(`Type not supported for cast: '${to}'`); - } -} - export function sqliteTypeOf(arg: SqliteInputValue): SqliteValueType { if (arg == null) { return 'null'; @@ -644,28 +562,6 @@ export function parseGeometry(value?: SqliteValue) { return geo; } -function parseNumeric(text: string): bigint | number { - const match = /^\s*(\d+)(\.\d*)?(e[+\-]?\d+)?/i.exec(text); - if (!match) { - return 0n; - } - - if (match[2] != null || match[3] != null) { - const v = parseFloat(match[0]); - return isNaN(v) ? 0n : v; - } else { - return BigInt(match[1]); - } -} - -function parseBigInt(text: string): bigint { - const match = /^\s*(\d+)/.exec(text); - if (!match) { - return 0n; - } - return BigInt(match[1]); -} - function isNumeric(a: SqliteValue): a is number | bigint { return typeof a == 'number' || typeof a == 'bigint'; } diff --git a/packages/sync-rules/src/streams/filter.ts b/packages/sync-rules/src/streams/filter.ts index 4ef22eab6..9f6f5259c 100644 --- a/packages/sync-rules/src/streams/filter.ts +++ b/packages/sync-rules/src/streams/filter.ts @@ -540,17 +540,16 @@ export class SubqueryParameterLookupSource implements ParameterIndexLookupCreato private streamName: string ) {} - public get defaultLookupScope() { + public get defaultLookupScope(): ParameterLookupScope { return { lookupName: this.streamName, - queryId: this.defaultQueryId + queryId: this.defaultQueryId, + source: this }; } - getSourceTables(): Set { - let result = new Set(); - result.add(this.parameterTable); - return result; + getSourceTables() { + return [this.parameterTable]; } /** diff --git a/packages/sync-rules/src/streams/stream.ts b/packages/sync-rules/src/streams/stream.ts index 3394cfa51..6c107dfe7 100644 --- a/packages/sync-rules/src/streams/stream.ts +++ b/packages/sync-rules/src/streams/stream.ts @@ -96,8 +96,8 @@ export class SyncStreamDataSource implements BucketDataSource { return this.variant.defaultBucketPrefix(this.stream.name); } - getSourceTables(): Set { - return new Set([this.data.sourceTable]); + getSourceTables() { + return [this.data.sourceTable]; } tableSyncsData(table: SourceTableInterface): boolean { diff --git a/packages/sync-rules/src/streams/variant.ts b/packages/sync-rules/src/streams/variant.ts index e6cf1c2ca..481f9a102 100644 --- a/packages/sync-rules/src/streams/variant.ts +++ b/packages/sync-rules/src/streams/variant.ts @@ -4,9 +4,9 @@ import { BucketDataSource, BucketParameterQuerierSource, ParameterIndexLookupCre import { BucketDataScope } from '../HydrationState.js'; import { CreateSourceParams, GetQuerierOptions, RequestedStream, ScopedParameterLookup } from '../index.js'; import { RequestParameters, SqliteJsonValue, TableRow } from '../types.js'; -import { buildBucketName, isJsonValue, JSONBucketNameSerialize } from '../utils.js'; +import { buildBucketInfo, isJsonValue, JSONBucketNameSerialize } from '../utils.js'; import { BucketParameter, SubqueryEvaluator } from './parameter.js'; -import { SyncStream, SyncStreamDataSource } from './stream.js'; +import { SyncStream } from './stream.js'; import { cartesianProduct } from './utils.js'; /** @@ -294,7 +294,7 @@ export class StreamVariant { return { definition: stream.name, inclusion_reasons: [reason], - bucket: buildBucketName(bucketScope, this.serializeBucketParameters(instantiation)), + ...buildBucketInfo(bucketScope, this.serializeBucketParameters(instantiation)), priority: stream.priority }; } diff --git a/packages/sync-rules/src/sync_plan/engine/javascript.ts b/packages/sync-rules/src/sync_plan/engine/javascript.ts index 7b5d3adde..65c8d0e8f 100644 --- a/packages/sync-rules/src/sync_plan/engine/javascript.ts +++ b/packages/sync-rules/src/sync_plan/engine/javascript.ts @@ -1,4 +1,5 @@ import { + cast, compare, CompatibilityContext, generateSqlFunctions, @@ -7,7 +8,7 @@ import { sqliteBool, sqliteNot } from '../../index.js'; -import { cast, evaluateOperator, SqlFunction } from '../../sql_functions.js'; +import { evaluateOperator, SqlFunction } from '../../sql_functions.js'; import { cartesianProduct } from '../../streams/utils.js'; import { generateTableValuedFunctions } from '../../TableValuedFunctions.js'; import { SqliteRow, SqliteValue } from '../../types.js'; diff --git a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts index 924aaeba5..b16d17e00 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts @@ -9,11 +9,12 @@ import { UnscopedEvaluatedRow, UnscopedEvaluationResult } from '../../types.js'; -import { filterJsonRow, idFromData, isJsonValue, isValidParameterValue, JSONBucketNameSerialize } from '../../utils.js'; +import { filterJsonRow, isJsonValue, isValidParameterValue, JSONBucketNameSerialize } from '../../utils.js'; import { SqlExpression } from '../expression.js'; import { ExpressionToSqlite } from '../expression_to_sql.js'; import * as plan from '../plan.js'; import { StreamEvaluationContext } from './index.js'; +import { idFromData } from '../../cast.js'; import { ScalarExpressionEvaluator, scalarStatementToSql, @@ -23,7 +24,7 @@ import { TableProcessorToSqlHelper } from './table_processor_to_sql.js'; import { SyncPlanSchemaAnalyzer } from '../schema_inference.js'; export class PreparedStreamBucketDataSource implements BucketDataSource { - private readonly sourceTables = new Set(); + private readonly sourceTables: TablePattern[] = []; private readonly sources: PreparedStreamDataSource[] = []; private readonly defaultSchema: string; @@ -37,7 +38,7 @@ export class PreparedStreamBucketDataSource implements BucketDataSource { const prepared = new PreparedStreamDataSource(data, context); this.sources.push(prepared); - this.sourceTables.add(prepared.tablePattern); + this.sourceTables.push(prepared.tablePattern); } } @@ -53,7 +54,7 @@ export class PreparedStreamBucketDataSource implements BucketDataSource { return evaluator.parameters.map((p) => ExpressionToSqlite.toSqlite(p.expr)); } - getSourceTables(): Set { + getSourceTables(): TablePattern[] { return this.sourceTables; } diff --git a/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts b/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts index 68dbcef8d..a6f84f6e0 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts @@ -15,7 +15,7 @@ import { parametersForRequest, RequestParameterEvaluators } from './parameter_ev import { PendingQueriers } from '../../BucketParameterQuerier.js'; import { RequestedStream } from '../../SqlSyncRules.js'; import { BucketInclusionReason, ResolvedBucket } from '../../BucketDescription.js'; -import { buildBucketName, JSONBucketNameSerialize } from '../../utils.js'; +import { buildBucketInfo, JSONBucketNameSerialize, SOURCE } from '../../utils.js'; export interface StreamInput extends StreamEvaluationContext { preparedBuckets: Map; @@ -131,9 +131,9 @@ class PreparedQuerier { const parametersToBucket = (instantiation: SqliteParameterValue[]): ResolvedBucket => { return { + ...buildBucketInfo(bucketScope, JSONBucketNameSerialize.stringify(instantiation)), definition: this.stream.name, inclusion_reasons: [reason], - bucket: buildBucketName(bucketScope, JSONBucketNameSerialize.stringify(instantiation)), priority: this.stream.priority }; }; diff --git a/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts b/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts index 66785f640..296ec8757 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts @@ -23,7 +23,10 @@ export class PreparedParameterIndexLookupCreator implements ParameterIndexLookup private readonly source: plan.StreamParameterIndexLookupCreator, { engine, defaultSchema }: StreamEvaluationContext ) { - this.defaultLookupScope = source.defaultLookupScope; + this.defaultLookupScope = { + ...source.defaultLookupScope, + source: this + }; const translationHelper = new TableProcessorToSqlHelper(source); const expressions = source.outputs.map((o) => translationHelper.mapper.transform(o)); @@ -42,10 +45,8 @@ export class PreparedParameterIndexLookupCreator implements ParameterIndexLookup this.evaluatorInputs = translationHelper.mapper.instantiation; } - getSourceTables(): Set { - const set = new Set(); - set.add(this.sourceTable); - return set; + getSourceTables(): TablePattern[] { + return [this.sourceTable]; } evaluateParameterRow(sourceTable: SourceTableInterface, row: SqliteRow): UnscopedEvaluatedParametersResult[] { diff --git a/packages/sync-rules/src/sync_plan/plan.ts b/packages/sync-rules/src/sync_plan/plan.ts index f30cc0f1e..7d43da9e2 100644 --- a/packages/sync-rules/src/sync_plan/plan.ts +++ b/packages/sync-rules/src/sync_plan/plan.ts @@ -153,7 +153,7 @@ export interface StreamBucketDataSource { */ export interface StreamParameterIndexLookupCreator extends TableProcessor { hashCode: number; - defaultLookupScope: ParameterLookupScope; + defaultLookupScope: Omit; /** * Outputs to persist in the lookup. diff --git a/packages/sync-rules/src/sync_plan/serialize.ts b/packages/sync-rules/src/sync_plan/serialize.ts index 994b15ab6..ee19fe994 100644 --- a/packages/sync-rules/src/sync_plan/serialize.ts +++ b/packages/sync-rules/src/sync_plan/serialize.ts @@ -350,7 +350,7 @@ interface SerializedDataSource { interface SerializedParameterIndexLookupCreator { table: SerializedTablePattern; hash: number; - lookupScope: ParameterLookupScope; + lookupScope: Omit; output: SqlExpression[]; filters: SqlExpression[]; tableValuedFunctions: TableProcessorTableValuedFunction[]; diff --git a/packages/sync-rules/src/types.ts b/packages/sync-rules/src/types.ts index 6f55647b3..91b08a48a 100644 --- a/packages/sync-rules/src/types.ts +++ b/packages/sync-rules/src/types.ts @@ -7,6 +7,7 @@ import { RequestFunctionCall } from './request_functions.js'; import { SourceTableInterface } from './SourceTableInterface.js'; import { SyncRulesOptions } from './SqlSyncRules.js'; import { TablePattern } from './TablePattern.js'; +import { BucketDataSource } from './BucketSource.js'; import { CustomSqliteValue } from './types/custom_sqlite_value.js'; import { jsonValueToSqlite, toSyncRulesParameters } from './utils.js'; @@ -58,6 +59,11 @@ export interface EvaluatedRow { /** Must be JSON-serializable. */ data: SqliteJsonRow; + + /** + * Source for the evaluated row. + */ + source: BucketDataSource; } /** diff --git a/packages/sync-rules/src/utils.ts b/packages/sync-rules/src/utils.ts index 216f56847..f757f13d7 100644 --- a/packages/sync-rules/src/utils.ts +++ b/packages/sync-rules/src/utils.ts @@ -1,5 +1,6 @@ import { JSONBig, JsonContainer, Replacer, stringifyRaw } from '@powersync/service-jsonbig'; import { SelectFromStatement, Statement } from 'pgsql-ast-parser'; +import { BucketDataSource } from './BucketSource.js'; import { CompatibilityContext } from './compatibility.js'; import { SyncRuleProcessingError as SyncRulesProcessingError } from './errors.js'; import { BucketDataScope } from './HydrationState.js'; @@ -16,13 +17,27 @@ import { SqliteValue } from './types.js'; import { CustomArray, CustomObject, CustomSqliteValue } from './types/custom_sqlite_value.js'; -import { castAsText } from './sql_functions.js'; export function isSelectStatement(q: Statement): q is SelectFromStatement { return q.type == 'select'; } -export function buildBucketName(scope: BucketDataScope, serializedParameters: string): string { +export const SOURCE = Symbol.for('BucketSourceStorage'); + +export function buildBucketInfo( + scope: BucketDataScope, + serializedParameters: string +): { bucket: string; [SOURCE]: BucketDataSource } { + if (scope.source == null) { + throw new Error('source is required'); + } + return { + bucket: scope.bucketPrefix + serializedParameters, + [SOURCE]: scope.source + }; +} + +function buildBucketName(scope: BucketDataScope, serializedParameters: string): string { return scope.bucketPrefix + serializedParameters; } @@ -239,20 +254,3 @@ export function normalizeParameterValue(value: SqliteJsonValue): SqliteJsonValue } return value; } - -/** - * Extracts and normalizes the ID column from a row. - */ -export function idFromData(data: SqliteJsonRow): string { - let id = data.id; - if (typeof id != 'string') { - // While an explicit cast would be better, this covers against very common - // issues when initially testing out sync, for example when the id column is an - // auto-incrementing integer. - // If there is no id column, we use a blank id. This will result in the user syncing - // a single arbitrary row for this table - better than just not being able to sync - // anything. - id = castAsText(id) ?? ''; - } - return id; -} diff --git a/packages/sync-rules/test/src/compatibility.test.ts b/packages/sync-rules/test/src/compatibility.test.ts index 16973cf33..f0b59d2fc 100644 --- a/packages/sync-rules/test/src/compatibility.test.ts +++ b/packages/sync-rules/test/src/compatibility.test.ts @@ -9,8 +9,8 @@ import { toSyncRulesValue } from '../../src/index.js'; -import { versionedHydrationState } from '../../src/HydrationState.js'; -import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS } from './util.js'; +import { DEFAULT_HYDRATION_STATE, versionedHydrationState } from '../../src/HydrationState.js'; +import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS, removeSource, removeSourceSymbol } from './util.js'; describe('compatibility options', () => { describe('timestamps', () => { @@ -28,16 +28,18 @@ bucket_definitions: - SELECT id, description FROM assets `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'mybucket[]', data: { description: '2025-08-19 09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -55,16 +57,18 @@ config: timestamps_iso8601: true `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'mybucket[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -85,18 +89,24 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: '1#stream|0[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { bucket: '1#stream|0[]', definition: 'stream', @@ -123,17 +133,23 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', data: { description: '2025-08-19 09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { bucket: 'stream|0[]', definition: 'stream', @@ -160,13 +176,15 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: 'desc' - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: 'desc' + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: '1#mybucket[]', data: { description: 'desc', id: 'id' }, id: 'id', table: 'assets' }]); }); @@ -184,16 +202,18 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: new DateTimeValue('2025-08-19T09:21:00Z', undefined, { - subSecondPrecision: TimeValuePrecision.seconds, - defaultSubSecondPrecision: TimeValuePrecision.seconds + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: new DateTimeValue('2025-08-19T09:21:00Z', undefined, { + subSecondPrecision: TimeValuePrecision.seconds, + defaultSubSecondPrecision: TimeValuePrecision.seconds + }) }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: '1#stream|0[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -211,16 +231,18 @@ bucket_definitions: - SELECT id, description ->> 'foo.bar' AS "desc" FROM assets `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: description - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: description + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: 'a[]', data: { desc: 'baz', id: 'id' }, id: 'id', table: 'assets' }]); }); @@ -235,16 +257,18 @@ config: fixed_json_extract: true `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: description - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: description + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: 'a[]', data: { desc: null, id: 'id' }, id: 'id', table: 'assets' }]); }); }); @@ -294,13 +318,15 @@ config: hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: data + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: data + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: withFixedQuirk ? '1#mybucket[]' : 'mybucket[]', @@ -315,7 +341,11 @@ config: } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { bucket: withFixedQuirk ? '1#mybucket[]' : 'mybucket[]', definition: 'mybucket', diff --git a/packages/sync-rules/test/src/parameter_queries.test.ts b/packages/sync-rules/test/src/parameter_queries.test.ts index e1e703bce..1714bb55b 100644 --- a/packages/sync-rules/test/src/parameter_queries.test.ts +++ b/packages/sync-rules/test/src/parameter_queries.test.ts @@ -12,7 +12,15 @@ import { UnscopedParameterLookup } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { BASIC_SCHEMA, EMPTY_DATA_SOURCE, findQuerierLookups, PARSE_OPTIONS, requestParameters } from './util.js'; +import { + BASIC_SCHEMA, + EMPTY_DATA_SOURCE, + findQuerierLookups, + PARSE_OPTIONS, + removeSource, + removeSourceSymbol, + requestParameters +} from './util.js'; describe('parameter queries', () => { const table = (name: string): SourceTableInterface => ({ @@ -123,15 +131,21 @@ describe('parameter queries', () => { // We _do_ need to care about the bucket string representation. expect( - query.resolveBucketDescriptions([{ int1: 314, float1: 3.14, float2: 314 }], requestParameters({}), { - bucketPrefix: 'mybucket' - }) + query + .resolveBucketDescriptions([{ int1: 314, float1: 3.14, float2: 314 }], requestParameters({}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE + }) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); expect( - query.resolveBucketDescriptions([{ int1: 314n, float1: 3.14, float2: 314 }], requestParameters({}), { - bucketPrefix: 'mybucket' - }) + query + .resolveBucketDescriptions([{ int1: 314n, float1: 3.14, float2: 314 }], requestParameters({}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE + }) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); }); @@ -491,11 +505,13 @@ describe('parameter queries', () => { ]); expect( - query.resolveBucketDescriptions( - [{ user_id: 'user1' }], - requestParameters({ sub: 'user1', parameters: { is_admin: true } }), - { bucketPrefix: 'mybucket' } - ) + query + .resolveBucketDescriptions( + [{ user_id: 'user1' }], + requestParameters({ sub: 'user1', parameters: { is_admin: true } }), + { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE } + ) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket["user1",1]', priority: 3 }]); }); @@ -873,12 +889,13 @@ describe('parameter queries', () => { describe('custom hydrationState', function () { const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -904,15 +921,23 @@ describe('parameter queries', () => { id: 'group1', user_ids: JSON.stringify(['test-user', 'other-user']) }); - expect(result).toEqual([ + expect( + result.map((entry) => ('lookup' in entry ? { ...entry, lookup: removeSource(entry.lookup) } : entry)) + ).toEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, ['test-user']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ + 'test-user' + ]) + ), bucketParameters: [{ group_id: 'group1' }] }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, [ - 'other-user' - ]), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ + 'other-user' + ]) + ), bucketParameters: [{ group_id: 'group1' }] } ]); @@ -943,8 +968,12 @@ describe('parameter queries', () => { const querier = queriers[0]; expect(querier.hasDynamicBuckets).toBeTruthy(); - expect(await findQuerierLookups(querier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, ['test-user']) + expect((await findQuerierLookups(querier)).map(removeSource)).toEqual([ + removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ + 'test-user' + ]) + ) ]); }); }); diff --git a/packages/sync-rules/test/src/static_parameter_queries.test.ts b/packages/sync-rules/test/src/static_parameter_queries.test.ts index 6b6c66dcf..7a648b2ff 100644 --- a/packages/sync-rules/test/src/static_parameter_queries.test.ts +++ b/packages/sync-rules/test/src/static_parameter_queries.test.ts @@ -2,13 +2,22 @@ import { describe, expect, test } from 'vitest'; import { BucketDataScope, HydrationState } from '../../src/HydrationState.js'; import { BucketParameterQuerier, GetQuerierOptions, QuerierError, SqlParameterQuery } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, requestParameters } from './util.js'; +import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, removeSourceSymbol, requestParameters } from './util.js'; describe('static parameter queries', () => { const MYBUCKET_SCOPE: BucketDataScope = { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }; + function getStaticBucketDescriptions( + query: StaticSqlParameterQuery, + parameters: ReturnType, + scope: BucketDataScope + ) { + return query.getStaticBucketDescriptions(parameters, scope).map(removeSourceSymbol); + } + test('basic query', function () { const sql = 'SELECT token_parameters.user_id'; const query = SqlParameterQuery.fromSql( @@ -20,7 +29,7 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); }); @@ -37,8 +46,9 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual(['user_id']); expect( - query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), { - bucketPrefix: '1#mybucket' + getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), { + bucketPrefix: '1#mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([{ bucket: '1#mybucket["user1"]', priority: 3 }]); }); @@ -54,7 +64,7 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -70,13 +80,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'user1', parameters: { is_admin: true } }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket["user1"]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'user1', parameters: { is_admin: false } }), MYBUCKET_SCOPE ) @@ -93,7 +105,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["USER1"]', priority: 3 } ]); expect(query.bucketParameters!).toEqual(['upper_id']); @@ -110,13 +122,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'admin', parameters: { role: 'admin' } }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'user', parameters: { role: 'user' } }), MYBUCKET_SCOPE ) @@ -134,10 +148,10 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions(requestParameters({ parameters: { id1: 't1', id2: 't1' } }), MYBUCKET_SCOPE) + getStaticBucketDescriptions(query, requestParameters({ parameters: { id1: 't1', id2: 't1' } }), MYBUCKET_SCOPE) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions(requestParameters({ parameters: { id1: 't1', id2: 't2' } }), MYBUCKET_SCOPE) + getStaticBucketDescriptions(query, requestParameters({ parameters: { id1: 't1', id2: 't2' } }), MYBUCKET_SCOPE) ).toEqual([]); }); @@ -155,7 +169,7 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}, { org_id: 'test' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}, { org_id: 'test' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["test"]', priority: 3 } ]); }); @@ -172,16 +186,16 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[123]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[1]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[\"{\\\"a\\\":123.0}\"]', priority: 3 } ]); }); @@ -199,16 +213,16 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ other: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ other: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[123]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ other: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[1]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ other: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[\"{\\\"a\\\":123.0}\"]', priority: 3 } ]); }); @@ -226,7 +240,7 @@ describe('static parameter queries', () => { expect(query.bucketParameters).toEqual(['email']); expect( - query.getStaticBucketDescriptions(requestParameters({ sub: { email: 'a@example.org' } }), MYBUCKET_SCOPE) + getStaticBucketDescriptions(query, requestParameters({ sub: { email: 'a@example.org' } }), MYBUCKET_SCOPE) ).toEqual([{ bucket: 'mybucket["a@example.org"]', priority: 3 }]); }); @@ -242,18 +256,18 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[123]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[1]', priority: 3 } ]); // This is not expected to be used - we just document the current behavior - expect(query.getStaticBucketDescriptions(requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[\"{\\\"a\\\":123.0}\"]', priority: 3 } ]); }); @@ -270,13 +284,13 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: '123' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: '123' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["text"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["real"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["integer"]', priority: 3 } ]); }); @@ -291,7 +305,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -306,7 +320,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -321,7 +335,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([]); + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([]); }); test('static IN expression', function () { @@ -334,7 +348,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -351,13 +365,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'read:users'] }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket[1]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'write:users'] }), MYBUCKET_SCOPE ) @@ -376,13 +392,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'read:users'] }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'write:users'] }), MYBUCKET_SCOPE ) @@ -399,10 +417,10 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({ role: 'superuser' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ role: 'superuser' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ role: 'superadmin' }), MYBUCKET_SCOPE)).toEqual([]); + expect(getStaticBucketDescriptions(query, requestParameters({ role: 'superadmin' }), MYBUCKET_SCOPE)).toEqual([]); }); test('case-sensitive queries (1)', () => { @@ -479,12 +497,13 @@ describe('static parameter queries', () => { const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; diff --git a/packages/sync-rules/test/src/streams.test.ts b/packages/sync-rules/test/src/streams.test.ts index 6f5709b12..c9d49386f 100644 --- a/packages/sync-rules/test/src/streams.test.ts +++ b/packages/sync-rules/test/src/streams.test.ts @@ -15,6 +15,8 @@ import { UnscopedParameterLookup, QuerierError, RequestParameters, + EvaluatedParametersResult, + isEvaluatedParameters, SourceTableInterface, SqliteJsonRow, SqliteRow, @@ -24,18 +26,34 @@ import { syncStreamFromSql, ScopedParameterLookup } from '../../src/index.js'; -import { normalizeQuerierOptions, PARSE_OPTIONS, requestParameters, TestSourceTable } from './util.js'; +import { + normalizeQuerierOptions, + PARSE_OPTIONS, + removeSource, + removeSourceSymbol, + requestParameters, + TestSourceTable +} from './util.js'; describe('streams', () => { const STREAM_0: ParameterLookupScope = { lookupName: 'stream', - queryId: '0' + queryId: '0', + source: {} as any }; const STREAM_1: ParameterLookupScope = { lookupName: 'stream', - queryId: '1' + queryId: '1', + source: {} as any }; + function removeLookupSource(entry: EvaluatedParametersResult) { + if (!isEvaluatedParameters(entry)) { + return entry; + } + return { ...entry, lookup: removeSource(entry.lookup) }; + } + test('refuses edition: 1', () => { expect(() => syncStreamFromSql('stream', 'SELECT * FROM comments', { @@ -90,7 +108,7 @@ describe('streams', () => { normalizeQuerierOptions({ parameters: { test: 'foo' } }, {}, { stream: [{ opaque_id: 0, parameters: null }] }) ); - expect(mergeBucketParameterQueriers(queriers).staticBuckets).toEqual([ + expect(mergeBucketParameterQueriers(queriers).staticBuckets.map(removeSourceSymbol)).toEqual([ { bucket: '1#stream|0["foo"]', definition: 'stream', @@ -230,13 +248,15 @@ describe('streams', () => { ]); expect( - debugHydratedMergedSource(desc, hydrationParams).evaluateParameterRow(ISSUES, { - id: 'i1', - owner_id: 'u1' - }) + debugHydratedMergedSource(desc, hydrationParams) + .evaluateParameterRow(ISSUES, { + id: 'i1', + owner_id: 'u1' + }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct(STREAM_0, ['u1']), + lookup: removeSource(ScopedParameterLookup.direct(STREAM_0, ['u1'])), bucketParameters: [ { result: 'i1' @@ -246,7 +266,7 @@ describe('streams', () => { ]); function getParameterSets(lookups: ScopedParameterLookup[]) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['u1'])]); + expect(lookups.map(removeSource)).toStrictEqual([removeSource(ScopedParameterLookup.direct(STREAM_0, ['u1']))]); return [{ result: 'i1' }]; } @@ -285,7 +305,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -317,7 +339,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'u' }, getParameterSets: (lookups: ScopedParameterLookup[]) => { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['u'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['u'])) + ]); return [{ result: 'u' }]; } }) @@ -328,7 +352,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'u2' }, getParameterSets: (lookups: ScopedParameterLookup[]) => { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['u2'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['u2'])) + ]); return []; } }) @@ -348,9 +374,9 @@ describe('streams', () => { const source = debugHydratedMergedSource(desc, hydrationParams); - expect(source.evaluateParameterRow(FRIENDS, { user_a: 'a', user_b: 'b' })).toStrictEqual([ + expect(source.evaluateParameterRow(FRIENDS, { user_a: 'a', user_b: 'b' }).map(removeLookupSource)).toStrictEqual([ { - lookup: ScopedParameterLookup.direct(STREAM_0, ['b']), + lookup: removeSource(ScopedParameterLookup.direct(STREAM_0, ['b'])), bucketParameters: [ { result: 'a' @@ -358,7 +384,7 @@ describe('streams', () => { ] }, { - lookup: ScopedParameterLookup.direct(STREAM_1, ['a']), + lookup: removeSource(ScopedParameterLookup.direct(STREAM_1, ['a'])), bucketParameters: [ { result: 'b' @@ -371,10 +397,10 @@ describe('streams', () => { expect(lookups).toHaveLength(1); const [lookup] = lookups; if (lookup.values[1] == '0') { - expect(lookup).toStrictEqual(ScopedParameterLookup.direct(STREAM_0, ['a'])); + expect(removeSource(lookup)).toStrictEqual(removeSource(ScopedParameterLookup.direct(STREAM_0, ['a']))); return []; } else { - expect(lookup).toStrictEqual(ScopedParameterLookup.direct(STREAM_1, ['a'])); + expect(removeSource(lookup)).toStrictEqual(removeSource(ScopedParameterLookup.direct(STREAM_1, ['a']))); return [{ result: 'b' }]; } } @@ -414,7 +440,7 @@ describe('streams', () => { getParameterSets(lookups) { expect(lookups).toHaveLength(1); const [lookup] = lookups; - expect(lookup).toStrictEqual(ScopedParameterLookup.direct(STREAM_0, ['a'])); + expect(removeSource(lookup)).toStrictEqual(removeSource(ScopedParameterLookup.direct(STREAM_0, ['a']))); return [{ result: 'i1' }, { result: 'i2' }]; } }) @@ -474,7 +500,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -634,7 +662,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -689,7 +719,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -699,7 +731,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1', is_admin: true }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -759,8 +793,10 @@ describe('streams', () => { tokenPayload: { sub: 'id' }, parameters: {}, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct({ lookupName: 'account_member', queryId: '0' }, ['id']) + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct({ lookupName: 'account_member', queryId: '0', source: {} as any }, ['id']) + ) ]); return [{ result: 'account_id' }]; } @@ -855,7 +891,9 @@ WHERE tokenPayload: { sub: 'user1', haystack_id: 1 }, parameters: { project: 'foo' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, [1n, 'foo'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, [1n, 'foo'])) + ]); return [{ result: 'foo' }]; } }) @@ -952,7 +990,9 @@ WHERE parameters: { project: 'foo' }, globalParameters: { team_id: 'team' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['team'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['team'])) + ]); return [{ result: 'user' }]; } }) @@ -973,12 +1013,13 @@ WHERE const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}.test` }; + return { bucketPrefix: `${source.uniqueName}.test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -990,14 +1031,18 @@ WHERE ).toStrictEqual(['stream|0.test["i1"]', 'stream|1.test["i1"]', 'stream|2.test["l1"]', 'stream|3.test[]']); expect( - hydrated.evaluateParameterRow(ISSUES, { - id: 'i1', - owner_id: 'u1', - name: 'myname' - }) + hydrated + .evaluateParameterRow(ISSUES, { + id: 'i1', + owner_id: 'u1', + name: 'myname' + }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test' }, ['u1']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, ['u1']) + ), bucketParameters: [ { result: 'i1' @@ -1006,7 +1051,9 @@ WHERE }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '1.test' }, ['myname']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '1.test', source: {} as any }, ['myname']) + ), bucketParameters: [ { result: 'i1' @@ -1016,13 +1063,17 @@ WHERE ]); expect( - hydrated.evaluateParameterRow(ISSUES, { - id: 'i1', - owner_id: 'u1' - }) + hydrated + .evaluateParameterRow(ISSUES, { + id: 'i1', + owner_id: 'u1' + }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test' }, ['u1']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, ['u1']) + ), bucketParameters: [ { result: 'i1' diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts index ccd0d6854..8e5f091da 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts @@ -8,7 +8,11 @@ import { SqliteRow, SqliteValue } from '../../../../src/index.js'; -import { requestParameters, TestSourceTable } from '../../util.js'; +import { removeSource, requestParameters, TestSourceTable } from '../../util.js'; + +function removeLookupSource(row: T): Omit & { lookup: any } { + return { ...row, lookup: removeSource(row.lookup) }; +} describe('evaluating rows', () => { syncTest('emits rows', ({ sync }) => { @@ -23,17 +27,19 @@ streams: `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo', - _double: 1, - _int: 1n, - _null: null, - _text: 'text', - _blob: new Uint8Array(10) // non-JSON columns should be removed - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo', + _double: 1, + _int: 1n, + _null: null, + _text: 'text', + _blob: new Uint8Array(10) // non-JSON columns should be removed + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -108,12 +114,14 @@ streams: query: SELECT * FROM users u `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo' - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo' + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -135,12 +143,14 @@ streams: query: SELECT * FROM "%" output `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo' - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo' + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -162,12 +172,14 @@ streams: query: SELECT * FROM "%" `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo' - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo' + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -211,9 +223,13 @@ streams: expect(desc.tableSyncsData(ISSUES)).toBeFalsy(); expect(desc.tableSyncsParameters(ISSUES)).toBeTruthy(); - expect(desc.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' })).toStrictEqual([ + expect( + desc.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' }).map(removeLookupSource) + ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0' }, ['user1']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['user1']) + ), bucketParameters: [ { '0': 'issue_id' @@ -344,26 +360,32 @@ streams: if (call == 0) { // First call. Lookup from users.id => users.name call++; - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct( - { - lookupName: 'lookup', - queryId: '0' - }, - ['user'] + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct( + { + lookupName: 'lookup', + queryId: '0', + source: {} as any + }, + ['user'] + ) ) ]); return [{ '0': 'name' }]; } else if (call == 1) { // Second call. Lookup from issues.owned_by => issues.id call++; - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct( - { - lookupName: 'lookup', - queryId: '1' - }, - ['name'] + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct( + { + lookupName: 'lookup', + queryId: '1', + source: {} as any + }, + ['name'] + ) ) ]); return [{ '0': 'issue' }]; diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts index e8bab6c03..c9a060afc 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts @@ -1,8 +1,12 @@ import { describe, expect } from 'vitest'; import { syncTest } from './utils.js'; -import { requestParameters, TestSourceTable } from '../../util.js'; +import { removeSource, requestParameters, TestSourceTable } from '../../util.js'; import { ScopedParameterLookup, SqliteJsonRow } from '../../../../src/index.js'; +function removeLookupSource(row: T): Omit & { lookup: any } { + return { ...row, lookup: removeSource(row.lookup) }; +} + describe('table-valued functions', () => { syncTest('as partition key', ({ sync }) => { const desc = sync.prepareSyncStreams(` @@ -16,7 +20,7 @@ streams: `); const sourceTable = new TestSourceTable('stores'); - expect(desc.evaluateRow({ sourceTable, record: { id: 'id', tags: '[1,2,3]' } })).toStrictEqual( + expect(desc.evaluateRow({ sourceTable, record: { id: 'id', tags: '[1,2,3]' } }).map(removeSource)).toStrictEqual( [1, 2, 3].map((e) => ({ bucket: `stream|0[${e}]`, data: { id: 'id' }, table: 's', id: 'id' })) ); }); @@ -43,10 +47,14 @@ streams: 'stream|0["user"]' ]); expect( - desc.evaluateParameterRow(conversations, { id: 'chat', members: JSON.stringify(['user', 'another']) }) + desc + .evaluateParameterRow(conversations, { id: 'chat', members: JSON.stringify(['user', 'another']) }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0' }, ['chat']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']) + ), bucketParameters: [ { '0': 'user' @@ -54,7 +62,9 @@ streams: ] }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0' }, ['chat']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']) + ), bucketParameters: [ { '0': 'another' @@ -78,13 +88,16 @@ streams: const buckets = await querier.queryDynamicBucketDescriptions({ getParameterSets: async function (lookups: ScopedParameterLookup[]): Promise { - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct( - { - lookupName: 'lookup', - queryId: '0' - }, - ['chat'] + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct( + { + lookupName: 'lookup', + queryId: '0', + source: {} as any + }, + ['chat'] + ) ) ]); diff --git a/packages/sync-rules/test/src/sync_rules.test.ts b/packages/sync-rules/test/src/sync_rules.test.ts index 2b11df144..0b682c481 100644 --- a/packages/sync-rules/test/src/sync_rules.test.ts +++ b/packages/sync-rules/test/src/sync_rules.test.ts @@ -12,9 +12,21 @@ import { USERS, findQuerierLookups, normalizeQuerierOptions, + removeSource, + removeSourceSymbol, requestParameters } from './util.js'; +function evaluateRows(hydrated: any, options: { sourceTable: any; record: any }) { + return hydrated.evaluateRow(options).map(removeSource); +} + +function removeLookupSource( + entry: T +): Omit & { lookup: any } { + return { ...entry, lookup: removeSource(entry.lookup) }; +} + describe('sync rules', () => { const hydrationParams: CreateSourceParams = { hydrationState: DEFAULT_HYDRATION_STATE }; @@ -42,7 +54,7 @@ bucket_definitions: expect(dataQuery.bucketParameters).toEqual([]); expect(dataQuery.columnOutputNames()).toEqual(['id', 'description']); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test' } }) @@ -111,10 +123,12 @@ bucket_definitions: PARSE_OPTIONS ); const hydrated = rules.hydrate(hydrationParams); - expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ + expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 }).map(removeLookupSource)).toEqual([ { bucketParameters: [{}], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1' }, ['user1']) + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1', source: {} as any }, ['user1']) + ) } ]); expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 0 })).toEqual([]); @@ -135,14 +149,15 @@ bucket_definitions: const bucketData = rules.bucketDataSources[0]; expect(bucketData.bucketParameters).toEqual(['user_id', 'device_id']); expect( - hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' }, { device_id: 'device1' })).querier - .staticBuckets + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' }, { device_id: 'device1' })) + .querier.staticBuckets.map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket["user1","device1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } }) @@ -158,7 +173,7 @@ bucket_definitions: } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1', archived: 1, device_id: 'device1' } }) @@ -184,12 +199,13 @@ bucket_definitions: ); const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -198,7 +214,7 @@ bucket_definitions: normalizeQuerierOptions({ sub: 'user1' }, { device_id: 'device1' }) ); expect(errors).toEqual([]); - expect(querier.staticBuckets).toEqual([ + expect(querier.staticBuckets.map(removeSourceSymbol)).toEqual([ { bucket: 'mybucket-test["user1"]', definition: 'mybucket', @@ -206,19 +222,23 @@ bucket_definitions: priority: 3 } ]); - expect(await findQuerierLookups(querier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test' }, ['user1']) + expect((await findQuerierLookups(querier)).map(removeSource)).toEqual([ + removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, ['user1']) + ) ]); - expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ + expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 }).map(removeLookupSource)).toEqual([ { bucketParameters: [{ user_id: 'user1' }], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test' }, ['user1']) + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, ['user1']) + ) } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } }) @@ -249,12 +269,14 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); const bucketData = rules.bucketDataSources[0]; expect(bucketData.bucketParameters).toEqual(['user_id']); - expect(hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' })).querier.staticBuckets).toEqual( - [{ bucket: 'mybucket["user1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }] - ); + expect( + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' })) + .querier.staticBuckets.map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket["user1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1' } }) @@ -270,7 +292,7 @@ bucket_definitions: } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', owner_id: 'user1' } }) @@ -396,7 +418,7 @@ bucket_definitions: }); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1' } }) @@ -434,7 +456,7 @@ bucket_definitions: }); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1' } }) @@ -463,7 +485,7 @@ bucket_definitions: ); const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', data: JSON.stringify({ count: 5, bool: true }) } }) @@ -498,7 +520,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', @@ -543,7 +565,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', role: 'admin' } }) @@ -562,7 +584,7 @@ bucket_definitions: ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset2', description: 'test', role: 'normal' } }) @@ -603,8 +625,9 @@ bucket_definitions: ]); expect( - hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ parameters: { is_admin: true } })).querier - .staticBuckets + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ parameters: { is_admin: true } })) + .querier.staticBuckets.map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[1]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }]); }); @@ -620,7 +643,7 @@ bucket_definitions: ); const hydrated = rules.hydrate(hydrationParams); - expect(hydrated.evaluateRow({ sourceTable: ASSETS, record: { id: 'asset1' } })).toEqual([ + expect(evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1' } })).toEqual([ { bucket: 'mybucket[]', id: 'asset1', @@ -654,7 +677,7 @@ bucket_definitions: ).toMatchObject({ staticBuckets: [{ bucket: 'mybucket[314,3.14,314]', priority: 3 }] }); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', int1: 314n, float1: 3.14, float2: 314 } }) @@ -702,7 +725,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: new TestSourceTable('assets_123'), record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } }) @@ -743,7 +766,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: new TestSourceTable('assets_123'), record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } }) @@ -777,7 +800,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', archived: 0n } }) @@ -813,7 +836,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1' } }) @@ -1043,8 +1066,8 @@ bucket_definitions: ] }); - expect(await findQuerierLookups(hydratedQuerier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'admin_only', queryId: '1' }, [1]) + expect((await findQuerierLookups(hydratedQuerier)).map(removeSource)).toEqual([ + removeSource(ScopedParameterLookup.direct({ lookupName: 'admin_only', queryId: '1', source: {} as any }, [1])) ]); }); diff --git a/packages/sync-rules/test/src/table_valued_function_queries.test.ts b/packages/sync-rules/test/src/table_valued_function_queries.test.ts index 411e1d701..581b35aa7 100644 --- a/packages/sync-rules/test/src/table_valued_function_queries.test.ts +++ b/packages/sync-rules/test/src/table_valued_function_queries.test.ts @@ -8,11 +8,20 @@ import { SqlParameterQuery } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, requestParameters } from './util.js'; +import { BucketDataScope } from '../../src/HydrationState.js'; +import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, removeSourceSymbol, requestParameters } from './util.js'; describe('table-valued function queries', () => { const emptyPayload: RequestJwtPayload = { userIdJson: '', parsedPayload: {} }; + function getStaticBucketDescriptions( + query: StaticSqlParameterQuery, + parameters: RequestParameters, + scope: BucketDataScope + ) { + return query.getStaticBucketDescriptions(parameters, scope).map(removeSourceSymbol); + } + test('json_each(array param)', function () { const sql = "SELECT json_each.value as v FROM json_each(request.parameters() -> 'array')"; const query = SqlParameterQuery.fromSql( @@ -29,8 +38,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3, null] }), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3, null] }), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -60,8 +70,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3, null] }), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3, null] }), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -84,8 +95,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, {}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -107,8 +119,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, {}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([]); }); @@ -129,8 +142,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, {}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([]); }); @@ -151,8 +165,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, {}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([]); }); @@ -170,8 +185,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, {}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket["a"]', priority: 3 }, @@ -196,8 +212,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3] }), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -222,8 +239,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3] }), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -248,8 +266,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3] }), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[2]', priority: 3 }, @@ -274,7 +293,8 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['project_id']); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters( { projects: [ @@ -285,7 +305,8 @@ describe('table-valued function queries', () => { {} ), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE } ) ).toEqual([{ bucket: 'mybucket[1]', priority: 3 }]); diff --git a/packages/sync-rules/test/src/util.ts b/packages/sync-rules/test/src/util.ts index f7bb60312..4e49798ce 100644 --- a/packages/sync-rules/test/src/util.ts +++ b/packages/sync-rules/test/src/util.ts @@ -5,13 +5,13 @@ import { BucketParameterQuerier, ColumnDefinition, CompatibilityContext, - CreateSourceParams, DEFAULT_TAG, GetQuerierOptions, RequestedStream, RequestJwtPayload, RequestParameters, ScopedParameterLookup, + SOURCE, SourceSchema, SourceTableInterface, StaticSchema, @@ -93,8 +93,8 @@ export const EMPTY_DATA_SOURCE: BucketDataSource = { uniqueName: 'mybucket', bucketParameters: [], // These are not used in the tests. - getSourceTables: function (): Set { - return new Set(); + getSourceTables: function (): TablePattern[] { + return []; }, evaluateRow(options) { throw new Error('Function not implemented.'); @@ -110,6 +110,26 @@ export const EMPTY_DATA_SOURCE: BucketDataSource = { } }; +/** + * Removes the source property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSource(obj: T): Omit { + const { source, ...rest } = obj; + return rest; +} + +/** + * Removes the [SOURCE] symbol property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSourceSymbol(obj: T): Omit { + const { [SOURCE]: source, ...rest } = obj; + return rest; +} + export async function findQuerierLookups(querier: BucketParameterQuerier): Promise { expect(querier.hasDynamicBuckets).toBe(true); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c1acfeb0c..0b2d7ac1d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -231,6 +231,9 @@ importers: bson: specifier: ^6.10.4 version: 6.10.4 + p-defer: + specifier: ^4.0.1 + version: 4.0.1 ts-codec: specifier: ^1.3.0 version: 1.3.0 @@ -424,6 +427,9 @@ importers: '@powersync/service-types': specifier: workspace:* version: link:../../packages/types + p-defer: + specifier: ^4.0.1 + version: 4.0.1 semver: specifier: ^7.5.4 version: 7.6.2