Skip to content

Commit 7075454

Browse files
committed
WIP for adding timeBucket() function for auto-bin
1 parent 1e3e7cc commit 7075454

File tree

8 files changed

+702
-10
lines changed

8 files changed

+702
-10
lines changed

internal-packages/tsql/src/index.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import {
2929
type BetweenCondition,
3030
type QuerySettings,
3131
type SimpleComparisonCondition,
32+
type TimeRange,
3233
type WhereClauseCondition,
3334
} from "./query/printer_context.js";
3435
import { createSchemaRegistry, type FieldMappings, type TableSchema } from "./query/schema.js";
@@ -126,9 +127,16 @@ export {
126127
type QueryNotice,
127128
type QuerySettings,
128129
type SimpleComparisonCondition,
130+
type TimeRange,
129131
type WhereClauseCondition,
130132
} from "./query/printer_context.js";
131133

134+
// Re-export time bucket utilities
135+
export {
136+
calculateTimeBucketInterval,
137+
type TimeBucketInterval,
138+
} from "./query/time_buckets.js";
139+
132140
// Re-export printer
133141
export { ClickHousePrinter, printToClickHouse, type PrintResult } from "./query/printer.js";
134142

@@ -517,6 +525,20 @@ export interface CompileTSQLOptions {
517525
* ```
518526
*/
519527
whereClauseFallback?: Record<string, WhereClauseCondition>;
528+
/**
529+
* Time range for `timeBucket()` interval calculation.
530+
* When provided, `timeBucket()` uses this to determine the appropriate bucket size
531+
* based on the span of the time range.
532+
*
533+
* @example
534+
* ```typescript
535+
* {
536+
* from: new Date('2024-01-01'),
537+
* to: new Date('2024-01-08'),
538+
* }
539+
* ```
540+
*/
541+
timeRange?: TimeRange;
520542
}
521543

522544
/**
@@ -574,6 +596,7 @@ export function compileTSQL(query: string, options: CompileTSQLOptions): PrintRe
574596
settings: options.settings,
575597
fieldMappings: options.fieldMappings,
576598
enforcedWhereClause,
599+
timeRange: options.timeRange,
577600
});
578601

579602
// 6. Print the AST to ClickHouse SQL (enforced conditions applied at printer level)

internal-packages/tsql/src/query/printer.test.ts

Lines changed: 284 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, it, expect, beforeEach } from "vitest";
2-
import { parseTSQLSelect, parseTSQLExpr } from "../index.js";
2+
import { parseTSQLSelect, parseTSQLExpr, compileTSQL } from "../index.js";
33
import { ClickHousePrinter, printToClickHouse, type PrintResult } from "./printer.js";
44
import { createPrinterContext, PrinterContext } from "./printer_context.js";
55
import { createSchemaRegistry, column, type TableSchema, type SchemaRegistry } from "./schema.js";
@@ -3288,3 +3288,286 @@ describe("Required Filters", () => {
32883288
expect(sql).toContain("cost_in_cents"); // total_cost is a virtual column
32893289
});
32903290
});
3291+
3292+
// ============================================================
3293+
// timeBucket() Tests
3294+
// ============================================================
3295+
3296+
describe("timeBucket()", () => {
3297+
/**
3298+
* Schema with timeConstraint for timeBucket() tests.
3299+
* Uses column mapping: TSQL "triggered_at" → ClickHouse "created_at"
3300+
*/
3301+
const timeBucketSchema: TableSchema = {
3302+
name: "runs",
3303+
clickhouseName: "trigger_dev.task_runs_v2",
3304+
timeConstraint: "triggered_at",
3305+
columns: {
3306+
id: { name: "id", ...column("String") },
3307+
status: { name: "status", ...column("String") },
3308+
triggered_at: {
3309+
name: "triggered_at",
3310+
clickhouseName: "created_at",
3311+
...column("DateTime64"),
3312+
},
3313+
organization_id: { name: "organization_id", ...column("String") },
3314+
project_id: { name: "project_id", ...column("String") },
3315+
environment_id: { name: "environment_id", ...column("String") },
3316+
},
3317+
tenantColumns: {
3318+
organizationId: "organization_id",
3319+
projectId: "project_id",
3320+
environmentId: "environment_id",
3321+
},
3322+
};
3323+
3324+
/**
3325+
* Schema without timeConstraint (for error tests)
3326+
*/
3327+
const noTimeConstraintSchema: TableSchema = {
3328+
name: "events",
3329+
clickhouseName: "trigger_dev.events_v1",
3330+
columns: {
3331+
id: { name: "id", ...column("String") },
3332+
event_type: { name: "event_type", ...column("String") },
3333+
organization_id: { name: "organization_id", ...column("String") },
3334+
project_id: { name: "project_id", ...column("String") },
3335+
environment_id: { name: "environment_id", ...column("String") },
3336+
},
3337+
tenantColumns: {
3338+
organizationId: "organization_id",
3339+
projectId: "project_id",
3340+
environmentId: "environment_id",
3341+
},
3342+
};
3343+
3344+
/** 7-day time range: should produce 6 HOUR buckets */
3345+
const sevenDayRange = {
3346+
from: new Date("2024-01-01T00:00:00Z"),
3347+
to: new Date("2024-01-08T00:00:00Z"),
3348+
};
3349+
3350+
/** 1-hour time range: should produce 1 MINUTE buckets */
3351+
const oneHourRange = {
3352+
from: new Date("2024-01-01T00:00:00Z"),
3353+
to: new Date("2024-01-01T01:00:00Z"),
3354+
};
3355+
3356+
function createTimeBucketContext(
3357+
overrides: Partial<Parameters<typeof createPrinterContext>[0]> = {}
3358+
): PrinterContext {
3359+
const schema = createSchemaRegistry([timeBucketSchema]);
3360+
return createPrinterContext({
3361+
schema,
3362+
enforcedWhereClause: {
3363+
organization_id: { op: "eq", value: "org_test123" },
3364+
project_id: { op: "eq", value: "proj_test456" },
3365+
environment_id: { op: "eq", value: "env_test789" },
3366+
},
3367+
timeRange: sevenDayRange,
3368+
...overrides,
3369+
});
3370+
}
3371+
3372+
function printTimeBucketQuery(query: string, context?: PrinterContext) {
3373+
const ast = parseTSQLSelect(query);
3374+
const ctx = context ?? createTimeBucketContext();
3375+
return printToClickHouse(ast, ctx);
3376+
}
3377+
3378+
describe("SELECT with timeBucket()", () => {
3379+
it("should compile timeBucket() to toStartOfInterval with correct column and interval", () => {
3380+
const { sql } = printTimeBucketQuery(
3381+
"SELECT timeBucket(), count() FROM runs GROUP BY timeBucket"
3382+
);
3383+
3384+
// Should use ClickHouse column name (created_at), not TSQL name (triggered_at)
3385+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
3386+
expect(sql).toContain("count()");
3387+
});
3388+
3389+
it("should use 1 MINUTE interval for 1-hour time range", () => {
3390+
const ctx = createTimeBucketContext({ timeRange: oneHourRange });
3391+
const { sql } = printTimeBucketQuery(
3392+
"SELECT timeBucket(), count() FROM runs GROUP BY timeBucket",
3393+
ctx
3394+
);
3395+
3396+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 1 MINUTE)");
3397+
});
3398+
3399+
it("should work with other selected columns", () => {
3400+
const { sql } = printTimeBucketQuery(
3401+
"SELECT timeBucket(), status, count() FROM runs GROUP BY timeBucket, status"
3402+
);
3403+
3404+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
3405+
expect(sql).toContain("status");
3406+
expect(sql).toContain("count()");
3407+
});
3408+
});
3409+
3410+
describe("GROUP BY with timeBucket alias", () => {
3411+
it("should allow GROUP BY timeBucket (bare identifier, matching implicit alias)", () => {
3412+
const { sql } = printTimeBucketQuery(
3413+
"SELECT timeBucket(), count() FROM runs GROUP BY timeBucket"
3414+
);
3415+
3416+
// The GROUP BY should reference the alias, not re-expand
3417+
expect(sql).toContain("GROUP BY");
3418+
// The SELECT should have the toStartOfInterval call
3419+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
3420+
});
3421+
3422+
it("should allow GROUP BY timebucket (all lowercase)", () => {
3423+
const { sql } = printTimeBucketQuery(
3424+
"SELECT timeBucket(), count() FROM runs GROUP BY timebucket"
3425+
);
3426+
3427+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
3428+
});
3429+
3430+
it("should allow GROUP BY TIMEBUCKET (all uppercase)", () => {
3431+
const { sql } = printTimeBucketQuery(
3432+
"SELECT timeBucket(), count() FROM runs GROUP BY TIMEBUCKET"
3433+
);
3434+
3435+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
3436+
});
3437+
});
3438+
3439+
describe("ORDER BY with timeBucket alias", () => {
3440+
it("should allow ORDER BY timeBucket", () => {
3441+
const { sql } = printTimeBucketQuery(
3442+
"SELECT timeBucket(), count() FROM runs GROUP BY timeBucket ORDER BY timeBucket"
3443+
);
3444+
3445+
expect(sql).toContain("ORDER BY timeBucket");
3446+
});
3447+
3448+
it("should allow ORDER BY timeBucket DESC", () => {
3449+
const { sql } = printTimeBucketQuery(
3450+
"SELECT timeBucket(), count() FROM runs GROUP BY timeBucket ORDER BY timeBucket DESC"
3451+
);
3452+
3453+
expect(sql).toContain("ORDER BY timeBucket DESC");
3454+
});
3455+
});
3456+
3457+
describe("error handling", () => {
3458+
it("should throw when timeBucket() is called with arguments", () => {
3459+
expect(() =>
3460+
printTimeBucketQuery("SELECT timeBucket(triggered_at) FROM runs")
3461+
).toThrow("timeBucket() does not accept arguments");
3462+
});
3463+
3464+
it("should throw when table has no timeConstraint", () => {
3465+
const schema = createSchemaRegistry([noTimeConstraintSchema]);
3466+
const ctx = createPrinterContext({
3467+
schema,
3468+
enforcedWhereClause: {
3469+
organization_id: { op: "eq", value: "org_test123" },
3470+
project_id: { op: "eq", value: "proj_test456" },
3471+
environment_id: { op: "eq", value: "env_test789" },
3472+
},
3473+
timeRange: sevenDayRange,
3474+
});
3475+
3476+
expect(() =>
3477+
printTimeBucketQuery("SELECT timeBucket(), count() FROM events GROUP BY timeBucket", ctx)
3478+
).toThrow("timeConstraint");
3479+
});
3480+
3481+
it("should throw when no timeRange is provided", () => {
3482+
const ctx = createTimeBucketContext({ timeRange: undefined });
3483+
3484+
expect(() =>
3485+
printTimeBucketQuery("SELECT timeBucket(), count() FROM runs GROUP BY timeBucket", ctx)
3486+
).toThrow("time range");
3487+
});
3488+
});
3489+
3490+
describe("column name mapping", () => {
3491+
it("should resolve timeConstraint through column mapping (TSQL → ClickHouse)", () => {
3492+
const { sql } = printTimeBucketQuery(
3493+
"SELECT timeBucket(), count() FROM runs GROUP BY timeBucket"
3494+
);
3495+
3496+
// timeConstraint is "triggered_at" which maps to CH "created_at"
3497+
expect(sql).toContain("created_at");
3498+
expect(sql).not.toContain("triggered_at");
3499+
});
3500+
3501+
it("should work with timeConstraint column that has no clickhouseName mapping", () => {
3502+
const schemaNoMapping: TableSchema = {
3503+
name: "logs",
3504+
clickhouseName: "trigger_dev.logs_v1",
3505+
timeConstraint: "timestamp",
3506+
columns: {
3507+
id: { name: "id", ...column("String") },
3508+
timestamp: { name: "timestamp", ...column("DateTime64") },
3509+
organization_id: { name: "organization_id", ...column("String") },
3510+
project_id: { name: "project_id", ...column("String") },
3511+
environment_id: { name: "environment_id", ...column("String") },
3512+
},
3513+
tenantColumns: {
3514+
organizationId: "organization_id",
3515+
projectId: "project_id",
3516+
environmentId: "environment_id",
3517+
},
3518+
};
3519+
3520+
const schema = createSchemaRegistry([schemaNoMapping]);
3521+
const ctx = createPrinterContext({
3522+
schema,
3523+
enforcedWhereClause: {
3524+
organization_id: { op: "eq", value: "org_test123" },
3525+
project_id: { op: "eq", value: "proj_test456" },
3526+
environment_id: { op: "eq", value: "env_test789" },
3527+
},
3528+
timeRange: sevenDayRange,
3529+
});
3530+
3531+
const { sql } = printTimeBucketQuery(
3532+
"SELECT timeBucket(), count() FROM logs GROUP BY timeBucket",
3533+
ctx
3534+
);
3535+
3536+
// No clickhouseName, so uses the TSQL name "timestamp" directly
3537+
expect(sql).toContain("toStartOfInterval(timestamp, INTERVAL 6 HOUR)");
3538+
});
3539+
});
3540+
3541+
describe("case insensitivity", () => {
3542+
it("should handle timeBucket() case-insensitively in SELECT", () => {
3543+
// The parser preserves case, but visitCall checks case-insensitively
3544+
const { sql } = printTimeBucketQuery(
3545+
"SELECT TIMEBUCKET(), count() FROM runs GROUP BY timeBucket"
3546+
);
3547+
3548+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
3549+
});
3550+
});
3551+
3552+
describe("integration with compileTSQL", () => {
3553+
it("should work through the full compileTSQL pipeline", () => {
3554+
const { sql, params } = compileTSQL(
3555+
"SELECT timeBucket(), count() FROM runs GROUP BY timeBucket",
3556+
{
3557+
tableSchema: [timeBucketSchema],
3558+
enforcedWhereClause: {
3559+
organization_id: { op: "eq", value: "org_test123" },
3560+
project_id: { op: "eq", value: "proj_test456" },
3561+
environment_id: { op: "eq", value: "env_test789" },
3562+
},
3563+
timeRange: sevenDayRange,
3564+
}
3565+
);
3566+
3567+
expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
3568+
expect(sql).toContain("count()");
3569+
// Tenant isolation should still be applied
3570+
expect(Object.values(params)).toContain("org_test123");
3571+
});
3572+
});
3573+
});

0 commit comments

Comments
 (0)