diff --git a/env.example b/.env.example similarity index 95% rename from env.example rename to .env.example index 2cb4632..8fa8006 100644 --- a/env.example +++ b/.env.example @@ -29,4 +29,4 @@ BL_WORKSPACE=your_bl_workspace ######### RENDER ######## RENDER_API_KEY=your_render_api_key -RENDER_OWNER_ID=your_render_owner_id \ No newline at end of file +RENDER_OWNER_ID=your_render_owner_id diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b212937 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,24 @@ +name: CI + +on: + pull_request: + branches: [master] + push: + branches: [master] + +permissions: + contents: read + +jobs: + ci: + name: Typecheck + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + cache: npm + - run: npm ci + - run: npm run ci diff --git a/.github/workflows/devops-hygiene.yml b/.github/workflows/devops-hygiene.yml new file mode 100644 index 0000000..eecdc36 --- /dev/null +++ b/.github/workflows/devops-hygiene.yml @@ -0,0 +1,33 @@ +name: DevOps Hygiene + +on: + pull_request: + branches: [main, master] + push: + branches: [main, master] + +permissions: + contents: read + +jobs: + hygiene: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Validate env file conventions + run: | + set -euo pipefail + tracked_env="$(git ls-files | awk '($0==".env" || $0 ~ /\/\.env$/){print}')" + legacy_example="$(git ls-files | awk '($0==".example.env" || $0 ~ /\/\.example\.env$/){print}')" + + if [[ -n "$tracked_env" ]]; then + echo "Tracked .env files are not allowed:" + echo "$tracked_env" + exit 1 + fi + + if [[ -n "$legacy_example" ]]; then + echo "Legacy .example.env file names found. Use .env.example instead:" + echo "$legacy_example" + exit 1 + fi diff --git a/.github/workflows/validation-matrix.yml b/.github/workflows/validation-matrix.yml new file mode 100644 index 0000000..fd91948 --- /dev/null +++ b/.github/workflows/validation-matrix.yml @@ -0,0 +1,58 @@ +name: Validation Matrix + +on: + workflow_dispatch: + inputs: + iterations_list: + description: "Comma-separated iterations list (e.g. 5,10,25)" + required: false + default: "5,10,25" + provider: + description: "Optional single provider filter" + required: false + default: "" + +permissions: + contents: read + +jobs: + matrix: + name: Run validation matrix + runs-on: namespace-profile-default + timeout-minutes: 45 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + cache: npm + - run: npm ci + - name: Run matrix benchmarks + env: + COMPUTESDK_API_KEY: ${{ secrets.COMPUTESDK_API_KEY }} + E2B_API_KEY: ${{ secrets.E2B_API_KEY }} + VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }} + VERCEL_TEAM_ID: ${{ secrets.VERCEL_TEAM_ID }} + VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }} + BL_API_KEY: ${{ secrets.BL_API_KEY }} + BL_WORKSPACE: ${{ secrets.BL_WORKSPACE }} + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} + DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }} + RAILWAY_API_KEY: ${{ secrets.RAILWAY_API_KEY }} + RAILWAY_PROJECT_ID: ${{ secrets.RAILWAY_PROJECT_ID }} + RAILWAY_ENVIRONMENT_ID: ${{ secrets.RAILWAY_ENVIRONMENT_ID }} + NSC_TOKEN: ${{ secrets.NSC_TOKEN }} + RENDER_API_KEY: ${{ secrets.RENDER_API_KEY }} + RENDER_OWNER_ID: ${{ secrets.RENDER_OWNER_ID }} + run: | + provider_arg="" + if [[ -n "${{ github.event.inputs.provider }}" ]]; then + provider_arg="--provider ${{ github.event.inputs.provider }}" + fi + npm run bench:matrix -- --iterations-list "${{ github.event.inputs.iterations_list }}" $provider_arg + - name: Upload matrix artifacts + uses: actions/upload-artifact@v4 + with: + name: validation-matrix-results + path: results/*.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 561e0dd..0b00196 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -35,7 +35,7 @@ export const yourProvider: DirectBenchmarkConfig = { 2. Add to the providers array in `src/direct-run.ts` -3. Update `env.example` with required environment variables +3. Update `.env.example` with required environment variables 4. Submit a PR with: - The code changes @@ -76,7 +76,7 @@ Documentation improvements are always welcome. No issue required for typos, clar git clone https://github.com/computesdk/benchmarks.git cd benchmarks npm install -cp env.example .env +cp .env.example .env ``` ### Running Tests Locally @@ -90,6 +90,9 @@ npm run bench:direct:e2b # Run with custom iterations npm run bench:direct -- --iterations 5 + +# Run validation matrix across multiple iteration sets +npm run bench:matrix -- --iterations-list 5,10,25 ``` ### Code Style diff --git a/METHODOLOGY.md b/METHODOLOGY.md index ca07298..453929b 100644 --- a/METHODOLOGY.md +++ b/METHODOLOGY.md @@ -215,10 +215,13 @@ Reproduce our results: git clone https://github.com/computesdk/benchmarks.git cd benchmarks npm install -cp env.example .env # Add your API keys +cp .env.example .env # Add your API keys # Run with same settings as CI npm run bench:direct -- --iterations 10 + +# Run a validation matrix (multiple iteration sets) +npm run bench:matrix -- --iterations-list 5,10,25 ``` **Note**: Your results will differ based on your network location and conditions. diff --git a/package.json b/package.json index 9c91df2..0f9707a 100644 --- a/package.json +++ b/package.json @@ -13,8 +13,11 @@ "bench:railway": "tsx src/run.ts --provider railway", "bench:namespace": "tsx src/run.ts --provider namespace", "bench:render": "tsx src/run.ts --provider render", + "bench:matrix": "tsx src/run-matrix.ts", "update-readme": "tsx src/update-readme.ts", - "generate-svg": "tsx src/generate-svg.ts" + "generate-svg": "tsx src/generate-svg.ts", + "typecheck": "tsc --noEmit", + "ci": "npm run typecheck" }, "dependencies": { "@computesdk/blaxel": "^1.5.7", diff --git a/src/benchmark.ts b/src/benchmark.ts index 15af4e4..ef27241 100644 --- a/src/benchmark.ts +++ b/src/benchmark.ts @@ -1,5 +1,10 @@ import type { ProviderConfig, BenchmarkResult, TimingResult, Stats } from './types.js'; +function buildIterationRequestId(runId: string | undefined, provider: string, iteration: number): string { + const seed = runId ?? 'run'; + return `${seed}-${provider}-${iteration + 1}`; +} + function computeStats(values: number[]): Stats { if (values.length === 0) return { min: 0, max: 0, median: 0, avg: 0 }; @@ -18,13 +23,14 @@ function computeStats(values: number[]): Stats { } export async function runBenchmark(config: ProviderConfig): Promise { - const { name, iterations = 10, timeout = 120_000, requiredEnvVars } = config; + const { name, runId, iterations = 10, timeout = 120_000, requiredEnvVars } = config; // Check if all required credentials are available const missingVars = requiredEnvVars.filter(v => !process.env[v]); if (missingVars.length > 0) { return { provider: name, + runId, iterations: [], summary: { ttiMs: { min: 0, max: 0, median: 0, avg: 0 } }, skipped: true, @@ -39,15 +45,17 @@ export async function runBenchmark(config: ProviderConfig): Promise r.ttiMs)), @@ -73,8 +83,9 @@ export async function runBenchmark(config: ProviderConfig): Promise { +async function runIteration(compute: any, timeout: number, requestId: string): Promise { let sandbox: any = null; + const startedAt = new Date().toISOString(); try { const start = performance.now(); @@ -89,7 +100,7 @@ async function runIteration(compute: any, timeout: number): Promise parseInt(v.trim(), 10)) + .filter(v => Number.isFinite(v) && v > 0); + + const dedup = Array.from(new Set(parsed)); + if (dedup.length === 0) { + throw new Error(`Invalid --iterations-list value: ${value}`); + } + return dedup; +} + +function buildMedianMatrix(runs: Array<{ iterations: number; results: BenchmarkResult[] }>) { + const matrix: Record> = {}; + for (const run of runs) { + for (const result of run.results) { + if (!matrix[result.provider]) { + matrix[result.provider] = {}; + } + matrix[result.provider][String(run.iterations)] = result.skipped ? null : result.summary.ttiMs.median; + } + } + return matrix; +} + +async function main() { + const iterationsList = parseIterationsList(iterationsListArg); + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + + const toRun = providerFilter + ? providers.filter(p => p.name === providerFilter) + : providers; + + if (toRun.length === 0) { + console.error(`Unknown provider: ${providerFilter}`); + console.error(`Available: ${providers.map(p => p.name).join(', ')}`); + process.exit(1); + } + + console.log('ComputeSDK Benchmark Validation Matrix'); + console.log(`Matrix ID: ${matrixId}`); + console.log(`Iterations sets: ${iterationsList.join(', ')}`); + console.log(`Date: ${new Date().toISOString()}\n`); + + const matrixRuns: Array<{ iterations: number; runId: string; results: BenchmarkResult[]; file: string }> = []; + + for (const iterations of iterationsList) { + const runId = `${matrixId}-i${iterations}`; + console.log(`\n=== Matrix run: ${runId} ===`); + const results: BenchmarkResult[] = []; + + for (const providerConfig of toRun) { + const result = await runBenchmark({ ...providerConfig, iterations, timeout: timeoutMs, runId }); + results.push(result); + } + + printResultsTable(results); + + const runFile = `${timestamp}-${runId}.json`; + const runPath = path.resolve(__dirname, `../results/${runFile}`); + const metadata: RunMetadata = { + runId, + mode: 'matrix', + providerFilter, + iterations, + timeoutMs, + }; + await writeResultsJson(results, runPath, metadata); + matrixRuns.push({ iterations, runId, results, file: runFile }); + } + + const summary = { + version: '1.0', + matrixId, + timestamp: new Date().toISOString(), + providerFilter: providerFilter || null, + timeoutMs, + iterationsList, + runFiles: matrixRuns.map(r => ({ runId: r.runId, iterations: r.iterations, file: r.file })), + medianMatrix: buildMedianMatrix(matrixRuns), + }; + + const matrixOut = path.resolve(__dirname, `../results/matrix-${timestamp}.json`); + fs.writeFileSync(matrixOut, JSON.stringify(summary, null, 2)); + console.log(`\nMatrix summary written to ${matrixOut}`); +} + +main().catch(err => { + console.error('Matrix benchmark failed:', err); + process.exit(1); +}); diff --git a/src/run.ts b/src/run.ts index dee7f0b..7e4a543 100644 --- a/src/run.ts +++ b/src/run.ts @@ -4,7 +4,7 @@ import { fileURLToPath } from 'url'; import { runBenchmark } from './benchmark.js'; import { printResultsTable, writeResultsJson } from './table.js'; import { providers } from './providers.js'; -import type { BenchmarkResult } from './types.js'; +import type { BenchmarkResult, RunMetadata } from './types.js'; // Load .env from the benchmarking root const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -14,6 +14,7 @@ config({ path: path.resolve(__dirname, '../.env') }); const args = process.argv.slice(2); const providerFilter = getArgValue(args, '--provider'); const iterations = parseInt(getArgValue(args, '--iterations') || '10', 10); +const runId = getArgValue(args, '--run-id') || `bench-${Date.now()}`; function getArgValue(args: string[], flag: string): string | undefined { const idx = args.indexOf(flag); @@ -22,6 +23,7 @@ function getArgValue(args: string[], flag: string): string | undefined { async function main() { console.log('ComputeSDK Sandbox Provider Benchmarks'); + console.log(`Run ID: ${runId}`); console.log(`Iterations per provider: ${iterations}`); console.log(`Date: ${new Date().toISOString()}\n`); @@ -40,7 +42,7 @@ async function main() { // Run benchmarks sequentially to avoid resource contention for (const providerConfig of toRun) { - const result = await runBenchmark({ ...providerConfig, iterations }); + const result = await runBenchmark({ ...providerConfig, iterations, runId }); results.push(result); } @@ -50,7 +52,14 @@ async function main() { // Write JSON results const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const outPath = path.resolve(__dirname, `../results/${timestamp}.json`); - await writeResultsJson(results, outPath); + const metadata: RunMetadata = { + runId, + mode: 'single', + providerFilter, + iterations, + timeoutMs: 120_000, + }; + await writeResultsJson(results, outPath, metadata); } main().catch(err => { diff --git a/src/table.ts b/src/table.ts index 8a71478..6c199c2 100644 --- a/src/table.ts +++ b/src/table.ts @@ -1,4 +1,4 @@ -import type { BenchmarkResult } from './types.js'; +import type { BenchmarkResult, RunMetadata } from './types.js'; /** * Print a comparison table of benchmark results to stdout @@ -82,15 +82,22 @@ function round(n: number): number { /** * Write results to a JSON file with clean formatting */ -export async function writeResultsJson(results: BenchmarkResult[], outPath: string): Promise { +export async function writeResultsJson( + results: BenchmarkResult[], + outPath: string, + metadata?: RunMetadata +): Promise { const fs = await import('fs'); const os = await import('os'); // Clean up floating point noise in results const cleanResults = results.map(r => ({ provider: r.provider, + ...(r.runId ? { runId: r.runId } : {}), iterations: r.iterations.map(i => ({ ttiMs: round(i.ttiMs), + ...(i.requestId ? { requestId: i.requestId } : {}), + ...(i.startedAt ? { startedAt: i.startedAt } : {}), ...(i.error ? { error: i.error } : {}), })), summary: { @@ -113,8 +120,11 @@ export async function writeResultsJson(results: BenchmarkResult[], outPath: stri arch: os.arch(), }, config: { - iterations: results[0]?.iterations.length || 0, - timeoutMs: 120000, + iterations: metadata?.iterations ?? (results[0]?.iterations.length || 0), + timeoutMs: metadata?.timeoutMs ?? 120000, + ...(metadata?.providerFilter ? { providerFilter: metadata.providerFilter } : {}), + ...(metadata?.runId ? { runId: metadata.runId } : {}), + ...(metadata?.mode ? { mode: metadata.mode } : {}), }, results: cleanResults, }; diff --git a/src/types.ts b/src/types.ts index 60596f5..e6ed49e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,6 +1,8 @@ export interface ProviderConfig { /** Provider name */ name: string; + /** Optional run correlation ID */ + runId?: string; /** Number of iterations (default: 10) */ iterations?: number; /** Timeout per iteration in ms (default: 120000) */ @@ -14,6 +16,10 @@ export interface ProviderConfig { export interface TimingResult { /** Total time from start to first successful code execution */ ttiMs: number; + /** Correlation ID for this iteration */ + requestId?: string; + /** ISO-8601 timestamp when iteration started */ + startedAt?: string; /** Error message if this iteration failed */ error?: string; } @@ -27,6 +33,7 @@ export interface Stats { export interface BenchmarkResult { provider: string; + runId?: string; iterations: TimingResult[]; summary: { ttiMs: Stats; @@ -34,3 +41,11 @@ export interface BenchmarkResult { skipped?: boolean; skipReason?: string; } + +export interface RunMetadata { + runId: string; + mode: 'single' | 'matrix'; + providerFilter?: string; + iterations: number; + timeoutMs: number; +}