From bccc0f37d90feb186189fda45083497b6c158de8 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Sun, 22 Feb 2026 13:44:11 +0100 Subject: [PATCH 01/19] Improve auto-record assertion reliability for dynamic nav flows --- .../assertion-candidates-snapshot.test.ts | 60 ++----------------- .../improve/assertion-candidates-snapshot.ts | 50 +++++++--------- .../improve/navigation-like-interaction.ts | 20 ++++--- src/core/transform/selector-normalize.test.ts | 55 +++++++++++++++++ 4 files changed, 96 insertions(+), 89 deletions(-) create mode 100644 src/core/transform/selector-normalize.test.ts diff --git a/src/core/improve/assertion-candidates-snapshot.test.ts b/src/core/improve/assertion-candidates-snapshot.test.ts index c7f5897..ee1d42f 100644 --- a/src/core/improve/assertion-candidates-snapshot.test.ts +++ b/src/core/improve/assertion-candidates-snapshot.test.ts @@ -118,75 +118,27 @@ describe("snapshot assertion candidates", () => { }); it("suppresses snapshot text assertions for navigation-like dynamic clicks", () => { - const dynamicLinkText = - "Live update 12:30: Market story shifts quickly after morning session"; - const preUrl = "https://example.test/home"; - const postUrl = "https://example.test/category"; const out = buildSnapshotAssertionCandidates([ { index: 1, step: { action: "click", target: { - value: `getByRole('link', { name: '${dynamicLinkText}', exact: true })`, + value: + "getByRole('link', { name: 'Nederlaag voor Trump: hooggerechtshof VS oordeelt dat heffingen onwettig zijn', exact: true })", kind: "locatorExpression", source: "manual", }, }, - preSnapshot: `- generic [ref=e1]:\n - link "Top story" [ref=e2]\n`, - postSnapshot: `- generic [ref=e1]:\n - heading "Category page" [level=1] [ref=e3]\n`, - preUrl, - postUrl, + preSnapshot: `- generic [ref=e1]:\n - link "Nieuws" [ref=e2]\n`, + postSnapshot: `- generic [ref=e1]:\n - heading "Ajax komt goed weg" [level=1] [ref=e3]\n`, + preUrl: "https://www.nu.nl/", + postUrl: "https://www.nu.nl/algemeen", }, ], "snapshot_native"); expect(out.some((candidate) => candidate.candidate.action === "assertText")).toBe(false); - expect(out.some((candidate) => candidate.candidate.action === "assertVisible")).toBe(false); - expect(out.some((candidate) => candidate.candidate.action === "assertEnabled")).toBe(false); expect(out.some((candidate) => candidate.candidate.action === "assertUrl")).toBe(true); - expect(out).toHaveLength(1); - }); - - it("keeps snapshot content assertions for stable exact link clicks", () => { - const out = buildSnapshotAssertionCandidates([ - { - index: 0, - step: { - action: "click", - target: { - value: "getByRole('link', { name: 'Settings', exact: true })", - kind: "locatorExpression", - source: "manual", - }, - }, - preSnapshot: "- generic [ref=e1]:\n", - postSnapshot: - "- generic [ref=e1]:\n - heading \"Account settings\" [level=1] [ref=e2]\n", - }, - ], "snapshot_native"); - - expect(out.some((candidate) => candidate.candidate.action === "assertText")).toBe(true); - }); - - it("keeps snapshot content assertions for stable selectors with story/article ids", () => { - const out = buildSnapshotAssertionCandidates([ - { - index: 0, - step: { - action: "click", - target: { - value: "locator('#user-story-tab')", - kind: "locatorExpression", - source: "manual", - }, - }, - preSnapshot: "- generic [ref=e1]:\n", - postSnapshot: - "- generic [ref=e1]:\n - heading \"Profile details\" [level=1] [ref=e2]\n", - }, - ], "snapshot_native"); - - expect(out.some((candidate) => candidate.candidate.action === "assertText")).toBe(true); }); it("generates multiple candidates from a rich delta", () => { diff --git a/src/core/improve/assertion-candidates-snapshot.ts b/src/core/improve/assertion-candidates-snapshot.ts index c081d61..b951326 100644 --- a/src/core/improve/assertion-candidates-snapshot.ts +++ b/src/core/improve/assertion-candidates-snapshot.ts @@ -56,7 +56,7 @@ export function buildSnapshotAssertionCandidates( snapshot.step.target ? snapshot.step.target : undefined; - const suppressContentCandidates = stepTarget + const suppressTextCandidates = stepTarget ? Boolean(classifyNavigationLikeInteraction(snapshot.step, stepTarget)) : false; const framePath = @@ -95,7 +95,7 @@ export function buildSnapshotAssertionCandidates( ) ); - if (!suppressContentCandidates) { + if (!suppressTextCandidates) { candidates.push( ...buildTextChangedCandidates( snapshot.index, @@ -110,23 +110,21 @@ export function buildSnapshotAssertionCandidates( ); } - if (!suppressContentCandidates) { - candidates.push( - ...buildStateChangeCandidates( - snapshot.index, - snapshot.step.action, - preNodes, - postNodes, - actedTargetHint, - framePath, - candidateSource - ) - ); - } + candidates.push( + ...buildStateChangeCandidates( + snapshot.index, + snapshot.step.action, + preNodes, + postNodes, + actedTargetHint, + framePath, + candidateSource + ) + ); if (delta.length === 0) continue; - const textCandidates = suppressContentCandidates + const textCandidates = suppressTextCandidates ? [] : buildTextCandidates( snapshot.index, @@ -149,17 +147,15 @@ export function buildSnapshotAssertionCandidates( ) ); - const visibleCandidates = suppressContentCandidates - ? [] - : buildVisibleCandidates( - snapshot.index, - snapshot.step.action, - delta, - actedTargetHint, - framePath, - candidateSource, - MAX_VISIBLE_CANDIDATES_PER_STEP - ); + const visibleCandidates = buildVisibleCandidates( + snapshot.index, + snapshot.step.action, + delta, + actedTargetHint, + framePath, + candidateSource, + MAX_VISIBLE_CANDIDATES_PER_STEP + ); for (const visibleCandidate of visibleCandidates) { const visibleTarget = diff --git a/src/core/improve/navigation-like-interaction.ts b/src/core/improve/navigation-like-interaction.ts index 7d7930a..3956f61 100644 --- a/src/core/improve/navigation-like-interaction.ts +++ b/src/core/improve/navigation-like-interaction.ts @@ -8,9 +8,6 @@ function isNavigationLikeAction(action: Step["action"]): boolean { return action === "click" || action === "press" || action === "hover"; } -const CONTENT_CARD_TEXT_PATTERN = - /headline|teaser|article|story|content[-_ ]?card|breaking[-_ ]?push|hero[-_ ]?card/i; - export function classifyNavigationLikeInteraction( step: Step, target: Target @@ -19,11 +16,14 @@ export function classifyNavigationLikeInteraction( const targetValue = target.value; const isRoleLink = /getByRole\(\s*['"]link['"]/.test(targetValue); - const queryTexts = extractTargetTextFragments(target); - const hasContentCardPattern = queryTexts.some((text) => - CONTENT_CARD_TEXT_PATTERN.test(text) - ); + const hasExact = /exact\s*:\s*true/.test(targetValue); + const hasContentCardPattern = + /headline|teaser|article|story|content[-_ ]?card|breaking[-_ ]?push|hero[-_ ]?card/i.test( + targetValue + ); + const { dynamicSignals } = assessTargetDynamics(target); + const queryTexts = extractTargetTextFragments(target); const hasHeadlineLikeText = queryTexts.some((text) => text.length >= 48) || dynamicSignals.includes("contains_headline_like_text") || @@ -31,7 +31,11 @@ export function classifyNavigationLikeInteraction( dynamicSignals.includes("contains_pipe_separator") || dynamicSignals.includes("contains_date_or_time_fragment"); - if ((isRoleLink && hasHeadlineLikeText) || hasContentCardPattern) { + if ( + (isRoleLink && hasHeadlineLikeText) || + (isRoleLink && hasExact) || + hasContentCardPattern + ) { return "navigation-like dynamic click target"; } diff --git a/src/core/transform/selector-normalize.test.ts b/src/core/transform/selector-normalize.test.ts new file mode 100644 index 0000000..6666304 --- /dev/null +++ b/src/core/transform/selector-normalize.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from "vitest"; +import { locatorNodeToExpression } from "./selector-normalize.js"; + +describe("locatorNodeToExpression dynamic exact normalization", () => { + it("drops exact for long headline-like role names when enabled", () => { + const expression = locatorNodeToExpression( + { + kind: "role", + body: "link", + options: { + name: "Nederlaag voor Trump: hooggerechtshof VS oordeelt dat heffingen onwettig zijn", + exact: true, + }, + }, + 0, + { dropDynamicExact: true } + ); + + expect(expression).toBe( + "getByRole('link', { name: 'Nederlaag voor Trump: hooggerechtshof VS oordeelt dat heffingen onwettig zijn' })" + ); + expect(expression).not.toContain("exact: true"); + }); + + it("drops exact for headline-like text with time fragments", () => { + const expression = locatorNodeToExpression( + { + kind: "text", + body: "Winterweer update Schiphol 12:30, alle vluchten vertraagd", + options: { exact: true }, + }, + 0, + { dropDynamicExact: true } + ); + + expect(expression).toBe( + "getByText('Winterweer update Schiphol 12:30, alle vluchten vertraagd')" + ); + expect(expression).not.toContain("exact: true"); + }); + + it("keeps exact for short stable text", () => { + const expression = locatorNodeToExpression( + { + kind: "role", + body: "link", + options: { name: "Algemeen", exact: true }, + }, + 0, + { dropDynamicExact: true } + ); + + expect(expression).toBe("getByRole('link', { name: 'Algemeen', exact: true })"); + }); +}); From 084a217398983f798a30cd31ab1734ce3ca3d416 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Sun, 22 Feb 2026 13:47:39 +0100 Subject: [PATCH 02/19] Avoid content-coupled snapshot assertions for dynamic navigation clicks --- .../assertion-candidates-snapshot.test.ts | 3 ++ .../improve/assertion-candidates-snapshot.ts | 50 ++++++++++--------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/core/improve/assertion-candidates-snapshot.test.ts b/src/core/improve/assertion-candidates-snapshot.test.ts index ee1d42f..dd8541f 100644 --- a/src/core/improve/assertion-candidates-snapshot.test.ts +++ b/src/core/improve/assertion-candidates-snapshot.test.ts @@ -138,7 +138,10 @@ describe("snapshot assertion candidates", () => { ], "snapshot_native"); expect(out.some((candidate) => candidate.candidate.action === "assertText")).toBe(false); + expect(out.some((candidate) => candidate.candidate.action === "assertVisible")).toBe(false); + expect(out.some((candidate) => candidate.candidate.action === "assertEnabled")).toBe(false); expect(out.some((candidate) => candidate.candidate.action === "assertUrl")).toBe(true); + expect(out).toHaveLength(1); }); it("generates multiple candidates from a rich delta", () => { diff --git a/src/core/improve/assertion-candidates-snapshot.ts b/src/core/improve/assertion-candidates-snapshot.ts index b951326..c081d61 100644 --- a/src/core/improve/assertion-candidates-snapshot.ts +++ b/src/core/improve/assertion-candidates-snapshot.ts @@ -56,7 +56,7 @@ export function buildSnapshotAssertionCandidates( snapshot.step.target ? snapshot.step.target : undefined; - const suppressTextCandidates = stepTarget + const suppressContentCandidates = stepTarget ? Boolean(classifyNavigationLikeInteraction(snapshot.step, stepTarget)) : false; const framePath = @@ -95,7 +95,7 @@ export function buildSnapshotAssertionCandidates( ) ); - if (!suppressTextCandidates) { + if (!suppressContentCandidates) { candidates.push( ...buildTextChangedCandidates( snapshot.index, @@ -110,21 +110,23 @@ export function buildSnapshotAssertionCandidates( ); } - candidates.push( - ...buildStateChangeCandidates( - snapshot.index, - snapshot.step.action, - preNodes, - postNodes, - actedTargetHint, - framePath, - candidateSource - ) - ); + if (!suppressContentCandidates) { + candidates.push( + ...buildStateChangeCandidates( + snapshot.index, + snapshot.step.action, + preNodes, + postNodes, + actedTargetHint, + framePath, + candidateSource + ) + ); + } if (delta.length === 0) continue; - const textCandidates = suppressTextCandidates + const textCandidates = suppressContentCandidates ? [] : buildTextCandidates( snapshot.index, @@ -147,15 +149,17 @@ export function buildSnapshotAssertionCandidates( ) ); - const visibleCandidates = buildVisibleCandidates( - snapshot.index, - snapshot.step.action, - delta, - actedTargetHint, - framePath, - candidateSource, - MAX_VISIBLE_CANDIDATES_PER_STEP - ); + const visibleCandidates = suppressContentCandidates + ? [] + : buildVisibleCandidates( + snapshot.index, + snapshot.step.action, + delta, + actedTargetHint, + framePath, + candidateSource, + MAX_VISIBLE_CANDIDATES_PER_STEP + ); for (const visibleCandidate of visibleCandidates) { const visibleTarget = From 529998a00f6e0ce388855063b2238162b17f1375 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Sun, 22 Feb 2026 13:57:04 +0100 Subject: [PATCH 03/19] Use neutral fixture values in snapshot suppression test --- .../improve/assertion-candidates-snapshot.test.ts | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/core/improve/assertion-candidates-snapshot.test.ts b/src/core/improve/assertion-candidates-snapshot.test.ts index dd8541f..002f1cd 100644 --- a/src/core/improve/assertion-candidates-snapshot.test.ts +++ b/src/core/improve/assertion-candidates-snapshot.test.ts @@ -118,22 +118,25 @@ describe("snapshot assertion candidates", () => { }); it("suppresses snapshot text assertions for navigation-like dynamic clicks", () => { + const dynamicLinkText = + "Live update 12:30: Market story shifts quickly after morning session"; + const preUrl = "https://example.test/home"; + const postUrl = "https://example.test/category"; const out = buildSnapshotAssertionCandidates([ { index: 1, step: { action: "click", target: { - value: - "getByRole('link', { name: 'Nederlaag voor Trump: hooggerechtshof VS oordeelt dat heffingen onwettig zijn', exact: true })", + value: `getByRole('link', { name: '${dynamicLinkText}', exact: true })`, kind: "locatorExpression", source: "manual", }, }, - preSnapshot: `- generic [ref=e1]:\n - link "Nieuws" [ref=e2]\n`, - postSnapshot: `- generic [ref=e1]:\n - heading "Ajax komt goed weg" [level=1] [ref=e3]\n`, - preUrl: "https://www.nu.nl/", - postUrl: "https://www.nu.nl/algemeen", + preSnapshot: `- generic [ref=e1]:\n - link "Top story" [ref=e2]\n`, + postSnapshot: `- generic [ref=e1]:\n - heading "Category page" [level=1] [ref=e3]\n`, + preUrl, + postUrl, }, ], "snapshot_native"); From 6bdc840ba3ec427565149ffa532a1e22b09778b1 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Sun, 22 Feb 2026 14:00:11 +0100 Subject: [PATCH 04/19] Narrow nav-like suppression to dynamic links and add regressions --- .../assertion-candidates-snapshot.test.ts | 21 +++++++++++++++++++ src/core/improve/assertion-candidates.test.ts | 20 ------------------ .../improve/navigation-like-interaction.ts | 7 +------ 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/core/improve/assertion-candidates-snapshot.test.ts b/src/core/improve/assertion-candidates-snapshot.test.ts index 002f1cd..ebd2efe 100644 --- a/src/core/improve/assertion-candidates-snapshot.test.ts +++ b/src/core/improve/assertion-candidates-snapshot.test.ts @@ -147,6 +147,27 @@ describe("snapshot assertion candidates", () => { expect(out).toHaveLength(1); }); + it("keeps snapshot content assertions for stable exact link clicks", () => { + const out = buildSnapshotAssertionCandidates([ + { + index: 0, + step: { + action: "click", + target: { + value: "getByRole('link', { name: 'Settings', exact: true })", + kind: "locatorExpression", + source: "manual", + }, + }, + preSnapshot: "- generic [ref=e1]:\n", + postSnapshot: + "- generic [ref=e1]:\n - heading \"Account settings\" [level=1] [ref=e2]\n", + }, + ], "snapshot_native"); + + expect(out.some((candidate) => candidate.candidate.action === "assertText")).toBe(true); + }); + it("generates multiple candidates from a rich delta", () => { const out = buildSnapshotAssertionCandidates([richDeltaStepSnapshot], "snapshot_native"); diff --git a/src/core/improve/assertion-candidates.test.ts b/src/core/improve/assertion-candidates.test.ts index fd0ff64..0b24bdf 100644 --- a/src/core/improve/assertion-candidates.test.ts +++ b/src/core/improve/assertion-candidates.test.ts @@ -221,24 +221,4 @@ describe("buildAssertionCandidates", () => { expect(out.candidates).toHaveLength(1); expect(out.candidates[0]?.candidate.action).toBe("assertVisible"); }); - - it("does not skip stable selectors with story/article in id-like values", () => { - const out = buildAssertionCandidates( - [ - { - action: "click", - target: { - value: "locator('#user-story-tab')", - kind: "locatorExpression", - source: "manual", - }, - }, - ], - [] - ); - - expect(out.skippedNavigationLikeClicks).toHaveLength(0); - expect(out.candidates).toHaveLength(1); - expect(out.candidates[0]?.candidate.action).toBe("assertVisible"); - }); }); diff --git a/src/core/improve/navigation-like-interaction.ts b/src/core/improve/navigation-like-interaction.ts index 3956f61..d0be956 100644 --- a/src/core/improve/navigation-like-interaction.ts +++ b/src/core/improve/navigation-like-interaction.ts @@ -16,7 +16,6 @@ export function classifyNavigationLikeInteraction( const targetValue = target.value; const isRoleLink = /getByRole\(\s*['"]link['"]/.test(targetValue); - const hasExact = /exact\s*:\s*true/.test(targetValue); const hasContentCardPattern = /headline|teaser|article|story|content[-_ ]?card|breaking[-_ ]?push|hero[-_ ]?card/i.test( targetValue @@ -31,11 +30,7 @@ export function classifyNavigationLikeInteraction( dynamicSignals.includes("contains_pipe_separator") || dynamicSignals.includes("contains_date_or_time_fragment"); - if ( - (isRoleLink && hasHeadlineLikeText) || - (isRoleLink && hasExact) || - hasContentCardPattern - ) { + if ((isRoleLink && hasHeadlineLikeText) || hasContentCardPattern) { return "navigation-like dynamic click target"; } From 660923a1af66125278fa3c60ac639a3e69a9d58e Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Sun, 22 Feb 2026 14:11:34 +0100 Subject: [PATCH 05/19] Narrow content-card heuristic to extracted text fragments --- .../assertion-candidates-snapshot.test.ts | 21 +++++++++++++++++++ src/core/improve/assertion-candidates.test.ts | 20 ++++++++++++++++++ .../improve/navigation-like-interaction.ts | 13 ++++++------ 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/core/improve/assertion-candidates-snapshot.test.ts b/src/core/improve/assertion-candidates-snapshot.test.ts index ebd2efe..c7f5897 100644 --- a/src/core/improve/assertion-candidates-snapshot.test.ts +++ b/src/core/improve/assertion-candidates-snapshot.test.ts @@ -168,6 +168,27 @@ describe("snapshot assertion candidates", () => { expect(out.some((candidate) => candidate.candidate.action === "assertText")).toBe(true); }); + it("keeps snapshot content assertions for stable selectors with story/article ids", () => { + const out = buildSnapshotAssertionCandidates([ + { + index: 0, + step: { + action: "click", + target: { + value: "locator('#user-story-tab')", + kind: "locatorExpression", + source: "manual", + }, + }, + preSnapshot: "- generic [ref=e1]:\n", + postSnapshot: + "- generic [ref=e1]:\n - heading \"Profile details\" [level=1] [ref=e2]\n", + }, + ], "snapshot_native"); + + expect(out.some((candidate) => candidate.candidate.action === "assertText")).toBe(true); + }); + it("generates multiple candidates from a rich delta", () => { const out = buildSnapshotAssertionCandidates([richDeltaStepSnapshot], "snapshot_native"); diff --git a/src/core/improve/assertion-candidates.test.ts b/src/core/improve/assertion-candidates.test.ts index 0b24bdf..fd0ff64 100644 --- a/src/core/improve/assertion-candidates.test.ts +++ b/src/core/improve/assertion-candidates.test.ts @@ -221,4 +221,24 @@ describe("buildAssertionCandidates", () => { expect(out.candidates).toHaveLength(1); expect(out.candidates[0]?.candidate.action).toBe("assertVisible"); }); + + it("does not skip stable selectors with story/article in id-like values", () => { + const out = buildAssertionCandidates( + [ + { + action: "click", + target: { + value: "locator('#user-story-tab')", + kind: "locatorExpression", + source: "manual", + }, + }, + ], + [] + ); + + expect(out.skippedNavigationLikeClicks).toHaveLength(0); + expect(out.candidates).toHaveLength(1); + expect(out.candidates[0]?.candidate.action).toBe("assertVisible"); + }); }); diff --git a/src/core/improve/navigation-like-interaction.ts b/src/core/improve/navigation-like-interaction.ts index d0be956..7d7930a 100644 --- a/src/core/improve/navigation-like-interaction.ts +++ b/src/core/improve/navigation-like-interaction.ts @@ -8,6 +8,9 @@ function isNavigationLikeAction(action: Step["action"]): boolean { return action === "click" || action === "press" || action === "hover"; } +const CONTENT_CARD_TEXT_PATTERN = + /headline|teaser|article|story|content[-_ ]?card|breaking[-_ ]?push|hero[-_ ]?card/i; + export function classifyNavigationLikeInteraction( step: Step, target: Target @@ -16,13 +19,11 @@ export function classifyNavigationLikeInteraction( const targetValue = target.value; const isRoleLink = /getByRole\(\s*['"]link['"]/.test(targetValue); - const hasContentCardPattern = - /headline|teaser|article|story|content[-_ ]?card|breaking[-_ ]?push|hero[-_ ]?card/i.test( - targetValue - ); - - const { dynamicSignals } = assessTargetDynamics(target); const queryTexts = extractTargetTextFragments(target); + const hasContentCardPattern = queryTexts.some((text) => + CONTENT_CARD_TEXT_PATTERN.test(text) + ); + const { dynamicSignals } = assessTargetDynamics(target); const hasHeadlineLikeText = queryTexts.some((text) => text.length >= 48) || dynamicSignals.includes("contains_headline_like_text") || From 0eda4cd849c55eeae4504e0f2d367df182873b54 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 00:57:12 +0100 Subject: [PATCH 06/19] Stabilize improve plan payload application Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- src/app/services/improve-service.test.ts | 523 ++++++++++++++++++++++- src/app/services/improve-service.ts | 353 ++++++++++++++- src/core/improve/improve-plan.test.ts | 172 ++++++++ src/core/improve/improve-plan.ts | 154 +++++++ 4 files changed, 1193 insertions(+), 9 deletions(-) create mode 100644 src/core/improve/improve-plan.test.ts create mode 100644 src/core/improve/improve-plan.ts diff --git a/src/app/services/improve-service.test.ts b/src/app/services/improve-service.test.ts index e24c64a..baff8dc 100644 --- a/src/app/services/improve-service.test.ts +++ b/src/app/services/improve-service.test.ts @@ -1,6 +1,14 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { UserError } from "../../utils/errors.js"; +vi.mock("node:fs/promises", () => ({ + default: { + readFile: vi.fn(), + writeFile: vi.fn(), + mkdir: vi.fn(), + }, +})); + vi.mock("@inquirer/prompts", () => ({ confirm: vi.fn(), })); @@ -22,14 +30,21 @@ vi.mock("../../utils/ui.js", () => ({ })); import { confirm } from "@inquirer/prompts"; +import fs from "node:fs/promises"; import { improveTestFile } from "../../core/improve/improve.js"; +import { hashImprovePlanSource } from "../../core/improve/improve-plan.js"; import { ui } from "../../utils/ui.js"; import { runImprove } from "./improve-service.js"; +const SAMPLE_YAML = "name: sample\nsteps:\n - action: navigate\n url: /\n"; + describe("runImprove chromium handling", () => { beforeEach(() => { vi.resetAllMocks(); vi.mocked(confirm).mockResolvedValue(false); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + vi.mocked(fs.readFile).mockResolvedValue(SAMPLE_YAML); vi.mocked(improveTestFile).mockResolvedValue({ reportPath: "e2e/sample.improve-report.json", outputPath: undefined, @@ -37,6 +52,7 @@ describe("runImprove chromium handling", () => { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 1, improved: 0, @@ -83,6 +99,9 @@ describe("runImprove chromium handling", () => { describe("runImprove confirm prompt", () => { beforeEach(() => { vi.resetAllMocks(); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + vi.mocked(fs.readFile).mockResolvedValue(SAMPLE_YAML); vi.mocked(improveTestFile).mockResolvedValue({ reportPath: "e2e/sample.improve-report.json", outputPath: undefined, @@ -90,6 +109,7 @@ describe("runImprove confirm prompt", () => { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 1, improved: 0, @@ -115,11 +135,16 @@ describe("runImprove confirm prompt", () => { expect(confirm).toHaveBeenCalledOnce(); expect(confirm).toHaveBeenCalledWith({ - message: "Apply improvements to sample.yaml?", + message: "Write improved copy to sample.improved.yaml?", default: true, }); expect(improveTestFile).toHaveBeenCalledWith( - expect.objectContaining({ applySelectors: true, applyAssertions: true }) + expect.objectContaining({ + outputPath: expect.stringContaining("sample.improved.yaml"), + applySelectors: true, + applyAssertions: true, + appliedBy: "manual_apply", + }) ); }); @@ -128,16 +153,46 @@ describe("runImprove confirm prompt", () => { expect(confirm).not.toHaveBeenCalled(); expect(improveTestFile).toHaveBeenCalledWith( - expect.objectContaining({ applySelectors: true, applyAssertions: true }) + expect.objectContaining({ + outputPath: expect.stringContaining("sample.improved.yaml"), + applySelectors: true, + applyAssertions: true, + appliedBy: "manual_apply", + }) ); }); + it("supports --in-place and does not set outputPath", async () => { + vi.mocked(confirm).mockResolvedValue(true); + + await runImprove("e2e/sample.yaml", { inPlace: true }); + + expect(confirm).toHaveBeenCalledWith({ + message: "Apply improvements in-place to sample.yaml?", + default: true, + }); + + const args = vi.mocked(improveTestFile).mock.calls[0]?.[0]; + expect(args).toBeDefined(); + expect(Object.prototype.hasOwnProperty.call(args, "outputPath")).toBe(false); + }); + + it("rejects combining --output with --in-place", async () => { + await expect( + runImprove("e2e/sample.yaml", { apply: true, output: "out.yaml", inPlace: true }) + ).rejects.toBeInstanceOf(UserError); + }); + it("does not prompt when apply is false", async () => { await runImprove("e2e/sample.yaml", { apply: false }); expect(confirm).not.toHaveBeenCalled(); expect(improveTestFile).toHaveBeenCalledWith( - expect.objectContaining({ applySelectors: false, applyAssertions: false }) + expect.objectContaining({ + applySelectors: false, + applyAssertions: false, + appliedBy: "report_only", + }) ); }); @@ -148,7 +203,11 @@ describe("runImprove confirm prompt", () => { expect(confirm).toHaveBeenCalledOnce(); expect(improveTestFile).toHaveBeenCalledWith( - expect.objectContaining({ applySelectors: false, applyAssertions: false }) + expect.objectContaining({ + applySelectors: false, + applyAssertions: false, + appliedBy: "report_only", + }) ); }); @@ -160,6 +219,7 @@ describe("runImprove confirm prompt", () => { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 1, improved: 0, @@ -191,6 +251,7 @@ describe("runImprove confirm prompt", () => { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 1, improved: 0, @@ -240,3 +301,455 @@ describe("runImprove confirm prompt", () => { ).toBe(true); }); }); + +describe("runImprove plan/apply-plan modes", () => { + beforeEach(() => { + vi.resetAllMocks(); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + vi.mocked(fs.readFile).mockResolvedValue(SAMPLE_YAML); + vi.mocked(improveTestFile).mockResolvedValue({ + reportPath: "e2e/sample.improve-report.json", + report: { + testFile: "e2e/sample.yaml", + generatedAt: new Date().toISOString(), + providerUsed: "playwright", + appliedBy: "plan_preview", + summary: { + unchanged: 0, + improved: 1, + fallback: 0, + warnings: 0, + assertionCandidates: 1, + appliedAssertions: 1, + skippedAssertions: 0, + }, + stepFindings: [], + assertionCandidates: [], + diagnostics: [], + }, + proposedTest: { + name: "sample", + steps: [{ action: "navigate", url: "/" }], + }, + }); + }); + + it("generates plan file in --plan mode without prompting", async () => { + await runImprove("e2e/sample.yaml", { plan: true }); + + expect(confirm).not.toHaveBeenCalled(); + expect(improveTestFile).toHaveBeenCalledWith( + expect.objectContaining({ + dryRunWrite: true, + includeProposedTest: true, + appliedBy: "plan_preview", + }) + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.stringContaining("e2e/sample.improve-plan.json"), + expect.stringContaining("\"version\": 2"), + "utf-8" + ); + + const planWrite = vi.mocked(fs.writeFile).mock.calls.find(([filePath]) => + String(filePath).endsWith("sample.improve-plan.json") + ); + const serializedPlan = String(planWrite?.[1] ?? ""); + expect(serializedPlan).toContain('"summary"'); + expect(serializedPlan).toContain('"diagnostics"'); + expect(serializedPlan).toContain('"assertionCandidates"'); + }); + + it("generates deterministic candidate and diagnostic ordering in plan mode", async () => { + vi.mocked(improveTestFile).mockResolvedValue({ + reportPath: "e2e/sample.improve-report.json", + report: { + testFile: "e2e/sample.yaml", + generatedAt: new Date().toISOString(), + providerUsed: "playwright", + appliedBy: "plan_preview", + summary: { + unchanged: 0, + improved: 1, + fallback: 0, + warnings: 2, + assertionCandidates: 2, + appliedAssertions: 0, + skippedAssertions: 2, + runtimeFailingStepsRetained: 1, + runtimeFailingStepsRemoved: 0, + }, + stepFindings: [], + assertionCandidates: [ + { + index: 1, + afterAction: "click", + candidate: { + action: "assertVisible", + target: { value: "#status", kind: "css", source: "manual" }, + }, + confidence: 0.76, + rationale: "visible", + applyStatus: "skipped_policy", + }, + { + index: 0, + afterAction: "fill", + candidate: { + action: "assertValue", + target: { value: "#name", kind: "css", source: "manual" }, + value: "Alice", + }, + confidence: 0.9, + rationale: "stable", + applyStatus: "skipped_low_confidence", + }, + ], + diagnostics: [ + { + code: "z_code", + level: "warn", + message: "z message", + }, + { + code: "a_code", + level: "info", + message: "a message", + }, + ], + }, + proposedTest: { + name: "sample", + steps: [{ action: "navigate", url: "/" }], + }, + }); + + await runImprove("e2e/sample.yaml", { plan: true }); + + const planWrite = vi.mocked(fs.writeFile).mock.calls.find(([filePath]) => + String(filePath).endsWith("sample.improve-plan.json") + ); + const plan = JSON.parse(String(planWrite?.[1] ?? "{}")) as { + diagnostics: Array<{ code: string }>; + assertionCandidates: Array<{ candidate: { action: string } }>; + }; + + expect(plan.diagnostics.map((diagnostic) => diagnostic.code)).toEqual([ + "a_code", + "z_code", + ]); + expect(plan.assertionCandidates.map((candidate) => candidate.candidate.action)).toEqual([ + "assertValue", + "assertVisible", + ]); + }); + + it("stores effective apply flags when assertions are disabled in plan mode", async () => { + await runImprove("e2e/sample.yaml", { plan: true, assertions: "none" }); + + expect(improveTestFile).toHaveBeenCalledWith( + expect.objectContaining({ + applySelectors: true, + applyAssertions: false, + assertions: "none", + }) + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.stringContaining("e2e/sample.improve-plan.json"), + expect.stringContaining("\"applyAssertions\": false"), + "utf-8" + ); + }); + + it("rejects conflicting --plan and --apply-plan options", async () => { + await expect( + runImprove("e2e/sample.yaml", { + plan: true, + applyPlan: "e2e/sample.improve-plan.json", + }) + ).rejects.toThrow(/Cannot use --plan together with --apply-plan/); + }); + + it("rejects write-target flags in plan mode", async () => { + await expect( + runImprove("e2e/sample.yaml", { + plan: true, + output: "out.yaml", + }) + ).rejects.toThrow(/Cannot use --output or --in-place together with --plan/); + }); + + it("rejects apply flags in plan mode", async () => { + await expect( + runImprove("e2e/sample.yaml", { + plan: true, + apply: true, + }) + ).rejects.toThrow(/Cannot use --apply or --no-apply together with --plan/); + }); + + it("applies a generated plan to a copy by default", async () => { + vi.mocked(fs.readFile).mockImplementation(async (filePath) => { + if (String(filePath).endsWith("sample.improve-plan.json")) { + return JSON.stringify({ + version: 2, + generatedAt: new Date().toISOString(), + testFile: "sample.yaml", + testFileLocator: "relative_to_plan", + testFileSha256: hashImprovePlanSource(SAMPLE_YAML), + sourceReportPath: "sample.improve-report.json", + sourceReportPathLocator: "relative_to_plan", + appliedBy: "plan_preview", + profile: { + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "balanced", + applySelectors: true, + applyAssertions: true, + }, + summary: { + runtimeFailingStepsRetained: 1, + runtimeFailingStepsRemoved: 0, + skippedAssertions: 2, + }, + diagnostics: [ + { + code: "runtime_failing_step_retained", + level: "info", + message: "retained", + }, + ], + assertionCandidates: [ + { + index: 1, + afterAction: "click", + candidate: { + action: "assertVisible", + target: { value: "#status", kind: "css", source: "manual" }, + }, + confidence: 0.76, + rationale: "visible", + applyStatus: "skipped_policy", + }, + ], + test: { + name: "sample", + steps: [{ action: "navigate", url: "/" }], + }, + }); + } + return SAMPLE_YAML; + }); + + await runImprove("e2e/sample.yaml", { applyPlan: "e2e/sample.improve-plan.json" }); + + expect(improveTestFile).not.toHaveBeenCalled(); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.stringContaining("e2e/sample.improved.yaml"), + expect.stringContaining("name: sample"), + "utf-8" + ); + expect(ui.step).toHaveBeenCalledWith( + expect.stringContaining("Original preserved at:") + ); + expect(ui.info).toHaveBeenCalledWith( + expect.stringContaining("skippedAssertions=2") + ); + }); + + it("applies a generated plan in place when requested", async () => { + vi.mocked(fs.readFile).mockImplementation(async (filePath) => { + if (String(filePath).endsWith("sample.improve-plan.json")) { + return JSON.stringify({ + version: 2, + generatedAt: new Date().toISOString(), + testFile: "sample.yaml", + testFileLocator: "relative_to_plan", + testFileSha256: hashImprovePlanSource(SAMPLE_YAML), + sourceReportPath: "sample.improve-report.json", + sourceReportPathLocator: "relative_to_plan", + appliedBy: "plan_preview", + profile: { + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "balanced", + applySelectors: true, + applyAssertions: true, + }, + summary: { + runtimeFailingStepsRetained: 0, + runtimeFailingStepsRemoved: 0, + skippedAssertions: 0, + }, + diagnostics: [], + assertionCandidates: [], + test: { + name: "sample", + steps: [{ action: "navigate", url: "/" }], + }, + }); + } + return SAMPLE_YAML; + }); + + await runImprove("e2e/sample.yaml", { + applyPlan: "e2e/sample.improve-plan.json", + inPlace: true, + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + expect.stringContaining("e2e/sample.yaml"), + expect.stringContaining("name: sample"), + "utf-8" + ); + }); + + it("applies a generated plan to a custom output path", async () => { + vi.mocked(fs.readFile).mockImplementation(async (filePath) => { + if (String(filePath).endsWith("sample.improve-plan.json")) { + return JSON.stringify({ + version: 2, + generatedAt: new Date().toISOString(), + testFile: "sample.yaml", + testFileLocator: "relative_to_plan", + testFileSha256: hashImprovePlanSource(SAMPLE_YAML), + sourceReportPath: "sample.improve-report.json", + sourceReportPathLocator: "relative_to_plan", + appliedBy: "plan_preview", + profile: { + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "balanced", + applySelectors: true, + applyAssertions: true, + }, + summary: { + runtimeFailingStepsRetained: 0, + runtimeFailingStepsRemoved: 0, + skippedAssertions: 0, + }, + diagnostics: [], + assertionCandidates: [], + test: { + name: "sample", + steps: [{ action: "navigate", url: "/" }], + }, + }); + } + return SAMPLE_YAML; + }); + + await runImprove("e2e/sample.yaml", { + applyPlan: "e2e/sample.improve-plan.json", + output: "custom/out.yaml", + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + expect.stringContaining("custom/out.yaml"), + expect.stringContaining("name: sample"), + "utf-8" + ); + }); + + it("accepts moved targets when the content fingerprint still matches", async () => { + vi.mocked(fs.readFile).mockImplementation(async (filePath) => { + if (String(filePath).endsWith("sample.improve-plan.json")) { + return JSON.stringify({ + version: 2, + generatedAt: new Date().toISOString(), + testFile: "sample.yaml", + testFileLocator: "relative_to_plan", + testFileSha256: hashImprovePlanSource(SAMPLE_YAML), + sourceReportPath: "sample.improve-report.json", + sourceReportPathLocator: "relative_to_plan", + appliedBy: "plan_preview", + profile: { + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "balanced", + applySelectors: true, + applyAssertions: true, + }, + summary: { + runtimeFailingStepsRetained: 0, + runtimeFailingStepsRemoved: 0, + skippedAssertions: 0, + }, + diagnostics: [], + assertionCandidates: [], + test: { + name: "sample", + steps: [{ action: "navigate", url: "/" }], + }, + }); + } + return SAMPLE_YAML; + }); + + await runImprove("e2e/moved-sample.yaml", { + applyPlan: "e2e/sample.improve-plan.json", + inPlace: true, + }); + + expect(ui.warn).toHaveBeenCalledWith( + expect.stringContaining("matched by content fingerprint") + ); + }); + + it("rejects plan apply when the source fingerprint no longer matches", async () => { + vi.mocked(fs.readFile).mockImplementation(async (filePath) => { + if (String(filePath).endsWith("sample.improve-plan.json")) { + return JSON.stringify({ + version: 2, + generatedAt: new Date().toISOString(), + testFile: "sample.yaml", + testFileLocator: "relative_to_plan", + testFileSha256: hashImprovePlanSource(SAMPLE_YAML), + sourceReportPath: "sample.improve-report.json", + sourceReportPathLocator: "relative_to_plan", + appliedBy: "plan_preview", + profile: { + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "balanced", + applySelectors: true, + applyAssertions: true, + }, + summary: { + runtimeFailingStepsRetained: 0, + runtimeFailingStepsRemoved: 0, + skippedAssertions: 0, + }, + diagnostics: [], + assertionCandidates: [], + test: { + name: "sample", + steps: [{ action: "navigate", url: "/" }], + }, + }); + } + return "name: changed\nsteps:\n - action: navigate\n url: /other\n"; + }); + + await expect( + runImprove("e2e/sample.yaml", { applyPlan: "e2e/sample.improve-plan.json" }) + ).rejects.toThrow(/Plan source mismatch/); + }); + + it("rejects report/profile flags in apply-plan mode", async () => { + await expect( + runImprove("e2e/sample.yaml", { + applyPlan: "e2e/sample.improve-plan.json", + report: "custom-report.json", + }) + ).rejects.toThrow(/Cannot combine --apply-plan with apply\/profile\/report flags/); + + await expect( + runImprove("e2e/sample.yaml", { + applyPlan: "e2e/sample.improve-plan.json", + assertions: "none", + }) + ).rejects.toThrow(/Cannot combine --apply-plan with apply\/profile\/report flags/); + }); +}); diff --git a/src/app/services/improve-service.ts b/src/app/services/improve-service.ts index 0daaac9..ec3867b 100644 --- a/src/app/services/improve-service.ts +++ b/src/app/services/improve-service.ts @@ -1,9 +1,26 @@ +import fs from "node:fs/promises"; import path from "node:path"; import { confirm } from "@inquirer/prompts"; import { improveTestFile } from "../../core/improve/improve.js"; -import { resolveImproveProfile } from "../options/improve-profile.js"; +import type { ImproveAppliedBy } from "../../core/improve/improve.js"; +import { + defaultImprovePlanPath, + hashImprovePlanSource, + improvePlanSchema, + type ImprovePlan, + relativizePlanPath, + resolvePlanPath, + sortPlanAssertionCandidates, + sortPlanDiagnostics, +} from "../../core/improve/improve-plan.js"; +import { stepsToYaml } from "../../core/transform/yaml-io.js"; +import { + resolveImproveProfile, + type ResolvedImproveProfile, +} from "../options/improve-profile.js"; import { formatImproveProfileSummary } from "../options/profile-summary.js"; import { ui } from "../../utils/ui.js"; +import { UserError } from "../../utils/errors.js"; import { buildExternalCliInvocationWarning, collectAssertionSkipDetails, @@ -16,13 +33,19 @@ export interface ImproveCliOptions { assertions?: string; assertionSource?: string; assertionPolicy?: string; + plan?: boolean; + applyPlan?: string; report?: string; + output?: string; + inPlace?: boolean; } export async function runImprove( testFile: string, opts: ImproveCliOptions ): Promise { + validateImproveModeOptions(opts); + const invocationWarning = buildExternalCliInvocationWarning( process.cwd(), process.argv[1], @@ -32,14 +55,43 @@ export async function runImprove( ui.warn(invocationWarning); } + if (opts.applyPlan) { + await applyImprovePlan( + testFile, + opts.applyPlan, + resolveImproveWriteTarget(testFile, opts) + ); + return; + } + + if (opts.plan) { + await generateImprovePlan(testFile, opts); + return; + } + let apply = opts.apply; if (apply === undefined) { + const promptMessage = opts.inPlace + ? `Apply improvements in-place to ${path.basename(testFile)}?` + : `Write improved copy to ${path.basename(resolveDefaultImproveOutputPath(testFile, opts.output))}?`; apply = await confirm({ - message: "Apply improvements to " + path.basename(testFile) + "?", + message: promptMessage, default: true, }); } - const profile = resolveImproveProfile({ ...opts, apply }); + const profile = applyImproveProfileMutations( + resolveImproveProfile({ ...opts, apply }) + ); + + if (!apply && (opts.output || opts.inPlace)) { + throw new UserError( + "Cannot use --output or --in-place when apply is disabled.", + "Remove --output/--in-place or run with --apply." + ); + } + + const wantsWrite = profile.applySelectors || profile.applyAssertions; + const writeTarget = wantsWrite ? resolveImproveWriteTarget(testFile, opts) : undefined; ui.info( formatImproveProfileSummary({ @@ -53,11 +105,17 @@ export async function runImprove( const improveOptions = { testFile, + ...(writeTarget?.outputPath ? { outputPath: writeTarget.outputPath } : {}), applySelectors: profile.applySelectors, applyAssertions: profile.applyAssertions, assertions: profile.assertions, assertionSource: profile.assertionSource, assertionPolicy: profile.assertionPolicy, + appliedBy: ( + profile.applySelectors || profile.applyAssertions + ? "manual_apply" + : "report_only" + ) as ImproveAppliedBy, }; const result = await improveTestFile( @@ -69,6 +127,9 @@ export async function runImprove( ui.success(`Improve report saved to ${result.reportPath}`); if (result.outputPath) { ui.success(`Applied improvements to ${result.outputPath}`); + if (path.resolve(result.outputPath) !== path.resolve(testFile)) { + ui.step(`Original preserved at: ${path.resolve(testFile)}`); + } } const runtimeFailingStepsRetained = @@ -97,7 +158,7 @@ export async function runImprove( result.report.summary.assertionFallbackAppliedWithNonFallbackSteps ?? 0; ui.info( - `Summary: improved=${result.report.summary.improved}, unchanged=${result.report.summary.unchanged}, fallback=${result.report.summary.fallback}, warnings=${result.report.summary.warnings}, assertionCandidates=${result.report.summary.assertionCandidates}, appliedAssertions=${result.report.summary.appliedAssertions}, skippedAssertions=${result.report.summary.skippedAssertions}, selectorRepairCandidates=${result.report.summary.selectorRepairCandidates ?? 0}, selectorRepairsApplied=${result.report.summary.selectorRepairsApplied ?? 0}, selectorRepairsAdoptedOnTie=${result.report.summary.selectorRepairsAdoptedOnTie ?? 0}, selectorRepairsGeneratedByPlaywrightRuntime=${result.report.summary.selectorRepairsGeneratedByPlaywrightRuntime ?? 0}, selectorRepairsAppliedFromPlaywrightRuntime=${result.report.summary.selectorRepairsAppliedFromPlaywrightRuntime ?? 0}, selectorRepairsGeneratedByPrivateFallback=${result.report.summary.selectorRepairsGeneratedByPrivateFallback ?? 0}, selectorRepairsAppliedFromPrivateFallback=${result.report.summary.selectorRepairsAppliedFromPrivateFallback ?? 0}, assertionCandidatesFilteredDynamic=${result.report.summary.assertionCandidatesFilteredDynamic ?? 0}, assertionCoverageStepsTotal=${assertionCoverageStepsTotal}, assertionCoverageStepsWithCandidates=${assertionCoverageStepsWithCandidates}, assertionCoverageStepsWithApplied=${assertionCoverageStepsWithApplied}, assertionCoverageCandidateRate=${assertionCoverageCandidateRate}, assertionCoverageAppliedRate=${assertionCoverageAppliedRate}, assertionFallbackApplied=${assertionFallbackApplied}, assertionFallbackAppliedOnlySteps=${assertionFallbackAppliedOnlySteps}, assertionFallbackAppliedWithNonFallbackSteps=${assertionFallbackAppliedWithNonFallbackSteps}, assertionInventoryStepsEvaluated=${assertionInventoryStepsEvaluated}, assertionInventoryCandidatesAdded=${assertionInventoryCandidatesAdded}, assertionInventoryGapStepsFilled=${assertionInventoryGapStepsFilled}, runtimeFailingStepsRetained=${runtimeFailingStepsRetained}, runtimeFailingStepsRemoved=${result.report.summary.runtimeFailingStepsRemoved ?? 0}` + `Summary: improved=${result.report.summary.improved}, unchanged=${result.report.summary.unchanged}, fallback=${result.report.summary.fallback}, warnings=${result.report.summary.warnings}, assertionCandidates=${result.report.summary.assertionCandidates}, appliedAssertions=${result.report.summary.appliedAssertions}, skippedAssertions=${result.report.summary.skippedAssertions}, selectorRepairCandidates=${result.report.summary.selectorRepairCandidates ?? 0}, selectorRepairsApplied=${result.report.summary.selectorRepairsApplied ?? 0}, selectorRepairsAdoptedOnTie=${result.report.summary.selectorRepairsAdoptedOnTie ?? 0}, selectorRepairsGeneratedByPlaywrightRuntime=${result.report.summary.selectorRepairsGeneratedByPlaywrightRuntime ?? 0}, selectorRepairsAppliedFromPlaywrightRuntime=${result.report.summary.selectorRepairsAppliedFromPlaywrightRuntime ?? 0}, assertionCandidatesFilteredDynamic=${result.report.summary.assertionCandidatesFilteredDynamic ?? 0}, assertionCoverageStepsTotal=${assertionCoverageStepsTotal}, assertionCoverageStepsWithCandidates=${assertionCoverageStepsWithCandidates}, assertionCoverageStepsWithApplied=${assertionCoverageStepsWithApplied}, assertionCoverageCandidateRate=${assertionCoverageCandidateRate}, assertionCoverageAppliedRate=${assertionCoverageAppliedRate}, assertionFallbackApplied=${assertionFallbackApplied}, assertionFallbackAppliedOnlySteps=${assertionFallbackAppliedOnlySteps}, assertionFallbackAppliedWithNonFallbackSteps=${assertionFallbackAppliedWithNonFallbackSteps}, assertionInventoryStepsEvaluated=${assertionInventoryStepsEvaluated}, assertionInventoryCandidatesAdded=${assertionInventoryCandidatesAdded}, assertionInventoryGapStepsFilled=${assertionInventoryGapStepsFilled}, runtimeFailingStepsRetained=${runtimeFailingStepsRetained}, runtimeFailingStepsRemoved=${result.report.summary.runtimeFailingStepsRemoved ?? 0}` ); const assertionStatusSummary = formatAssertionApplyStatusCounts(result.report.assertionCandidates); if (assertionStatusSummary) { @@ -120,3 +181,287 @@ export async function runImprove( ui.step("Apply improvements: ui-test improve " + path.resolve(testFile) + " --apply"); } } + +function resolveDefaultImproveOutputPath(testFile: string, overridePath?: string): string { + if (overridePath && overridePath.trim().length > 0) { + return overridePath.trim(); + } + + const absoluteInput = path.resolve(testFile); + const ext = path.extname(absoluteInput); + const base = ext ? absoluteInput.slice(0, -ext.length) : absoluteInput; + const effectiveExt = ext.length > 0 ? ext : ".yaml"; + return `${base}.improved${effectiveExt}`; +} + +async function generateImprovePlan(testFile: string, opts: ImproveCliOptions): Promise { + const profile = applyImproveProfileMutations( + resolveImproveProfile({ ...opts, apply: true }) + ); + ui.info( + formatImproveProfileSummary({ + applySelectors: profile.applySelectors, + applyAssertions: profile.applyAssertions, + assertions: profile.assertions, + assertionSource: profile.assertionSource, + assertionPolicy: profile.assertionPolicy, + }) + ); + + const result = await improveTestFile( + profile.reportPath === undefined + ? { + testFile, + applySelectors: profile.applySelectors, + applyAssertions: profile.applyAssertions, + assertions: profile.assertions, + assertionSource: profile.assertionSource, + assertionPolicy: profile.assertionPolicy, + dryRunWrite: true, + includeProposedTest: true, + appliedBy: "plan_preview", + } + : { + testFile, + applySelectors: profile.applySelectors, + applyAssertions: profile.applyAssertions, + assertions: profile.assertions, + assertionSource: profile.assertionSource, + assertionPolicy: profile.assertionPolicy, + dryRunWrite: true, + includeProposedTest: true, + appliedBy: "plan_preview", + reportPath: profile.reportPath, + } + ); + + if (!result.proposedTest) { + throw new UserError( + "Improve plan generation failed: no proposed test output was returned.", + "Retry with ui-test improve --plan." + ); + } + + const planPath = defaultImprovePlanPath(testFile); + const absoluteTestPath = path.resolve(testFile); + const sourceTestContent = await fs.readFile(absoluteTestPath, "utf-8"); + const plan = { + version: 2 as const, + generatedAt: new Date().toISOString(), + testFile: relativizePlanPath(planPath, absoluteTestPath), + testFileLocator: "relative_to_plan" as const, + testFileSha256: hashImprovePlanSource(sourceTestContent), + sourceReportPath: relativizePlanPath(planPath, result.reportPath), + sourceReportPathLocator: "relative_to_plan" as const, + appliedBy: "plan_preview" as const, + profile: { + assertions: profile.assertions, + assertionSource: profile.assertionSource, + assertionPolicy: profile.assertionPolicy, + applySelectors: profile.applySelectors, + applyAssertions: profile.applyAssertions, + }, + summary: { + runtimeFailingStepsRetained: + result.report.summary.runtimeFailingStepsRetained ?? 0, + runtimeFailingStepsRemoved: + result.report.summary.runtimeFailingStepsRemoved ?? 0, + skippedAssertions: result.report.summary.skippedAssertions, + }, + diagnostics: sortPlanDiagnostics( + result.report.diagnostics.map((diagnostic) => ({ + code: diagnostic.code, + level: diagnostic.level, + message: diagnostic.message, + })) + ), + assertionCandidates: sortPlanAssertionCandidates(result.report.assertionCandidates), + test: { + name: result.proposedTest.name, + description: result.proposedTest.description, + baseUrl: result.proposedTest.baseUrl, + steps: result.proposedTest.steps, + }, + }; + const validatedPlan = improvePlanSchema.parse(plan); + + await fs.mkdir(path.dirname(planPath), { recursive: true }); + await fs.writeFile(planPath, JSON.stringify(validatedPlan, null, 2), "utf-8"); + + ui.success(`Improve report saved to ${result.reportPath}`); + ui.success(`Improve plan saved to ${planPath}`); + ui.step(`Apply plan: ui-test improve ${absoluteTestPath} --apply-plan ${planPath}`); +} + +async function applyImprovePlan( + testFile: string, + planPath: string, + writeTarget: ImproveWriteTarget +): Promise { + const absoluteTestPath = path.resolve(testFile); + const absolutePlanPath = path.resolve(planPath); + + let parsed: unknown; + try { + const planContent = await fs.readFile(absolutePlanPath, "utf-8"); + parsed = JSON.parse(planContent) as unknown; + } catch { + throw new UserError( + `Could not read improve plan: ${absolutePlanPath}`, + "Generate a plan first with ui-test improve --plan." + ); + } + + let plan: ImprovePlan; + try { + plan = improvePlanSchema.parse(parsed); + } catch { + throw new UserError( + `Invalid improve plan format: ${absolutePlanPath}`, + "Regenerate the plan with ui-test improve --plan." + ); + } + + const expectedPlanTargetPath = + plan.version === 2 + ? resolvePlanPath(absolutePlanPath, plan.testFile, plan.testFileLocator) + : path.resolve(plan.testFile); + + if (plan.version === 2) { + let targetSourceContent: string; + try { + targetSourceContent = await fs.readFile(absoluteTestPath, "utf-8"); + } catch { + throw new UserError( + `Could not read target test file for plan apply: ${absoluteTestPath}`, + "Use the matching test file path or regenerate the plan from the current file." + ); + } + + const actualHash = hashImprovePlanSource(targetSourceContent); + if (actualHash !== plan.testFileSha256) { + throw new UserError( + `Plan source mismatch: ${absoluteTestPath} no longer matches the test content used to generate ${absolutePlanPath}.`, + "Regenerate the plan from the current test file before applying it." + ); + } + } else if (expectedPlanTargetPath !== absoluteTestPath) { + throw new UserError( + `Plan target mismatch: ${absolutePlanPath} targets ${plan.testFile}, not ${absoluteTestPath}.`, + "Use the matching test file argument or regenerate the plan." + ); + } + + const yamlOptions: { description?: string; baseUrl?: string } = {}; + if (plan.test.description !== undefined) { + yamlOptions.description = plan.test.description; + } + if (plan.test.baseUrl !== undefined) { + yamlOptions.baseUrl = plan.test.baseUrl; + } + + const yamlOut = stepsToYaml(plan.test.name, plan.test.steps, yamlOptions); + await fs.mkdir(path.dirname(writeTarget.destinationPath), { recursive: true }); + await fs.writeFile(writeTarget.destinationPath, yamlOut, "utf-8"); + + ui.success(`Applied improve plan: ${absolutePlanPath}`); + ui.success(`Updated test file: ${writeTarget.destinationPath}`); + ui.info( + `Plan summary: skippedAssertions=${plan.version === 2 ? plan.summary.skippedAssertions : 0}, runtimeFailingStepsRetained=${plan.version === 2 ? plan.summary.runtimeFailingStepsRetained : 0}, runtimeFailingStepsRemoved=${plan.version === 2 ? plan.summary.runtimeFailingStepsRemoved : 0}` + ); + if (writeTarget.destinationPath !== absoluteTestPath) { + ui.step(`Original preserved at: ${absoluteTestPath}`); + } + if (plan.version === 2 && expectedPlanTargetPath !== absoluteTestPath) { + ui.warn( + `Plan path resolved to ${expectedPlanTargetPath}, but the requested target matched by content fingerprint.` + ); + } +} + +interface ImproveWriteTarget { + destinationPath: string; + outputPath?: string; +} + +function validateImproveModeOptions(opts: ImproveCliOptions): void { + if (opts.plan && opts.applyPlan) { + throw new UserError( + "Cannot use --plan together with --apply-plan.", + "Choose one mode: --plan to generate or --apply-plan to apply." + ); + } + + if (opts.output && opts.inPlace) { + throw new UserError( + "Cannot combine --output with --in-place.", + "Use --output to write a copy, or --in-place to overwrite the input file." + ); + } + + if (opts.plan && (opts.output || opts.inPlace)) { + throw new UserError( + "Cannot use --output or --in-place together with --plan.", + "Plan mode generates JSON only. Apply it later with --apply-plan." + ); + } + + if (opts.plan && opts.apply !== undefined) { + throw new UserError( + "Cannot use --apply or --no-apply together with --plan.", + "Plan mode always generates a reviewable apply preview without writing YAML." + ); + } + + if (opts.applyPlan && opts.apply === false) { + throw new UserError( + "Cannot use --no-apply together with --apply-plan.", + "Remove --no-apply or use --plan to generate a reviewable plan." + ); + } + + if ( + opts.applyPlan && + (opts.apply === true || + opts.assertions !== undefined || + opts.assertionSource !== undefined || + opts.assertionPolicy !== undefined || + opts.report !== undefined) + ) { + throw new UserError( + "Cannot combine --apply-plan with apply/profile/report flags.", + "The plan already defines its assertions/profile and does not generate a new report when applied." + ); + } +} + +function resolveImproveWriteTarget( + testFile: string, + opts: Pick +): ImproveWriteTarget { + const absoluteTestPath = path.resolve(testFile); + if (opts.inPlace) { + return { destinationPath: absoluteTestPath }; + } + + const destinationPath = path.resolve( + resolveDefaultImproveOutputPath(testFile, opts.output) + ); + return { + destinationPath, + outputPath: destinationPath, + }; +} + +function applyImproveProfileMutations( + profile: ResolvedImproveProfile +): ResolvedImproveProfile { + if (profile.assertions === "none" && profile.applyAssertions) { + return { + ...profile, + applyAssertions: false, + }; + } + + return profile; +} diff --git a/src/core/improve/improve-plan.test.ts b/src/core/improve/improve-plan.test.ts new file mode 100644 index 0000000..0d73ecb --- /dev/null +++ b/src/core/improve/improve-plan.test.ts @@ -0,0 +1,172 @@ +import { describe, expect, it } from "vitest"; +import { + defaultImprovePlanPath, + hashImprovePlanSource, + improvePlanSchema, + relativizePlanPath, + resolvePlanPath, + sortPlanAssertionCandidates, + sortPlanDiagnostics, +} from "./improve-plan.js"; + +describe("improvePlan", () => { + it("builds a deterministic default plan path", () => { + expect(defaultImprovePlanPath("e2e/login.yaml")).toMatch(/e2e\/login\.improve-plan\.json$/); + expect(defaultImprovePlanPath("e2e/login")).toMatch(/e2e\/login\.improve-plan\.json$/); + }); + + it("accepts version 2 portable plan payloads", () => { + const parsed = improvePlanSchema.parse({ + version: 2, + generatedAt: new Date().toISOString(), + testFile: "../tests/sample.yaml", + testFileLocator: "relative_to_plan", + testFileSha256: hashImprovePlanSource("name: sample\nsteps:\n - action: navigate\n url: /\n"), + sourceReportPath: "../tests/sample.improve-report.json", + sourceReportPathLocator: "relative_to_plan", + appliedBy: "plan_preview", + profile: { + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "balanced", + applySelectors: true, + applyAssertions: true, + }, + summary: { + runtimeFailingStepsRetained: 1, + runtimeFailingStepsRemoved: 0, + skippedAssertions: 2, + }, + diagnostics: [ + { + code: "runtime_failing_step_retained", + level: "info", + message: "retained", + }, + ], + assertionCandidates: [ + { + index: 1, + afterAction: "click", + candidate: { + action: "assertVisible", + target: { value: "#status", kind: "css", source: "manual" }, + }, + confidence: 0.76, + rationale: "visible", + applyStatus: "skipped_policy", + }, + ], + test: { + name: "sample", + steps: [ + { + action: "navigate", + url: "/", + }, + ], + }, + }); + + expect(parsed.version).toBe(2); + expect(parsed.profile.assertionPolicy).toBe("balanced"); + if (parsed.version !== 2) { + throw new Error("Expected version 2 plan payload"); + } + expect(parsed.summary.skippedAssertions).toBe(2); + }); + + it("keeps backward compatibility with version 1 plans", () => { + const parsed = improvePlanSchema.parse({ + version: 1, + generatedAt: new Date().toISOString(), + testFile: "/tmp/sample.yaml", + sourceReportPath: "/tmp/sample.improve-report.json", + appliedBy: "plan_preview", + profile: { + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "balanced", + applySelectors: true, + applyAssertions: true, + }, + test: { + name: "sample", + steps: [ + { + action: "navigate", + url: "/", + }, + ], + }, + }); + + expect(parsed.version).toBe(1); + }); + + it("relativizes and resolves portable plan paths", () => { + const planPath = "/tmp/plans/sample.improve-plan.json"; + const targetPath = "/tmp/e2e/sample.yaml"; + const relativeTargetPath = relativizePlanPath(planPath, targetPath); + + expect(relativeTargetPath).toBe("../e2e/sample.yaml"); + expect(resolvePlanPath(planPath, relativeTargetPath, "relative_to_plan")).toBe(targetPath); + }); + + it("sorts diagnostics deterministically", () => { + const out = sortPlanDiagnostics([ + { code: "b", level: "warn", message: "z" }, + { code: "a", level: "warn", message: "z" }, + { code: "a", level: "info", message: "m" }, + ]); + + expect(out).toEqual([ + { code: "a", level: "info", message: "m" }, + { code: "a", level: "warn", message: "z" }, + { code: "b", level: "warn", message: "z" }, + ]); + }); + + it("sorts assertion candidates deterministically", () => { + const out = sortPlanAssertionCandidates([ + { + index: 1, + afterAction: "click", + candidate: { + action: "assertVisible", + target: { value: "#b", kind: "css", source: "manual" }, + }, + confidence: 0.76, + rationale: "later", + }, + { + index: 0, + afterAction: "fill", + candidate: { + action: "assertValue", + target: { value: "#name", kind: "css", source: "manual" }, + value: "Alice", + }, + confidence: 0.9, + rationale: "stable", + }, + { + index: 1, + afterAction: "click", + candidate: { + action: "assertText", + target: { value: "#a", kind: "css", source: "manual" }, + text: "Saved", + }, + confidence: 0.8, + rationale: "earlier action", + }, + ]); + + expect(out.map((candidate) => candidate.candidate.action)).toEqual([ + "assertValue", + "assertText", + "assertVisible", + ]); + }); +}); diff --git a/src/core/improve/improve-plan.ts b/src/core/improve/improve-plan.ts new file mode 100644 index 0000000..975963c --- /dev/null +++ b/src/core/improve/improve-plan.ts @@ -0,0 +1,154 @@ +import { createHash } from "node:crypto"; +import path from "node:path"; +import { z } from "zod"; +import { stepSchema } from "../yaml-schema.js"; +import { + assertionApplyPolicySchema, + improveAppliedBySchema, +} from "./report-schema.js"; + +const improvePlanPathLocatorSchema = z.enum(["absolute", "relative_to_plan"]); + +const improvePlanProfileSchema = z.object({ + assertions: z.enum(["none", "candidates"]), + assertionSource: z.enum(["deterministic", "snapshot-native"]), + assertionPolicy: assertionApplyPolicySchema, + applySelectors: z.boolean(), + applyAssertions: z.boolean(), +}); + +const improvePlanTestSchema = z.object({ + name: z.string().min(1), + description: z.string().optional(), + baseUrl: z.string().url().optional(), + steps: z.array(stepSchema).min(1), +}); + +const improvePlanDiagnosticSchema = z.object({ + code: z.string().min(1), + level: z.enum(["info", "warn", "error"]), + message: z.string().min(1), +}); + +const improvePlanAssertionCandidateSchema = z.object({ + index: z.number().int().nonnegative(), + afterAction: z.string().min(1), + candidate: stepSchema, + confidence: z.number().min(0).max(1), + rationale: z.string().min(1), + coverageFallback: z.boolean().optional(), + stabilityScore: z.number().min(0).max(1).optional(), + candidateSource: z.enum(["deterministic", "snapshot_native"]).optional(), + stableStructural: z.boolean().optional(), + applyStatus: z + .enum([ + "applied", + "skipped_low_confidence", + "skipped_runtime_failure", + "skipped_policy", + "skipped_existing", + "not_requested", + ]) + .optional(), + applyMessage: z.string().min(1).optional(), +}); + +const improvePlanSummarySchema = z.object({ + runtimeFailingStepsRetained: z.number().int().nonnegative(), + runtimeFailingStepsRemoved: z.number().int().nonnegative(), + skippedAssertions: z.number().int().nonnegative(), +}); + +const improvePlanSchemaV1 = z.object({ + version: z.literal(1), + generatedAt: z.string().datetime(), + testFile: z.string().min(1), + sourceReportPath: z.string().min(1), + appliedBy: improveAppliedBySchema, + profile: improvePlanProfileSchema, + test: improvePlanTestSchema, +}); + +const improvePlanSchemaV2 = z.object({ + version: z.literal(2), + generatedAt: z.string().datetime(), + testFile: z.string().min(1), + testFileLocator: improvePlanPathLocatorSchema, + testFileSha256: z.string().regex(/^[a-f0-9]{64}$/), + sourceReportPath: z.string().min(1).optional(), + sourceReportPathLocator: improvePlanPathLocatorSchema.optional(), + appliedBy: improveAppliedBySchema, + profile: improvePlanProfileSchema, + summary: improvePlanSummarySchema, + diagnostics: z.array(improvePlanDiagnosticSchema), + assertionCandidates: z.array(improvePlanAssertionCandidateSchema), + test: improvePlanTestSchema, +}); + +export const improvePlanSchema = z.union([improvePlanSchemaV1, improvePlanSchemaV2]); + +export type ImprovePlan = z.infer; +export type ImprovePlanV1 = z.infer; +export type ImprovePlanV2 = z.infer; + +export function defaultImprovePlanPath(testFile: string): string { + const absolute = path.resolve(testFile); + const ext = path.extname(absolute); + if (ext.length === 0) { + return `${absolute}.improve-plan.json`; + } + return `${absolute.slice(0, -ext.length)}.improve-plan.json`; +} + +export function hashImprovePlanSource(content: string): string { + return createHash("sha256").update(content, "utf-8").digest("hex"); +} + +export function relativizePlanPath(planPath: string, targetPath: string): string { + return path.relative(path.dirname(path.resolve(planPath)), path.resolve(targetPath)); +} + +export function resolvePlanPath( + planPath: string, + targetPath: string, + locator: z.infer = "relative_to_plan" +): string { + if (locator === "absolute") { + return path.resolve(targetPath); + } + return path.resolve(path.dirname(path.resolve(planPath)), targetPath); +} + +export function sortPlanDiagnostics( + diagnostics: T[] +): T[] { + return [...diagnostics].sort((left, right) => { + const codeDelta = left.code.localeCompare(right.code); + if (codeDelta !== 0) return codeDelta; + const levelDelta = left.level.localeCompare(right.level); + if (levelDelta !== 0) return levelDelta; + return left.message.localeCompare(right.message); + }); +} + +export function sortPlanAssertionCandidates< + T extends { + index: number; + afterAction: string; + candidate: { action: string }; + confidence: number; + rationale: string; + }, +>(candidates: T[]): T[] { + return [...candidates].sort((left, right) => { + const indexDelta = left.index - right.index; + if (indexDelta !== 0) return indexDelta; + const actionDelta = left.afterAction.localeCompare(right.afterAction); + if (actionDelta !== 0) return actionDelta; + const candidateActionDelta = left.candidate.action.localeCompare(right.candidate.action); + if (candidateActionDelta !== 0) return candidateActionDelta; + const confidenceDelta = right.confidence - left.confidence; + if (confidenceDelta !== 0) return confidenceDelta; + return left.rationale.localeCompare(right.rationale); + }); +} From ed444e4a9b37fd15bafa820f43b2e93a98add867 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 07:30:24 +0100 Subject: [PATCH 07/19] Add deterministic planning scrutiny reports --- .../deterministic-improve-plan-core.json | 26 ++++++++++ ...rministic-planning-validator-blockers.json | 21 ++++++++ .../scrutiny/synthesis.json | 51 +++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 .factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-improve-plan-core.json create mode 100644 .factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-planning-validator-blockers.json create mode 100644 .factory/validation/deterministic-planning-core/scrutiny/synthesis.json diff --git a/.factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-improve-plan-core.json b/.factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-improve-plan-core.json new file mode 100644 index 0000000..91c5431 --- /dev/null +++ b/.factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-improve-plan-core.json @@ -0,0 +1,26 @@ +{ + "featureId": "deterministic-improve-plan-core", + "reviewedAt": "2026-03-09T06:26:58.784479+00:00", + "commitId": "89a3b4a8105657f051c0d4836616fd6ef48998c3", + "transcriptSkeletonReviewed": true, + "diffReviewed": true, + "status": "pass", + "codeReview": { + "summary": "The implementation covers the feature contract: v2 improve plans now carry deterministic, reviewable payload metadata; plan generation remains non-mutating; apply-plan writes the reviewed payload directly; moved-file same-content applies with warning; and source drift fails closed before any YAML write. Focused service/schema/core tests provide direct evidence for these behaviors.", + "issues": [] + }, + "sharedStateObservations": [ + { + "area": "knowledge", + "observation": "Known full-suite baseline failures discovered by the worker are not captured in shared mission library notes, so future workers may keep re-discovering them.", + "evidence": "Handoff `/Users/vriesd/.factory/missions/0b99802b-e794-4378-8e69-1426fde695f4/handoffs/2026-03-08T23-57-33-900Z__deterministic-improve-plan-core__ad067bfe-64b6-4fb6-aa31-552a7b848184.json` lists pre-existing failures in `src/core/player.integration.test.ts` and `src/core/improve/assertion-candidates-inventory.test.ts`; `.factory/library/user-testing.md` currently has no known-failures section." + }, + { + "area": "skills", + "observation": "The skill requires a CLI-level sanity check but does not require the check to run against freshly built/current sources, which can allow stale `dist` verification.", + "evidence": "Transcript skeleton for session `ad067bfe-64b6-4fb6-aa31-552a7b848184` includes `node ./dist/index.js improve ... --apply-plan ...` without a preceding build step." + } + ], + "addressesFailureFrom": null, + "summary": "Pass. I reviewed the feature entry, handoff, transcript skeleton, skill, and commit diff; the deterministic plan/apply behavior matches the feature requirements and no blocking code defects were found in the reviewed implementation." +} \ No newline at end of file diff --git a/.factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-planning-validator-blockers.json b/.factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-planning-validator-blockers.json new file mode 100644 index 0000000..504da4c --- /dev/null +++ b/.factory/validation/deterministic-planning-core/scrutiny/reviews/deterministic-planning-validator-blockers.json @@ -0,0 +1,21 @@ +{ + "featureId": "deterministic-planning-validator-blockers", + "reviewedAt": "2026-03-09T06:26:41Z", + "commitId": "89a3b4a8105657f051c0d4836616fd6ef48998c3", + "transcriptSkeletonReviewed": true, + "diffReviewed": true, + "status": "pass", + "codeReview": { + "summary": "The blocker fixes are minimal and aligned with deterministic-planning intent: overlay locator-handler triggering is narrowed to dialog/CMP contexts to avoid strict-mode collisions, network-idle warning coverage now matches the opt-in readiness contract, and assertion inventory expectations are updated to landmark-scoped fallback behavior.", + "issues": [] + }, + "sharedStateObservations": [ + { + "area": "conventions", + "observation": "Handoff commit provenance is ambiguous for dirty-worktree fix features: the handoff commitId points to a prior commit whose diff does not contain this feature's three edited files, so review evidence had to come from current working-tree file diffs.", + "evidence": "Handoff commitId=89a3b4a8105657f051c0d4836616fd6ef48998c3; `git show --stat 89a3b4a` changes only improve-service/improve-plan files, while `git diff -- src/core/runtime/overlay-handler.ts src/core/player.integration.test.ts src/core/improve/assertion-candidates-inventory.test.ts` shows the actual blocker-fix edits." + } + ], + "addressesFailureFrom": null, + "summary": "Pass. The feature restores validator blockers with targeted edits in exactly the expected areas (overlay handler trigger scope, network-idle opt-in test semantics, and scoped inventory fallback assertions), with no new blocking code issues found in the reviewed changes." +} diff --git a/.factory/validation/deterministic-planning-core/scrutiny/synthesis.json b/.factory/validation/deterministic-planning-core/scrutiny/synthesis.json new file mode 100644 index 0000000..6b990e8 --- /dev/null +++ b/.factory/validation/deterministic-planning-core/scrutiny/synthesis.json @@ -0,0 +1,51 @@ +{ + "milestone": "deterministic-planning-core", + "round": 1, + "status": "pass", + "validatorsRun": { + "test": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" test -- --maxWorkers=5", + "exitCode": 0 + }, + "typecheck": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run typecheck:test", + "exitCode": 0 + }, + "lint": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run lint", + "exitCode": 0 + } + }, + "reviewsSummary": { + "total": 2, + "passed": 2, + "failed": 0, + "failedFeatures": [] + }, + "blockingIssues": [], + "appliedUpdates": [], + "suggestedGuidanceUpdates": [ + { + "target": "skills/record-improve-worker", + "suggestion": "Require CLI sanity checks that exercise built outputs to run after an explicit build step (or against source entrypoints) so validation cannot silently use stale dist artifacts.", + "evidence": "Review of deterministic-improve-plan-core noted transcript usage of `node ./dist/index.js improve ... --apply-plan ...` without a preceding build step.", + "isSystemic": true + }, + { + "target": "AGENTS.md", + "suggestion": "For dirty-worktree fix features, require handoffs to include explicit edited file paths and diff anchors in addition to commitId, since commitId may not isolate the feature's changes.", + "evidence": "Review of deterministic-planning-validator-blockers found handoff commitId `89a3b4a...` did not include the blocker-fix files; evidence had to come from direct working-tree diffs.", + "isSystemic": true + } + ], + "rejectedObservations": [ + { + "observation": "Known full-suite baseline failures should be added to mission library notes.", + "reason": "ambiguous" + } + ], + "previousRound": null +} From 71ac76085f06c1e39238594f003388c378169a0c Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 07:46:05 +0100 Subject: [PATCH 08/19] Add deterministic-planning-core user-testing validation artifacts --- .factory/library/user-testing.md | 41 +++++++++++++ .../user-testing/flows/cli-apply-core.json | 50 ++++++++++++++++ .../user-testing/flows/cli-plan-core.json | 57 +++++++++++++++++++ .../user-testing/synthesis.json | 35 ++++++++++++ 4 files changed, 183 insertions(+) create mode 100644 .factory/library/user-testing.md create mode 100644 .factory/validation/deterministic-planning-core/user-testing/flows/cli-apply-core.json create mode 100644 .factory/validation/deterministic-planning-core/user-testing/flows/cli-plan-core.json create mode 100644 .factory/validation/deterministic-planning-core/user-testing/synthesis.json diff --git a/.factory/library/user-testing.md b/.factory/library/user-testing.md new file mode 100644 index 0000000..571c02a --- /dev/null +++ b/.factory/library/user-testing.md @@ -0,0 +1,41 @@ +# User Testing + +Testing surface, tools, setup steps, and known quirks. + +**What belongs here:** how validators/workers should exercise the user-facing surface. + +--- + +## Primary Surface + +- CLI commands: + - `ui-test record` + - `ui-test improve --plan` + - `ui-test improve --apply-plan` + - `ui-test play` + +## Preferred Validation Path + +1. Use focused Vitest coverage for record/improve behavior. +2. Use `src/core/improve/improve.dynamic.integration.test.ts` for brittle-fixture repair proof. +3. Use `src/core/player.integration.test.ts` plus `scripts/run-headed-parity.test.mjs` for parity coverage. +4. Final gate: `npm run test:parity:headed`. + +## Constraints + +- Do not use live external websites as proof of determinism. +- Prefer controlled local fixtures and ephemeral localhost servers created by tests. +- Optional manual slice may use the example app on `127.0.0.1:5173` only if needed. + +## Flow Validator Guidance: CLI + +- Run CLI validations in isolated temp directories (`mktemp -d`) to avoid file collisions between parallel validators. +- Use unique filename prefixes per validator (for example `flow-a-*`, `flow-b-*`) for plan/report/output artifacts. +- Do not reuse another validator's generated plan, report, or YAML output paths. +- Keep execution local and deterministic: use repository fixtures/tests only; never use external websites. +- Prefer `vitest run ` coverage that directly maps to assigned assertions. + +## Known Validation Quirks + +- `improve` runtime treats `data:` navigation URLs as relative and reports `Cannot resolve relative navigation URL`; this can be used as deterministic runtime-failure evidence in candidate-skip assertions. +- If transient local TypeScript worktree errors block `npm run build`, validators may use the existing `dist/bin/ui-test.js` binary for CLI-flow checks. diff --git a/.factory/validation/deterministic-planning-core/user-testing/flows/cli-apply-core.json b/.factory/validation/deterministic-planning-core/user-testing/flows/cli-apply-core.json new file mode 100644 index 0000000..7e17d97 --- /dev/null +++ b/.factory/validation/deterministic-planning-core/user-testing/flows/cli-apply-core.json @@ -0,0 +1,50 @@ +{ + "milestone": "deterministic-planning-core", + "flowId": "cli-apply-core", + "assertionResults": [ + { + "id": "VAL-IMPROVE-003", + "status": "pass", + "reason": "Applying the reviewed plan produced YAML structurally identical to plan.test payload, indicating apply used the reviewed payload rather than recomputing recommendations.", + "evidence": [ + "CLI: \u2714 Applied improve plan: /tmp/utv-dpc-flow-b/flow-b-source.improve-plan.json", + "CLI: \u2714 Updated test file: /tmp/utv-dpc-flow-b/flow-b-source.improved.yaml", + "Comparison: val003_structural_match=true (plan.test vs flow-b-source.improved.yaml)", + "Artifact: /tmp/utv-dpc-flow-b/flow-b-source.improved.yaml" + ] + }, + { + "id": "VAL-IMPROVE-004", + "status": "pass", + "reason": "Applying the same plan to moved-but-identical content succeeded and emitted explicit fingerprint-match warning while writing plan-matching output.", + "evidence": [ + "CLI: \u26a0 Plan path resolved to /tmp/utv-dpc-flow-b/flow-b-source.yaml, but the requested target matched by content fingerprint.", + "CLI: \u2714 Updated test file: /tmp/utv-dpc-flow-b/flow-b-moved.improved.yaml", + "Comparison: val004_structural_match=true (plan.test vs flow-b-moved.improved.yaml)", + "Artifact: /tmp/utv-dpc-flow-b/flow-b-moved.yaml" + ] + }, + { + "id": "VAL-IMPROVE-005", + "status": "pass", + "reason": "Applying the reviewed plan to drifted source content failed closed with mismatch error and wrote no output; source file remained unchanged.", + "evidence": [ + "Execution: val005_exit_code=1", + "CLI: Plan source mismatch: /tmp/utv-dpc-flow-b/flow-b-drift.yaml no longer matches the test content used to generate /tmp/utv-dpc-flow-b/flow-b-source.improve-plan.json.", + "File integrity: val005_source_unchanged=true", + "Write guard: val005_output_written=no", + "Log: /tmp/utv-dpc-flow-b/val005-cli.log" + ] + } + ], + "frictions": [ + "npm run build failed in current worktree due TypeScript exactOptionalPropertyTypes errors, so validation used existing dist CLI binary (/Users/vriesd/projects/ui-test/dist/bin/ui-test.js)." + ], + "blockers": [], + "toolsUsed": [ + "Read", + "Grep", + "Execute", + "TodoWrite" + ] +} \ No newline at end of file diff --git a/.factory/validation/deterministic-planning-core/user-testing/flows/cli-plan-core.json b/.factory/validation/deterministic-planning-core/user-testing/flows/cli-plan-core.json new file mode 100644 index 0000000..fe5474e --- /dev/null +++ b/.factory/validation/deterministic-planning-core/user-testing/flows/cli-plan-core.json @@ -0,0 +1,57 @@ +{ + "milestone": "deterministic-planning-core", + "flowId": "cli-plan-core", + "assertionResults": [ + { + "id": "VAL-IMPROVE-001", + "status": "pass", + "reason": "`ui-test improve --plan` generated both plan and report artifacts while leaving the source YAML unchanged.", + "evidence": [ + "Command: node /Users/vriesd/projects/ui-test/dist/bin/ui-test.js improve /tmp/utv-dpc-flow-a/flow-a-plan.yaml --plan --report /tmp/utv-dpc-flow-a/artifacts/flow-a-plan.improve-report.json (exit 0)", + "Artifacts: /tmp/utv-dpc-flow-a/flow-a-plan.improve-plan.json and /tmp/utv-dpc-flow-a/artifacts/flow-a-plan.improve-report.json both exist", + "Source checksum before/after plan runs remained cfb0f03f77aae0cc408480bf107337f28490c83d28540de1f9c1d9301ffab789" + ] + }, + { + "id": "VAL-IMPROVE-002", + "status": "pass", + "reason": "Repeated `--plan` runs on unchanged input produced identical normalized plan content, stable proposed test content, and stable assertion candidate ordering.", + "evidence": [ + "Run1 artifact: /tmp/utv-dpc-flow-a/artifacts/flow-a-plan.improve-plan.run1.json; Run2 artifact: /tmp/utv-dpc-flow-a/flow-a-plan.improve-plan.json", + "Comparator output: normalized_plan_equal=True, proposed_test_equal=True", + "Comparator output: run1_assertion_order=['assertValue','assertVisible'], run2_assertion_order=['assertValue','assertVisible']; run1_indexes=[1,2], run2_indexes=[1,2]" + ] + }, + { + "id": "VAL-IMPROVE-006", + "status": "pass", + "reason": "Runtime-failing assertion candidates were surfaced as skipped and runtime-failing steps were retained by safety guard, with no silent assertion writes into YAML.", + "evidence": [ + "Command: node /Users/vriesd/projects/ui-test/dist/bin/ui-test.js improve /tmp/utv-dpc-flow-a/flow-a-candidate-skip.yaml --apply --in-place --report /tmp/utv-dpc-flow-a/artifacts/flow-a-candidate-skip.improve-report.json (exit 0)", + "CLI summary included: Assertion apply status: skipped_runtime_failure=2 and runtimeFailingStepsRetained=2, runtimeFailingStepsRemoved=0", + "Report /tmp/utv-dpc-flow-a/artifacts/flow-a-candidate-skip.improve-report.json contains assertionCandidates[*].applyStatus=skipped_runtime_failure and diagnostics code runtime_failing_step_retained", + "Diff /tmp/utv-dpc-flow-a/artifacts/flow-a-candidate-skip.before.yaml vs /tmp/utv-dpc-flow-a/flow-a-candidate-skip.yaml shows formatting/key-order changes only; no new assert* steps were inserted" + ] + }, + { + "id": "VAL-IMPROVE-007", + "status": "pass", + "reason": "When apply-mode mutations produced schema-invalid output (no remaining steps), CLI failed closed, wrote diagnostics report, and left source YAML untouched.", + "evidence": [ + "Command: node /Users/vriesd/projects/ui-test/dist/bin/ui-test.js improve /tmp/utv-dpc-flow-a/flow-a-invalid-output.yaml --apply --in-place --assertions none --report /tmp/utv-dpc-flow-a/artifacts/flow-a-invalid-output.improve-report.json (exit 1)", + "Terminal output: 'Improve apply aborted: generated output is invalid and was not written' with validation issue 'steps: Test must have at least one step'", + "Report /tmp/utv-dpc-flow-a/artifacts/flow-a-invalid-output.improve-report.json includes diagnostics code apply_write_blocked_invalid_output", + "Diff /tmp/utv-dpc-flow-a/artifacts/flow-a-invalid-output.before.yaml vs /tmp/utv-dpc-flow-a/flow-a-invalid-output.yaml returned no differences (exit 0)" + ] + } + ], + "frictions": [ + "Navigation to data: URLs is treated as a relative navigation in improve runtime ('Cannot resolve relative navigation URL'); used this deterministic behavior intentionally for runtime-failure candidate/retention validation evidence." + ], + "blockers": [], + "toolsUsed": [ + "execute", + "read", + "grep" + ] +} diff --git a/.factory/validation/deterministic-planning-core/user-testing/synthesis.json b/.factory/validation/deterministic-planning-core/user-testing/synthesis.json new file mode 100644 index 0000000..efca342 --- /dev/null +++ b/.factory/validation/deterministic-planning-core/user-testing/synthesis.json @@ -0,0 +1,35 @@ +{ + "milestone": "deterministic-planning-core", + "round": 1, + "status": "pass", + "assertionsSummary": { + "total": 7, + "passed": 7, + "failed": 0, + "blocked": 0 + }, + "passedAssertions": [ + "VAL-IMPROVE-001", + "VAL-IMPROVE-002", + "VAL-IMPROVE-003", + "VAL-IMPROVE-004", + "VAL-IMPROVE-005", + "VAL-IMPROVE-006", + "VAL-IMPROVE-007" + ], + "failedAssertions": [], + "blockedAssertions": [], + "appliedUpdates": [ + { + "target": "user-testing.md", + "description": "Added Flow Validator Guidance for CLI surface covering temp-directory isolation, unique artifact prefixes, and local-only deterministic boundaries for parallel subagents.", + "source": "setup" + }, + { + "target": "user-testing.md", + "description": "Captured CLI validation quirks from flow reports: deterministic data: URL relative-resolution behavior and fallback to existing dist binary when local build is blocked by unrelated worktree type errors.", + "source": "flow-report" + } + ], + "previousRound": null +} From 64f33ccb3ffa750d7670101c9f21c41b4558e296 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 07:53:14 +0100 Subject: [PATCH 09/19] test: cover recording provenance persistence Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- src/app/services/record-service.test.ts | 133 +++++++++++++++++++- src/core/recorder.test.ts | 18 +++ src/core/recording/canonical-events.test.ts | 104 +++++++++++++++ 3 files changed, 251 insertions(+), 4 deletions(-) create mode 100644 src/core/recording/canonical-events.test.ts diff --git a/src/app/services/record-service.test.ts b/src/app/services/record-service.test.ts index 1a60cc4..c624720 100644 --- a/src/app/services/record-service.test.ts +++ b/src/app/services/record-service.test.ts @@ -1,4 +1,5 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; +import path from "node:path"; import { UserError } from "../../utils/errors.js"; vi.mock("node:fs/promises", () => ({ @@ -55,6 +56,7 @@ function mockRecordDefaults() { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 1, improved: 0, @@ -133,12 +135,34 @@ describe("runRecord auto-improve", () => { expect(improveTestFile).toHaveBeenCalledWith({ testFile: "e2e/sample.yaml", + applySelectors: false, + applyAssertions: false, + assertions: "candidates", + assertionPolicy: "reliable", + appliedBy: "report_only", + }); + expect(ui.info).toHaveBeenCalledWith("Auto-improve report: no recommendations"); + }); + + it("supports explicit auto-improve apply mode", async () => { + await runRecord({ + name: "sample", + url: "http://127.0.0.1:5173", + description: "demo", + outputDir: "e2e", + browser: "firefox", + improveMode: "apply", + }); + + expect(improveTestFile).toHaveBeenCalledWith({ + testFile: "e2e/sample.yaml", + outputPath: "e2e/sample.improved.yaml", applySelectors: true, applyAssertions: true, assertions: "candidates", assertionPolicy: "reliable", + appliedBy: "auto_apply", }); - expect(ui.info).toHaveBeenCalledWith("Auto-improve: no changes needed"); }); it("skips auto-improve when improve is false", async () => { @@ -160,6 +184,7 @@ describe("runRecord auto-improve", () => { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 0, improved: 2, @@ -190,8 +215,8 @@ describe("runRecord auto-improve", () => { browser: "firefox", }); - expect(ui.success).toHaveBeenCalledWith( - "Auto-improve: 2 selectors improved, 1 assertions applied, 1 transient steps removed" + expect(ui.info).toHaveBeenCalledWith( + "Auto-improve report: 2 selector recommendations, 1 assertion candidates" ); }); @@ -201,6 +226,7 @@ describe("runRecord auto-improve", () => { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 0, improved: 1, @@ -224,6 +250,7 @@ describe("runRecord auto-improve", () => { description: "demo", outputDir: "e2e", browser: "firefox", + improveMode: "apply", }); expect(ui.success).toHaveBeenCalledWith( @@ -237,6 +264,7 @@ describe("runRecord auto-improve", () => { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 0, improved: 1, @@ -265,6 +293,7 @@ describe("runRecord auto-improve", () => { description: "demo", outputDir: "e2e", browser: "firefox", + improveMode: "apply", }); expect(ui.success).toHaveBeenCalledWith( @@ -287,7 +316,28 @@ describe("runRecord auto-improve", () => { expect.stringContaining("browser crashed") ); expect(ui.warn).toHaveBeenCalledWith( - expect.stringContaining("You can run it manually") + "You can run it manually: ui-test improve " + + path.resolve("e2e/sample.yaml") + + " --no-apply" + ); + }); + + it("preserves apply mode in the manual retry hint when auto-improve apply fails", async () => { + vi.mocked(improveTestFile).mockRejectedValue(new Error("browser crashed")); + + await runRecord({ + name: "sample", + url: "http://127.0.0.1:5173", + description: "demo", + outputDir: "e2e", + browser: "firefox", + improveMode: "apply", + }); + + expect(ui.warn).toHaveBeenCalledWith( + "You can run it manually: ui-test improve " + + path.resolve("e2e/sample.yaml") + + " --apply" ); }); }); @@ -311,6 +361,7 @@ describe("runRecordFromFile", () => { testFile: "e2e/login-flow.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 1, improved: 0, @@ -337,11 +388,61 @@ describe("runRecordFromFile", () => { expect.stringContaining("name: Login Flow"), "utf-8" ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("baseUrl: https://example.com"), + "utf-8" + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("url: /login"), + "utf-8" + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("source: devtools-import"), + "utf-8" + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("kind: locatorExpression"), + "utf-8" + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("value: \"getByRole('button', { name: 'Submit' })\""), + "utf-8" + ); expect(ui.success).toHaveBeenCalledWith( expect.stringContaining("Test saved to") ); }); + it("imports first navigation as normalized path while preserving derived baseUrl", async () => { + vi.mocked(fs.readFile).mockResolvedValue( + JSON.stringify({ + title: "Deep Link", + steps: [ + { type: "navigate", url: "https://example.com/login?next=%2Fhome#cta" }, + { type: "click", selectors: [["#submit"]] }, + ], + }) + ); + + await runRecord({ fromFile: "/tmp/deep-link.json", improve: false }); + + expect(fs.writeFile).toHaveBeenCalledWith( + expect.stringContaining("deep-link.yaml"), + expect.stringContaining("baseUrl: https://example.com"), + "utf-8" + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expect.stringContaining("deep-link.yaml"), + expect.stringContaining("url: /login?next=%2Fhome#cta"), + "utf-8" + ); + }); + it("throws UserError when file does not exist", async () => { vi.mocked(fs.readFile).mockRejectedValue(new Error("ENOENT")); @@ -392,4 +493,28 @@ describe("runRecordFromFile", () => { expect(improveTestFile).not.toHaveBeenCalled(); }); + + it("runs from-file auto-improve in report mode by default", async () => { + await runRecord({ fromFile: "/tmp/recording.json" }); + + expect(improveTestFile).toHaveBeenCalledWith( + expect.objectContaining({ + applySelectors: false, + applyAssertions: false, + appliedBy: "report_only", + }) + ); + }); + + it("runs from-file auto-improve in apply mode when requested", async () => { + await runRecord({ fromFile: "/tmp/recording.json", improveMode: "apply" }); + + expect(improveTestFile).toHaveBeenCalledWith( + expect.objectContaining({ + applySelectors: true, + applyAssertions: true, + appliedBy: "auto_apply", + }) + ); + }); }); diff --git a/src/core/recorder.test.ts b/src/core/recorder.test.ts index 7ce5eef..1e805f7 100644 --- a/src/core/recorder.test.ts +++ b/src/core/recorder.test.ts @@ -110,7 +110,13 @@ describe("record", () => { expect(result.recordingMode).toBe("codegen"); expect(result.stepCount).toBeGreaterThan(0); expect(saved).toContain("name: Codegen Recording"); + expect(saved).toContain("baseUrl: http://127.0.0.1:5173"); + expect(saved).toContain("url: /"); expect(saved).toContain("action: click"); + expect(saved).toContain("target:"); + expect(saved).toContain("value: \"getByRole('button', { name: 'Save' })\""); + expect(saved).toContain("kind: locatorExpression"); + expect(saved).toContain("source: codegen"); await fs.rm(outputDir, { recursive: true, force: true }); }); @@ -265,4 +271,16 @@ describe("normalizeFirstNavigate", () => { expect(steps[0]).toEqual({ action: "navigate", url: "/page#section" }); }); + + it("preserves first-navigation query and hash for persisted baseUrl context", () => { + const steps = normalizeFirstNavigate( + [{ action: "navigate", url: "https://redirect.example.com/consent" }], + "https://example.com/start?next=%2Fcheckout#summary" + ); + + expect(steps[0]).toEqual({ + action: "navigate", + url: "/start?next=%2Fcheckout#summary", + }); + }); }); diff --git a/src/core/recording/canonical-events.test.ts b/src/core/recording/canonical-events.test.ts new file mode 100644 index 0000000..2a1d55f --- /dev/null +++ b/src/core/recording/canonical-events.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, it } from "vitest"; +import { playwrightCodeToSteps } from "../transform/playwright-ast-transform.js"; +import { devtoolsRecordingToSteps } from "../transform/devtools-recording-adapter.js"; +import { canonicalEventsToSteps, stepsToCanonicalEvents } from "./canonical-events.js"; +import type { Step } from "../yaml-schema.js"; + +describe("canonical events", () => { + it("round-trips steps deterministically", () => { + const steps = [ + { action: "navigate", url: "/" }, + { + action: "click", + target: { + value: "getByRole('button', { name: 'Submit' })", + kind: "locatorExpression", + source: "manual", + confidence: 0.91, + }, + }, + { + action: "fill", + target: { value: "#email", kind: "css", source: "manual" }, + text: "user@example.com", + }, + ] as const; + + const canonical = stepsToCanonicalEvents([...steps]); + const rebuilt = canonicalEventsToSteps(canonical); + const canonicalAgain = stepsToCanonicalEvents(rebuilt); + + expect(canonicalAgain).toEqual(canonical); + expect(rebuilt).toEqual(steps); + }); + + it("normalizes adapter outputs through the same canonical action contract", () => { + const playwrightCode = [ + "import { test, expect } from '@playwright/test';", + "test('recording', async ({ page }) => {", + " await page.goto('/');", + " await page.getByRole('button', { name: 'Submit' }).click();", + " await page.locator('#email').fill('user@example.com');", + "});", + ].join("\n"); + + const devtoolsRecording = JSON.stringify({ + title: "recording", + steps: [ + { type: "navigate", url: "/" }, + { type: "click", selectors: [["aria/Submit[role=\"button\"]"]] }, + { type: "change", selectors: [["#email"]], value: "user@example.com" }, + ], + }); + + const playwrightKinds = stepsToCanonicalEvents(playwrightCodeToSteps(playwrightCode)).map( + (event) => event.kind + ); + const devtoolsKinds = stepsToCanonicalEvents(devtoolsRecordingToSteps(devtoolsRecording).steps).map( + (event) => event.kind + ); + + expect(playwrightKinds).toEqual(["navigate", "click", "fill"]); + expect(devtoolsKinds).toEqual(["navigate", "click", "fill"]); + }); + + it("preserves selector provenance through canonical round-trips", () => { + const steps: Step[] = [ + { + action: "click", + target: { + value: "getByRole('button', { name: 'Continue' })", + kind: "locatorExpression", + source: "codegen", + raw: "page.getByRole('button', { name: 'Continue' })", + framePath: ["iframe[name='checkout']"], + confidence: 0.9, + warning: "preferred locator", + fallbacks: [{ value: "#continue", kind: "css", source: "codegen" }], + }, + }, + ]; + + const rebuilt = canonicalEventsToSteps(stepsToCanonicalEvents([...steps])); + + expect(rebuilt).toEqual(steps); + }); + + it("preserves normalized navigation context through canonical round-trips", () => { + const steps: Step[] = [ + { action: "navigate", url: "/start?next=%2Fcheckout#summary" }, + { + action: "click", + target: { + value: "#continue", + kind: "css", + source: "manual", + }, + }, + ]; + + const rebuilt = canonicalEventsToSteps(stepsToCanonicalEvents(steps)); + + expect(rebuilt).toEqual(steps); + }); +}); From 975c748a8bdf6dd2e408039a0146980010d2ade1 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 07:57:24 +0100 Subject: [PATCH 10/19] Make record auto-apply preserve raw output Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- src/app/options/profile-summary.test.ts | 10 ++ src/app/options/profile-summary.ts | 3 +- src/app/options/record-profile.test.ts | 6 + src/app/options/record-profile.ts | 19 +++ src/app/services/record-service.test.ts | 68 +++++++- src/app/services/record-service.ts | 198 ++++++++++++++++-------- src/commands/record.test.ts | 13 ++ src/commands/record.ts | 7 + 8 files changed, 251 insertions(+), 73 deletions(-) diff --git a/src/app/options/profile-summary.test.ts b/src/app/options/profile-summary.test.ts index 2c59311..4fb5e5a 100644 --- a/src/app/options/profile-summary.test.ts +++ b/src/app/options/profile-summary.test.ts @@ -9,6 +9,7 @@ describe("profile summary formatting", () => { it("formats record summary", () => { const out = formatRecordingProfileSummary({ browser: "chromium", + improveMode: "apply", device: "iPhone 13", testIdAttribute: "data-qa", loadStorage: ".auth/in.json", @@ -16,9 +17,18 @@ describe("profile summary formatting", () => { }); expect(out).toContain("browser=chromium"); + expect(out).toContain("improveMode=apply"); expect(out).toContain("loadStorage=.auth/in.json"); }); + it("defaults record summary improve mode to apply", () => { + const out = formatRecordingProfileSummary({ + browser: "firefox", + }); + + expect(out).toContain("improveMode=apply"); + }); + it("formats improve summary", () => { const out = formatImproveProfileSummary({ applySelectors: false, diff --git a/src/app/options/profile-summary.ts b/src/app/options/profile-summary.ts index f4868bd..5c5f27a 100644 --- a/src/app/options/profile-summary.ts +++ b/src/app/options/profile-summary.ts @@ -2,12 +2,13 @@ import type { RecordBrowser } from "../../core/recorder.js"; export function formatRecordingProfileSummary(profile: { browser: RecordBrowser; + improveMode?: "off" | "report" | "apply"; device?: string; testIdAttribute?: string; loadStorage?: string; saveStorage?: string; }): string { - return `Recording profile: browser=${profile.browser}, device=${profile.device ?? "(none)"}, testIdAttr=${profile.testIdAttribute ?? "(default)"}, loadStorage=${profile.loadStorage ?? "(none)"}, saveStorage=${profile.saveStorage ?? "(none)"}`; + return `Recording profile: browser=${profile.browser}, improveMode=${profile.improveMode ?? "apply"}, device=${profile.device ?? "(none)"}, testIdAttr=${profile.testIdAttribute ?? "(default)"}, loadStorage=${profile.loadStorage ?? "(none)"}, saveStorage=${profile.saveStorage ?? "(none)"}`; } export function formatImproveProfileSummary(profile: { diff --git a/src/app/options/record-profile.test.ts b/src/app/options/record-profile.test.ts index dcea8a3..44ce2e6 100644 --- a/src/app/options/record-profile.test.ts +++ b/src/app/options/record-profile.test.ts @@ -3,6 +3,7 @@ import { UserError } from "../../utils/errors.js"; import { normalizeRecordUrl, parseRecordBrowser, + parseRecordImproveMode, resolveRecordProfile, } from "./record-profile.js"; @@ -23,6 +24,7 @@ describe("resolveRecordProfile", () => { loadStorage: ".auth/in.json", saveStorage: ".auth/out.json", outputDir: "e2e", + improveMode: "apply", }); }); @@ -30,16 +32,20 @@ describe("resolveRecordProfile", () => { const out = resolveRecordProfile({}); expect(out.browser).toBe("chromium"); expect(out.outputDir).toBe("e2e"); + expect(out.improveMode).toBe("apply"); }); }); describe("record-profile parsing", () => { it("parses valid enums", () => { expect(parseRecordBrowser("Webkit")).toBe("webkit"); + expect(parseRecordImproveMode("Off")).toBe("off"); + expect(parseRecordImproveMode("Apply")).toBe("apply"); }); it("rejects invalid enums", () => { expect(() => parseRecordBrowser("safari")).toThrow(UserError); + expect(() => parseRecordImproveMode("fast")).toThrow(UserError); }); it("normalizes record URLs", () => { diff --git a/src/app/options/record-profile.ts b/src/app/options/record-profile.ts index 5e96280..12cf357 100644 --- a/src/app/options/record-profile.ts +++ b/src/app/options/record-profile.ts @@ -2,6 +2,8 @@ import type { RecordBrowser } from "../../core/recorder.js"; import { PLAY_DEFAULT_TEST_DIR } from "../../core/play/play-defaults.js"; import { UserError } from "../../utils/errors.js"; +export type RecordImproveMode = "off" | "report" | "apply"; + export interface RecordProfileInput { browser?: string; device?: string; @@ -9,6 +11,7 @@ export interface RecordProfileInput { loadStorage?: string; saveStorage?: string; outputDir?: string; + improveMode?: string; } export interface ResolvedRecordProfile { @@ -18,6 +21,7 @@ export interface ResolvedRecordProfile { loadStorage?: string; saveStorage?: string; outputDir: string; + improveMode: RecordImproveMode; } export function resolveRecordProfile( @@ -26,6 +30,7 @@ export function resolveRecordProfile( const profile: ResolvedRecordProfile = { browser: parseRecordBrowser(input.browser) ?? "chromium", outputDir: input.outputDir ?? PLAY_DEFAULT_TEST_DIR, + improveMode: parseRecordImproveMode(input.improveMode) ?? "apply", }; const device = cleanOptional(input.device); @@ -59,6 +64,20 @@ export function parseRecordBrowser(value: string | undefined): RecordBrowser | u ); } +export function parseRecordImproveMode( + value: string | undefined +): RecordImproveMode | undefined { + if (!value) return undefined; + const normalized = value.trim().toLowerCase(); + if (normalized === "off" || normalized === "report" || normalized === "apply") { + return normalized; + } + throw new UserError( + `Invalid improve mode: ${value}`, + "Use --improve-mode off, --improve-mode report, or --improve-mode apply" + ); +} + const PROTOCOL_PREFIX = /^[a-zA-Z][a-zA-Z\d+.-]*:\/\//; export function normalizeRecordUrl(value: string): string { diff --git a/src/app/services/record-service.test.ts b/src/app/services/record-service.test.ts index c624720..5cc4bb9 100644 --- a/src/app/services/record-service.test.ts +++ b/src/app/services/record-service.test.ts @@ -133,6 +133,27 @@ describe("runRecord auto-improve", () => { browser: "firefox", }); + expect(improveTestFile).toHaveBeenCalledWith({ + testFile: "e2e/sample.yaml", + outputPath: "e2e/sample.improved.yaml", + applySelectors: true, + applyAssertions: true, + assertions: "candidates", + assertionPolicy: "reliable", + appliedBy: "auto_apply", + }); + }); + + it("supports explicit auto-improve report mode", async () => { + await runRecord({ + name: "sample", + url: "http://127.0.0.1:5173", + description: "demo", + outputDir: "e2e", + browser: "firefox", + improveMode: "report", + }); + expect(improveTestFile).toHaveBeenCalledWith({ testFile: "e2e/sample.yaml", applySelectors: false, @@ -178,19 +199,32 @@ describe("runRecord auto-improve", () => { expect(improveTestFile).not.toHaveBeenCalled(); }); + it("skips auto-improve when improve mode is off", async () => { + await runRecord({ + name: "sample", + url: "http://127.0.0.1:5173", + description: "demo", + outputDir: "e2e", + browser: "firefox", + improveMode: "off", + }); + + expect(improveTestFile).not.toHaveBeenCalled(); + }); + it("prints summary when auto-improve makes changes", async () => { vi.mocked(improveTestFile).mockResolvedValue({ report: { testFile: "e2e/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", - appliedBy: "report_only", + appliedBy: "auto_apply", summary: { unchanged: 0, improved: 2, fallback: 0, warnings: 0, - assertionCandidates: 1, + assertionCandidates: 0, appliedAssertions: 1, skippedAssertions: 0, }, @@ -205,6 +239,7 @@ describe("runRecord auto-improve", () => { ], }, reportPath: "e2e/sample.improve-report.json", + outputPath: "e2e/sample.improved.yaml", }); await runRecord({ @@ -215,8 +250,8 @@ describe("runRecord auto-improve", () => { browser: "firefox", }); - expect(ui.info).toHaveBeenCalledWith( - "Auto-improve report: 2 selector recommendations, 1 assertion candidates" + expect(ui.success).toHaveBeenCalledWith( + "Auto-improve: 2 selectors improved, 1 assertions applied, 1 transient steps removed" ); }); @@ -318,7 +353,10 @@ describe("runRecord auto-improve", () => { expect(ui.warn).toHaveBeenCalledWith( "You can run it manually: ui-test improve " + path.resolve("e2e/sample.yaml") + - " --no-apply" + " --plan && ui-test improve " + + path.resolve("e2e/sample.yaml") + + " --apply-plan " + + path.resolve("e2e/sample.improve-plan.json") ); }); @@ -337,7 +375,10 @@ describe("runRecord auto-improve", () => { expect(ui.warn).toHaveBeenCalledWith( "You can run it manually: ui-test improve " + path.resolve("e2e/sample.yaml") + - " --apply" + " --plan && ui-test improve " + + path.resolve("e2e/sample.yaml") + + " --apply-plan " + + path.resolve("e2e/sample.improve-plan.json") ); }); }); @@ -495,7 +536,7 @@ describe("runRecordFromFile", () => { }); it("runs from-file auto-improve in report mode by default", async () => { - await runRecord({ fromFile: "/tmp/recording.json" }); + await runRecord({ fromFile: "/tmp/recording.json", improveMode: "report" }); expect(improveTestFile).toHaveBeenCalledWith( expect.objectContaining({ @@ -517,4 +558,17 @@ describe("runRecordFromFile", () => { }) ); }); + + it("runs from-file auto-improve in apply mode by default", async () => { + await runRecord({ fromFile: "/tmp/recording.json" }); + + expect(improveTestFile).toHaveBeenCalledWith( + expect.objectContaining({ + outputPath: expect.stringContaining("login-flow.improved.yaml"), + applySelectors: true, + applyAssertions: true, + appliedBy: "auto_apply", + }) + ); + }); }); diff --git a/src/app/services/record-service.ts b/src/app/services/record-service.ts index 9b5877a..6ba564f 100644 --- a/src/app/services/record-service.ts +++ b/src/app/services/record-service.ts @@ -4,7 +4,13 @@ import { input } from "@inquirer/prompts"; import { record as runRecording, normalizeFirstNavigate, slugify, type RecordOptions } from "../../core/recorder.js"; import { improveTestFile } from "../../core/improve/improve.js"; import { PLAY_DEFAULT_BASE_URL, PLAY_DEFAULT_TEST_DIR } from "../../core/play/play-defaults.js"; -import { resolveRecordProfile, hasUrlProtocol, normalizeRecordUrl } from "../options/record-profile.js"; +import { + resolveRecordProfile, + parseRecordImproveMode, + type RecordImproveMode, + hasUrlProtocol, + normalizeRecordUrl, +} from "../options/record-profile.js"; import { formatRecordingProfileSummary } from "../options/profile-summary.js"; import { ensureChromiumAvailable } from "../../utils/chromium-runtime.js"; import { UserError } from "../../utils/errors.js"; @@ -12,6 +18,10 @@ import { ui } from "../../utils/ui.js"; import { defaultRunInteractiveCommand } from "../../infra/process/process-runner-adapter.js"; import { devtoolsRecordingToSteps } from "../../core/transform/devtools-recording-adapter.js"; import { stepsToYaml } from "../../core/transform/yaml-io.js"; +import { + canonicalEventsToSteps, + stepsToCanonicalEvents, +} from "../../core/recording/canonical-events.js"; export interface RecordCliOptions { name?: string; @@ -24,6 +34,7 @@ export interface RecordCliOptions { loadStorage?: string; saveStorage?: string; fromFile?: string; + improveMode?: string; improve?: boolean; } @@ -85,12 +96,14 @@ export async function runRecord(opts: RecordCliOptions): Promise { const summaryOptions: { browser: typeof profile.browser; + improveMode: RecordImproveMode; device?: string; testIdAttribute?: string; loadStorage?: string; saveStorage?: string; } = { browser: profile.browser, + improveMode: profile.improveMode, }; if (profile.device !== undefined) summaryOptions.device = profile.device; if (profile.testIdAttribute !== undefined) { @@ -131,57 +144,21 @@ export async function runRecord(opts: RecordCliOptions): Promise { if (opts.improve !== false) { try { - console.log(); - ui.info("Running auto-improve..."); - const improveResult = await improveTestFile({ - testFile: result.outputPath, - applySelectors: true, - applyAssertions: true, - assertions: "candidates", - assertionPolicy: "reliable", - }); - - const summary = improveResult.report.summary; - const removedSteps = improveResult.report.diagnostics.filter( - (d) => d.code === "runtime_failing_step_removed" - ).length; - const retainedStepDiagnostics = improveResult.report.diagnostics.filter( - (d) => d.code === "runtime_failing_step_retained" - ).length; - const retainedSteps = - summary.runtimeFailingStepsRetained ?? - retainedStepDiagnostics; - - const parts: string[] = []; - if (summary.improved > 0) parts.push(summary.improved + " selectors improved"); - if ((summary.selectorRepairsApplied ?? 0) > 0) { - parts.push((summary.selectorRepairsApplied ?? 0) + " selector repairs applied"); - } - if (summary.appliedAssertions > 0) parts.push(summary.appliedAssertions + " assertions applied"); - if ((summary.assertionCandidatesFilteredDynamic ?? 0) > 0) { - parts.push( - (summary.assertionCandidatesFilteredDynamic ?? 0) + - " dynamic assertion candidates filtered" - ); - } - if (retainedSteps > 0) parts.push(retainedSteps + " failing steps retained"); - if (removedSteps > 0) parts.push(removedSteps + " transient steps removed"); - - if (parts.length > 0) { - ui.success("Auto-improve: " + parts.join(", ")); - } else { - ui.info("Auto-improve: no changes needed"); - } + await runAutoImprove(result.outputPath, profile.improveMode); } catch (err) { const message = err instanceof Error ? err.message : String(err); ui.warn("Auto-improve failed: " + message); - ui.warn("You can run it manually: ui-test improve " + result.outputPath + " --apply"); + ui.warn( + "You can run it manually: " + + buildManualImproveCommand(result.outputPath, profile.improveMode) + ); } } } async function runRecordFromFile(opts: RecordCliOptions): Promise { const filePath = opts.fromFile!; + const improveMode = parseRecordImproveMode(opts.improveMode) ?? "apply"; let json: string; try { @@ -217,6 +194,7 @@ async function runRecordFromFile(opts: RecordCliOptions): Promise { const steps = firstNavigateUrl ? normalizeFirstNavigate(result.steps, firstNavigateUrl) : result.steps; + const canonicalizedSteps = canonicalEventsToSteps(stepsToCanonicalEvents(steps)); const yamlOptions: { description?: string; baseUrl?: string } = {}; if (opts.description) yamlOptions.description = opts.description; @@ -229,40 +207,130 @@ async function runRecordFromFile(opts: RecordCliOptions): Promise { } } - const yamlContent = stepsToYaml(name, steps, yamlOptions); + const yamlContent = stepsToYaml(name, canonicalizedSteps, yamlOptions); await fs.mkdir(outputDir, { recursive: true }); await fs.writeFile(outputPath, yamlContent, "utf-8"); console.log(); ui.success(`Test saved to ${outputPath}`); - ui.info(`Imported ${steps.length} steps from DevTools recording (${result.skipped} skipped)`); + ui.info( + `Imported ${canonicalizedSteps.length} steps from DevTools recording (${result.skipped} skipped)` + ); ui.info("Run it with: ui-test play " + outputPath); if (opts.improve !== false) { try { - console.log(); - ui.info("Running auto-improve..."); - const improveResult = await improveTestFile({ - testFile: outputPath, - applySelectors: true, - applyAssertions: true, - assertions: "candidates", - }); - - const summary = improveResult.report.summary; - const parts: string[] = []; - if (summary.improved > 0) parts.push(summary.improved + " selectors improved"); - if (summary.appliedAssertions > 0) parts.push(summary.appliedAssertions + " assertions applied"); - - if (parts.length > 0) { - ui.success("Auto-improve: " + parts.join(", ")); - } else { - ui.info("Auto-improve: no changes needed"); - } + await runAutoImprove(outputPath, improveMode); } catch (err) { const message = err instanceof Error ? err.message : String(err); ui.warn("Auto-improve failed: " + message); - ui.warn("You can run it manually: ui-test improve " + outputPath + " --apply"); + ui.warn( + "You can run it manually: " + + buildManualImproveCommand(outputPath, improveMode) + ); } } } + +async function runAutoImprove( + testFile: string, + improveMode: RecordImproveMode +): Promise { + if (improveMode === "off") { + return; + } + + const applyMutations = improveMode === "apply"; + const appliedBy = applyMutations ? "auto_apply" : "report_only"; + + console.log(); + ui.info(`Running auto-improve (${improveMode})...`); + const improveResult = await improveTestFile({ + testFile, + ...(applyMutations ? { outputPath: resolveDefaultImproveOutputPath(testFile) } : {}), + applySelectors: applyMutations, + applyAssertions: applyMutations, + assertions: "candidates", + assertionPolicy: "reliable", + appliedBy, + }); + + const summary = improveResult.report.summary; + if (!applyMutations) { + const parts: string[] = []; + if (summary.improved > 0) { + parts.push(summary.improved + " selector recommendations"); + } + if (summary.assertionCandidates > 0) { + parts.push(summary.assertionCandidates + " assertion candidates"); + } + if ((summary.assertionCandidatesFilteredDynamic ?? 0) > 0) { + parts.push( + (summary.assertionCandidatesFilteredDynamic ?? 0) + + " dynamic assertion candidates filtered" + ); + } + + if (parts.length > 0) { + ui.info("Auto-improve report: " + parts.join(", ")); + } else { + ui.info("Auto-improve report: no recommendations"); + } + ui.step("Apply recommendations: ui-test improve " + path.resolve(testFile) + " --apply"); + return; + } + + const removedSteps = improveResult.report.diagnostics.filter( + (d) => d.code === "runtime_failing_step_removed" + ).length; + const retainedStepDiagnostics = improveResult.report.diagnostics.filter( + (d) => d.code === "runtime_failing_step_retained" + ).length; + const retainedSteps = + summary.runtimeFailingStepsRetained ?? + retainedStepDiagnostics; + + const parts: string[] = []; + if (summary.improved > 0) parts.push(summary.improved + " selectors improved"); + if ((summary.selectorRepairsApplied ?? 0) > 0) { + parts.push((summary.selectorRepairsApplied ?? 0) + " selector repairs applied"); + } + if (summary.appliedAssertions > 0) parts.push(summary.appliedAssertions + " assertions applied"); + if ((summary.assertionCandidatesFilteredDynamic ?? 0) > 0) { + parts.push( + (summary.assertionCandidatesFilteredDynamic ?? 0) + + " dynamic assertion candidates filtered" + ); + } + if (retainedSteps > 0) parts.push(retainedSteps + " failing steps retained"); + if (removedSteps > 0) parts.push(removedSteps + " transient steps removed"); + + if (parts.length > 0) { + ui.success("Auto-improve: " + parts.join(", ")); + } else { + ui.info("Auto-improve: no changes needed"); + } + + if (improveResult.outputPath) { + ui.step("Run improved test with: ui-test play " + improveResult.outputPath); + } +} + +function resolveDefaultImproveOutputPath(testFile: string): string { + const ext = path.extname(testFile); + const base = ext ? testFile.slice(0, -ext.length) : testFile; + const effectiveExt = ext.length > 0 ? ext : ".yaml"; + return `${base}.improved${effectiveExt}`; +} + +function buildManualImproveCommand( + testFile: string, + improveMode: RecordImproveMode +): string { + const absolutePath = path.resolve(testFile); + if (improveMode === "apply") { + const planPath = absolutePath.replace(/(\.[^.]+)?$/, ".improve-plan.json"); + return `ui-test improve ${absolutePath} --plan && ui-test improve ${absolutePath} --apply-plan ${planPath}`; + } + return `ui-test improve ${absolutePath} --no-apply`; +} diff --git a/src/commands/record.test.ts b/src/commands/record.test.ts index 465da31..de1bba6 100644 --- a/src/commands/record.test.ts +++ b/src/commands/record.test.ts @@ -20,6 +20,8 @@ describe("record command options", () => { ".auth/in.json", "--save-storage", ".auth/out.json", + "--improve-mode", + "apply", ]); const opts = command?.opts() as Record; @@ -28,6 +30,7 @@ describe("record command options", () => { expect(opts.testIdAttribute).toBe("data-qa"); expect(opts.loadStorage).toBe(".auth/in.json"); expect(opts.saveStorage).toBe(".auth/out.json"); + expect(opts.improveMode).toBe("apply"); }); it("registers --no-improve flag", () => { @@ -49,4 +52,14 @@ describe("record command options", () => { const opts = command?.opts() as Record; expect(opts.improve).toBe(true); }); + + it("accepts off as an explicit improve mode value", () => { + const program = new Command(); + registerRecord(program); + const command = program.commands.find((entry) => entry.name() === "record"); + + command?.parseOptions(["--improve-mode", "off"]); + const opts = command?.opts() as Record; + expect(opts.improveMode).toBe("off"); + }); }); diff --git a/src/commands/record.ts b/src/commands/record.ts index f2d8da4..075f671 100644 --- a/src/commands/record.ts +++ b/src/commands/record.ts @@ -17,6 +17,10 @@ export function registerRecord(program: Command) { .option("--load-storage ", "Path to storage state to preload") .option("--save-storage ", "Path to write resulting storage state") .option("--from-file ", "Import a Chrome DevTools Recorder JSON export instead of recording") + .option( + "--improve-mode ", + "Auto-improve mode after recording: apply (default), report, or off" + ) .option("--no-improve", "Skip automatic improvement after recording") .action(async (opts: unknown) => { try { @@ -41,6 +45,7 @@ function parseRecordCliOptions(value: unknown): RecordCliOptions { const loadStorage = asOptionalString(value.loadStorage); const saveStorage = asOptionalString(value.saveStorage); const fromFile = asOptionalString(value.fromFile); + const improveMode = asOptionalString(value.improveMode); const improve = asOptionalBoolean(value.improve); if (name !== undefined) out.name = name; @@ -53,6 +58,7 @@ function parseRecordCliOptions(value: unknown): RecordCliOptions { if (loadStorage !== undefined) out.loadStorage = loadStorage; if (saveStorage !== undefined) out.saveStorage = saveStorage; if (fromFile !== undefined) out.fromFile = fromFile; + if (improveMode !== undefined) out.improveMode = improveMode; if (improve !== undefined) out.improve = improve; return out; @@ -69,6 +75,7 @@ interface RawRecordCliOptions { loadStorage?: unknown; saveStorage?: unknown; fromFile?: unknown; + improveMode?: unknown; improve?: unknown; } From 93d641969afafe8a87d277c5b2686631037259c4 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 08:07:50 +0100 Subject: [PATCH 11/19] validation: add recording-provenance scrutiny synthesis --- ...record-default-auto-apply-integration.json | 28 ++++++++++ .../recording-provenance-persistence.json | 21 ++++++++ .../scrutiny/synthesis.json | 54 +++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 .factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-default-auto-apply-integration.json create mode 100644 .factory/validation/recording-provenance-auto-apply/scrutiny/reviews/recording-provenance-persistence.json create mode 100644 .factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json diff --git a/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-default-auto-apply-integration.json b/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-default-auto-apply-integration.json new file mode 100644 index 0000000..802446a --- /dev/null +++ b/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-default-auto-apply-integration.json @@ -0,0 +1,28 @@ +{ + "featureId": "record-default-auto-apply-integration", + "reviewedAt": "2026-03-09T07:04:53Z", + "commitId": "9ed2475", + "transcriptSkeletonReviewed": true, + "diffReviewed": true, + "status": "fail", + "codeReview": { + "summary": "The feature correctly defaults record flows to auto-apply with a separate `.improved.yaml`, honors `--no-improve`, and keeps recordings on auto-improve failure. However, a blocking profile mismatch remains: auto-apply hard-codes `assertionPolicy: \"reliable\"` while the manual `--plan`/`--apply-plan` path defaults to `balanced`, so the advertised convergence between default auto-apply and manual plan/apply is not guaranteed.", + "issues": [ + { + "file": "src/app/services/record-service.ts", + "line": 254, + "severity": "blocking", + "description": "`runAutoImprove` forces `assertionPolicy: \"reliable\"`, but the manual fallback command it prints (`ui-test improve --plan && ui-test improve --apply-plan `, line 333) does not pass `--assertion-policy reliable`. Manual plan generation therefore uses improve defaults (`DEFAULT_IMPROVE_ASSERTION_POLICY = \"balanced\"` in `src/core/improve/assertion-policy.ts:44` via `resolveImproveProfile` in `src/app/services/improve-service.ts`), which can produce different improved YAML from default record auto-apply and violates the convergence requirement for this feature." + } + ] + }, + "sharedStateObservations": [ + { + "area": "skills", + "observation": "The `record-improve-worker` skill requires a manual/CLI sanity check for user-facing command changes, but this worker transcript skeleton only shows tests/typecheck/git commands and no explicit `ui-test` CLI sanity run. The handoff still reports `skillFeedback.followedProcedure: true`.", + "evidence": "Skill requirement: `.factory/skills/record-improve-worker/SKILL.md:30`. Transcript skeleton for `c11e70c7-eee3-40b2-aae3-ae7b8f58f099` lists Execute commands such as vitest and typecheck but no `ui-test record`/`ui-test improve` command; handoff JSON sets `skillFeedback.followedProcedure` to true." + } + ], + "addressesFailureFrom": null, + "summary": "Fail: core auto-apply integration is mostly in place, but default auto-apply and manual plan/apply are configured with different assertion policies (`reliable` vs default `balanced`), so deterministic convergence is not reliably satisfied." +} diff --git a/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/recording-provenance-persistence.json b/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/recording-provenance-persistence.json new file mode 100644 index 0000000..2cf70ac --- /dev/null +++ b/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/recording-provenance-persistence.json @@ -0,0 +1,21 @@ +{ + "featureId": "recording-provenance-persistence", + "reviewedAt": "2026-03-09T07:01:14Z", + "commitId": "bb3a3684a540c4d5099c4d43321193246389eb21", + "transcriptSkeletonReviewed": true, + "diffReviewed": true, + "status": "pass", + "codeReview": { + "summary": "Pass. The commit adds focused regression coverage that directly exercises the feature requirements: persisted normalized first-navigation/baseUrl context and persisted selector provenance fields (target.value/kind/source) across recorder, DevTools import, and canonical event round-trips. No implementation defects were identified in the reviewed scope.", + "issues": [] + }, + "sharedStateObservations": [ + { + "area": "skills", + "observation": "Worker flow indicates direct `vitest` invocation may be unreliable in this environment; using npm-backed test commands is more robust and this nuance is not explicit in shared skill guidance.", + "evidence": "Transcript skeleton for session 11814fa4-4b87-497d-a922-2f2fd2ad82de includes a failed initial validator attempt with `vitest` not on PATH, followed by successful `npm run test -- ...`; compare with `.factory/skills/record-improve-worker/SKILL.md` examples that use direct `vitest run ...` and `.factory/services.yaml` test command (`npm test -- --maxWorkers=5`)." + } + ], + "addressesFailureFrom": null, + "summary": "Reviewed feature recording-provenance-persistence for milestone recording-provenance-auto-apply at commit bb3a3684a540c4d5099c4d43321193246389eb21. Status: pass, with no blocking/non-blocking code issues found in scope." +} diff --git a/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json b/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json new file mode 100644 index 0000000..474adf8 --- /dev/null +++ b/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json @@ -0,0 +1,54 @@ +{ + "milestone": "recording-provenance-auto-apply", + "round": 1, + "status": "fail", + "validatorsRun": { + "test": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" test -- --maxWorkers=5", + "exitCode": 0 + }, + "typecheck": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run typecheck:test", + "exitCode": 0 + }, + "lint": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run lint", + "exitCode": 0 + } + }, + "reviewsSummary": { + "total": 2, + "passed": 1, + "failed": 1, + "failedFeatures": [ + "record-default-auto-apply-integration" + ] + }, + "blockingIssues": [ + { + "featureId": "record-default-auto-apply-integration", + "severity": "blocking", + "description": "Record auto-apply forces assertionPolicy=\"reliable\" in src/app/services/record-service.ts while the printed manual fallback (`ui-test improve --plan` then `--apply-plan`) uses improve defaults (balanced), so auto-apply/manual convergence is not guaranteed." + } + ], + "appliedUpdates": [], + "suggestedGuidanceUpdates": [ + { + "target": ".factory/skills/record-improve-worker/SKILL.md", + "suggestion": "Add explicit validator-command fallback guidance: prefer npm-backed commands from .factory/services.yaml when direct `vitest` binary is unavailable on PATH.", + "evidence": "Review of recording-provenance-persistence observed a failed direct `vitest` invocation followed by successful npm-backed test execution in worker session 11814fa4-4b87-497d-a922-2f2fd2ad82de.", + "isSystemic": false + }, + { + "target": ".factory/skills/record-improve-worker/SKILL.md", + "suggestion": "Clarify and enforce evidence requirements for user-facing CLI sanity checks in worker transcripts/handoffs when command-surface behavior changes.", + "evidence": "Review of record-default-auto-apply-integration found user-facing command changes but transcript skeleton showed only tests/typecheck/git commands while handoff still marked procedure followed.", + "isSystemic": false + } + ], + "rejectedObservations": [], + "previousRound": null +} From 7ec2da55ddee2077bd686764c5746af4e41fa23b Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 08:11:54 +0100 Subject: [PATCH 12/19] Align record fallback improve profile Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- src/app/services/record-service.test.ts | 36 ++++++++++++++++++++++-- src/app/services/record-service.ts | 37 ++++++++++++++++++++----- 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/src/app/services/record-service.test.ts b/src/app/services/record-service.test.ts index 5cc4bb9..cf86220 100644 --- a/src/app/services/record-service.test.ts +++ b/src/app/services/record-service.test.ts @@ -139,6 +139,7 @@ describe("runRecord auto-improve", () => { applySelectors: true, applyAssertions: true, assertions: "candidates", + assertionSource: "snapshot-native", assertionPolicy: "reliable", appliedBy: "auto_apply", }); @@ -159,6 +160,7 @@ describe("runRecord auto-improve", () => { applySelectors: false, applyAssertions: false, assertions: "candidates", + assertionSource: "snapshot-native", assertionPolicy: "reliable", appliedBy: "report_only", }); @@ -181,6 +183,7 @@ describe("runRecord auto-improve", () => { applySelectors: true, applyAssertions: true, assertions: "candidates", + assertionSource: "snapshot-native", assertionPolicy: "reliable", appliedBy: "auto_apply", }); @@ -353,7 +356,7 @@ describe("runRecord auto-improve", () => { expect(ui.warn).toHaveBeenCalledWith( "You can run it manually: ui-test improve " + path.resolve("e2e/sample.yaml") + - " --plan && ui-test improve " + + " --assertions candidates --assertion-source snapshot-native --assertion-policy reliable --plan && ui-test improve " + path.resolve("e2e/sample.yaml") + " --apply-plan " + path.resolve("e2e/sample.improve-plan.json") @@ -375,12 +378,38 @@ describe("runRecord auto-improve", () => { expect(ui.warn).toHaveBeenCalledWith( "You can run it manually: ui-test improve " + path.resolve("e2e/sample.yaml") + - " --plan && ui-test improve " + + " --assertions candidates --assertion-source snapshot-native --assertion-policy reliable --plan && ui-test improve " + path.resolve("e2e/sample.yaml") + " --apply-plan " + path.resolve("e2e/sample.improve-plan.json") ); }); + + it("prints the same assertion profile in manual fallback guidance as auto-apply uses", async () => { + vi.mocked(improveTestFile).mockRejectedValue(new Error("browser crashed")); + + await runRecord({ + name: "sample", + url: "http://127.0.0.1:5173", + description: "demo", + outputDir: "e2e", + browser: "firefox", + improveMode: "report", + }); + + expect(improveTestFile).toHaveBeenCalledWith( + expect.objectContaining({ + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "reliable", + }) + ); + expect(ui.warn).toHaveBeenCalledWith( + "You can run it manually: ui-test improve " + + path.resolve("e2e/sample.yaml") + + " --assertions candidates --assertion-source snapshot-native --assertion-policy reliable --no-apply" + ); + }); }); describe("runRecordFromFile", () => { @@ -542,6 +571,7 @@ describe("runRecordFromFile", () => { expect.objectContaining({ applySelectors: false, applyAssertions: false, + assertionSource: "snapshot-native", appliedBy: "report_only", }) ); @@ -554,6 +584,7 @@ describe("runRecordFromFile", () => { expect.objectContaining({ applySelectors: true, applyAssertions: true, + assertionSource: "snapshot-native", appliedBy: "auto_apply", }) ); @@ -567,6 +598,7 @@ describe("runRecordFromFile", () => { outputPath: expect.stringContaining("login-flow.improved.yaml"), applySelectors: true, applyAssertions: true, + assertionSource: "snapshot-native", appliedBy: "auto_apply", }) ); diff --git a/src/app/services/record-service.ts b/src/app/services/record-service.ts index 6ba564f..80b8beb 100644 --- a/src/app/services/record-service.ts +++ b/src/app/services/record-service.ts @@ -11,6 +11,11 @@ import { hasUrlProtocol, normalizeRecordUrl, } from "../options/record-profile.js"; +import { + resolveImproveProfile, + type ImproveProfileInput, + type ResolvedImproveProfile, +} from "../options/improve-profile.js"; import { formatRecordingProfileSummary } from "../options/profile-summary.js"; import { ensureChromiumAvailable } from "../../utils/chromium-runtime.js"; import { UserError } from "../../utils/errors.js"; @@ -23,6 +28,16 @@ import { stepsToCanonicalEvents, } from "../../core/recording/canonical-events.js"; +function resolveRecordAutoImproveProfile(improveMode: RecordImproveMode): ResolvedImproveProfile { + const improveProfileInput: ImproveProfileInput = { + apply: improveMode === "apply", + assertions: "candidates", + assertionSource: "snapshot-native", + assertionPolicy: "reliable", + }; + return resolveImproveProfile(improveProfileInput); +} + export interface RecordCliOptions { name?: string; url?: string; @@ -240,7 +255,8 @@ async function runAutoImprove( return; } - const applyMutations = improveMode === "apply"; + const improveProfile = resolveRecordAutoImproveProfile(improveMode); + const applyMutations = improveProfile.applySelectors || improveProfile.applyAssertions; const appliedBy = applyMutations ? "auto_apply" : "report_only"; console.log(); @@ -248,10 +264,11 @@ async function runAutoImprove( const improveResult = await improveTestFile({ testFile, ...(applyMutations ? { outputPath: resolveDefaultImproveOutputPath(testFile) } : {}), - applySelectors: applyMutations, - applyAssertions: applyMutations, - assertions: "candidates", - assertionPolicy: "reliable", + applySelectors: improveProfile.applySelectors, + applyAssertions: improveProfile.applyAssertions, + assertions: improveProfile.assertions, + assertionSource: improveProfile.assertionSource, + assertionPolicy: improveProfile.assertionPolicy, appliedBy, }); @@ -328,9 +345,15 @@ function buildManualImproveCommand( improveMode: RecordImproveMode ): string { const absolutePath = path.resolve(testFile); + const improveProfile = resolveRecordAutoImproveProfile(improveMode); + const profileArgs = [ + `--assertions ${improveProfile.assertions}`, + `--assertion-source ${improveProfile.assertionSource}`, + `--assertion-policy ${improveProfile.assertionPolicy}`, + ].join(" "); if (improveMode === "apply") { const planPath = absolutePath.replace(/(\.[^.]+)?$/, ".improve-plan.json"); - return `ui-test improve ${absolutePath} --plan && ui-test improve ${absolutePath} --apply-plan ${planPath}`; + return `ui-test improve ${absolutePath} ${profileArgs} --plan && ui-test improve ${absolutePath} --apply-plan ${planPath}`; } - return `ui-test improve ${absolutePath} --no-apply`; + return `ui-test improve ${absolutePath} ${profileArgs} --no-apply`; } From c122dd9e0515f3617d1d609eef407cbb00a61cbc Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 08:18:49 +0100 Subject: [PATCH 13/19] validation: synthesize recording-provenance scrutiny rerun --- ...ord-auto-apply-policy-convergence-fix.json | 21 ++++++++ .../scrutiny/synthesis.json | 38 ++++--------- .../scrutiny/synthesis.round1.json | 54 +++++++++++++++++++ 3 files changed, 86 insertions(+), 27 deletions(-) create mode 100644 .factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-auto-apply-policy-convergence-fix.json create mode 100644 .factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.round1.json diff --git a/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-auto-apply-policy-convergence-fix.json b/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-auto-apply-policy-convergence-fix.json new file mode 100644 index 0000000..b19bcd4 --- /dev/null +++ b/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-auto-apply-policy-convergence-fix.json @@ -0,0 +1,21 @@ +{ + "featureId": "record-auto-apply-policy-convergence-fix", + "reviewedAt": "2026-03-09T07:15:37Z", + "commitId": "a219c37", + "transcriptSkeletonReviewed": true, + "diffReviewed": true, + "status": "pass", + "codeReview": { + "summary": "Pass. The fix directly closes the prior convergence defect by introducing a shared resolver for record auto-improve profile values and reusing it for the manual fallback command text, so the fallback `--plan` invocation now carries the same assertions/source/policy as auto-apply. Review of both commits (`9ed2475` and fix `a219c37`) confirms the original mismatch (`reliable` auto-apply vs default manual `balanced`) was addressed and regression tests were added for both apply and report fallback guidance.", + "issues": [] + }, + "sharedStateObservations": [ + { + "area": "skills", + "observation": "The skill requires one manual/CLI sanity check whenever user-facing command behavior changes, but the fix transcript skeleton shows only test/typecheck/lint/diff/commit steps and no explicit `ui-test` CLI sanity run. Handoff still marks `followedProcedure: true`.", + "evidence": "Skill requirement: `.factory/skills/record-improve-worker/SKILL.md:30`. Fix transcript skeleton for `22147114-ccdd-4d7c-8cff-64ab63292db2` lists vitest, typecheck, lint, and git commands but no explicit `ui-test record` or `ui-test improve` sanity invocation." + } + ], + "addressesFailureFrom": "/Users/vriesd/projects/ui-test/.factory/validation/recording-provenance-auto-apply/scrutiny/reviews/record-default-auto-apply-integration.json", + "summary": "Fix verified. The convergence gap flagged in the prior failed review is resolved: auto-apply and manual fallback guidance now derive the same deterministic assertion profile, and updated tests lock this behavior for both apply and report fallback paths." +} diff --git a/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json b/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json index 474adf8..83476bd 100644 --- a/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json +++ b/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.json @@ -1,7 +1,7 @@ { "milestone": "recording-provenance-auto-apply", - "round": 1, - "status": "fail", + "round": 2, + "status": "pass", "validatorsRun": { "test": { "passed": true, @@ -20,35 +20,19 @@ } }, "reviewsSummary": { - "total": 2, + "total": 1, "passed": 1, - "failed": 1, - "failedFeatures": [ - "record-default-auto-apply-integration" - ] + "failed": 0, + "failedFeatures": [] }, - "blockingIssues": [ - { - "featureId": "record-default-auto-apply-integration", - "severity": "blocking", - "description": "Record auto-apply forces assertionPolicy=\"reliable\" in src/app/services/record-service.ts while the printed manual fallback (`ui-test improve --plan` then `--apply-plan`) uses improve defaults (balanced), so auto-apply/manual convergence is not guaranteed." - } - ], + "blockingIssues": [], "appliedUpdates": [], - "suggestedGuidanceUpdates": [ - { - "target": ".factory/skills/record-improve-worker/SKILL.md", - "suggestion": "Add explicit validator-command fallback guidance: prefer npm-backed commands from .factory/services.yaml when direct `vitest` binary is unavailable on PATH.", - "evidence": "Review of recording-provenance-persistence observed a failed direct `vitest` invocation followed by successful npm-backed test execution in worker session 11814fa4-4b87-497d-a922-2f2fd2ad82de.", - "isSystemic": false - }, + "suggestedGuidanceUpdates": [], + "rejectedObservations": [ { - "target": ".factory/skills/record-improve-worker/SKILL.md", - "suggestion": "Clarify and enforce evidence requirements for user-facing CLI sanity checks in worker transcripts/handoffs when command-surface behavior changes.", - "evidence": "Review of record-default-auto-apply-integration found user-facing command changes but transcript skeleton showed only tests/typecheck/git commands while handoff still marked procedure followed.", - "isSystemic": false + "observation": "The fix transcript did not include an explicit manual/CLI sanity check despite user-facing command behavior changes.", + "reason": "duplicate" } ], - "rejectedObservations": [], - "previousRound": null + "previousRound": ".factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.round1.json" } diff --git a/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.round1.json b/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.round1.json new file mode 100644 index 0000000..474adf8 --- /dev/null +++ b/.factory/validation/recording-provenance-auto-apply/scrutiny/synthesis.round1.json @@ -0,0 +1,54 @@ +{ + "milestone": "recording-provenance-auto-apply", + "round": 1, + "status": "fail", + "validatorsRun": { + "test": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" test -- --maxWorkers=5", + "exitCode": 0 + }, + "typecheck": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run typecheck:test", + "exitCode": 0 + }, + "lint": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run lint", + "exitCode": 0 + } + }, + "reviewsSummary": { + "total": 2, + "passed": 1, + "failed": 1, + "failedFeatures": [ + "record-default-auto-apply-integration" + ] + }, + "blockingIssues": [ + { + "featureId": "record-default-auto-apply-integration", + "severity": "blocking", + "description": "Record auto-apply forces assertionPolicy=\"reliable\" in src/app/services/record-service.ts while the printed manual fallback (`ui-test improve --plan` then `--apply-plan`) uses improve defaults (balanced), so auto-apply/manual convergence is not guaranteed." + } + ], + "appliedUpdates": [], + "suggestedGuidanceUpdates": [ + { + "target": ".factory/skills/record-improve-worker/SKILL.md", + "suggestion": "Add explicit validator-command fallback guidance: prefer npm-backed commands from .factory/services.yaml when direct `vitest` binary is unavailable on PATH.", + "evidence": "Review of recording-provenance-persistence observed a failed direct `vitest` invocation followed by successful npm-backed test execution in worker session 11814fa4-4b87-497d-a922-2f2fd2ad82de.", + "isSystemic": false + }, + { + "target": ".factory/skills/record-improve-worker/SKILL.md", + "suggestion": "Clarify and enforce evidence requirements for user-facing CLI sanity checks in worker transcripts/handoffs when command-surface behavior changes.", + "evidence": "Review of record-default-auto-apply-integration found user-facing command changes but transcript skeleton showed only tests/typecheck/git commands while handoff still marked procedure followed.", + "isSystemic": false + } + ], + "rejectedObservations": [], + "previousRound": null +} From 4a015d757d0ec5f94d1d7db9ede86943b28b0221 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 08:32:24 +0100 Subject: [PATCH 14/19] validation: capture user-testing flow quirk --- .factory/library/user-testing.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.factory/library/user-testing.md b/.factory/library/user-testing.md index 571c02a..91c8849 100644 --- a/.factory/library/user-testing.md +++ b/.factory/library/user-testing.md @@ -39,3 +39,4 @@ Testing surface, tools, setup steps, and known quirks. - `improve` runtime treats `data:` navigation URLs as relative and reports `Cannot resolve relative navigation URL`; this can be used as deterministic runtime-failure evidence in candidate-skip assertions. - If transient local TypeScript worktree errors block `npm run build`, validators may use the existing `dist/bin/ui-test.js` binary for CLI-flow checks. +- Vitest in this repo only runs tests under configured include globs; ad-hoc validator test files created under `/tmp` will be ignored. Place any temporary flow tests under `scripts/` (with unique prefixes) if dynamic test generation is needed. From 638921e8444aa6e5d71ce3c1f8a67c90d94a735f Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 08:46:44 +0100 Subject: [PATCH 15/19] validation: add parity-and-determinism scrutiny synthesis --- .../brittle-fixture-repair-determinism.json | 21 ++++++++++ .../headed-parity-surface-stabilization.json | 22 ++++++++++ .../scrutiny/synthesis.json | 40 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 .factory/validation/parity-and-determinism-hardening/scrutiny/reviews/brittle-fixture-repair-determinism.json create mode 100644 .factory/validation/parity-and-determinism-hardening/scrutiny/reviews/headed-parity-surface-stabilization.json create mode 100644 .factory/validation/parity-and-determinism-hardening/scrutiny/synthesis.json diff --git a/.factory/validation/parity-and-determinism-hardening/scrutiny/reviews/brittle-fixture-repair-determinism.json b/.factory/validation/parity-and-determinism-hardening/scrutiny/reviews/brittle-fixture-repair-determinism.json new file mode 100644 index 0000000..466c9ff --- /dev/null +++ b/.factory/validation/parity-and-determinism-hardening/scrutiny/reviews/brittle-fixture-repair-determinism.json @@ -0,0 +1,21 @@ +{ + "featureId": "brittle-fixture-repair-determinism", + "reviewedAt": "2026-03-09T07:43:44Z", + "commitId": "aad204e05c88dc7981b00a9c2f5990435c334690", + "transcriptSkeletonReviewed": true, + "diffReviewed": true, + "status": "pass", + "codeReview": { + "summary": "Reviewed transcript skeleton, handoff, and relevant improve/runtime selector+assertion code paths for the brittle fixture contract. The controlled baseline failure + repaired replay flow is covered in improve.dynamic.integration.test.ts, runtime step removal is safety-gated on disposition+mutationSafety+confidence in improve-runner.ts, content/business safeguards are present in runtime-failure-classifier.ts, and deterministic tie-break behavior is exercised in assertion-apply.test.ts. No blocking or non-blocking correctness issues were found against the feature contract.", + "issues": [] + }, + "sharedStateObservations": [ + { + "area": "conventions", + "observation": "Feature traceability is weaker than ideal because the handoff commit for this feature does not contain the feature implementation diff; the worker validated behavior already present in a dirty worktree.", + "evidence": "Handoff for worker session eeb1546a-f90c-4174-aa32-55801f27d419 states \"No new edits were required\" while `git show aad204e05c88dc7981b00a9c2f5990435c334690` only changes `.factory/library/user-testing.md`." + } + ], + "addressesFailureFrom": null, + "summary": "Pass. The brittle fixture repair/determinism contract is satisfied by the reviewed code paths and tests, with no correctness defects identified; noted a non-blocking traceability convention gap for commit-to-feature mapping." +} diff --git a/.factory/validation/parity-and-determinism-hardening/scrutiny/reviews/headed-parity-surface-stabilization.json b/.factory/validation/parity-and-determinism-hardening/scrutiny/reviews/headed-parity-surface-stabilization.json new file mode 100644 index 0000000..3014e27 --- /dev/null +++ b/.factory/validation/parity-and-determinism-hardening/scrutiny/reviews/headed-parity-surface-stabilization.json @@ -0,0 +1,22 @@ +{ + "featureId": "headed-parity-surface-stabilization", + "reviewedAt": "2026-03-09T07:42:23Z", + "commitId": "aad204e", + "transcriptSkeletonReviewed": true, + "diffReviewed": true, + "status": "pass", + "codeReview": { + "summary": "The headed parity implementation meets the feature contract: resolver-based suite discovery now fails loudly on zero-match suites, mission-critical player/improve parity files are included, and parity-focused integration coverage demonstrates the same improved YAML passes in headless and headed runs when parity is required.", + "issues": [ + { + "file": "scripts/run-headed-parity.test.mjs", + "line": 16, + "severity": "non_blocking", + "description": "The resolver contract test allows extra files (`length >= 2`) rather than asserting the exact expected parity set, so future broad-glob drift could silently expand parity scope without a failing test." + } + ] + }, + "sharedStateObservations": [], + "addressesFailureFrom": null, + "summary": "Review passed. The feature behavior is correctly implemented and validated for headed/headless parity, with one non-blocking test strictness gap in resolver-scope assertions." +} diff --git a/.factory/validation/parity-and-determinism-hardening/scrutiny/synthesis.json b/.factory/validation/parity-and-determinism-hardening/scrutiny/synthesis.json new file mode 100644 index 0000000..e03e071 --- /dev/null +++ b/.factory/validation/parity-and-determinism-hardening/scrutiny/synthesis.json @@ -0,0 +1,40 @@ +{ + "milestone": "parity-and-determinism-hardening", + "round": 1, + "status": "pass", + "validatorsRun": { + "test": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" test -- --maxWorkers=5", + "exitCode": 0 + }, + "typecheck": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run typecheck:test", + "exitCode": 0 + }, + "lint": { + "passed": true, + "command": "npm --prefix \"/Users/vriesd/projects/ui-test\" run lint", + "exitCode": 0 + } + }, + "reviewsSummary": { + "total": 2, + "passed": 2, + "failed": 0, + "failedFeatures": [] + }, + "blockingIssues": [], + "appliedUpdates": [], + "suggestedGuidanceUpdates": [ + { + "target": "AGENTS.md", + "suggestion": "Require dirty-worktree feature handoffs to include explicit edited file paths and diff anchors in addition to commit IDs so scrutiny can reliably trace implementation evidence.", + "evidence": "Review for feature brittle-fixture-repair-determinism observed worker session eeb1546a-f90c-4174-aa32-55801f27d419 validated behavior in dirty worktree while cited commit aad204e only changed .factory/library/user-testing.md.", + "isSystemic": true + } + ], + "rejectedObservations": [], + "previousRound": null +} From b9333c28d715cd7d753c58e1e0e7b19c75bf8297 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 08:56:58 +0100 Subject: [PATCH 16/19] validation: add parity-and-determinism user-testing synthesis --- .../flows/brittle-determinism.json | 121 ++++++++++++++++++ .../user-testing/flows/headed-parity.json | 78 +++++++++++ .../user-testing/synthesis.json | 25 ++++ 3 files changed, 224 insertions(+) create mode 100644 .factory/validation/parity-and-determinism-hardening/user-testing/flows/brittle-determinism.json create mode 100644 .factory/validation/parity-and-determinism-hardening/user-testing/flows/headed-parity.json create mode 100644 .factory/validation/parity-and-determinism-hardening/user-testing/synthesis.json diff --git a/.factory/validation/parity-and-determinism-hardening/user-testing/flows/brittle-determinism.json b/.factory/validation/parity-and-determinism-hardening/user-testing/flows/brittle-determinism.json new file mode 100644 index 0000000..383a995 --- /dev/null +++ b/.factory/validation/parity-and-determinism-hardening/user-testing/flows/brittle-determinism.json @@ -0,0 +1,121 @@ +{ + "milestone": "parity-and-determinism-hardening", + "flowId": "brittle-determinism", + "testedAt": "2026-03-09T07:52:29Z", + "credentials": { + "account": "cli-flow-a@example.test", + "passwordLabel": "flow-a-pass", + "namespace": "flow-a" + }, + "assertionsTested": [ + "VAL-CROSS-002", + "VAL-CROSS-003", + "VAL-CROSS-007", + "VAL-CROSS-008", + "VAL-CROSS-009" + ], + "assertionResults": [ + { + "id": "VAL-CROSS-002", + "status": "pass", + "reason": "Controlled brittle baseline failure mode reproduced before improve was applied.", + "evidence": [ + "Command cmd-1: src/core/improve/improve.dynamic.integration.test.ts passed while logging baseline failure: '✖ Step 2: click ... Timeout 2500ms exceeded' for exact brittle locator.", + "Command cmd-1 targeted test output: improve dynamic acceptance benchmark > repairs brittle exact news locator and turns baseline failure into pass." + ] + }, + { + "id": "VAL-CROSS-003", + "status": "pass", + "reason": "Redesigned improve path repaired brittle selector and replay succeeded after repair.", + "evidence": [ + "Command cmd-1: same integration test logged repaired replay steps passing (click 'link', assertVisible '#article') and overall test pass.", + "Command cmd-5: full suite rerun repeated baseline failure signal first, then successful improved replay in both headless and headed sections within the same deterministic fixture test." + ] + }, + { + "id": "VAL-CROSS-007", + "status": "pass", + "reason": "Runtime-failing step removal behavior remained gated by classifier disposition/safety/confidence and retained when unsafe.", + "evidence": [ + "Command cmd-2: improve.test targeted pass for 'retains non-transient runtime-failing steps and removes transient ones'.", + "Command cmd-2: improve.test targeted pass for 'retains low-confidence transient removals behind safety guard'.", + "Command cmd-2: runtime-failure-classifier targeted pass for 'marks low-confidence soft transient dismissals as unsafe to auto-apply'." + ] + }, + { + "id": "VAL-CROSS-008", + "status": "pass", + "reason": "Business/content interactions were retained unless explicit safe removal evidence existed.", + "evidence": [ + "Command cmd-2: improve.test targeted pass for 'keeps likely business-intent transient-context failures as retained'.", + "Command cmd-2: runtime-failure-classifier targeted passes for retained content/business cases ('classifies content interactions as retained non-transient failures', 'keeps generic business wording in transient context as retained', 'keeps privacy content-link interactions as retained').", + "Command cmd-5: comprehensive classifier suite pass including both transient removals and retained content semantics tests." + ] + }, + { + "id": "VAL-CROSS-009", + "status": "pass", + "reason": "Tie-break behavior for equivalent-confidence candidates remained deterministic in selector and assertion application paths.", + "evidence": [ + "Command cmd-3: improve.apply-runtime targeted pass for 'adopts dynamic selector repairs on score ties when runtime match is unique'.", + "Command cmd-3: assertion-apply targeted pass for 'uses candidate index as stable tie-breaker when confidence, action, and source are tied'.", + "Command cmd-5: full deterministic candidate-related suites passed (improve.apply-runtime, improve-selector-pass, assertion-candidates-inventory)." + ] + } + ], + "toolsUsed": [ + "cli", + "npx vitest" + ], + "frictions": [], + "blockers": [], + "commandsRun": [ + { + "id": "cmd-1", + "command": "npx vitest run src/core/improve/improve.dynamic.integration.test.ts --reporter=verbose", + "exitCode": 0, + "observations": [ + "Baseline brittle locator failure reproduced before improve (Step 2 timeout on exact link name).", + "After improve, replay steps passed and test completed green." + ] + }, + { + "id": "cmd-2", + "command": "npx vitest run src/core/improve/improve.test.ts src/core/improve/runtime-failure-classifier.test.ts --reporter=verbose -t \"retains non-transient runtime-failing steps and removes transient ones|keeps likely business-intent transient-context failures as retained|retains low-confidence transient removals behind safety guard|classifies cookie-dismiss interactions as transient removals|classifies content interactions as retained non-transient failures|keeps generic business wording in transient context as retained|keeps privacy content-link interactions as retained|marks low-confidence soft transient dismissals as unsafe to auto-apply\"", + "exitCode": 0, + "observations": [ + "All targeted removal/retention classifier and runner tests passed (8 passed, 23 skipped).", + "Observed explicit retained-path and unsafe-to-auto-apply protections passing under deterministic fixtures." + ] + }, + { + "id": "cmd-3", + "command": "npx vitest run src/core/improve/improve.apply-runtime.test.ts src/core/improve/assertion-apply.test.ts --reporter=verbose -t \"adopts dynamic selector repairs on score ties when runtime match is unique|uses candidate index as stable tie-breaker when confidence, action, and source are tied\"", + "exitCode": 0, + "observations": [ + "Both tie-focused tests passed (2 passed, 49 skipped).", + "Selector tie adoption and assertion candidate index tie-break remained stable." + ] + }, + { + "id": "cmd-4", + "command": "npx vitest run src/core/improve/improve-selector-pass.test.ts src/core/improve/assertion-candidates-inventory.test.ts --reporter=verbose", + "exitCode": 0, + "observations": [ + "Selector pass and inventory candidate suites passed (9 passed).", + "Confirmed deterministic candidate generation/dedup paths are green." + ] + }, + { + "id": "cmd-5", + "command": "npx vitest run src/core/improve/improve.dynamic.integration.test.ts src/core/improve/improve.apply-runtime.test.ts src/core/improve/runtime-failure-classifier.test.ts src/core/improve/improve-selector-pass.test.ts src/core/improve/assertion-candidates-inventory.test.ts --reporter=verbose", + "exitCode": 0, + "observations": [ + "Comprehensive suggested command passed (5 files, 55 tests).", + "Output again showed brittle baseline failure signal followed by successful improved replay, plus all classifier/tie-related suites passing." + ] + } + ], + "summary": "Validated 5 assigned assertions with deterministic local fixtures/tests only: 5 passed, 0 failed, 0 blocked." +} diff --git a/.factory/validation/parity-and-determinism-hardening/user-testing/flows/headed-parity.json b/.factory/validation/parity-and-determinism-hardening/user-testing/flows/headed-parity.json new file mode 100644 index 0000000..ced4ace --- /dev/null +++ b/.factory/validation/parity-and-determinism-hardening/user-testing/flows/headed-parity.json @@ -0,0 +1,78 @@ +{ + "milestone": "parity-and-determinism-hardening", + "flowId": "headed-parity", + "testedAt": "2026-03-09T07:51:58Z", + "credentials": { + "account": "cli-flow-b@example.test", + "passwordLabel": "flow-b-pass", + "namespace": "flow-b" + }, + "assertionsTested": [ + "VAL-CROSS-004", + "VAL-CROSS-005", + "VAL-CROSS-006" + ], + "assertionResults": [ + { + "id": "VAL-CROSS-004", + "status": "pass", + "reason": "Parity-covered fixture behavior was validated by integration coverage that explicitly runs the same YAML in headless and headed modes, with both variants passing.", + "evidence": [ + "targeted-test-output: src/core/player.integration.test.ts includes passing tests 'runs the same YAML in headless and headed modes when headed Chromium is available' and 'runs non-cookie overlay YAML in headless and headed modes when headed Chromium is available' (exit 0)", + "targeted-test-output: src/core/improve/improve.dynamic.integration.test.ts passed mission-critical improved-flow repair benchmark (exit 0)", + "terminal-output(headless-pass): headless execution steps completed and assertions passed in parity tests", + "terminal-output(headed-pass): headed parity mode executed under UI_TEST_REQUIRE_HEADED_PARITY=1 via scripts/run-headed-parity.mjs and passed" + ] + }, + { + "id": "VAL-CROSS-005", + "status": "pass", + "reason": "The headed parity resolver test confirms mission-critical suite resolution and explicit zero-match failure behavior; parity runner output confirms resolved files are the expected player and improve integration files.", + "evidence": [ + "targeted-test-output: scripts/run-headed-parity.test.mjs passed 'resolves all configured parity suites'", + "targeted-test-output: scripts/run-headed-parity.test.mjs passed 'fails when a configured suite resolves to zero tests'", + "targeted-test-output: scripts/run-headed-parity.test.mjs passed 'deduplicates and sorts matched files'", + "terminal-output(test:parity:headed): [headed-parity] Running parity suites (2 files): src/core/improve/improve.dynamic.integration.test.ts, src/core/player.integration.test.ts" + ] + }, + { + "id": "VAL-CROSS-006", + "status": "pass", + "reason": "Mission-critical headed parity command completed successfully and reported a fully green parity surface for player and improve integration tests.", + "evidence": [ + "terminal-output(test:parity:headed): npm run test:parity:headed exited 0", + "terminal-output(test:parity:headed): Test Files 2 passed, Tests 26 passed", + "terminal-output(targeted-integration): includes src/core/player.integration.test.ts (25 tests) and src/core/improve/improve.dynamic.integration.test.ts (1 test) both passed" + ] + } + ], + "toolsUsed": [ + "Read", + "LS", + "Execute", + "ApplyPatch" + ], + "frictions": [], + "blockers": [], + "commandsRun": [ + { + "command": "FLOW_B_TMP=$(mktemp -d /tmp/flow-b-vitest-XXXXXX) && TMPDIR=\"$FLOW_B_TMP\" node /Users/vriesd/projects/ui-test/node_modules/vitest/vitest.mjs run /Users/vriesd/projects/ui-test/scripts/run-headed-parity.test.mjs /Users/vriesd/projects/ui-test/src/core/player.integration.test.ts /Users/vriesd/projects/ui-test/src/core/improve/improve.dynamic.integration.test.ts --reporter=verbose", + "exitCode": 0, + "observations": [ + "3 test files passed, 29 tests passed.", + "Resolver coverage passed including zero-match loud failure assertion.", + "Parity-focused player and improve integration tests passed with expected intentional failure-step logs contained within passing test cases." + ] + }, + { + "command": "FLOW_B_TMP=$(mktemp -d /tmp/flow-b-headed-XXXXXX) && TMPDIR=\"$FLOW_B_TMP\" npm --prefix /Users/vriesd/projects/ui-test run test:parity:headed", + "exitCode": 0, + "observations": [ + "Headed parity resolver selected exactly 2 mission-critical files.", + "Parity run completed green: 2 files passed, 26 tests passed.", + "Included headless/headed parity assertions for shared YAML flows and improve dynamic integration coverage." + ] + } + ], + "summary": "Validated VAL-CROSS-004/005/006 with deterministic local fixtures: resolver behavior (including zero-match fail-loud), headless+headed parity stability, and full npm headed parity gate all passed with exit code 0." +} diff --git a/.factory/validation/parity-and-determinism-hardening/user-testing/synthesis.json b/.factory/validation/parity-and-determinism-hardening/user-testing/synthesis.json new file mode 100644 index 0000000..9efd326 --- /dev/null +++ b/.factory/validation/parity-and-determinism-hardening/user-testing/synthesis.json @@ -0,0 +1,25 @@ +{ + "milestone": "parity-and-determinism-hardening", + "round": 1, + "status": "pass", + "assertionsSummary": { + "total": 8, + "passed": 8, + "failed": 0, + "blocked": 0 + }, + "passedAssertions": [ + "VAL-CROSS-002", + "VAL-CROSS-003", + "VAL-CROSS-004", + "VAL-CROSS-005", + "VAL-CROSS-006", + "VAL-CROSS-007", + "VAL-CROSS-008", + "VAL-CROSS-009" + ], + "failedAssertions": [], + "blockedAssertions": [], + "appliedUpdates": [], + "previousRound": null +} From a597df459a69588fa75dd760f05abc97cc0d079c Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 08:58:29 +0100 Subject: [PATCH 17/19] Improve determinism and readiness in record/improve flows Harden improve planning/application, canonicalize recorded events, add post-step readiness controls, and expand parity/flake soak coverage to reduce runtime flakiness while preserving safer default behavior. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- .github/workflows/flake-soak.yml | 51 ++++ README.md | 4 +- docs/configuration.md | 5 +- docs/getting-started.md | 8 +- docs/maintainers.md | 7 +- docs/troubleshooting.md | 6 +- docs/workflows/improve.md | 60 ++++- docs/workflows/record.md | 18 +- package.json | 1 + scripts/run-flake-soak.mjs | 165 ++++++++++++ scripts/run-flake-soak.test.mjs | 38 +++ scripts/run-headed-parity.mjs | 93 +++++-- scripts/run-headed-parity.test.mjs | 50 ++++ src/commands/improve.test.ts | 10 + src/commands/improve.ts | 19 ++ .../improve/assertion-apply-validation.ts | 27 +- src/core/improve/assertion-apply.test.ts | 43 +++- .../assertion-candidates-inventory.test.ts | 16 +- .../improve/assertion-candidates-inventory.ts | 8 +- .../improve/assertion-candidates-snapshot.ts | 2 + .../improve/improve-assertion-pass.test.ts | 137 ++++++---- src/core/improve/improve-runner.ts | 129 +++++++++- .../improve/improve-selector-pass.test.ts | 20 +- src/core/improve/improve-selector-pass.ts | 100 +++---- src/core/improve/improve-types.ts | 24 ++ .../improve/improve.apply-runtime.test.ts | 143 ++++++++--- src/core/improve/improve.test.ts | 136 ++++++++++ src/core/improve/improve.ts | 2 + .../playwright-runtime-selector-adapter.ts | 141 ++++++---- src/core/improve/report-schema.test.ts | 20 +- src/core/improve/report-schema.ts | 34 ++- .../runtime-failure-classifier.test.ts | 17 ++ .../improve/runtime-failure-classifier.ts | 144 ++++++++--- .../improve/selector-pass/apply-selection.ts | 9 - .../selector-pass/collect-candidates.ts | 14 +- .../improve/selector-runtime-repair.test.ts | 120 +-------- src/core/improve/selector-runtime-repair.ts | 123 +-------- src/core/improve/step-snapshot-scope.ts | 70 +++++ src/core/play/step-loop.test.ts | 27 +- src/core/play/step-loop.ts | 47 +++- src/core/player.integration.test.ts | 30 +++ src/core/player.test.ts | 9 + src/core/recorder.ts | 11 +- src/core/recording/canonical-events.ts | 243 ++++++++++++++++++ src/core/runtime/network-idle.ts | 93 ++++++- src/core/runtime/overlay-handler.test.ts | 4 +- src/core/runtime/overlay-handler.ts | 2 +- 47 files changed, 1857 insertions(+), 623 deletions(-) create mode 100644 .github/workflows/flake-soak.yml create mode 100644 scripts/run-flake-soak.mjs create mode 100644 scripts/run-flake-soak.test.mjs create mode 100644 scripts/run-headed-parity.test.mjs create mode 100644 src/core/improve/step-snapshot-scope.ts create mode 100644 src/core/recording/canonical-events.ts diff --git a/.github/workflows/flake-soak.yml b/.github/workflows/flake-soak.yml new file mode 100644 index 0000000..2c9f02a --- /dev/null +++ b/.github/workflows/flake-soak.yml @@ -0,0 +1,51 @@ +name: Flake Soak + +on: + schedule: + - cron: "0 6 * * *" + workflow_dispatch: + inputs: + iterations: + description: "Number of soak iterations" + required: false + default: "8" + +permissions: + contents: read + +concurrency: + group: flake-soak-${{ github.ref }} + cancel-in-progress: false + +jobs: + flake-soak: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: npm ci + + - name: Install Playwright Chromium + run: npx playwright install --with-deps chromium + + - name: Run flake soak + env: + UI_TEST_FLAKE_ITERATIONS: ${{ github.event.inputs.iterations || '8' }} + UI_TEST_FLAKE_REPORT_PATH: .ui-test-artifacts/flake-soak/latest.json + run: xvfb-run -a npm run test:flake:soak + + - name: Upload flake soak report + if: always() + uses: actions/upload-artifact@v4 + with: + name: flake-soak-report + path: .ui-test-artifacts/flake-soak/latest.json + if-no-files-found: warn diff --git a/README.md b/README.md index 18fc853..26d1622 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ All command examples below use global `ui-test`. | `ui-test setup` | Onboarding and provisioning helper | | `ui-test play [test]` | Run one YAML test or all tests | | `ui-test record` | Record browser interactions into YAML | -| `ui-test improve ` | Improve selectors, add assertions, remove transient runtime failures, retain non-transient failures as required steps | +| `ui-test improve ` | Improve selectors/assertions (`--plan` for review-only plan, `--apply-plan` to apply reviewed plan) | | `ui-test list` | List discovered tests | | `ui-test doctor` | Show invocation/version diagnostics | @@ -41,7 +41,7 @@ All command examples below use global `ui-test`. | `--headed` | off (headless) | | `--timeout ` | 10000 | | `--delay ` | 0 | -| `--wait-network-idle` | on | +| `--wait-network-idle` | off | | `--save-failure-artifacts` | on | | `--artifacts-dir ` | `.ui-test-artifacts` | | `--browser ` | chromium | diff --git a/docs/configuration.md b/docs/configuration.md index 8c1a62f..9f8b605 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -19,7 +19,7 @@ ui-test setup | `--headed` | off (headless) | | `--timeout ` | 10000 | | `--delay ` | 0 | -| `--wait-network-idle` | on | +| `--wait-network-idle` | off | | `--save-failure-artifacts` | on | | `--artifacts-dir ` | `.ui-test-artifacts` | | `--browser ` | chromium | @@ -32,7 +32,6 @@ Auto-start only applies to `e2e/example.yaml`. | Setting | Default | |---------|---------| | `--output-dir ` | `e2e` | -| `--selector-policy ` | `reliable` | | `--browser ` | chromium | | `--no-improve` | off (auto-improve enabled) | @@ -44,3 +43,5 @@ Auto-start only applies to `e2e/example.yaml`. | `--assertion-source ` | `snapshot-native` | | `--assertion-policy ` | `balanced` | | `--apply` / `--no-apply` | prompt (interactive) | +| `--in-place` | off (write improved copy) | +| `--output ` | `.improved.yaml` | diff --git a/docs/getting-started.md b/docs/getting-started.md index 7dbb9a3..e5db352 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -132,19 +132,19 @@ On dynamic content pages (for example news homepages), recorder/improve use stab - Improve can convert dynamic `internal`/`playwrightSelector` targets into `locatorExpression` targets via Playwright runtime APIs when uniquely matched. - Navigation-like dynamic link clicks avoid deterministic post-click `assertVisible` insertion; URL/title/snapshot-native assertions are preferred. - You can disable runtime selector regeneration/conversion for debugging with `UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_REGEN=1`. -- You can keep public runtime regeneration but disable private resolver fallback with `UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_PRIVATE_FALLBACK=1`. ## Improve Tests `improve` upgrades selectors, generates assertion candidates, and classifies runtime-failing interactions (aggressively removes transient dismissal/control `click`/`press` failures, retains non-transient and safeguarded content/business interactions as required steps). ```bash -ui-test improve e2e/login.yaml -ui-test improve e2e/login.yaml --apply +ui-test improve e2e/login.yaml # prompts to write e2e/login.improved.yaml +ui-test improve e2e/login.yaml --apply # writes e2e/login.improved.yaml (no prompt) +ui-test improve e2e/login.yaml --apply --in-place ui-test improve e2e/login.yaml --no-apply ``` -By default, `improve` prompts you to confirm before applying changes. Use `--apply` to skip the prompt (CI-friendly), or `--no-apply` for a report-only run without prompting. +By default, `improve` prompts you to confirm before writing an improved copy (`.improved.yaml`). Use `--apply` to skip the prompt (CI-friendly), `--in-place` to overwrite the input file, `--output ` for a custom destination, or `--no-apply` for a report-only run without prompting. Apply-mode runs can mark candidates as `skipped_policy` when policy caps/filters are enforced. Report-only runs (`--no-apply`) keep candidate status as `not_requested`. diff --git a/docs/maintainers.md b/docs/maintainers.md index 136c7f0..bd30222 100644 --- a/docs/maintainers.md +++ b/docs/maintainers.md @@ -12,6 +12,7 @@ npm run typecheck:prod npm test npm run quality:ci npm run test:parity:headed +npm run test:flake:soak npm run test:coverage npm run build npm run test:smoke @@ -49,10 +50,14 @@ Workflows run on GitHub-hosted `ubuntu-latest` runners. Primary CI workflow (`.github/workflows/ci.yml`) has these jobs: - `quality-ci`: runs as a Node `20`/`22` matrix, installs Chromium, runs `npm run quality:ci`, then `npm run test:coverage`. -- `headed-parity`: installs Chromium, runs `xvfb-run -a npm run test:parity:headed`. +- `headed-parity`: installs Chromium, runs `xvfb-run -a npm run test:parity:headed`; suite resolution fails fast if configured parity suites resolve to zero files. - `build`: runs as a Node `20`/`22` matrix and executes `npm run build`, `npm run typecheck:prod`, and packaging/install dry-run checks. - `consumer-smoke`: runs `npm run test:smoke` after `quality-ci`, `headed-parity`, and `build` succeed. +Optional soak workflow: + +- `.github/workflows/flake-soak.yml`: scheduled/manual multi-iteration integration soak; uploads JSON failure-rate report artifact. + ## Recorder Stability Override Recorder default path is JSONL with fallback to playwright-test parsing. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 81bc353..a14d307 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -82,6 +82,8 @@ Fix the underlying issue, then run the improve step manually: ui-test improve --apply ``` +This writes `.improved.yaml` by default; use `--in-place` to overwrite the input file. + ## Headed vs Headless Differences on Dynamic Sites Dynamic news pages can behave differently between headed and headless runs when overlays or fast-changing headlines are present. @@ -114,15 +116,11 @@ If your test still flakes: 3. Prefer stable semantic targets (roles/test ids/nav labels) over long, exact headline text. 4. If needed, disable runtime regeneration temporarily to isolate behavior: - `UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_REGEN=1 ui-test improve --apply` -5. If needed, keep public runtime conversion but disable private resolver fallback: - - `UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_PRIVATE_FALLBACK=1 ui-test improve --apply` Runtime regeneration diagnostic meanings: 1. `selector_repair_playwright_runtime_unavailable`: runtime resolver or uniqueness check could not run in this environment. 2. `selector_repair_playwright_runtime_non_unique`: runtime match count was not unique, so no repair was generated. 3. `selector_repair_playwright_runtime_disabled`: runtime regeneration was explicitly disabled by env var. -4. `selector_repair_playwright_runtime_private_fallback_disabled`: private resolver fallback was explicitly disabled by env var. -5. `selector_repair_playwright_runtime_private_fallback_used`: fallback path was used after public conversion was unavailable. ## Improve Apply Mode Fails diff --git a/docs/workflows/improve.md b/docs/workflows/improve.md index ddccac9..b3a87b8 100644 --- a/docs/workflows/improve.md +++ b/docs/workflows/improve.md @@ -13,10 +13,10 @@ ui-test improve e2e/login.yaml This prompts you to confirm before applying improvements: ``` -? Apply improvements to login.yaml? (Y/n) +? Write improved copy to login.improved.yaml? (Y/n) ``` -Accept (default) to apply improved selectors and assertion candidates to the YAML file, or decline for a report-only run. +Accept (default) to write improvements to `e2e/login.improved.yaml` and keep `e2e/login.yaml` unchanged. Decline for a report-only run. ### Apply Without Prompting (CI) @@ -24,7 +24,19 @@ Accept (default) to apply improved selectors and assertion candidates to the YAM ui-test improve e2e/login.yaml --apply ``` -`--apply` writes both improved selectors and high-confidence assertion candidates to the YAML file without prompting. +`--apply` writes both improved selectors and high-confidence assertion candidates to `e2e/login.improved.yaml` without prompting. + +To overwrite the input file instead: + +```bash +ui-test improve e2e/login.yaml --apply --in-place +``` + +To write to a custom destination: + +```bash +ui-test improve e2e/login.yaml --apply --output e2e/login.latest.yaml +``` ### Report Only (CI) @@ -34,6 +46,27 @@ ui-test improve e2e/login.yaml --no-apply `--no-apply` writes a JSON report and does not modify YAML. Useful in CI pipelines where interactive prompts are not available. +### Generate A Review Plan + +```bash +ui-test improve e2e/login.yaml --plan +``` + +`--plan` computes full apply-mode recommendations (selectors + assertions + runtime failure handling) but does not write YAML. +It writes: + +- improve report (`*.improve-report.json`) +- improve plan (`*.improve-plan.json`) + +Apply a reviewed plan explicitly: + +```bash +ui-test improve e2e/login.yaml --apply-plan e2e/login.improve-plan.json +``` + +By default, `--apply-plan` writes `e2e/login.improved.yaml` and preserves `e2e/login.yaml`. +Use `--in-place` to overwrite the input file or `--output ` to choose a custom destination. + In report-only runs (`--no-apply`), assertion candidates keep `applyStatus: not_requested`, including candidates that would be policy-capped or dynamic-filtered in apply mode. Before/after example — a CSS selector upgraded to a semantic locator: @@ -116,7 +149,7 @@ These rules govern how assertions are inserted: 7. In `snapshot-native` mode, improve performs gap-only runtime locator inventory harvesting from post-step aria snapshots and adds inventory fallback candidates only for uncovered interaction steps. 8. Existing adjacent assertions are preserved (no automatic cleanup). 9. Applied assertions are inserted as required steps (no `optional` field). -10. In apply mode, runtime-failing interaction steps are classified: transient dismissal/control interactions are removed aggressively, while likely content/business-intent interactions are retained as required steps. +10. In apply mode, runtime-failing interaction steps are classified with confidence/safety metadata: only high-confidence safe transient dismissal/control interactions are auto-removed; low-confidence or unsafe removals are retained as required steps. ### Auto-Improve After Recording @@ -175,10 +208,9 @@ For dynamic-flagged/brittle targets (for example long exact headline link names) - Requires a unique runtime match (`matchCount === 1`) before generating a repair candidate. - Runs as a dedicated runtime-repair stage after baseline and heuristic locator-repair candidate generation. -- Uses public Playwright locator conversion first (`page.locator(...).toString()`), with a guarded private `_resolveSelector()` fallback when needed. -- Falls back safely to existing repair heuristics when internals are unavailable or conversion fails. +- Converts supported `internal` / selector-engine targets into locator expressions using deterministic public selector parsing. +- Falls back safely to existing repair heuristics when conversion is unavailable or the selector shape is unsupported. - Set `UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_REGEN=1` to disable runtime regeneration/conversion and use heuristic repairs only. -- Set `UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_PRIVATE_FALLBACK=1` to disable only the private `_resolveSelector()` fallback while keeping public runtime conversion enabled. Runtime regeneration diagnostics: @@ -187,21 +219,25 @@ Runtime regeneration diagnostics: - `selector_repair_playwright_runtime_non_unique` - `selector_repair_playwright_runtime_conversion_failed` - `selector_repair_playwright_runtime_disabled` -- `selector_repair_playwright_runtime_private_fallback_disabled` -- `selector_repair_playwright_runtime_private_fallback_used` ### Report Contents The report includes step-level old/recommended targets, confidence scores, assertion candidates, and diagnostics. +Diagnostics may include decision metadata: + +- `decisionConfidence` +- `mutationType` +- `mutationSafety` +- `evidenceRefs` +- `appliedBy` + The summary includes: - `selectorRepairCandidates` - `selectorRepairsApplied` - `selectorRepairsGeneratedByPlaywrightRuntime` - `selectorRepairsAppliedFromPlaywrightRuntime` -- `selectorRepairsGeneratedByPrivateFallback` -- `selectorRepairsAppliedFromPrivateFallback` - `runtimeFailingStepsRetained` - `runtimeFailingStepsRemoved` - `assertionCandidatesFilteredDynamic` @@ -243,4 +279,4 @@ ui-test improve e2e/login.yaml --report ./reports/login.improve.json - Runtime analysis may replay actions; use a safe test environment. - `improve` requires Chromium availability in CLI runs. - If Chromium is missing, provision it with `ui-test setup` or `npx playwright install chromium`. -- Validation timing mirrors `play` post-step waiting (network idle with Playwright default timeout behavior). +- Validation timing mirrors `play` post-step readiness checks: navigation waits happen automatically, and `networkidle` is opt-in. diff --git a/docs/workflows/record.md b/docs/workflows/record.md index 72820f0..93035de 100644 --- a/docs/workflows/record.md +++ b/docs/workflows/record.md @@ -22,12 +22,12 @@ You will be prompted for: | `-u, --url ` | Starting URL (skips prompt) | prompted | | `-d, --description ` | Test description (skips prompt) | prompted | | `-o, --output-dir ` | Output directory | `e2e` | -| `--selector-policy ` | `reliable` or `raw` | `reliable` | | `--browser ` | `chromium`, `firefox`, or `webkit` | `chromium` | | `--device ` | Playwright device name | none | | `--test-id-attribute ` | Custom test-id attribute | none | | `--load-storage ` | Preload browser storage state | none | | `--save-storage ` | Save browser storage state | none | +| `--improve-mode ` | Auto-improve mode: `report` or `apply` | `report` | | `--no-improve` | Skip automatic improvement after recording | enabled | Skip all prompts by providing name and URL: @@ -38,23 +38,23 @@ ui-test record --name "Login flow" --url http://localhost:3000/login ## After Recording -After the browser is closed, `ui-test` automatically runs `improve` on the new test file. This: +After the browser is closed, `ui-test` automatically runs `improve` on the new test file. + +Default mode is review-first (`--improve-mode report`), which writes only the improve report and leaves the recorded YAML unchanged. +Use `--improve-mode apply` to write an improved copy (`.improved.yaml`) while preserving the original recording. + +Auto-improve can: - Upgrades selectors to more reliable alternatives - Generates assertion candidates (e.g. `assertVisible`, `assertText`) - Classifies runtime-failing interactions (aggressively removes transient dismissal/control `click`/`press` failures, retains non-transient and safeguarded content/business interactions as required steps) -The CLI prints a summary of changes. If auto-improve fails, the recording is still saved and you can run `ui-test improve --apply` manually. +The CLI prints a summary of recommendations/changes. If auto-improve fails, the recording is still saved and you can run `ui-test improve --apply` manually (writes `.improved.yaml` by default). -Auto-improve runs in apply mode. Policy-capped or policy-filtered assertion candidates are reported as `skipped_policy`; report-only improve runs (`--no-apply`) keep candidates as `not_requested`. +Policy-capped or policy-filtered assertion candidates are reported as `skipped_policy`; report-only improve runs keep candidates as `not_requested`. Use `--no-improve` to skip auto-improvement entirely. -## Selector Policy - -- `reliable` (default): prefers normalized locator expressions. -- `raw`: preserves raw selectors when available. - ## Output Quality Summary After recording, the CLI prints a summary of selector quality. diff --git a/package.json b/package.json index 6016346..5db3e7c 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ "test:unit": "vitest run --exclude '**/*.integration.test.ts'", "test:integration": "vitest run src/**/*.integration.test.ts", "test:parity:headed": "node ./scripts/run-headed-parity.mjs", + "test:flake:soak": "node ./scripts/run-flake-soak.mjs", "test:coverage": "vitest run --coverage" }, "keywords": [ diff --git a/scripts/run-flake-soak.mjs b/scripts/run-flake-soak.mjs new file mode 100644 index 0000000..bfd9a53 --- /dev/null +++ b/scripts/run-flake-soak.mjs @@ -0,0 +1,165 @@ +/* global console, process */ +import { spawnSync } from "node:child_process"; +import { existsSync } from "node:fs"; +import { mkdir, writeFile } from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const repoRoot = path.resolve(scriptDir, ".."); + +export const DEFAULT_FLAKE_TEST_FILES = [ + "src/core/player.integration.test.ts", + "src/core/improve/improve.dynamic.integration.test.ts", +]; + +export function resolveFlakeIterations(env = process.env) { + const raw = env.UI_TEST_FLAKE_ITERATIONS; + if (!raw) return 5; + const parsed = Number(raw); + if (!Number.isInteger(parsed) || parsed <= 0) { + throw new Error(`Invalid UI_TEST_FLAKE_ITERATIONS: ${raw}`); + } + return parsed; +} + +export function resolveFlakeTestFiles(env = process.env) { + const raw = env.UI_TEST_FLAKE_TEST_FILES; + if (!raw) return [...DEFAULT_FLAKE_TEST_FILES]; + + const files = raw + .split(",") + .map((value) => value.trim()) + .filter((value) => value.length > 0); + + if (files.length === 0) { + throw new Error("UI_TEST_FLAKE_TEST_FILES resolved to an empty test file list."); + } + + return [...new Set(files)]; +} + +function defaultReportPath(now = new Date()) { + const stamp = now.toISOString().replace(/[:.]/g, "-"); + return path.join(repoRoot, ".ui-test-artifacts", "flake-soak", `flake-soak-${stamp}.json`); +} + +export function resolveFlakeReportPath(env = process.env, now = new Date()) { + const raw = env.UI_TEST_FLAKE_REPORT_PATH; + if (!raw || raw.trim().length === 0) return defaultReportPath(now); + return path.resolve(repoRoot, raw.trim()); +} + +export function runSingleIteration({ vitestEntry, testFile, iteration, iterations }) { + console.log(`[flake-soak] Iteration ${iteration}/${iterations} :: ${testFile}`); + const startedAt = Date.now(); + const result = spawnSync(process.execPath, [vitestEntry, "run", testFile], { + cwd: repoRoot, + stdio: "inherit", + env: { + ...process.env, + UI_TEST_REQUIRE_HEADED_PARITY: "1", + }, + }); + const durationMs = Date.now() - startedAt; + + if (result.error) { + return { + iteration, + testFile, + status: 1, + durationMs, + error: result.error.message, + }; + } + + return { + iteration, + testFile, + status: result.status ?? 1, + durationMs, + }; +} + +export async function runFlakeSoak(config = {}) { + const iterations = config.iterations ?? resolveFlakeIterations(); + const testFiles = config.testFiles ?? resolveFlakeTestFiles(); + const reportPath = config.reportPath ?? resolveFlakeReportPath(); + + const vitestEntry = path.resolve(repoRoot, "node_modules", "vitest", "vitest.mjs"); + if (!existsSync(vitestEntry)) { + throw new Error(`[flake-soak] vitest entry not found at ${vitestEntry}. Run npm ci first.`); + } + + console.log(`[flake-soak] Running ${iterations} iterations across ${testFiles.length} files.`); + for (const file of testFiles) { + console.log(` - ${file}`); + } + + const runs = []; + for (let iteration = 1; iteration <= iterations; iteration += 1) { + for (const testFile of testFiles) { + runs.push(runSingleIteration({ vitestEntry, testFile, iteration, iterations })); + } + } + + const failures = runs.filter((run) => run.status !== 0).length; + const passes = runs.length - failures; + const perFile = {}; + for (const file of testFiles) { + const fileRuns = runs.filter((run) => run.testFile === file); + const fileFailures = fileRuns.filter((run) => run.status !== 0).length; + perFile[file] = { + passes: fileRuns.length - fileFailures, + failures: fileFailures, + failureRate: + fileRuns.length === 0 ? 0 : Number((fileFailures / fileRuns.length).toFixed(4)), + }; + } + const report = { + generatedAt: new Date().toISOString(), + iterations, + testFiles, + runs, + quarantinePolicy: { + failRateThreshold: 0.05, + minFailures: 2, + action: "quarantine_candidate", + }, + totals: { + passes, + failures, + failureRate: runs.length === 0 ? 0 : Number((failures / runs.length).toFixed(4)), + perFile, + }, + }; + + await mkdir(path.dirname(reportPath), { recursive: true }); + await writeFile(reportPath, JSON.stringify(report, null, 2), "utf-8"); + + console.log(`[flake-soak] Report written to ${reportPath}`); + console.log( + `[flake-soak] Summary: passes=${passes}, failures=${failures}, failureRate=${report.totals.failureRate}` + ); + + return { + exitCode: failures > 0 ? 1 : 0, + reportPath, + report, + }; +} + +async function main() { + try { + const result = await runFlakeSoak(); + process.exitCode = result.exitCode; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(message); + process.exitCode = 1; + } +} + +if (process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)) { + await main(); +} diff --git a/scripts/run-flake-soak.test.mjs b/scripts/run-flake-soak.test.mjs new file mode 100644 index 0000000..74db5d0 --- /dev/null +++ b/scripts/run-flake-soak.test.mjs @@ -0,0 +1,38 @@ +import { describe, expect, it } from "vitest"; +import { + DEFAULT_FLAKE_TEST_FILES, + resolveFlakeIterations, + resolveFlakeReportPath, + resolveFlakeTestFiles, +} from "./run-flake-soak.mjs"; + +describe("run-flake-soak config", () => { + it("uses sane defaults", () => { + expect(resolveFlakeIterations({})).toBe(5); + expect(resolveFlakeTestFiles({})).toEqual(DEFAULT_FLAKE_TEST_FILES); + }); + + it("parses custom iterations and rejects invalid values", () => { + expect(resolveFlakeIterations({ UI_TEST_FLAKE_ITERATIONS: "9" })).toBe(9); + expect(() => resolveFlakeIterations({ UI_TEST_FLAKE_ITERATIONS: "0" })).toThrow( + /Invalid UI_TEST_FLAKE_ITERATIONS/ + ); + }); + + it("parses custom test file list and removes duplicates", () => { + expect( + resolveFlakeTestFiles({ + UI_TEST_FLAKE_TEST_FILES: "a.test.ts, b.test.ts, a.test.ts", + }) + ).toEqual(["a.test.ts", "b.test.ts"]); + }); + + it("resolves default and custom report paths", () => { + const now = new Date("2026-02-22T10:00:00.000Z"); + const defaultPath = resolveFlakeReportPath({}, now); + expect(defaultPath).toContain(".ui-test-artifacts/flake-soak/flake-soak-2026-02-22T10-00-00-000Z.json"); + + const customPath = resolveFlakeReportPath({ UI_TEST_FLAKE_REPORT_PATH: "tmp/flake.json" }, now); + expect(customPath).toMatch(/tmp\/flake\.json$/); + }); +}); diff --git a/scripts/run-headed-parity.mjs b/scripts/run-headed-parity.mjs index dcbc532..b1edefd 100644 --- a/scripts/run-headed-parity.mjs +++ b/scripts/run-headed-parity.mjs @@ -1,48 +1,95 @@ +/* global console, process */ import { spawnSync } from "node:child_process"; import { existsSync } from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; +import { globby } from "globby"; const scriptDir = path.dirname(fileURLToPath(import.meta.url)); const repoRoot = path.resolve(scriptDir, ".."); -const PARITY_TEST_FILES = [ - "src/core/player.integration.test.ts", - "src/core/improve/improve.volatile.integration.test.ts", +export const PARITY_SUITE_PATTERNS = [ + { + id: "player-core", + patterns: ["src/core/player.integration.test.ts"], + }, + { + id: "improve-runtime", + patterns: ["src/core/improve/*.integration.test.ts"], + }, ]; -function main() { +export async function resolveParityTestFiles( + suites = PARITY_SUITE_PATTERNS, + cwd = repoRoot +) { + const resolved = []; + + for (const suite of suites) { + const files = await globby(suite.patterns, { + cwd, + onlyFiles: true, + unique: true, + expandDirectories: false, + }); + + if (files.length === 0) { + throw new Error( + `Parity suite '${suite.id}' resolved to zero test files. Patterns: ${suite.patterns.join(", ")}` + ); + } + + resolved.push(...files); + } + + return [...new Set(resolved)].sort(); +} + +export async function runHeadedParity(testFiles) { const vitestEntry = path.resolve(repoRoot, "node_modules", "vitest", "vitest.mjs"); if (!existsSync(vitestEntry)) { console.error(`[headed-parity] vitest entry not found at ${vitestEntry}. Run npm ci first.`); - process.exitCode = 1; - return; + return 1; } - const result = spawnSync( - process.execPath, - [vitestEntry, "run", ...PARITY_TEST_FILES], - { - cwd: repoRoot, - stdio: "inherit", - env: { - ...process.env, - UI_TEST_REQUIRE_HEADED_PARITY: "1", - }, - } - ); + const files = testFiles ?? (await resolveParityTestFiles()); + if (files.length === 0) { + console.error("[headed-parity] No parity tests resolved."); + return 1; + } + + console.log(`[headed-parity] Running parity suites (${files.length} files):`); + for (const file of files) { + console.log(` - ${file}`); + } + + const result = spawnSync(process.execPath, [vitestEntry, "run", ...files], { + cwd: repoRoot, + stdio: "inherit", + env: { + ...process.env, + UI_TEST_REQUIRE_HEADED_PARITY: "1", + }, + }); if (result.error) { console.error(`[headed-parity] Failed to run parity tests: ${result.error.message}`); - process.exitCode = 1; - return; + return 1; } - if (result.status !== 0) { - process.exitCode = result.status ?? 1; + return result.status ?? 1; +} + +async function main() { + try { + process.exitCode = await runHeadedParity(); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`[headed-parity] ${message}`); + process.exitCode = 1; } } if (process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)) { - main(); + await main(); } diff --git a/scripts/run-headed-parity.test.mjs b/scripts/run-headed-parity.test.mjs new file mode 100644 index 0000000..9876fb7 --- /dev/null +++ b/scripts/run-headed-parity.test.mjs @@ -0,0 +1,50 @@ +import { describe, expect, it } from "vitest"; +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import path from "node:path"; +import { tmpdir } from "node:os"; +import { + PARITY_SUITE_PATTERNS, + resolveParityTestFiles, +} from "./run-headed-parity.mjs"; + +describe("run-headed-parity resolver", () => { + it("resolves all configured parity suites", async () => { + const files = await resolveParityTestFiles(PARITY_SUITE_PATTERNS, process.cwd()); + + expect(files).toContain("src/core/player.integration.test.ts"); + expect(files).toContain("src/core/improve/improve.dynamic.integration.test.ts"); + expect(files.length).toBeGreaterThanOrEqual(2); + }); + + it("fails when a configured suite resolves to zero tests", async () => { + await expect( + resolveParityTestFiles( + [{ id: "missing-suite", patterns: ["src/**/does-not-exist.integration.test.ts"] }], + process.cwd() + ) + ).rejects.toThrow(/resolved to zero test files/); + }); + + it("deduplicates and sorts matched files", async () => { + const dir = await mkdtemp(path.join(tmpdir(), "ui-test-parity-resolve-")); + + try { + const a = path.join(dir, "b.integration.test.ts"); + const b = path.join(dir, "a.integration.test.ts"); + await writeFile(a, "", "utf-8"); + await writeFile(b, "", "utf-8"); + + const files = await resolveParityTestFiles( + [ + { id: "suite-a", patterns: ["*.integration.test.ts"] }, + { id: "suite-b", patterns: ["a.integration.test.ts"] }, + ], + dir + ); + + expect(files).toEqual(["a.integration.test.ts", "b.integration.test.ts"]); + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/src/commands/improve.test.ts b/src/commands/improve.test.ts index 0c30b21..bbd349f 100644 --- a/src/commands/improve.test.ts +++ b/src/commands/improve.test.ts @@ -11,12 +11,18 @@ describe("improve command options", () => { command?.parseOptions([ "--apply", + "--output", + "out.yaml", + "--in-place", "--assertions", "none", "--assertion-source", "snapshot-native", "--assertion-policy", "balanced", + "--plan", + "--apply-plan", + "sample.improve-plan.json", "--report", "report.json", "e2e/sample.yaml", @@ -24,9 +30,13 @@ describe("improve command options", () => { const opts = command?.opts() as Record; expect(opts.apply).toBe(true); + expect(opts.output).toBe("out.yaml"); + expect(opts.inPlace).toBe(true); expect(opts.assertions).toBe("none"); expect(opts.assertionSource).toBe("snapshot-native"); expect(opts.assertionPolicy).toBe("balanced"); + expect(opts.plan).toBe(true); + expect(opts.applyPlan).toBe("sample.improve-plan.json"); expect(opts.report).toBe("report.json"); }); diff --git a/src/commands/improve.ts b/src/commands/improve.ts index 6b519de..2df8fa2 100644 --- a/src/commands/improve.ts +++ b/src/commands/improve.ts @@ -14,6 +14,11 @@ export function registerImprove(program: Command) { .argument("", "Path to the YAML test file to analyze") .option("--apply", "Apply all improvements (selectors and assertions)") .option("--no-apply", "Report only — do not modify the test file") + .option( + "--output ", + "Write improved YAML to a custom path (default: .improved.yaml)" + ) + .option("--in-place", "Overwrite the input YAML test file when applying") .option("--assertions ", "Assertion mode: none or candidates") .option( "--assertion-source ", @@ -23,6 +28,8 @@ export function registerImprove(program: Command) { "--assertion-policy ", "Assertion policy: reliable, balanced, or aggressive" ) + .option("--plan", "Generate a reviewable improve plan without writing YAML") + .option("--apply-plan ", "Apply a previously generated improve plan JSON") .option("--report ", "Write JSON report to a custom path") .action(async (testFile: unknown, opts: unknown) => { try { @@ -40,15 +47,23 @@ function parseImproveCliOptions(value: unknown): ImproveCliOptions { if (!isRawImproveCliOptions(value)) return {}; const out: ImproveCliOptions = {}; const apply = asOptionalBoolean(value.apply); + const output = asOptionalString(value.output); + const inPlace = asOptionalBoolean(value.inPlace); const assertions = asOptionalString(value.assertions); const assertionSource = asOptionalString(value.assertionSource); const assertionPolicy = asOptionalString(value.assertionPolicy); + const plan = asOptionalBoolean(value.plan); + const applyPlan = asOptionalString(value.applyPlan); const report = asOptionalString(value.report); if (apply !== undefined) out.apply = apply; + if (output !== undefined) out.output = output; + if (inPlace !== undefined) out.inPlace = inPlace; if (assertions !== undefined) out.assertions = assertions; if (assertionSource !== undefined) out.assertionSource = assertionSource; if (assertionPolicy !== undefined) out.assertionPolicy = assertionPolicy; + if (plan !== undefined) out.plan = plan; + if (applyPlan !== undefined) out.applyPlan = applyPlan; if (report !== undefined) out.report = report; return out; @@ -56,9 +71,13 @@ function parseImproveCliOptions(value: unknown): ImproveCliOptions { interface RawImproveCliOptions { apply?: unknown; + output?: unknown; + inPlace?: unknown; assertions?: unknown; assertionSource?: unknown; assertionPolicy?: unknown; + plan?: unknown; + applyPlan?: unknown; report?: unknown; } diff --git a/src/core/improve/assertion-apply-validation.ts b/src/core/improve/assertion-apply-validation.ts index 2c28375..e65bfef 100644 --- a/src/core/improve/assertion-apply-validation.ts +++ b/src/core/improve/assertion-apply-validation.ts @@ -2,7 +2,7 @@ import type { Page } from "playwright"; import { executeRuntimeStep } from "../runtime/step-executor.js"; import { DEFAULT_WAIT_FOR_NETWORK_IDLE, - waitForPostStepNetworkIdle, + waitForPostStepReadiness, } from "../runtime/network-idle.js"; import type { Step } from "../yaml-schema.js"; import type { AssertionApplyOutcome } from "./assertion-apply-types.js"; @@ -67,6 +67,12 @@ export async function validateCandidatesAgainstRuntime( if (!step) continue; try { + let beforeUrl: string | undefined; + try { + beforeUrl = page.url(); + } catch { + beforeUrl = undefined; + } await executeRuntimeStep( page, step, @@ -74,20 +80,13 @@ export async function validateCandidatesAgainstRuntime( ? { timeout: options.timeout, mode: "analysis" } : { timeout: options.timeout, baseUrl: options.baseUrl, mode: "analysis" } ); - const networkIdleTimedOut = await waitForPostStepNetworkIdle( + await waitForPostStepReadiness({ page, - waitForNetworkIdle - ); - if (networkIdleTimedOut) { - for (const candidateRef of candidatesByStepIndex.get(index) ?? []) { - outcomes.push({ - candidateIndex: candidateRef.candidateIndex, - applyStatus: "skipped_runtime_failure", - applyMessage: "Post-step network idle wait timed out; assertion skipped.", - }); - } - continue; - } + step, + waitForNetworkIdle, + timeoutMs: options.timeout, + beforeUrl, + }); } catch (err) { const message = err instanceof Error ? err.message : "Unknown runtime replay failure."; diff --git a/src/core/improve/assertion-apply.test.ts b/src/core/improve/assertion-apply.test.ts index 299d734..2ca71a4 100644 --- a/src/core/improve/assertion-apply.test.ts +++ b/src/core/improve/assertion-apply.test.ts @@ -16,8 +16,13 @@ const { executeRuntimeStepMock } = vi.hoisted(() => ({ })); const { waitForPostStepNetworkIdleMock } = vi.hoisted(() => ({ waitForPostStepNetworkIdleMock: vi.fn< - typeof import("../runtime/network-idle.js").waitForPostStepNetworkIdle - >(async () => false), + typeof import("../runtime/network-idle.js").waitForPostStepReadiness + >(async () => ({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: true, + })), })); vi.mock("../runtime/step-executor.js", () => ({ @@ -25,8 +30,8 @@ vi.mock("../runtime/step-executor.js", () => ({ })); vi.mock("../runtime/network-idle.js", () => ({ - DEFAULT_WAIT_FOR_NETWORK_IDLE: true, - waitForPostStepNetworkIdle: waitForPostStepNetworkIdleMock, + DEFAULT_WAIT_FOR_NETWORK_IDLE: false, + waitForPostStepReadiness: waitForPostStepNetworkIdleMock, })); function getExecutedStepAt(callIndex: number): Step { @@ -39,7 +44,12 @@ describe("assertion apply helpers", () => { beforeEach(() => { executeRuntimeStepMock.mockClear(); waitForPostStepNetworkIdleMock.mockClear(); - waitForPostStepNetworkIdleMock.mockResolvedValue(false); + waitForPostStepNetworkIdleMock.mockResolvedValue({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: true, + }); }); it("selects high-confidence candidates and skips low-confidence entries", () => { @@ -372,7 +382,14 @@ describe("assertion apply helpers", () => { expect(outcomes).toHaveLength(2); expect(outcomes.find((item) => item.candidateIndex === 0)?.applyStatus).toBe("skipped_runtime_failure"); expect(outcomes.find((item) => item.candidateIndex === 1)?.applyStatus).toBe("applied"); - expect(waitForPostStepNetworkIdleMock).toHaveBeenCalledWith(expect.anything(), true); + expect(waitForPostStepNetworkIdleMock).toHaveBeenCalledWith( + expect.objectContaining({ + page: expect.anything(), + step: expect.objectContaining({ action: "click" }), + waitForNetworkIdle: false, + timeoutMs: 1000, + }) + ); }); it("prefers higher-priority assertion action when confidence is tied", async () => { @@ -898,9 +915,14 @@ describe("assertion apply helpers", () => { expect(outcomes.find((item) => item.candidateIndex === 1)?.applyStatus).toBe("applied"); }); - it("skips validation for a step when post-step network idle times out", async () => { + it("continues validation for a step when post-step network idle times out", async () => { executeRuntimeStepMock.mockResolvedValue(undefined); - waitForPostStepNetworkIdleMock.mockResolvedValueOnce(true); + waitForPostStepNetworkIdleMock.mockResolvedValueOnce({ + navigationTimedOut: false, + networkIdleTimedOut: true, + usedNavigationWait: false, + usedNetworkIdleWait: true, + }); const outcomes = await validateCandidatesAgainstRuntime( {} as Page, @@ -924,9 +946,8 @@ describe("assertion apply helpers", () => { ); expect(outcomes).toHaveLength(1); - expect(outcomes[0]?.applyStatus).toBe("skipped_runtime_failure"); - expect(outcomes[0]?.applyMessage).toContain("Post-step network idle wait timed out"); - expect(executeRuntimeStepMock).toHaveBeenCalledTimes(1); + expect(outcomes[0]?.applyStatus).toBe("applied"); + expect(executeRuntimeStepMock).toHaveBeenCalledTimes(2); }); it("keeps runtime-failing assertions as skipped_runtime_failure", async () => { diff --git a/src/core/improve/assertion-candidates-inventory.test.ts b/src/core/improve/assertion-candidates-inventory.test.ts index 03076d6..c2ff192 100644 --- a/src/core/improve/assertion-candidates-inventory.test.ts +++ b/src/core/improve/assertion-candidates-inventory.test.ts @@ -16,14 +16,17 @@ describe("buildSnapshotInventoryAssertionCandidates", () => { action: "click", target: { value: "#submit", kind: "css", source: "manual" }, }, - preSnapshot: snapshot, + preSnapshot: "- generic [ref=e1]:\n", postSnapshot: snapshot, + scope: "landmark", }, ]); - expect(out).toHaveLength(2); - expect(out[0]?.candidate.action).toBe("assertText"); - expect(out[1]?.candidate.action).toBe("assertVisible"); + expect(out).toHaveLength(1); + expect(out[0]?.candidate).toMatchObject({ + action: "assertText", + text: "Welcome", + }); }); it("excludes noisy and acted-target-like nodes", () => { @@ -45,6 +48,7 @@ describe("buildSnapshotInventoryAssertionCandidates", () => { ' - heading "12345" [level=2] [ref=e3]', ' - navigation "Main menu" [ref=e4]', ].join("\n") + "\n", + scope: "landmark", }, ]); @@ -64,7 +68,7 @@ describe("buildSnapshotInventoryAssertionCandidates", () => { ).toBe(false); expect( out.some((candidate) => candidate.candidate.action === "assertVisible") - ).toBe(true); + ).toBe(false); }); it("caps inventory output to two candidates per step", () => { @@ -83,6 +87,7 @@ describe("buildSnapshotInventoryAssertionCandidates", () => { ' - navigation "Main menu" [ref=e4]', ' - dialog "Confirmation" [ref=e5]', ].join("\n") + "\n", + scope: "landmark", }, ]); @@ -104,6 +109,7 @@ describe("buildSnapshotInventoryAssertionCandidates", () => { "- generic [ref=e1]:", ' - heading "Results" [level=1] [ref=e2]', ].join("\n") + "\n", + scope: "landmark", }, ]); diff --git a/src/core/improve/assertion-candidates-inventory.ts b/src/core/improve/assertion-candidates-inventory.ts index d2da2ac..3ca7402 100644 --- a/src/core/improve/assertion-candidates-inventory.ts +++ b/src/core/improve/assertion-candidates-inventory.ts @@ -15,10 +15,7 @@ const INVENTORY_TEXT_ROLES = new Set([ ]); const INVENTORY_VISIBLE_ROLES = new Set([ - "navigation", - "banner", "main", - "contentinfo", "dialog", "status", "alert", @@ -37,6 +34,7 @@ export function buildSnapshotInventoryAssertionCandidates( const candidates: AssertionCandidate[] = []; for (const snapshot of snapshots) { + if (snapshot.scope === "body") continue; const postNodes = parseSnapshotNodes(snapshot.postSnapshot); if (postNodes.length === 0) continue; @@ -132,7 +130,7 @@ function buildInventoryTextCandidates( }, confidence: INVENTORY_TEXT_CONFIDENCE, rationale: - "Coverage fallback (inventory): full post-step aria inventory yielded high-signal text.", + "Coverage fallback (inventory): scoped post-step aria snapshot yielded high-signal text.", candidateSource: "snapshot_native", coverageFallback: true, }); @@ -176,7 +174,7 @@ function buildInventoryVisibleCandidates( }, confidence: INVENTORY_VISIBLE_CONFIDENCE, rationale: - "Coverage fallback (inventory): full post-step aria inventory found stable landmark visibility.", + "Coverage fallback (inventory): scoped post-step aria snapshot found stable landmark visibility.", candidateSource: "snapshot_native", coverageFallback: true, }); diff --git a/src/core/improve/assertion-candidates-snapshot.ts b/src/core/improve/assertion-candidates-snapshot.ts index c081d61..14a626d 100644 --- a/src/core/improve/assertion-candidates-snapshot.ts +++ b/src/core/improve/assertion-candidates-snapshot.ts @@ -24,12 +24,14 @@ import { } from "./assertion-candidates-snapshot-shared.js"; import { parseSnapshotNodes } from "./assertion-candidates-snapshot-parser.js"; import { classifyNavigationLikeInteraction } from "./navigation-like-interaction.js"; +import type { StepSnapshotScope } from "./step-snapshot-scope.js"; export interface StepSnapshot { index: number; step: Step; preSnapshot: string; postSnapshot: string; + scope?: StepSnapshotScope; preUrl?: string; postUrl?: string; preTitle?: string; diff --git a/src/core/improve/improve-assertion-pass.test.ts b/src/core/improve/improve-assertion-pass.test.ts index cc50b7e..a7369a4 100644 --- a/src/core/improve/improve-assertion-pass.test.ts +++ b/src/core/improve/improve-assertion-pass.test.ts @@ -5,7 +5,10 @@ import { runImproveAssertionPass } from "./improve-assertion-pass.js"; const { buildAssertionCandidatesMock } = vi.hoisted(() => ({ buildAssertionCandidatesMock: vi.fn< typeof import("./assertion-candidates.js").buildAssertionCandidates - >(() => []), + >(() => ({ + candidates: [], + skippedNavigationLikeClicks: [], + })), })); const { buildSnapshotInventoryAssertionCandidatesMock } = vi.hoisted(() => ({ buildSnapshotInventoryAssertionCandidatesMock: vi.fn< @@ -19,8 +22,13 @@ const { executeRuntimeStepMock } = vi.hoisted(() => ({ })); const { waitForPostStepNetworkIdleMock } = vi.hoisted(() => ({ waitForPostStepNetworkIdleMock: vi.fn< - typeof import("../runtime/network-idle.js").waitForPostStepNetworkIdle - >(async () => false), + typeof import("../runtime/network-idle.js").waitForPostStepReadiness + >(async () => ({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: false, + })), })); vi.mock("./assertion-candidates.js", () => ({ @@ -37,8 +45,8 @@ vi.mock("../runtime/step-executor.js", () => ({ })); vi.mock("../runtime/network-idle.js", () => ({ - DEFAULT_WAIT_FOR_NETWORK_IDLE: true, - waitForPostStepNetworkIdle: waitForPostStepNetworkIdleMock, + DEFAULT_WAIT_FOR_NETWORK_IDLE: false, + waitForPostStepReadiness: waitForPostStepNetworkIdleMock, })); function baseCandidates(): AssertionCandidate[] { @@ -71,16 +79,33 @@ function baseCandidates(): AssertionCandidate[] { ]; } +function deterministicCandidateResult( + candidates: AssertionCandidate[], + skippedNavigationLikeClicks: Array<{ index: number; reason: string }> = [] +) { + return { + candidates, + skippedNavigationLikeClicks, + }; +} + describe("runImproveAssertionPass coverage fallback behavior", () => { beforeEach(() => { vi.resetAllMocks(); executeRuntimeStepMock.mockResolvedValue(undefined); - waitForPostStepNetworkIdleMock.mockResolvedValue(false); + waitForPostStepNetworkIdleMock.mockResolvedValue({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: false, + }); buildSnapshotInventoryAssertionCandidatesMock.mockReturnValue([]); }); it("keeps fallback as backup-only when a stronger candidate exists for the same step", async () => { - buildAssertionCandidatesMock.mockReturnValue(baseCandidates()); + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult(baseCandidates()) + ); const diagnostics: import("./report-schema.js").ImproveDiagnostic[] = []; const result = await runImproveAssertionPass({ @@ -118,21 +143,23 @@ describe("runImproveAssertionPass coverage fallback behavior", () => { }); it("keeps fallback eligible when it is the only candidate on a step", async () => { - buildAssertionCandidatesMock.mockReturnValue([ - { - index: 1, - afterAction: "click", - candidate: { - action: "assertVisible", - target: { value: "#submit", kind: "css", source: "manual" }, + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ + { + index: 1, + afterAction: "click", + candidate: { + action: "assertVisible", + target: { value: "#submit", kind: "css", source: "manual" }, + }, + confidence: 0.76, + rationale: + "Coverage fallback: verify interacted element remains visible after action.", + candidateSource: "deterministic", + coverageFallback: true, }, - confidence: 0.76, - rationale: - "Coverage fallback: verify interacted element remains visible after action.", - candidateSource: "deterministic", - coverageFallback: true, - }, - ]); + ]) + ); const result = await runImproveAssertionPass({ assertions: "candidates", @@ -164,21 +191,23 @@ describe("runImproveAssertionPass coverage fallback behavior", () => { }); it("adds inventory candidates for uncovered snapshot-native steps", async () => { - buildAssertionCandidatesMock.mockReturnValue([ - { - index: 1, - afterAction: "click", - candidate: { - action: "assertVisible", - target: { value: "#submit", kind: "css", source: "manual" }, + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ + { + index: 1, + afterAction: "click", + candidate: { + action: "assertVisible", + target: { value: "#submit", kind: "css", source: "manual" }, + }, + confidence: 0.76, + rationale: + "Coverage fallback: verify interacted element remains visible after action.", + candidateSource: "deterministic", + coverageFallback: true, }, - confidence: 0.76, - rationale: - "Coverage fallback: verify interacted element remains visible after action.", - candidateSource: "deterministic", - coverageFallback: true, - }, - ]); + ]) + ); buildSnapshotInventoryAssertionCandidatesMock.mockReturnValue([ { index: 1, @@ -190,7 +219,7 @@ describe("runImproveAssertionPass coverage fallback behavior", () => { }, confidence: 0.79, rationale: - "Coverage fallback (inventory): full post-step aria inventory yielded high-signal text.", + "Coverage fallback (inventory): scoped post-step aria snapshot yielded high-signal text.", candidateSource: "snapshot_native", coverageFallback: true, }, @@ -233,20 +262,22 @@ describe("runImproveAssertionPass coverage fallback behavior", () => { }); it("does not run inventory harvesting when non-fallback candidates already cover a step", async () => { - buildAssertionCandidatesMock.mockReturnValue([ - { - index: 1, - afterAction: "click", - candidate: { - action: "assertText", - target: { value: "#status", kind: "css", source: "manual" }, - text: "Saved", + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ + { + index: 1, + afterAction: "click", + candidate: { + action: "assertText", + target: { value: "#status", kind: "css", source: "manual" }, + text: "Saved", + }, + confidence: 0.9, + rationale: "high-signal text", + candidateSource: "deterministic", }, - confidence: 0.9, - rationale: "high-signal text", - candidateSource: "deterministic", - }, - ]); + ]) + ); const result = await runImproveAssertionPass({ assertions: "candidates", @@ -278,7 +309,9 @@ describe("runImproveAssertionPass coverage fallback behavior", () => { }); it("does not run inventory harvesting for deterministic assertion source", async () => { - buildAssertionCandidatesMock.mockReturnValue(baseCandidates()); + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult(baseCandidates()) + ); const result = await runImproveAssertionPass({ assertions: "candidates", @@ -326,7 +359,7 @@ describe("runImproveAssertionPass coverage fallback behavior", () => { }, ], skippedNavigationLikeClicks: [], - } as any); + }); const result = await runImproveAssertionPass({ assertions: "candidates", @@ -383,7 +416,7 @@ describe("runImproveAssertionPass coverage fallback behavior", () => { reason: "navigation-like dynamic click target", }, ], - } as any); + }); const diagnostics: import("./report-schema.js").ImproveDiagnostic[] = []; const result = await runImproveAssertionPass({ diff --git a/src/core/improve/improve-runner.ts b/src/core/improve/improve-runner.ts index 948442b..7214bac 100644 --- a/src/core/improve/improve-runner.ts +++ b/src/core/improve/improve-runner.ts @@ -17,8 +17,10 @@ import { classifyRuntimeFailingStep } from "./runtime-failure-classifier.js"; import { type ImproveAssertionPolicy, type ImproveAssertionSource, + type ImproveAppliedBy, type ImproveOptions, type ImproveResult, + RUNTIME_STEP_REMOVE_MIN_CONFIDENCE, } from "./improve-types.js"; import { DEFAULT_IMPROVE_ASSERTION_POLICY } from "./assertion-policy.js"; import { @@ -51,6 +53,14 @@ export async function improveTestFile(options: ImproveOptions): Promise= RUNTIME_STEP_REMOVE_MIN_CONFIDENCE; + + if (safeToAutoRemove) { failedIndexesToRemove.add(index); diagnostics.push({ code: "runtime_failing_step_removed", level: "info", message: `Step ${originalIndex + 1}: removed because it failed at runtime (${classification.reason}).`, + decisionConfidence: classification.decisionConfidence, + mutationType: "runtime_step_removal", + mutationSafety: classification.mutationSafety, + evidenceRefs: classification.evidenceRefs, + appliedBy, }); continue; } failedIndexesToRetain.add(index); + const safetySuffix = + classification.disposition === "remove" + ? " Auto-removal blocked by safety guard." + : ""; diagnostics.push({ code: "runtime_failing_step_retained", level: "info", message: - `Step ${originalIndex + 1}: retained as required step after runtime failure (${classification.reason}).`, + `Step ${originalIndex + 1}: retained as required step after runtime failure (${classification.reason}).${safetySuffix}`, + decisionConfidence: classification.decisionConfidence, + mutationType: "runtime_step_retention", + mutationSafety: classification.mutationSafety, + evidenceRefs: classification.evidenceRefs, + appliedBy, }); } } @@ -209,11 +250,63 @@ export async function improveTestFile(options: ImproveOptions): Promise { + const issuePath = issue.path.length > 0 ? issue.path.join(".") : "(root)"; + return `${issuePath}: ${issue.message}`; + }) + ); + diagnostics.push({ + code: "apply_write_blocked_invalid_output", + level: "error", + message: + "Apply-mode output failed schema validation; YAML write was blocked to prevent an invalid test file.", + mutationType: "none", + mutationSafety: "safe", + evidenceRefs: ["output_validation_failed"], + appliedBy, + }); + } + } + const reportPath = effectiveOptions.reportPath + ? path.resolve(effectiveOptions.reportPath) + : defaultReportPath(absoluteTestPath); + + diagnostics.push({ + code: "reproducibility_hint", + level: "info", + message: `Reproduce runtime behavior with: ui-test play ${absoluteTestPath}`, + mutationType: "none", + mutationSafety: "safe", + evidenceRefs: ["repro:play"], + appliedBy, + }); + diagnostics.push({ + code: "reproducibility_hint", + level: "info", + message: + `Re-run improve report with: ui-test improve ${absoluteTestPath} --no-apply --report ${reportPath}`, + mutationType: "none", + mutationSafety: "safe", + evidenceRefs: ["repro:improve_report"], + appliedBy, + }); const report: ImproveReport = { testFile: absoluteTestPath, generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy, summary: { unchanged: postRemovalFindings.filter((item) => !item.changed).length, improved: postRemovalFindings.filter((item) => item.changed).length, @@ -231,10 +324,6 @@ export async function improveTestFile(options: ImproveOptions): Promise 0) { + throw new ValidationError( + `Improve apply aborted: generated output is invalid and was not written (${absoluteTestPath}).`, + outputValidationIssues + ); + } + let outputPath: string | undefined; - if (wantsWrite) { + if (wantsWrite && !dryRunWrite) { + const absoluteOutputPath = effectiveOptions.outputPath + ? path.resolve(effectiveOptions.outputPath) + : absoluteTestPath; const yamlOptions: { description?: string; baseUrl?: string } = {}; if (test.description !== undefined) { yamlOptions.description = test.description; @@ -287,8 +383,9 @@ export async function improveTestFile(options: ImproveOptions): Promise(async () => ({ dismissed: false })), waitForPostStepNetworkIdleMock: vi.fn< - typeof import("../runtime/network-idle.js").waitForPostStepNetworkIdle - >(async () => false), + typeof import("../runtime/network-idle.js").waitForPostStepReadiness + >(async () => ({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: false, + })), generateRuntimeRepairCandidatesMock: vi.fn< typeof import("./selector-runtime-repair.js").generateRuntimeRepairCandidates >(async () => ({ @@ -54,8 +59,8 @@ vi.mock("../runtime/cookie-banner.js", () => ({ })); vi.mock("../runtime/network-idle.js", () => ({ - DEFAULT_WAIT_FOR_NETWORK_IDLE: true, - waitForPostStepNetworkIdle: waitForPostStepNetworkIdleMock, + DEFAULT_WAIT_FOR_NETWORK_IDLE: false, + waitForPostStepReadiness: waitForPostStepNetworkIdleMock, })); vi.mock("./selector-runtime-repair.js", () => ({ @@ -79,7 +84,12 @@ describe("runImproveSelectorPass", () => { executeRuntimeStepMock.mockImplementation(async () => {}); dismissCookieBannerWithDetailsMock.mockResolvedValue({ dismissed: false }); - waitForPostStepNetworkIdleMock.mockResolvedValue(false); + waitForPostStepNetworkIdleMock.mockResolvedValue({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: false, + }); shouldAdoptCandidateMock.mockReturnValue(false); generateRuntimeRepairCandidatesMock.mockResolvedValue({ candidates: [], diff --git a/src/core/improve/improve-selector-pass.ts b/src/core/improve/improve-selector-pass.ts index 215af69..ece6db0 100644 --- a/src/core/improve/improve-selector-pass.ts +++ b/src/core/improve/improve-selector-pass.ts @@ -2,7 +2,7 @@ import type { Page } from "playwright"; import { executeRuntimeStep } from "../runtime/step-executor.js"; import { DEFAULT_WAIT_FOR_NETWORK_IDLE, - waitForPostStepNetworkIdle, + waitForPostStepReadiness, } from "../runtime/network-idle.js"; import { dismissCookieBannerWithDetails } from "../runtime/cookie-banner.js"; import type { Step, Target } from "../yaml-schema.js"; @@ -16,6 +16,7 @@ import type { ImproveDiagnostic, StepFinding } from "./report-schema.js"; import { applySelectionAndRecordFinding } from "./selector-pass/apply-selection.js"; import { collectCandidatesForStep } from "./selector-pass/collect-candidates.js"; import { selectBestCandidateForStep } from "./selector-pass/select-candidate.js"; +import { prepareScopedStepSnapshot } from "./step-snapshot-scope.js"; export interface SelectorPassResult { outputSteps: Step[]; @@ -27,8 +28,6 @@ export interface SelectorPassResult { selectorRepairsAdoptedOnTie: number; selectorRepairsGeneratedByPlaywrightRuntime: number; selectorRepairsAppliedFromPlaywrightRuntime: number; - selectorRepairsGeneratedByPrivateFallback: number; - selectorRepairsAppliedFromPrivateFallback: number; } type StepWithTarget = Step & { target: Target }; @@ -52,8 +51,6 @@ export async function runImproveSelectorPass(input: { let selectorRepairsAdoptedOnTie = 0; let selectorRepairsGeneratedByPlaywrightRuntime = 0; let selectorRepairsAppliedFromPlaywrightRuntime = 0; - let selectorRepairsGeneratedByPrivateFallback = 0; - let selectorRepairsAppliedFromPrivateFallback = 0; for (let index = 0; index < outputSteps.length; index += 1) { const step = outputSteps[index]; @@ -73,8 +70,6 @@ export async function runImproveSelectorPass(input: { selectorRepairCandidates += candidateCollection.selectorRepairCandidatesAdded; selectorRepairsGeneratedByPlaywrightRuntime += candidateCollection.selectorRepairsGeneratedByPlaywrightRuntime; - selectorRepairsGeneratedByPrivateFallback += - candidateCollection.selectorRepairsGeneratedByPrivateFallback; const scored = await scoreTargetCandidates( input.page, @@ -117,15 +112,11 @@ export async function runImproveSelectorPass(input: { scored, diagnostics: input.diagnostics, runtimeRepairCandidateKeys: candidateCollection.runtimeRepairCandidateKeys, - privateFallbackRuntimeRepairCandidateKeys: - candidateCollection.privateFallbackRuntimeRepairCandidateKeys, }); selectorRepairsApplied += applyMetrics.selectorRepairsApplied; selectorRepairsAdoptedOnTie += applyMetrics.selectorRepairsAdoptedOnTie; selectorRepairsAppliedFromPlaywrightRuntime += applyMetrics.selectorRepairsAppliedFromPlaywrightRuntime; - selectorRepairsAppliedFromPrivateFallback += - applyMetrics.selectorRepairsAppliedFromPrivateFallback; } } @@ -133,14 +124,21 @@ export async function runImproveSelectorPass(input: { continue; } - let preSnapshot: string | undefined; + let scopedSnapshot: + | { + preSnapshot: string; + capturePostSnapshot: () => Promise; + scope: NonNullable; + } + | undefined; let preUrl: string | undefined; let preTitle: string | undefined; if (input.wantsNativeSnapshots) { - preSnapshot = await input.page - .locator("body") - .ariaSnapshot({ timeout: DEFAULT_SCORING_TIMEOUT_MS }) - .catch(() => undefined); + scopedSnapshot = await prepareScopedStepSnapshot( + input.page, + step, + DEFAULT_SCORING_TIMEOUT_MS + ); try { preUrl = input.page.url(); preTitle = await input.page.title(); @@ -151,6 +149,14 @@ export async function runImproveSelectorPass(input: { } try { + let beforeUrl = preUrl; + if (beforeUrl === undefined) { + try { + beforeUrl = input.page.url(); + } catch { + beforeUrl = undefined; + } + } const dismissResult = await dismissCookieBannerWithDetails( input.page, Math.min(DEFAULT_RUNTIME_TIMEOUT_MS, 1200) @@ -174,27 +180,25 @@ export async function runImproveSelectorPass(input: { timeout: DEFAULT_RUNTIME_TIMEOUT_MS, baseUrl: input.testBaseUrl, mode: "analysis", - } + } ); - } catch (err) { - failedStepIndexes.push(index); - input.diagnostics.push({ - code: "runtime_step_execution_failed", - level: "warn", - message: - err instanceof Error - ? `Runtime execution failed at step ${originalIndex + 1}; continuing with best-effort analysis. ${err.message}` - : `Runtime execution failed at step ${originalIndex + 1}; continuing with best-effort analysis.`, - }); - } - if (input.wantsNativeSnapshots) { try { - const networkIdleTimedOut = await waitForPostStepNetworkIdle( - input.page, - DEFAULT_WAIT_FOR_NETWORK_IDLE - ); - if (networkIdleTimedOut) { + const readiness = await waitForPostStepReadiness({ + page: input.page, + step: runtimeStep, + waitForNetworkIdle: DEFAULT_WAIT_FOR_NETWORK_IDLE, + timeoutMs: DEFAULT_RUNTIME_TIMEOUT_MS, + beforeUrl, + }); + if (readiness.navigationTimedOut) { + input.diagnostics.push({ + code: "runtime_navigation_readiness_wait_timed_out", + level: "warn", + message: `Runtime navigation readiness wait timed out at step ${originalIndex + 1}; capturing best-effort snapshot state.`, + }); + } + if (readiness.networkIdleTimedOut) { input.diagnostics.push({ code: "runtime_network_idle_wait_timed_out", level: "warn", @@ -203,21 +207,28 @@ export async function runImproveSelectorPass(input: { } } catch (err) { input.diagnostics.push({ - code: "runtime_network_idle_wait_failed", + code: "runtime_post_step_readiness_failed", level: "warn", message: err instanceof Error - ? `Runtime network idle wait failed at step ${originalIndex + 1}; continuing with best-effort analysis. ${err.message}` - : `Runtime network idle wait failed at step ${originalIndex + 1}; continuing with best-effort analysis.`, + ? `Runtime readiness wait failed at step ${originalIndex + 1}; continuing with best-effort analysis. ${err.message}` + : `Runtime readiness wait failed at step ${originalIndex + 1}; continuing with best-effort analysis.`, }); } + } catch (err) { + failedStepIndexes.push(index); + input.diagnostics.push({ + code: "runtime_step_execution_failed", + level: "warn", + message: + err instanceof Error + ? `Runtime execution failed at step ${originalIndex + 1}; continuing with best-effort analysis. ${err.message}` + : `Runtime execution failed at step ${originalIndex + 1}; continuing with best-effort analysis.`, + }); } - if (input.wantsNativeSnapshots && preSnapshot !== undefined) { - const postSnapshot = await input.page - .locator("body") - .ariaSnapshot({ timeout: DEFAULT_SCORING_TIMEOUT_MS }) - .catch(() => undefined); + if (input.wantsNativeSnapshots && scopedSnapshot) { + const postSnapshot = await scopedSnapshot.capturePostSnapshot(); if (postSnapshot) { let postUrl = ""; let postTitle = ""; @@ -231,8 +242,9 @@ export async function runImproveSelectorPass(input: { const stepSnapshot: StepSnapshot = { index, step, - preSnapshot, + preSnapshot: scopedSnapshot.preSnapshot, postSnapshot, + scope: scopedSnapshot.scope, }; if (preUrl !== undefined) stepSnapshot.preUrl = preUrl; if (postUrl !== undefined) stepSnapshot.postUrl = postUrl; @@ -253,8 +265,6 @@ export async function runImproveSelectorPass(input: { selectorRepairsAdoptedOnTie, selectorRepairsGeneratedByPlaywrightRuntime, selectorRepairsAppliedFromPlaywrightRuntime, - selectorRepairsGeneratedByPrivateFallback, - selectorRepairsAppliedFromPrivateFallback, }; } diff --git a/src/core/improve/improve-types.ts b/src/core/improve/improve-types.ts index fc8ab97..e9fc184 100644 --- a/src/core/improve/improve-types.ts +++ b/src/core/improve/improve-types.ts @@ -1,25 +1,49 @@ import type { ImproveReport } from "./report-schema.js"; +import type { Step } from "../yaml-schema.js"; export type ImproveAssertionsMode = "none" | "candidates"; export type ImproveAssertionSource = "deterministic" | "snapshot-native"; export type ImproveAssertionPolicy = "reliable" | "balanced" | "aggressive"; +export type ImproveAppliedBy = + | "auto_apply" + | "manual_apply" + | "plan_apply" + | "plan_preview" + | "report_only"; export interface ImproveOptions { testFile: string; + /** + * Optional output path for apply-mode writes. When omitted, apply-mode writes + * update the input test file in place. + */ + outputPath?: string; applySelectors: boolean; applyAssertions: boolean; assertions: ImproveAssertionsMode; assertionSource?: ImproveAssertionSource; assertionPolicy?: ImproveAssertionPolicy; reportPath?: string; + dryRunWrite?: boolean; + includeProposedTest?: boolean; + appliedBy?: ImproveAppliedBy; +} + +export interface ImproveProposedTest { + name: string; + description?: string; + baseUrl?: string; + steps: Step[]; } export interface ImproveResult { report: ImproveReport; reportPath: string; outputPath?: string; + proposedTest?: ImproveProposedTest; } export const DEFAULT_RUNTIME_TIMEOUT_MS = 3_000; export const DEFAULT_SCORING_TIMEOUT_MS = 1_200; export const ASSERTION_APPLY_MIN_CONFIDENCE = 0.75; +export const RUNTIME_STEP_REMOVE_MIN_CONFIDENCE = 0.85; diff --git a/src/core/improve/improve.apply-runtime.test.ts b/src/core/improve/improve.apply-runtime.test.ts index 3fe59be..e3a5834 100644 --- a/src/core/improve/improve.apply-runtime.test.ts +++ b/src/core/improve/improve.apply-runtime.test.ts @@ -3,6 +3,7 @@ import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { Step } from "../yaml-schema.js"; +import type { AssertionCandidate } from "./report-schema.js"; const { executeRuntimeStepMock } = vi.hoisted(() => ({ executeRuntimeStepMock: vi.fn< @@ -12,12 +13,20 @@ const { executeRuntimeStepMock } = vi.hoisted(() => ({ const { buildAssertionCandidatesMock } = vi.hoisted(() => ({ buildAssertionCandidatesMock: vi.fn< typeof import("./assertion-candidates.js").buildAssertionCandidates - >(() => []), + >(() => ({ + candidates: [], + skippedNavigationLikeClicks: [], + })), })); const { waitForPostStepNetworkIdleMock } = vi.hoisted(() => ({ waitForPostStepNetworkIdleMock: vi.fn< - typeof import("../runtime/network-idle.js").waitForPostStepNetworkIdle - >(async () => false), + typeof import("../runtime/network-idle.js").waitForPostStepReadiness + >(async () => ({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: false, + })), })); const { generateRuntimeRepairCandidatesMock } = vi.hoisted(() => ({ generateRuntimeRepairCandidatesMock: vi.fn< @@ -56,8 +65,8 @@ vi.mock("../runtime/step-executor.js", () => ({ })); vi.mock("../runtime/network-idle.js", () => ({ - waitForPostStepNetworkIdle: waitForPostStepNetworkIdleMock, - DEFAULT_WAIT_FOR_NETWORK_IDLE: true, + waitForPostStepReadiness: waitForPostStepNetworkIdleMock, + DEFAULT_WAIT_FOR_NETWORK_IDLE: false, })); vi.mock("./candidate-generator.js", () => ({ @@ -136,6 +145,16 @@ function getExecutedStepAt(callIndex: number): Step { return call![1]; } +function deterministicCandidateResult( + candidates: AssertionCandidate[], + skippedNavigationLikeClicks: Array<{ index: number; reason: string }> = [] +) { + return { + candidates, + skippedNavigationLikeClicks, + }; +} + describe("improve apply runtime replay", () => { const tempDirs: string[] = []; @@ -143,9 +162,16 @@ describe("improve apply runtime replay", () => { executeRuntimeStepMock.mockClear(); executeRuntimeStepMock.mockImplementation(async () => {}); buildAssertionCandidatesMock.mockClear(); - buildAssertionCandidatesMock.mockReturnValue([]); + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([]) + ); waitForPostStepNetworkIdleMock.mockClear(); - waitForPostStepNetworkIdleMock.mockResolvedValue(false); + waitForPostStepNetworkIdleMock.mockResolvedValue({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: false, + }); generateRuntimeRepairCandidatesMock.mockClear(); generateRuntimeRepairCandidatesMock.mockResolvedValue({ candidates: [], @@ -316,7 +342,7 @@ describe("improve apply runtime replay", () => { sourceMarkers: [ { candidateId: "repair-playwright-runtime-1", - source: "resolved_selector_fallback", + source: "public_conversion", }, ], }); @@ -422,7 +448,7 @@ describe("improve apply runtime replay", () => { sourceMarkers: [ { candidateId: "repair-playwright-runtime", - source: "resolved_selector_fallback", + source: "public_conversion", }, ], }); @@ -494,7 +520,8 @@ describe("improve apply runtime replay", () => { ].join("\n"), "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "fill", @@ -506,7 +533,8 @@ describe("improve apply runtime replay", () => { confidence: 0.9, rationale: "stable input value", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -546,7 +574,8 @@ describe("improve apply runtime replay", () => { ].join("\n"), "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -558,7 +587,8 @@ describe("improve apply runtime replay", () => { confidence: 0.9, rationale: "stable state check", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -598,7 +628,8 @@ describe("improve apply runtime replay", () => { ].join("\n"), "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -609,7 +640,8 @@ describe("improve apply runtime replay", () => { confidence: 0.6, rationale: "insufficient confidence", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -781,7 +813,8 @@ describe("improve apply runtime replay", () => { "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -807,7 +840,8 @@ describe("improve apply runtime replay", () => { rationale: "stronger text candidate", candidateSource: "deterministic", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -859,7 +893,8 @@ describe("improve apply runtime replay", () => { } }); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -885,7 +920,8 @@ describe("improve apply runtime replay", () => { rationale: "stronger text candidate", candidateSource: "deterministic", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -927,7 +963,8 @@ describe("improve apply runtime replay", () => { ].join("\n"), "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -940,7 +977,8 @@ describe("improve apply runtime replay", () => { confidence: 0.98, rationale: "snapshot text", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -981,7 +1019,8 @@ describe("improve apply runtime replay", () => { ].join("\n"), "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1018,7 +1057,8 @@ describe("improve apply runtime replay", () => { confidence: 0.93, rationale: "snapshot text", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -1058,7 +1098,8 @@ describe("improve apply runtime replay", () => { ].join("\n"), "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1095,7 +1136,8 @@ describe("improve apply runtime replay", () => { confidence: 0.94, rationale: "snapshot text", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -1141,7 +1183,8 @@ describe("improve apply runtime replay", () => { throw new Error("Expected element to be visible"); } }); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1152,7 +1195,8 @@ describe("improve apply runtime replay", () => { confidence: 0.85, rationale: "runtime validation candidate", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -1193,7 +1237,8 @@ describe("improve apply runtime replay", () => { ].join("\n"), "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1217,7 +1262,8 @@ describe("improve apply runtime replay", () => { rationale: "secondary assertion", candidateSource: "snapshot_native", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, @@ -1328,13 +1374,18 @@ describe("improve apply runtime replay", () => { expect( result.report.diagnostics.some( - (diagnostic) => diagnostic.code === "runtime_network_idle_wait_failed" + (diagnostic) => diagnostic.code === "runtime_post_step_readiness_failed" ) ).toBe(true); }); it("reports timeout warning when native snapshot network-idle wait times out", async () => { - waitForPostStepNetworkIdleMock.mockResolvedValueOnce(true); + waitForPostStepNetworkIdleMock.mockResolvedValueOnce({ + navigationTimedOut: false, + networkIdleTimedOut: true, + usedNavigationWait: false, + usedNetworkIdleWait: true, + }); const dir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-snapshot-native-timeout-")); tempDirs.push(dir); @@ -1570,7 +1621,8 @@ describe("improve apply runtime replay", () => { " source: manual", ].join("\n"); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1598,7 +1650,8 @@ describe("improve apply runtime replay", () => { rationale: "snapshot visible candidate", candidateSource: "snapshot_native", }, - ]); + ]) + ); const reliableDir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-policy-reliable-")); const balancedDir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-policy-balanced-")); @@ -1650,7 +1703,8 @@ describe("improve apply runtime replay", () => { await fs.writeFile(reliableYaml, yamlBase, "utf-8"); await fs.writeFile(balancedYaml, yamlBase, "utf-8"); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1675,7 +1729,8 @@ describe("improve apply runtime replay", () => { rationale: "deterministic text", candidateSource: "deterministic", }, - ]); + ]) + ); const reliableResult = await improveTestFile({ testFile: reliableYaml, @@ -1715,7 +1770,8 @@ describe("improve apply runtime replay", () => { " source: manual", ].join("\n"); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1727,7 +1783,8 @@ describe("improve apply runtime replay", () => { rationale: "snapshot visible candidate", candidateSource: "snapshot_native", }, - ]); + ]) + ); const reliableDir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-visible-reliable-")); const balancedDir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-visible-balanced-")); @@ -1769,7 +1826,8 @@ describe("improve apply runtime replay", () => { " source: manual", ].join("\n"); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1805,7 +1863,8 @@ describe("improve apply runtime replay", () => { rationale: "visible", candidateSource: "deterministic", }, - ]); + ]) + ); const reliableDir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-cap-reliable-")); const balancedDir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-cap-balanced-")); @@ -1866,7 +1925,8 @@ describe("improve apply runtime replay", () => { "utf-8" ); - buildAssertionCandidatesMock.mockReturnValue([ + buildAssertionCandidatesMock.mockReturnValue( + deterministicCandidateResult([ { index: 1, afterAction: "click", @@ -1878,7 +1938,8 @@ describe("improve apply runtime replay", () => { rationale: "snapshot visible candidate", candidateSource: "snapshot_native", }, - ]); + ]) + ); const result = await improveTestFile({ testFile: yamlPath, diff --git a/src/core/improve/improve.test.ts b/src/core/improve/improve.test.ts index 889faa2..04773e0 100644 --- a/src/core/improve/improve.test.ts +++ b/src/core/improve/improve.test.ts @@ -130,6 +130,27 @@ describe("improveTestFile runner", () => { return yamlPath; } + async function writeYamlWithOnlyTransientStep(): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-")); + tempDirs.push(dir); + + const yamlPath = path.join(dir, "transient-only.yaml"); + await fs.writeFile( + yamlPath, + [ + "name: transient-only", + "steps:", + " - action: click", + " target:", + ' value: "#cookie-accept"', + " kind: css", + " source: manual", + ].join("\n") + "\n", + "utf-8" + ); + return yamlPath; + } + it("fails in review mode when chromium is unavailable", async () => { chromiumLaunchMock.mockRejectedValueOnce(new Error("Executable doesn't exist")); const yamlPath = await writeSampleYaml(); @@ -230,6 +251,71 @@ describe("improveTestFile runner", () => { expect(written).toContain("submit"); }); + it("writes apply output to outputPath when provided and preserves the original", async () => { + const yamlPath = await writeYamlWithTransientStep(); + const improvedPath = path.join(path.dirname(yamlPath), "transient.improved.yaml"); + + runImproveSelectorPassMock.mockImplementation(async (input) => ({ + outputSteps: input.steps, + findings: [], + nativeStepSnapshots: [], + failedStepIndexes: [1], + })); + + const result = await improveTestFile({ + testFile: yamlPath, + outputPath: improvedPath, + applySelectors: true, + applyAssertions: false, + assertions: "none", + }); + + expect(result.outputPath).toBe(path.resolve(improvedPath)); + + const original = await fs.readFile(yamlPath, "utf-8"); + expect(original).toContain("cookie-accept"); + + const improved = await fs.readFile(improvedPath, "utf-8"); + expect(improved).not.toContain("cookie-accept"); + expect(improved).toContain("submit"); + }); + + it("blocks YAML write when apply output would fail schema validation", async () => { + const yamlPath = await writeYamlWithOnlyTransientStep(); + + runImproveSelectorPassMock.mockImplementation(async (input) => ({ + outputSteps: input.steps, + findings: [], + nativeStepSnapshots: [], + failedStepIndexes: [0], + })); + + const run = improveTestFile({ + testFile: yamlPath, + applySelectors: true, + applyAssertions: false, + assertions: "none", + }); + + await expect(run).rejects.toBeInstanceOf(ValidationError); + await expect(run).rejects.toMatchObject({ + issues: expect.arrayContaining([ + expect.stringContaining("steps: Test must have at least one step"), + ]), + }); + + const unchangedYaml = await fs.readFile(yamlPath, "utf-8"); + expect(unchangedYaml).toContain("cookie-accept"); + + const reportPath = path.join(path.dirname(yamlPath), "transient-only.improve-report.json"); + const report = JSON.parse(await fs.readFile(reportPath, "utf-8")) as { + diagnostics: Array<{ code: string }>; + }; + expect( + report.diagnostics.some((diagnostic) => diagnostic.code === "apply_write_blocked_invalid_output") + ).toBe(true); + }); + it("does not remove navigate steps even if they fail", async () => { const yamlPath = await writeYamlWithTransientStep(); @@ -409,4 +495,54 @@ describe("improveTestFile runner", () => { expect(written).toContain("navigate"); expect(result.report.summary.runtimeFailingStepsRetained).toBe(1); }); + + it("retains low-confidence transient removals behind safety guard", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "ui-test-improve-")); + tempDirs.push(dir); + + const yamlPath = path.join(dir, "transient-soft.yaml"); + await fs.writeFile( + yamlPath, + [ + "name: transient-soft", + "baseUrl: https://example.com", + "steps:", + " - action: navigate", + ' url: "/"', + " - action: click", + " target:", + ' value: "getByRole(\'button\', { name: \'Close privacy notice\' })"', + " kind: locatorExpression", + " source: manual", + ].join("\n") + "\n", + "utf-8" + ); + + runImproveSelectorPassMock.mockImplementation(async (input) => ({ + outputSteps: input.steps, + findings: [], + nativeStepSnapshots: [], + failedStepIndexes: [1], + selectorRepairCandidates: 0, + selectorRepairsApplied: 0, + })); + + const result = await improveTestFile({ + testFile: yamlPath, + applySelectors: true, + applyAssertions: false, + assertions: "none", + }); + + const written = await fs.readFile(yamlPath, "utf-8"); + expect(written).toContain("Close privacy notice"); + expect(result.report.summary.runtimeFailingStepsRetained).toBe(1); + expect( + result.report.diagnostics.some( + (diagnostic) => + diagnostic.code === "runtime_failing_step_retained" && + diagnostic.mutationSafety === "unsafe_to_auto_apply" + ) + ).toBe(true); + }); }); diff --git a/src/core/improve/improve.ts b/src/core/improve/improve.ts index 5495505..4781d26 100644 --- a/src/core/improve/improve.ts +++ b/src/core/improve/improve.ts @@ -4,6 +4,8 @@ export type { ImproveAssertionSource, ImproveAssertionsMode, ImproveAssertionPolicy, + ImproveAppliedBy, ImproveOptions, + ImproveProposedTest, ImproveResult, } from "./improve-types.js"; diff --git a/src/core/improve/playwright-runtime-selector-adapter.ts b/src/core/improve/playwright-runtime-selector-adapter.ts index 4eb2019..e2201d9 100644 --- a/src/core/improve/playwright-runtime-selector-adapter.ts +++ b/src/core/improve/playwright-runtime-selector-adapter.ts @@ -1,24 +1,10 @@ -import type { Locator, Page } from "playwright"; -import { looksLikeLocatorExpression } from "../locator-expression.js"; import type { Target } from "../yaml-schema.js"; -interface SelectorResolutionPayload { - resolvedSelector: string; -} - -interface LocatorWithPrivateResolveSelector extends Locator { - _resolveSelector?: () => Promise; -} - export interface RuntimeSelectorAdapterDependencies { - toLocatorExpressionFromSelectorFn?: ( - page: Page, - selector: string - ) => string | undefined; + convertSelectorFn?: (selector: string) => string | undefined; } export function convertRuntimeTargetToLocatorExpression( - page: Page, target: Target, dependencies: RuntimeSelectorAdapterDependencies = {} ): string | undefined { @@ -26,37 +12,36 @@ export function convertRuntimeTargetToLocatorExpression( return undefined; } - const converter = dependencies.toLocatorExpressionFromSelectorFn ?? toLocatorExpressionFromSelector; - return converter(page, target.value); -} - -export function getPrivateResolveSelector( - locator: Locator -): (() => Promise) | undefined { - const candidate = locator as LocatorWithPrivateResolveSelector; - const maybeFn = candidate._resolveSelector; - if (typeof maybeFn !== "function") return undefined; - return maybeFn; -} - -export function readResolvedSelectorValue(value: unknown): string | undefined { - if (!value || typeof value !== "object") return undefined; - const maybeSelector = (value as Record)["resolvedSelector"]; - if (typeof maybeSelector !== "string") return undefined; - const trimmed = maybeSelector.trim(); - return trimmed.length > 0 ? trimmed : undefined; + const converter = dependencies.convertSelectorFn ?? toLocatorExpressionFromSelector; + return converter(target.value); } export function toLocatorExpressionFromSelector( - page: Page, selector: string ): string | undefined { - try { - const locator = page.locator(selector); - const expression = locator.toString(); - return normalizeLocatorExpression(expression); - } catch { - return undefined; + const terminalSelector = extractTerminalSelectorSegment(selector); + if (!terminalSelector) return undefined; + + const internalRole = parseInternalRoleSelector(terminalSelector); + if (internalRole?.name) { + return `getByRole(${quote(internalRole.role)}, { name: ${quote(internalRole.name)} })`; + } + + const engineSelector = parseEngineSelector(terminalSelector); + if (!engineSelector) return undefined; + + switch (engineSelector.engine) { + case "data-testid": + case "data-test-id": + return `getByTestId(${quote(engineSelector.body)})`; + case "text": + return `getByText(${quote(engineSelector.body)})`; + case "css": + return `locator(${quote(engineSelector.body)})`; + case "xpath": + return `locator(${quote(engineSelector.body.startsWith("xpath=") ? engineSelector.body : `xpath=${engineSelector.body}`)})`; + default: + return undefined; } } @@ -72,18 +57,66 @@ export function shouldRetainFramePath( return true; } -function normalizeLocatorExpression(value: string): string | undefined { - const trimmed = value.trim(); - if (!trimmed) return undefined; +function extractTerminalSelectorSegment(selector: string): string | undefined { + const parts = selector + .split(/\s*>>\s*/u) + .map((part) => part.trim()) + .filter((part) => part.length > 0); + for (let index = parts.length - 1; index >= 0; index -= 1) { + const candidate = parts[index]; + if (!candidate || candidate.startsWith("internal:control=enter-frame")) { + continue; + } + return candidate; + } + return undefined; +} + +function parseEngineSelector( + selector: string +): { engine: string; body: string } | undefined { + const index = selector.indexOf("="); + if (index <= 0) return undefined; + const engine = selector.slice(0, index).trim(); + const rawBody = selector.slice(index + 1).trim(); + if (!engine || !rawBody) return undefined; + const body = unquote(rawBody) ?? rawBody; + return body ? { engine, body } : undefined; +} + +function parseInternalRoleSelector( + selector: string +): { role: string; name?: string } | undefined { + const match = /^internal:role=([^[\s]+)(?:\[(.+)\])?$/u.exec(selector); + if (!match?.[1]) return undefined; + const role = match[1].trim(); + const attrs = match[2]?.trim(); + if (!role) return undefined; + if (!attrs) return { role }; + + const nameMatch = /name=(?:"([^"]*)"|'([^']*)'|([^\]\s]+))(?:[is])?/u.exec(attrs); + const rawName = nameMatch?.[1] ?? nameMatch?.[2] ?? nameMatch?.[3]; + const name = rawName ? unescapeSelectorValue(rawName) : undefined; + return name ? { role, name } : { role }; +} + +function quote(value: string): string { + return `'${value.replace(/\\/g, "\\\\").replace(/'/g, "\\'").replace(/\n/g, "\\n")}'`; +} + +function unquote(value: string): string | undefined { + if (value.length < 2) return undefined; + const quoteChar = value[0]; + if ((quoteChar !== "'" && quoteChar !== '"') || value[value.length - 1] !== quoteChar) { + return undefined; + } + return unescapeSelectorValue(value.slice(1, -1)); +} - const withoutAwait = trimmed.replace(/^await\s+/u, ""); - const withoutSemicolon = withoutAwait.endsWith(";") - ? withoutAwait.slice(0, -1).trim() - : withoutAwait; - const withoutPagePrefix = withoutSemicolon.startsWith("page.") - ? withoutSemicolon.slice("page.".length) - : withoutSemicolon; - const normalized = withoutPagePrefix.trim(); - if (!looksLikeLocatorExpression(normalized)) return undefined; - return normalized; +function unescapeSelectorValue(value: string): string { + return value + .replace(/\\'/g, "'") + .replace(/\\"/g, '"') + .replace(/\\n/g, "\n") + .replace(/\\\\/g, "\\"); } diff --git a/src/core/improve/report-schema.test.ts b/src/core/improve/report-schema.test.ts index e39196c..ad235f8 100644 --- a/src/core/improve/report-schema.test.ts +++ b/src/core/improve/report-schema.test.ts @@ -7,6 +7,7 @@ describe("improveReportSchema", () => { testFile: "/tmp/sample.yaml", generatedAt: new Date().toISOString(), providerUsed: "playwright", + appliedBy: "report_only", summary: { unchanged: 1, improved: 1, @@ -20,8 +21,6 @@ describe("improveReportSchema", () => { selectorRepairsAdoptedOnTie: 1, selectorRepairsGeneratedByPlaywrightRuntime: 1, selectorRepairsAppliedFromPlaywrightRuntime: 1, - selectorRepairsGeneratedByPrivateFallback: 1, - selectorRepairsAppliedFromPrivateFallback: 1, deterministicAssertionsSkippedNavigationLikeClick: 2, runtimeFailingStepsRetained: 2, runtimeFailingStepsRemoved: 1, @@ -80,7 +79,18 @@ describe("improveReportSchema", () => { applyMessage: "Skipped by policy", }, ], - diagnostics: [], + diagnostics: [ + { + code: "runtime_failing_step_retained", + level: "info", + message: "Step 2 retained after runtime failure.", + decisionConfidence: 0.74, + mutationType: "runtime_step_retention", + mutationSafety: "unsafe_to_auto_apply", + evidenceRefs: ["context:soft_transient"], + appliedBy: "report_only", + }, + ], }); expect(parsed.summary.appliedAssertions).toBe(1); @@ -89,8 +99,6 @@ describe("improveReportSchema", () => { expect(parsed.summary.selectorRepairsAdoptedOnTie).toBe(1); expect(parsed.summary.selectorRepairsGeneratedByPlaywrightRuntime).toBe(1); expect(parsed.summary.selectorRepairsAppliedFromPlaywrightRuntime).toBe(1); - expect(parsed.summary.selectorRepairsGeneratedByPrivateFallback).toBe(1); - expect(parsed.summary.selectorRepairsAppliedFromPrivateFallback).toBe(1); expect(parsed.summary.deterministicAssertionsSkippedNavigationLikeClick).toBe(2); expect(parsed.summary.runtimeFailingStepsRetained).toBe(2); expect(parsed.summary.assertionCandidatesFilteredDynamic).toBe(1); @@ -107,6 +115,8 @@ describe("improveReportSchema", () => { expect(parsed.summary.assertionInventoryGapStepsFilled).toBe(2); expect(parsed.summary.assertionApplyStatusCounts?.applied).toBe(1); expect(parsed.summary.assertionCandidateSourceCounts?.snapshot_native).toBe(1); + expect(parsed.appliedBy).toBe("report_only"); + expect(parsed.diagnostics[0]?.decisionConfidence).toBe(0.74); expect(parsed.assertionCandidates[0]?.candidateSource).toBe("deterministic"); expect(parsed.assertionCandidates[0]?.coverageFallback).toBe(true); expect(parsed.assertionCandidates[0]?.applyStatus).toBe("applied"); diff --git a/src/core/improve/report-schema.ts b/src/core/improve/report-schema.ts index e0e316f..cea5c31 100644 --- a/src/core/improve/report-schema.ts +++ b/src/core/improve/report-schema.ts @@ -3,10 +3,38 @@ import { stepSchema, targetSchema } from "../yaml-schema.js"; export const improveProviderSchema = z.enum(["playwright"]); +export const improveMutationTypeSchema = z.enum([ + "selector_update", + "assertion_insert", + "runtime_step_removal", + "runtime_step_retention", + "stale_assertion_removal", + "none", +]); + +export const improveMutationSafetySchema = z.enum([ + "safe", + "review_required", + "unsafe_to_auto_apply", +]); + +export const improveAppliedBySchema = z.enum([ + "auto_apply", + "manual_apply", + "plan_apply", + "plan_preview", + "report_only", +]); + export const improveDiagnosticSchema = z.object({ code: z.string().min(1), level: z.enum(["info", "warn", "error"]), message: z.string().min(1), + decisionConfidence: z.number().min(0).max(1).optional(), + mutationType: improveMutationTypeSchema.optional(), + mutationSafety: improveMutationSafetySchema.optional(), + evidenceRefs: z.array(z.string()).optional(), + appliedBy: improveAppliedBySchema.optional(), }); export const stepFindingSchema = z.object({ @@ -69,8 +97,6 @@ export const improveSummarySchema = z.object({ selectorRepairsAdoptedOnTie: z.number().int().nonnegative().optional(), selectorRepairsGeneratedByPlaywrightRuntime: z.number().int().nonnegative().optional(), selectorRepairsAppliedFromPlaywrightRuntime: z.number().int().nonnegative().optional(), - selectorRepairsGeneratedByPrivateFallback: z.number().int().nonnegative().optional(), - selectorRepairsAppliedFromPrivateFallback: z.number().int().nonnegative().optional(), deterministicAssertionsSkippedNavigationLikeClick: z .number() .int() @@ -107,6 +133,7 @@ export const improveReportSchema = z.object({ testFile: z.string().min(1), generatedAt: z.string().datetime(), providerUsed: improveProviderSchema, + appliedBy: improveAppliedBySchema, summary: improveSummarySchema, stepFindings: z.array(stepFindingSchema), assertionCandidates: z.array(assertionCandidateSchema), @@ -114,6 +141,9 @@ export const improveReportSchema = z.object({ }); export type ImproveProviderUsed = z.infer; +export type ImproveMutationType = z.infer; +export type ImproveMutationSafety = z.infer; +export type ImproveAppliedBy = z.infer; export type ImproveDiagnostic = z.infer; export type StepFinding = z.infer; export type AssertionApplyStatus = z.infer; diff --git a/src/core/improve/runtime-failure-classifier.test.ts b/src/core/improve/runtime-failure-classifier.test.ts index 9ac77eb..8dd3506 100644 --- a/src/core/improve/runtime-failure-classifier.test.ts +++ b/src/core/improve/runtime-failure-classifier.test.ts @@ -15,6 +15,8 @@ describe("classifyRuntimeFailingStep", () => { ); expect(out.disposition).toBe("remove"); + expect(out.mutationSafety).toBe("safe"); + expect(out.decisionConfidence).toBeGreaterThan(0.9); }); it("classifies content interactions as retained non-transient failures", () => { @@ -218,6 +220,21 @@ describe("classifyRuntimeFailingStep", () => { expect(out.reason).toContain("multilingual pattern match"); }); + it("marks low-confidence soft transient dismissals as unsafe to auto-apply", () => { + const out = classifyRuntimeFailingStep({ + action: "click", + target: { + value: "getByRole('button', { name: 'Close privacy notice' })", + kind: "locatorExpression", + source: "manual", + }, + }); + + expect(out.disposition).toBe("remove"); + expect(out.mutationSafety).toBe("unsafe_to_auto_apply"); + expect(out.decisionConfidence).toBeLessThan(0.85); + }); + it("does not match CMP selectors against locator expression names", () => { const out = classifyRuntimeFailingStep({ action: "click", diff --git a/src/core/improve/runtime-failure-classifier.ts b/src/core/improve/runtime-failure-classifier.ts index b901b9e..29299fc 100644 --- a/src/core/improve/runtime-failure-classifier.ts +++ b/src/core/improve/runtime-failure-classifier.ts @@ -75,28 +75,41 @@ function matchesCmpSelector(targetValue: string, targetKind: string): boolean { } export type RuntimeFailureDisposition = "remove" | "retain"; +export type RuntimeFailureMutationSafety = + | "safe" + | "review_required" + | "unsafe_to_auto_apply"; export interface RuntimeFailureClassification { disposition: RuntimeFailureDisposition; reason: string; + decisionConfidence: number; + mutationSafety: RuntimeFailureMutationSafety; + evidenceRefs: string[]; } export function classifyRuntimeFailingStep( step: Step ): RuntimeFailureClassification { if (step.action === "navigate") { - return { - disposition: "retain", - reason: "navigation steps are never auto-removed", - }; + return makeClassification("retain", "navigation steps are never auto-removed", { + decisionConfidence: 1, + mutationSafety: "safe", + evidenceRefs: ["action:navigate"], + }); } const isInteraction = step.action === "click" || step.action === "press"; if (!isInteraction) { - return { - disposition: "retain", - reason: "non-interaction steps are never auto-removed by transient policy", - }; + return makeClassification( + "retain", + "non-interaction steps are never auto-removed by transient policy", + { + decisionConfidence: 1, + mutationSafety: "safe", + evidenceRefs: [`action:${step.action}`], + } + ); } // --- Early cookie-consent detection via shared patterns --- @@ -104,18 +117,24 @@ export function classifyRuntimeFailingStep( const accessibleName = extractAccessibleName(targetValue); if (accessibleName && isCookieConsentDismissText(accessibleName)) { - return { - disposition: "remove", - reason: "classified as cookie-consent dismiss interaction (multilingual pattern match)", - }; + return makeClassification( + "remove", + "classified as cookie-consent dismiss interaction (multilingual pattern match)", + { + decisionConfidence: 0.98, + mutationSafety: "safe", + evidenceRefs: ["pattern:cookie_consent_text", `accessible_name:${accessibleName}`], + } + ); } const targetKind = "target" in step ? step.target.kind : "unknown"; if (matchesCmpSelector(targetValue, targetKind)) { - return { - disposition: "remove", - reason: "classified as cookie-consent CMP selector interaction", - }; + return makeClassification("remove", "classified as cookie-consent CMP selector interaction", { + decisionConfidence: 0.97, + mutationSafety: "safe", + evidenceRefs: ["pattern:cmp_selector", `target_kind:${targetKind}`], + }); } // --- Existing transient-context classification --- @@ -130,10 +149,11 @@ export function classifyRuntimeFailingStep( const hasAnyTransientContext = hasStrongTransientContext || hasSoftTransientContext; if (!hasAnyTransientContext) { - return { - disposition: "retain", - reason: "classified as non-transient interaction", - }; + return makeClassification("retain", "classified as non-transient interaction", { + decisionConfidence: 0.93, + mutationSafety: "safe", + evidenceRefs: ["context:non_transient"], + }); } const hasDismissIntent = DISMISS_INTENT_PATTERN.test(stepText); @@ -153,42 +173,82 @@ export function classifyRuntimeFailingStep( ); if (looksLikeContentLink) { - return { - disposition: "retain", - reason: "transient-context safeguard: likely content link interaction", - }; + return makeClassification( + "retain", + "transient-context safeguard: likely content link interaction", + { + decisionConfidence: 0.9, + mutationSafety: "review_required", + evidenceRefs: ["safeguard:content_link", "role:link"], + } + ); } if (hasBusinessIntent && !hasStrongTransientContext) { - return { - disposition: "retain", - reason: "transient-context safeguard: likely business-intent interaction", - }; + return makeClassification( + "retain", + "transient-context safeguard: likely business-intent interaction", + { + decisionConfidence: 0.88, + mutationSafety: "review_required", + evidenceRefs: ["safeguard:business_intent"], + } + ); } if (hasStrongTransientContext && (hasDismissIntent || hasControlCue)) { - return { - disposition: "remove", - reason: "classified as transient dismissal/control interaction", - }; + return makeClassification("remove", "classified as transient dismissal/control interaction", { + decisionConfidence: 0.92, + mutationSafety: "safe", + evidenceRefs: [ + "context:strong_transient", + hasDismissIntent ? "signal:dismiss_intent" : "signal:no_dismiss_intent", + hasControlCue ? "signal:control_cue" : "signal:no_control_cue", + ], + }); } if (hasStrongTransientContext) { - return { - disposition: "remove", - reason: "classified as strong transient-context interaction", - }; + return makeClassification("remove", "classified as strong transient-context interaction", { + decisionConfidence: 0.78, + mutationSafety: "review_required", + evidenceRefs: ["context:strong_transient"], + }); } if (hasDismissIntent && hasControlCue) { - return { - disposition: "remove", - reason: "classified as soft transient dismissal/control interaction", - }; + return makeClassification( + "remove", + "classified as soft transient dismissal/control interaction", + { + decisionConfidence: 0.72, + mutationSafety: "unsafe_to_auto_apply", + evidenceRefs: ["context:soft_transient", "signal:dismiss_intent", "signal:control_cue"], + } + ); } + return makeClassification("retain", "transient context without dismissal/control confidence", { + decisionConfidence: 0.74, + mutationSafety: "review_required", + evidenceRefs: ["context:soft_transient", "signal:insufficient_confidence"], + }); +} + +function makeClassification( + disposition: RuntimeFailureDisposition, + reason: string, + extras: { + decisionConfidence: number; + mutationSafety: RuntimeFailureMutationSafety; + evidenceRefs: string[]; + } +): RuntimeFailureClassification { return { - disposition: "retain", - reason: "transient context without dismissal/control confidence", + disposition, + reason, + decisionConfidence: extras.decisionConfidence, + mutationSafety: extras.mutationSafety, + evidenceRefs: extras.evidenceRefs, }; } diff --git a/src/core/improve/selector-pass/apply-selection.ts b/src/core/improve/selector-pass/apply-selection.ts index 3e8214d..234b1e6 100644 --- a/src/core/improve/selector-pass/apply-selection.ts +++ b/src/core/improve/selector-pass/apply-selection.ts @@ -15,17 +15,14 @@ export function applySelectionAndRecordFinding(input: { scored: TargetCandidateScore[]; diagnostics: ImproveDiagnostic[]; runtimeRepairCandidateKeys: Set; - privateFallbackRuntimeRepairCandidateKeys: Set; }): { selectorRepairsApplied: number; selectorRepairsAdoptedOnTie: number; selectorRepairsAppliedFromPlaywrightRuntime: number; - selectorRepairsAppliedFromPrivateFallback: number; } { let selectorRepairsApplied = 0; let selectorRepairsAdoptedOnTie = 0; let selectorRepairsAppliedFromPlaywrightRuntime = 0; - let selectorRepairsAppliedFromPrivateFallback = 0; if (!input.selection.adopt && input.selection.improveOpportunity) { input.diagnostics.push({ @@ -37,7 +34,6 @@ export function applySelectionAndRecordFinding(input: { selectorRepairsApplied, selectorRepairsAdoptedOnTie, selectorRepairsAppliedFromPlaywrightRuntime, - selectorRepairsAppliedFromPrivateFallback, }; } @@ -46,7 +42,6 @@ export function applySelectionAndRecordFinding(input: { selectorRepairsApplied, selectorRepairsAdoptedOnTie, selectorRepairsAppliedFromPlaywrightRuntime, - selectorRepairsAppliedFromPrivateFallback, }; } @@ -75,9 +70,6 @@ export function applySelectionAndRecordFinding(input: { if (selectedByRuntime) { selectorRepairsAppliedFromPlaywrightRuntime += 1; - if (input.privateFallbackRuntimeRepairCandidateKeys.has(selectedKey)) { - selectorRepairsAppliedFromPrivateFallback += 1; - } } input.diagnostics.push({ @@ -114,6 +106,5 @@ export function applySelectionAndRecordFinding(input: { selectorRepairsApplied, selectorRepairsAdoptedOnTie, selectorRepairsAppliedFromPlaywrightRuntime, - selectorRepairsAppliedFromPrivateFallback, }; } diff --git a/src/core/improve/selector-pass/collect-candidates.ts b/src/core/improve/selector-pass/collect-candidates.ts index ba9befb..a057be0 100644 --- a/src/core/improve/selector-pass/collect-candidates.ts +++ b/src/core/improve/selector-pass/collect-candidates.ts @@ -15,9 +15,7 @@ export interface CollectedCandidatesResult { candidates: TargetCandidate[]; selectorRepairCandidatesAdded: number; selectorRepairsGeneratedByPlaywrightRuntime: number; - selectorRepairsGeneratedByPrivateFallback: number; runtimeRepairCandidateKeys: Set; - privateFallbackRuntimeRepairCandidateKeys: Set; } export async function collectCandidatesForStep(input: { @@ -32,7 +30,6 @@ export async function collectCandidatesForStep(input: { candidates.map((candidate) => selectorTargetKey(candidate.target)) ); const runtimeRepairCandidateKeys = new Set(); - const privateFallbackRuntimeRepairCandidateKeys = new Set(); const dynamicAssessment = assessTargetDynamics(input.step.target); let dynamicSignals = [...dynamicAssessment.dynamicSignals]; @@ -61,7 +58,6 @@ export async function collectCandidatesForStep(input: { } let selectorRepairsGeneratedByPlaywrightRuntime = 0; - let selectorRepairsGeneratedByPrivateFallback = 0; if (input.page && dynamicSignals.length > 0) { if (input.runtimeRegenerationDisabled) { input.diagnostics.push({ @@ -87,13 +83,11 @@ export async function collectCandidatesForStep(input: { if (existingCandidateKeys.has(key)) continue; existingCandidateKeys.add(key); runtimeRepairCandidateKeys.add(key); - if (markerByCandidateId.get(candidate.id) === "resolved_selector_fallback") { - privateFallbackRuntimeRepairCandidateKeys.add(key); - selectorRepairsGeneratedByPrivateFallback += 1; - } candidates.push(candidate); selectorRepairCandidatesAdded += 1; - selectorRepairsGeneratedByPlaywrightRuntime += 1; + if (markerByCandidateId.get(candidate.id) === "public_conversion") { + selectorRepairsGeneratedByPlaywrightRuntime += 1; + } } } } @@ -119,8 +113,6 @@ export async function collectCandidatesForStep(input: { candidates, selectorRepairCandidatesAdded, selectorRepairsGeneratedByPlaywrightRuntime, - selectorRepairsGeneratedByPrivateFallback, runtimeRepairCandidateKeys, - privateFallbackRuntimeRepairCandidateKeys, }; } diff --git a/src/core/improve/selector-runtime-repair.test.ts b/src/core/improve/selector-runtime-repair.test.ts index 9f760e5..950f6ec 100644 --- a/src/core/improve/selector-runtime-repair.test.ts +++ b/src/core/improve/selector-runtime-repair.test.ts @@ -1,4 +1,4 @@ -import { afterEach, describe, expect, it } from "vitest"; +import { describe, expect, it } from "vitest"; import type { Locator, Page } from "playwright"; import { generateRuntimeRepairCandidates } from "./selector-runtime-repair.js"; @@ -6,21 +6,13 @@ function pageStub(): Page { return {} as Page; } -function locatorStub(input: { - count: number; - resolveSelector?: () => Promise<{ resolvedSelector: string }>; -}): Locator { +function locatorStub(input: { count: number }): Locator { return { count: async () => input.count, - _resolveSelector: input.resolveSelector, } as unknown as Locator; } describe("generateRuntimeRepairCandidates", () => { - afterEach(() => { - delete process.env["UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_PRIVATE_FALLBACK"]; - }); - it("generates runtime repair via public conversion for dynamic internal selectors", async () => { const result = await generateRuntimeRepairCandidates( { @@ -35,21 +27,15 @@ describe("generateRuntimeRepairCandidates", () => { }, { resolveLocatorFn: () => locatorStub({ count: 1 }), - toLocatorExpressionFromSelectorFn: (_page, selector) => { - if (selector.includes("internal:role")) { - return "getByRole('link', { name: /winterweer\\s+update/i })"; - } - return undefined; - }, } ); expect(result.runtimeUnique).toBe(true); expect(result.candidates).toHaveLength(1); + expect(result.candidates[0]?.target.value).toBe( + "getByRole('link', { name: 'Winterweer update Schiphol 12:30' })" + ); expect(result.candidates[0]?.reasonCodes).toContain("locator_repair_playwright_runtime"); - expect(result.candidates[0]?.dynamicSignals).toEqual([ - "contains_weather_or_news_fragment", - ]); expect(result.sourceMarkers).toEqual([ { candidateId: "repair-playwright-runtime-1", @@ -84,79 +70,27 @@ describe("generateRuntimeRepairCandidates", () => { ).toBe(true); }); - it("falls back to private resolved selector path when public conversion is unavailable", async () => { + it("reports conversion_failed when selector shape cannot be converted", async () => { const result = await generateRuntimeRepairCandidates( { page: pageStub(), target: { - value: "getByRole('link', { name: 'Winterweer update', exact: true })", - kind: "locatorExpression", - source: "manual", - framePath: ["iframe[name='news']"], - }, - stepNumber: 5, - dynamicSignals: ["exact_true", "contains_weather_or_news_fragment"], - }, - { - resolveLocatorFn: () => - locatorStub({ - count: 1, - resolveSelector: async () => ({ resolvedSelector: "css=a.news-link" }), - }), - toLocatorExpressionFromSelectorFn: (_page, selector) => { - if (selector === "css=a.news-link") { - return "getByRole('link', { name: /winterweer/i })"; - } - return undefined; - }, - } - ); - - expect(result.candidates).toHaveLength(1); - expect(result.sourceMarkers).toEqual([ - { - candidateId: "repair-playwright-runtime-1", - source: "resolved_selector_fallback", - }, - ]); - expect( - result.diagnostics.some( - (diagnostic) => - diagnostic.code === "selector_repair_playwright_runtime_private_fallback_used" - ) - ).toBe(true); - expect(result.candidates[0]?.target.framePath).toEqual(["iframe[name='news']"]); - }); - - it("skips private fallback when it is explicitly disabled", async () => { - process.env["UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_PRIVATE_FALLBACK"] = "1"; - - const result = await generateRuntimeRepairCandidates( - { - page: pageStub(), - target: { - value: "getByRole('link', { name: 'Winterweer update', exact: true })", - kind: "locatorExpression", + value: "internal:has-text=/dynamic/", + kind: "internal", source: "manual", }, - stepNumber: 5, + stepNumber: 7, }, { - resolveLocatorFn: () => - locatorStub({ - count: 1, - resolveSelector: async () => ({ resolvedSelector: "css=a.news-link" }), - }), - toLocatorExpressionFromSelectorFn: () => undefined, + resolveLocatorFn: () => locatorStub({ count: 1 }), } ); - expect(result.candidates).toEqual([]); + expect(result.candidates).toHaveLength(0); expect( result.diagnostics.some( (diagnostic) => - diagnostic.code === - "selector_repair_playwright_runtime_private_fallback_disabled" + diagnostic.code === "selector_repair_playwright_runtime_conversion_failed" ) ).toBe(true); }); @@ -187,36 +121,6 @@ describe("generateRuntimeRepairCandidates", () => { ).toBe(true); }); - it("reports conversion_failed when private resolved selector cannot be converted", async () => { - const result = await generateRuntimeRepairCandidates( - { - page: pageStub(), - target: { - value: "getByRole('link', { name: 'Winterweer update', exact: true })", - kind: "locatorExpression", - source: "manual", - }, - stepNumber: 7, - }, - { - resolveLocatorFn: () => - locatorStub({ - count: 1, - resolveSelector: async () => ({ resolvedSelector: "css=a.news-link" }), - }), - toLocatorExpressionFromSelectorFn: () => undefined, - } - ); - - expect(result.candidates).toHaveLength(0); - expect( - result.diagnostics.some( - (diagnostic) => - diagnostic.code === "selector_repair_playwright_runtime_conversion_failed" - ) - ).toBe(true); - }); - it("does not retain framePath when locator expression is already frame-aware", async () => { const result = await generateRuntimeRepairCandidates( { diff --git a/src/core/improve/selector-runtime-repair.ts b/src/core/improve/selector-runtime-repair.ts index efb66a9..6a2bb4f 100644 --- a/src/core/improve/selector-runtime-repair.ts +++ b/src/core/improve/selector-runtime-repair.ts @@ -9,13 +9,11 @@ import { import type { ImproveDiagnostic } from "./report-schema.js"; import { convertRuntimeTargetToLocatorExpression, - getPrivateResolveSelector, - readResolvedSelectorValue, shouldRetainFramePath, toLocatorExpressionFromSelector, } from "./playwright-runtime-selector-adapter.js"; -export type RuntimeRepairSource = "public_conversion" | "resolved_selector_fallback"; +export type RuntimeRepairSource = "public_conversion"; export interface RuntimeRepairSourceMarker { candidateId: string; @@ -24,10 +22,7 @@ export interface RuntimeRepairSourceMarker { export interface RuntimeRepairDependencies { resolveLocatorFn?: typeof resolveLocator; - toLocatorExpressionFromSelectorFn?: ( - page: Page, - selector: string - ) => string | undefined; + toLocatorExpressionFromSelectorFn?: (selector: string) => string | undefined; } export interface RuntimeRepairInput { @@ -147,7 +142,6 @@ export async function generateRuntimeRepairCandidates( }; const publicCandidate = tryPublicConversion( - input.page, input.target, toLocatorExpressionFromSelectorFn ); @@ -155,104 +149,6 @@ export async function generateRuntimeRepairCandidates( pushCandidate(publicCandidate, "public_conversion"); } - if (candidateByKey.size === 0) { - if (isPlaywrightRuntimePrivateFallbackDisabled()) { - diagnostics.push({ - code: "selector_repair_playwright_runtime_private_fallback_disabled", - level: "info", - message: - `Step ${input.stepNumber}: skipped Playwright private selector fallback because UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_PRIVATE_FALLBACK=1.`, - }); - return { - candidates: [], - diagnostics, - dynamicSignals, - runtimeUnique: true, - sourceMarkers, - }; - } - - const privateResolveSelector = readPrivateResolveSelector(locator); - if (!privateResolveSelector) { - diagnostics.push({ - code: "selector_repair_playwright_runtime_unavailable", - level: "warn", - message: - `Step ${input.stepNumber}: Playwright private selector resolver (_resolveSelector) was unavailable.`, - }); - return { - candidates: [], - diagnostics, - dynamicSignals, - runtimeUnique: true, - sourceMarkers, - }; - } - - let resolvedSelector: string | undefined; - try { - resolvedSelector = readResolvedSelectorValue(await privateResolveSelector()); - } catch { - diagnostics.push({ - code: "selector_repair_playwright_runtime_unavailable", - level: "warn", - message: - `Step ${input.stepNumber}: Playwright private selector resolution failed during regeneration.`, - }); - return { - candidates: [], - diagnostics, - dynamicSignals, - runtimeUnique: true, - sourceMarkers, - }; - } - - if (!resolvedSelector) { - diagnostics.push({ - code: "selector_repair_playwright_runtime_conversion_failed", - level: "warn", - message: - `Step ${input.stepNumber}: Playwright runtime selector did not return a valid resolved selector.`, - }); - return { - candidates: [], - diagnostics, - dynamicSignals, - runtimeUnique: true, - sourceMarkers, - }; - } - - const fallbackExpression = toLocatorExpressionFromSelectorFn( - input.page, - resolvedSelector - ); - if (!fallbackExpression) { - diagnostics.push({ - code: "selector_repair_playwright_runtime_conversion_failed", - level: "warn", - message: - `Step ${input.stepNumber}: could not convert resolved selector to a locator expression.`, - }); - return { - candidates: [], - diagnostics, - dynamicSignals, - runtimeUnique: true, - sourceMarkers, - }; - } - - pushCandidate(fallbackExpression, "resolved_selector_fallback"); - diagnostics.push({ - code: "selector_repair_playwright_runtime_private_fallback_used", - level: "info", - message: - `Step ${input.stepNumber}: selector repair used Playwright private resolver fallback.`, - }); - } - if (candidateByKey.size === 0) { diagnostics.push({ code: "selector_repair_playwright_runtime_conversion_failed", @@ -272,21 +168,14 @@ export async function generateRuntimeRepairCandidates( } function tryPublicConversion( - page: Page, target: Target, toLocatorExpressionFromSelectorFn: RuntimeRepairDependencies["toLocatorExpressionFromSelectorFn"] ): string | undefined { const adapterDependencies = toLocatorExpressionFromSelectorFn === undefined ? {} - : { toLocatorExpressionFromSelectorFn }; - return convertRuntimeTargetToLocatorExpression(page, target, adapterDependencies); -} - -function readPrivateResolveSelector( - locator: Locator -): (() => Promise<{ resolvedSelector: string }>) | undefined { - return getPrivateResolveSelector(locator); + : { convertSelectorFn: toLocatorExpressionFromSelectorFn }; + return convertRuntimeTargetToLocatorExpression(target, adapterDependencies); } function targetKey(target: Target): string { @@ -296,7 +185,3 @@ function targetKey(target: Target): string { framePath: target.framePath ?? [], }); } - -function isPlaywrightRuntimePrivateFallbackDisabled(): boolean { - return process.env["UI_TEST_DISABLE_PLAYWRIGHT_RUNTIME_PRIVATE_FALLBACK"] === "1"; -} diff --git a/src/core/improve/step-snapshot-scope.ts b/src/core/improve/step-snapshot-scope.ts new file mode 100644 index 0000000..e714895 --- /dev/null +++ b/src/core/improve/step-snapshot-scope.ts @@ -0,0 +1,70 @@ +import type { Locator, Page } from "playwright"; +import { resolveLocator } from "../runtime/locator-runtime.js"; +import type { Step } from "../yaml-schema.js"; + +export type StepSnapshotScope = "target" | "landmark" | "body"; + +export interface PreparedStepSnapshot { + scope: StepSnapshotScope; + preSnapshot: string; + capturePostSnapshot: () => Promise; +} + +export async function prepareScopedStepSnapshot( + page: Page, + step: Step, + timeoutMs: number +): Promise { + for (const candidate of buildSnapshotScopeCandidates(page, step)) { + const preSnapshot = await candidate.locator + .ariaSnapshot({ timeout: timeoutMs }) + .catch(() => undefined); + if (!preSnapshot) continue; + return { + scope: candidate.scope, + preSnapshot, + capturePostSnapshot: () => + candidate.locator.ariaSnapshot({ timeout: timeoutMs }).catch(() => undefined), + }; + } + return undefined; +} + +function buildSnapshotScopeCandidates( + page: Page, + step: Step +): Array<{ scope: StepSnapshotScope; locator: Locator }> { + const candidates: Array<{ scope: StepSnapshotScope; locator: Locator }> = []; + + if ( + step.action !== "navigate" && + step.action !== "assertUrl" && + step.action !== "assertTitle" && + "target" in step && + step.target + ) { + try { + candidates.push({ + scope: "target", + locator: narrowLocator(resolveLocator(page, step.target)), + }); + } catch { + // Ignore invalid target resolution and continue to broader scopes. + } + } + + candidates.push({ + scope: "landmark", + locator: narrowLocator(page.locator("dialog, [role='dialog'], main, [role='main'], form")), + }); + candidates.push({ + scope: "body", + locator: page.locator("body"), + }); + + return candidates; +} + +function narrowLocator(locator: Locator): Locator { + return typeof locator.first === "function" ? locator.first() : locator; +} diff --git a/src/core/play/step-loop.test.ts b/src/core/play/step-loop.test.ts index 39fef95..0034b4b 100644 --- a/src/core/play/step-loop.test.ts +++ b/src/core/play/step-loop.test.ts @@ -10,8 +10,13 @@ const { executeRuntimeStepMock } = vi.hoisted(() => ({ })); const { waitForPostStepNetworkIdleMock } = vi.hoisted(() => ({ waitForPostStepNetworkIdleMock: vi.fn< - typeof import("../runtime/network-idle.js").waitForPostStepNetworkIdle - >(async () => false), + typeof import("../runtime/network-idle.js").waitForPostStepReadiness + >(async () => ({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: true, + })), })); const { dismissCookieBannerWithDetailsMock, @@ -35,7 +40,7 @@ vi.mock("../runtime/step-executor.js", () => ({ })); vi.mock("../runtime/network-idle.js", () => ({ - waitForPostStepNetworkIdle: waitForPostStepNetworkIdleMock, + waitForPostStepReadiness: waitForPostStepNetworkIdleMock, })); vi.mock("../runtime/cookie-banner.js", () => ({ @@ -124,7 +129,12 @@ describe("runPlayStepLoop warning behavior", () => { }); it("suppresses repeated network idle wait warnings after the limit", async () => { - waitForPostStepNetworkIdleMock.mockResolvedValue(true); + waitForPostStepNetworkIdleMock.mockResolvedValue({ + navigationTimedOut: false, + networkIdleTimedOut: true, + usedNavigationWait: false, + usedNetworkIdleWait: true, + }); const steps = Array.from({ length: 6 }, (_, index) => makeClickStep(index + 1)); const result = await runPlayStepLoop({ @@ -146,12 +156,17 @@ describe("runPlayStepLoop warning behavior", () => { expect(warnMock).toHaveBeenCalledTimes(4); expect(warnMock.mock.calls[0]?.[0]).toContain("network idle wait timed out; continuing."); expect(warnMock.mock.calls[3]?.[0]).toBe( - "Additional network idle wait warnings will be suppressed for this test file." + "Additional post-step readiness warnings will be suppressed for this test file." ); }); it("does not warn when post-step network idle waits do not time out", async () => { - waitForPostStepNetworkIdleMock.mockResolvedValue(false); + waitForPostStepNetworkIdleMock.mockResolvedValue({ + navigationTimedOut: false, + networkIdleTimedOut: false, + usedNavigationWait: false, + usedNetworkIdleWait: true, + }); const steps = [makeClickStep(1), makeClickStep(2)]; const result = await runPlayStepLoop({ diff --git a/src/core/play/step-loop.ts b/src/core/play/step-loop.ts index 706d335..a3fb6b2 100644 --- a/src/core/play/step-loop.ts +++ b/src/core/play/step-loop.ts @@ -6,14 +6,14 @@ import { dismissCookieBannerWithDetails, isLikelyOverlayInterceptionError, } from "../runtime/cookie-banner.js"; -import { waitForPostStepNetworkIdle } from "../runtime/network-idle.js"; +import { waitForPostStepReadiness } from "../runtime/network-idle.js"; import type { Step } from "../yaml-schema.js"; import type { PlayFailureArtifactPaths } from "../play-failure-report.js"; import { captureFailureArtifacts, type TraceCaptureState } from "./artifact-writer.js"; import { stepDescription } from "./step-description.js"; import type { PlayFailureArtifacts, StepResult } from "./play-types.js"; -const NETWORK_IDLE_WARNING_LIMIT = 3; +const READINESS_WARNING_LIMIT = 3; export interface StepLoopResult { stepResults: StepResult[]; @@ -36,7 +36,7 @@ export async function runPlayStepLoop(input: { artifactPaths?: PlayFailureArtifactPaths; }): Promise { const stepResults: StepResult[] = []; - let networkIdleWarnings = 0; + let readinessWarnings = 0; let failureArtifacts: PlayFailureArtifacts | undefined; for (const [i, step] of input.steps.entries()) { @@ -47,6 +47,13 @@ export async function runPlayStepLoop(input: { let overlayRetryUsed = false; while (true) { + let beforeUrl: string | undefined; + try { + beforeUrl = input.page.url(); + } catch { + beforeUrl = undefined; + } + const dismissResult = await dismissCookieBannerWithDetails( input.page, dismissTimeout @@ -74,20 +81,36 @@ export async function runPlayStepLoop(input: { ...stepExecutionOptions, }); - const networkIdleTimedOut = await waitForPostStepNetworkIdle( - input.page, - input.waitForNetworkIdle - ); + const readiness = await waitForPostStepReadiness({ + page: input.page, + step, + waitForNetworkIdle: input.waitForNetworkIdle, + timeoutMs: input.timeout, + beforeUrl, + }); + + if (readiness.navigationTimedOut) { + readinessWarnings += 1; + if (readinessWarnings <= READINESS_WARNING_LIMIT) { + ui.warn( + `Step ${i + 1} (${step.action}): navigation readiness wait timed out; continuing.` + ); + } else if (readinessWarnings === READINESS_WARNING_LIMIT + 1) { + ui.warn( + "Additional post-step readiness warnings will be suppressed for this test file." + ); + } + } - if (networkIdleTimedOut) { - networkIdleWarnings += 1; - if (networkIdleWarnings <= NETWORK_IDLE_WARNING_LIMIT) { + if (readiness.networkIdleTimedOut) { + readinessWarnings += 1; + if (readinessWarnings <= READINESS_WARNING_LIMIT) { ui.warn( `Step ${i + 1} (${step.action}): network idle wait timed out; continuing.` ); - } else if (networkIdleWarnings === NETWORK_IDLE_WARNING_LIMIT + 1) { + } else if (readinessWarnings === READINESS_WARNING_LIMIT + 1) { ui.warn( - "Additional network idle wait warnings will be suppressed for this test file." + "Additional post-step readiness warnings will be suppressed for this test file." ); } } diff --git a/src/core/player.integration.test.ts b/src/core/player.integration.test.ts index ad8eb19..f86bd0b 100644 --- a/src/core/player.integration.test.ts +++ b/src/core/player.integration.test.ts @@ -432,6 +432,7 @@ describe("player integration - step execution", () => { const result = await play(testFile, { headed: false, timeout: 2_000, + waitForNetworkIdle: true, }); expect(result.passed).toBe(false); @@ -474,6 +475,35 @@ describe("player integration - step execution", () => { } }, 30000); + it("does not warn on post-step network idle timeout when readiness wait is not enabled", async () => { + const warnSpy = vi.spyOn(ui, "warn").mockImplementation(() => {}); + try { + const testFile = await writeInlineFixture("network-idle-opt-in.yaml", { + name: "Network Idle Opt-In", + baseUrl, + steps: [ + { action: "navigate", url: "/network-polling.html" }, + { + action: "assertVisible", + target: { value: "#status", kind: "css", source: "manual" }, + }, + ], + }); + + const result = await play(testFile, { + headed: false, + timeout: 2_000, + }); + + expect(result.passed).toBe(true); + expect(warnSpy).not.toHaveBeenCalledWith( + expect.stringContaining("network idle wait timed out; continuing") + ); + } finally { + warnSpy.mockRestore(); + } + }, 30000); + it("dismisses consent overlays before executing interaction steps", async () => { const testFile = await writeInlineFixture("consent-overlay.yaml", { name: "Consent Overlay Dismissal", diff --git a/src/core/player.test.ts b/src/core/player.test.ts index d1c3b56..2dca367 100644 --- a/src/core/player.test.ts +++ b/src/core/player.test.ts @@ -420,6 +420,15 @@ describe("waitForPostStepNetworkIdle", () => { expect(page.waitForLoadState).toHaveBeenCalledWith("networkidle"); }); + it("passes timeout to waitForLoadState when provided", async () => { + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + } as unknown as Page; + + await expect(waitForPostStepNetworkIdle(page, true, 1234)).resolves.toBe(false); + expect(page.waitForLoadState).toHaveBeenCalledWith("networkidle", { timeout: 1234 }); + }); + it("returns timed out marker on network idle timeout", async () => { const page = { waitForLoadState: vi.fn().mockRejectedValue(new playwrightErrors.TimeoutError("timed out")), diff --git a/src/core/recorder.ts b/src/core/recorder.ts index 135c84e..49664a1 100644 --- a/src/core/recorder.ts +++ b/src/core/recorder.ts @@ -16,6 +16,10 @@ import { type CodegenBrowser, type CodegenRunOptions, } from "./recorder-codegen.js"; +import { + canonicalEventsToSteps, + stepsToCanonicalEvents, +} from "./recording/canonical-events.js"; export type RecordBrowser = CodegenBrowser; @@ -104,11 +108,14 @@ export async function record( } const normalizedSteps = normalizeFirstNavigate(steps, options.url); - const outputPath = await saveRecordingYaml(options, normalizedSteps); + const canonicalizedSteps = canonicalEventsToSteps( + stepsToCanonicalEvents(normalizedSteps) + ); + const outputPath = await saveRecordingYaml(options, canonicalizedSteps); return { outputPath, - stepCount: normalizedSteps.length, + stepCount: canonicalizedSteps.length, recordingMode: "codegen", }; } diff --git a/src/core/recording/canonical-events.ts b/src/core/recording/canonical-events.ts new file mode 100644 index 0000000..788f7e1 --- /dev/null +++ b/src/core/recording/canonical-events.ts @@ -0,0 +1,243 @@ +import type { Step, Target } from "../yaml-schema.js"; + +export interface CanonicalEvent { + kind: Step["action"]; + description?: string; + timeout?: number; + target?: Target; + url?: string; + text?: string; + key?: string; + value?: string; + checked?: boolean; + title?: string; + enabled?: boolean; +} + +export function stepsToCanonicalEvents(steps: Step[]): CanonicalEvent[] { + return steps.map((step) => stepToCanonicalEvent(step)); +} + +export function canonicalEventsToSteps(events: CanonicalEvent[]): Step[] { + return events.map((event) => canonicalEventToStep(event)); +} + +function stepToCanonicalEvent(step: Step): CanonicalEvent { + const base = { + kind: step.action, + ...(step.description !== undefined ? { description: step.description } : {}), + ...(step.timeout !== undefined ? { timeout: step.timeout } : {}), + } as const; + + if (step.action === "navigate") { + return { + ...base, + url: step.url, + }; + } + + const withTarget = "target" in step ? { target: cloneTarget(step.target) } : {}; + switch (step.action) { + case "click": + case "dblclick": + case "hover": + case "check": + case "uncheck": + case "assertVisible": + return { + ...base, + ...withTarget, + }; + case "fill": + return { + ...base, + ...withTarget, + text: step.text, + }; + case "press": + return { + ...base, + ...withTarget, + key: step.key, + }; + case "select": + return { + ...base, + ...withTarget, + value: step.value, + }; + case "assertText": + return { + ...base, + ...withTarget, + text: step.text, + }; + case "assertValue": + return { + ...base, + ...withTarget, + value: step.value, + }; + case "assertChecked": + return { + ...base, + ...withTarget, + checked: step.checked, + }; + case "assertUrl": + return { + ...base, + url: step.url, + }; + case "assertTitle": + return { + ...base, + title: step.title, + }; + case "assertEnabled": + return { + ...base, + ...withTarget, + enabled: step.enabled, + }; + default: + return { + ...base, + }; + } +} + +function canonicalEventToStep(event: CanonicalEvent): Step { + const base = { + ...(event.description !== undefined ? { description: event.description } : {}), + ...(event.timeout !== undefined ? { timeout: event.timeout } : {}), + }; + + switch (event.kind) { + case "navigate": + return { + action: "navigate", + url: event.url ?? "/", + ...base, + }; + case "click": + return buildTargetStep("click", event, base); + case "dblclick": + return buildTargetStep("dblclick", event, base); + case "hover": + return buildTargetStep("hover", event, base); + case "check": + return buildTargetStep("check", event, base); + case "uncheck": + return buildTargetStep("uncheck", event, base); + case "assertVisible": + return buildTargetStep("assertVisible", event, base); + case "fill": + return { + ...buildTargetStep("fill", event, base), + text: event.text ?? "", + } as Step; + case "press": + return { + ...buildTargetStep("press", event, base), + key: event.key ?? "", + } as Step; + case "select": + return { + ...buildTargetStep("select", event, base), + value: event.value ?? "", + } as Step; + case "assertText": + return { + ...buildTargetStep("assertText", event, base), + text: event.text ?? "", + } as Step; + case "assertValue": + return { + ...buildTargetStep("assertValue", event, base), + value: event.value ?? "", + } as Step; + case "assertChecked": + return { + ...buildTargetStep("assertChecked", event, base), + checked: event.checked ?? true, + } as Step; + case "assertUrl": + return { + action: "assertUrl", + url: event.url ?? "", + ...base, + }; + case "assertTitle": + return { + action: "assertTitle", + title: event.title ?? "", + ...base, + }; + case "assertEnabled": + return { + ...buildTargetStep("assertEnabled", event, base), + enabled: event.enabled ?? true, + } as Step; + default: + return { + action: "navigate", + url: "/", + ...base, + }; + } +} + +function buildTargetStep( + action: + | "click" + | "dblclick" + | "hover" + | "check" + | "uncheck" + | "fill" + | "press" + | "select" + | "assertVisible" + | "assertText" + | "assertValue" + | "assertChecked" + | "assertEnabled", + event: CanonicalEvent, + base: { description?: string; timeout?: number } +): Step { + return { + action, + target: cloneTarget(event.target), + ...base, + } as Step; +} + +function cloneTarget(target?: Target): Target { + if (target) { + return { + value: target.value, + kind: target.kind, + source: target.source, + ...(target.framePath !== undefined ? { framePath: [...target.framePath] } : {}), + ...(target.raw !== undefined ? { raw: target.raw } : {}), + ...(target.confidence !== undefined ? { confidence: target.confidence } : {}), + ...(target.warning !== undefined ? { warning: target.warning } : {}), + ...(target.fallbacks !== undefined + ? { + fallbacks: target.fallbacks.map((fallback) => ({ + value: fallback.value, + kind: fallback.kind, + source: fallback.source, + })), + } + : {}), + }; + } + + return { + value: "*", + kind: "unknown", + source: "manual", + }; +} diff --git a/src/core/runtime/network-idle.ts b/src/core/runtime/network-idle.ts index fc91e57..414676a 100644 --- a/src/core/runtime/network-idle.ts +++ b/src/core/runtime/network-idle.ts @@ -1,15 +1,68 @@ import { errors as playwrightErrors, type Page } from "playwright"; +import type { Step } from "../yaml-schema.js"; -export const DEFAULT_WAIT_FOR_NETWORK_IDLE = true; +export const DEFAULT_WAIT_FOR_NETWORK_IDLE = false; + +export interface PostStepReadinessResult { + navigationTimedOut: boolean; + networkIdleTimedOut: boolean; + usedNavigationWait: boolean; + usedNetworkIdleWait: boolean; +} + +export async function waitForPostStepReadiness(input: { + page: Page; + step: Step; + waitForNetworkIdle: boolean; + timeoutMs?: number; + beforeUrl?: string; +}): Promise { + const afterUrl = readPageUrl(input.page); + const usedNavigationWait = shouldWaitForNavigation(input.step, input.beforeUrl, afterUrl); + let navigationTimedOut = false; + let networkIdleTimedOut = false; + + if (usedNavigationWait) { + try { + await waitForLoadState(input.page, "domcontentloaded", input.timeoutMs); + } catch (err) { + if (isPlaywrightTimeoutError(err)) { + navigationTimedOut = true; + } else { + throw err; + } + } + } + + if (input.waitForNetworkIdle) { + try { + await waitForLoadState(input.page, "networkidle", input.timeoutMs); + } catch (err) { + if (isPlaywrightTimeoutError(err)) { + networkIdleTimedOut = true; + } else { + throw err; + } + } + } + + return { + navigationTimedOut, + networkIdleTimedOut, + usedNavigationWait, + usedNetworkIdleWait: input.waitForNetworkIdle, + }; +} export async function waitForPostStepNetworkIdle( page: Page, - enabled: boolean + enabled: boolean, + timeoutMs?: number ): Promise { if (!enabled) return false; try { - await page.waitForLoadState("networkidle"); + await waitForLoadState(page, "networkidle", timeoutMs); return false; } catch (err) { if (isPlaywrightTimeoutError(err)) { @@ -24,3 +77,37 @@ export function isPlaywrightTimeoutError(err: unknown): boolean { if (err instanceof Error && err.name === "TimeoutError") return true; return false; } + +async function waitForLoadState( + page: Page, + state: "domcontentloaded" | "networkidle", + timeoutMs?: number +): Promise { + if (timeoutMs === undefined) { + await page.waitForLoadState(state); + } else { + await page.waitForLoadState(state, { timeout: timeoutMs }); + } +} + +function shouldWaitForNavigation( + step: Step, + beforeUrl: string | undefined, + afterUrl: string | undefined +): boolean { + if (step.action === "navigate") return true; + if (!beforeUrl || !afterUrl) return false; + return normalizeUrl(beforeUrl) !== normalizeUrl(afterUrl); +} + +function readPageUrl(page: Page): string | undefined { + try { + return page.url(); + } catch { + return undefined; + } +} + +function normalizeUrl(value: string): string { + return value.trim(); +} diff --git a/src/core/runtime/overlay-handler.test.ts b/src/core/runtime/overlay-handler.test.ts index fd22913..25ff27e 100644 --- a/src/core/runtime/overlay-handler.test.ts +++ b/src/core/runtime/overlay-handler.test.ts @@ -59,8 +59,8 @@ describe("installOverlayHandlers", () => { { dismissOverlayFn: vi.fn(async () => ({ dismissed: true, - category: "non_cookie_overlay", - strategy: "modal_close_control", + category: "non_cookie_overlay" as const, + strategy: "modal_close_control" as const, frameUrl: "https://example.test", })), } diff --git a/src/core/runtime/overlay-handler.ts b/src/core/runtime/overlay-handler.ts index bf65fbf..2d26d84 100644 --- a/src/core/runtime/overlay-handler.ts +++ b/src/core/runtime/overlay-handler.ts @@ -5,7 +5,7 @@ import { } from "./cookie-banner.js"; const CONSENT_HANDLER_TRIGGER_SELECTOR = - '[id*="consent"], [class*="consent"], [id*="cookie"], [class*="cookie"], [id*="gdpr"], [class*="gdpr"], [data-testid*="consent"], [data-testid*="cookie"], [data-cmp]'; + '[role="dialog"][id*="consent" i], [role="dialog"][class*="consent" i], [role="dialog"][id*="cookie" i], [role="dialog"][class*="cookie" i], [role="dialog"][id*="gdpr" i], [role="dialog"][class*="gdpr" i], [role="dialog"][data-testid*="consent" i], [role="dialog"][data-testid*="cookie" i], [role="dialog"][data-cmp], iframe[src*="onetrust" i], iframe[src*="cookiebot" i], iframe[src*="didomi" i], iframe[src*="trustarc" i]'; const NON_COOKIE_OVERLAY_TRIGGER_SELECTOR = '[role="dialog"][aria-modal="true"], .modal[aria-modal="true"], .modal--breaking-push, .breaking-push-modal'; const DEFAULT_HANDLER_TIMEOUT_MS = 1_200; From f57a114fd37d12973be7cbc03e83e64207cc5b05 Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 09:04:15 +0100 Subject: [PATCH 18/19] Restore selector normalization and complete mainline rebase Resolve rebase conflicts against main, restore selector normalization dependency for transform tests, and keep validation green after rebasing the record/improve stability stack. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- src/core/runtime/network-idle.ts | 2 +- src/core/transform/selector-normalize.ts | 280 +++++++++++++++++++++++ 2 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 src/core/transform/selector-normalize.ts diff --git a/src/core/runtime/network-idle.ts b/src/core/runtime/network-idle.ts index 414676a..4bd1edf 100644 --- a/src/core/runtime/network-idle.ts +++ b/src/core/runtime/network-idle.ts @@ -15,7 +15,7 @@ export async function waitForPostStepReadiness(input: { step: Step; waitForNetworkIdle: boolean; timeoutMs?: number; - beforeUrl?: string; + beforeUrl: string | undefined; }): Promise { const afterUrl = readPageUrl(input.page); const usedNavigationWait = shouldWaitForNavigation(input.step, input.beforeUrl, afterUrl); diff --git a/src/core/transform/selector-normalize.ts b/src/core/transform/selector-normalize.ts new file mode 100644 index 0000000..439c923 --- /dev/null +++ b/src/core/transform/selector-normalize.ts @@ -0,0 +1,280 @@ +import { detectDynamicSignals } from "../improve/dynamic-signal-detection.js"; + +export interface JsonlLocatorNode { + kind: string; + body?: unknown; + options?: Record; + next?: JsonlLocatorNode; +} + +export interface LocatorNormalizeOptions { + dropDynamicExact?: boolean; +} + +export function locatorNodeToExpression( + node: unknown, + depth = 0, + normalizeOptions: LocatorNormalizeOptions = {} +): string | undefined { + if (!isLocatorNode(node) || depth > 64) return undefined; + + const { kind, body, options = {}, next } = node; + let current: string; + + switch (kind) { + case "default": { + const hasText = options["hasText"]; + const hasNotText = options["hasNotText"]; + if (hasText !== undefined) { + current = `locator(${toLiteral(body)}, { hasText: ${toLiteral(hasText)} })`; + } else if (hasNotText !== undefined) { + current = `locator(${toLiteral(body)}, { hasNotText: ${toLiteral(hasNotText)} })`; + } else { + current = `locator(${toLiteral(body)})`; + } + break; + } + + case "frame-locator": + current = `frameLocator(${toLiteral(body)})`; + break; + + case "frame": + current = "contentFrame()"; + break; + + case "nth": { + const nthIndex = typeof body === "number" ? body : Number(body); + if (!Number.isFinite(nthIndex)) return undefined; + current = `nth(${nthIndex})`; + break; + } + + case "first": + current = "first()"; + break; + + case "last": + current = "last()"; + break; + + case "visible": + current = `filter({ visible: ${body === true || body === "true" ? "true" : "false"} })`; + break; + + case "role": { + const roleOptions: string[] = []; + let roleName = ""; + if (options["name"] !== undefined) { + roleOptions.push(`name: ${toLiteral(options["name"])}`); + roleName = typeof options["name"] === "string" ? options["name"] : ""; + } + const dropExact = shouldDropExactForDynamicText( + roleName, + normalizeOptions.dropDynamicExact === true + ); + if (options["exact"] === true && !dropExact) roleOptions.push("exact: true"); + const attrs = Array.isArray(options["attrs"]) + ? options["attrs"].filter( + (value): value is { name: unknown; value: unknown } => isPlainObject(value) + ) + : []; + for (const attr of attrs) { + if (typeof attr.name !== "string") continue; + roleOptions.push(`${safeObjectKey(attr.name)}: ${toLiteral(attr.value)}`); + } + current = + roleOptions.length > 0 + ? `getByRole(${toLiteral(body)}, { ${roleOptions.join(", ")} })` + : `getByRole(${toLiteral(body)})`; + break; + } + + case "has-text": + current = `filter({ hasText: ${toLiteral(body)} })`; + break; + + case "has-not-text": + current = `filter({ hasNotText: ${toLiteral(body)} })`; + break; + + case "has": { + const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); + if (!nested) return undefined; + current = `filter({ has: ${nested} })`; + break; + } + + case "hasNot": { + const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); + if (!nested) return undefined; + current = `filter({ hasNot: ${nested} })`; + break; + } + + case "and": { + const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); + if (!nested) return undefined; + current = `and(${nested})`; + break; + } + + case "or": { + const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); + if (!nested) return undefined; + current = `or(${nested})`; + break; + } + + case "chain": { + const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); + if (!nested) return undefined; + current = `locator(${nested})`; + break; + } + + case "test-id": + current = `getByTestId(${toLiteral(body)})`; + break; + + case "text": + current = toGetByTextMethod("getByText", body, options, normalizeOptions); + break; + + case "alt": + current = toGetByTextMethod("getByAltText", body, options, normalizeOptions); + break; + + case "placeholder": + current = toGetByTextMethod("getByPlaceholder", body, options, normalizeOptions); + break; + + case "label": + current = toGetByTextMethod("getByLabel", body, options, normalizeOptions); + break; + + case "title": + current = toGetByTextMethod("getByTitle", body, options, normalizeOptions); + break; + + default: + return undefined; + } + + if (!next) return current; + const nextExpression = locatorNodeToExpression(next, depth + 1, normalizeOptions); + if (!nextExpression) return current; + return `${current}.${nextExpression}`; +} + +function toGetByTextMethod( + methodName: "getByText" | "getByAltText" | "getByPlaceholder" | "getByLabel" | "getByTitle", + body: unknown, + options: Record, + normalizeOptions: LocatorNormalizeOptions +): string { + const bodyText = typeof body === "string" ? body : ""; + const dropExact = shouldDropExactForDynamicText( + bodyText, + normalizeOptions.dropDynamicExact === true + ); + if (options["exact"] === true && !dropExact) { + return `${methodName}(${toLiteral(body)}, { exact: true })`; + } + return `${methodName}(${toLiteral(body)})`; +} + +function toLiteral(value: unknown): string { + if (value === null) return "null"; + if (value === undefined) return "undefined"; + if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") { + return String(value); + } + if (typeof value === "string") return quote(value); + if (Array.isArray(value)) return `[${value.map((entry) => toLiteral(entry)).join(", ")}]`; + if (isRegexLike(value)) return `/${escapeRegexBody(value.source)}/${value.flags}`; + if (isPlainObject(value)) { + const entries = Object.entries(value).map( + ([key, entry]) => `${safeObjectKey(key)}: ${toLiteral(entry)}` + ); + return `{ ${entries.join(", ")} }`; + } + return quote(formatFallbackLiteral(value)); +} + +function quote(value: string): string { + return `'${value.replace(/\\/g, "\\\\").replace(/'/g, "\\'").replace(/\n/g, "\\n")}'`; +} + +function safeObjectKey(key: string): string { + if (/^[A-Za-z_$][A-Za-z0-9_$]*$/u.test(key)) return key; + return quote(key); +} + +function isPlainObject(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function isLocatorNode(value: unknown): value is JsonlLocatorNode { + return isPlainObject(value) && typeof value["kind"] === "string"; +} + +function isRegexLike(value: unknown): value is { source: string; flags: string } { + return ( + isPlainObject(value) && + typeof value["source"] === "string" && + typeof value["flags"] === "string" + ); +} + +function escapeRegexBody(value: string): string { + return value.replace(/\//g, "\\/"); +} + +function formatFallbackLiteral(value: unknown): string { + if (typeof value === "symbol") { + return value.description ? `Symbol(${value.description})` : "Symbol()"; + } + return Object.prototype.toString.call(value); +} + +function shouldDropExactForDynamicText(text: string, enabled: boolean): boolean { + if (!enabled) return false; + + const normalized = text.trim().toLowerCase(); + if (!normalized) return false; + + if (normalized.length <= 24) { + return false; + } + const dynamicSignals = detectDynamicSignals(text); + const hasWeatherOrNewsSignal = dynamicSignals.includes( + "contains_weather_or_news_fragment" + ); + const hasDateOrTimeSignal = dynamicSignals.includes( + "contains_date_or_time_fragment" + ); + const hasNumericSignal = dynamicSignals.includes("contains_numeric_fragment"); + const hasHeadlineSignal = dynamicSignals.includes("contains_headline_like_text"); + const hasPipeSeparatorSignal = dynamicSignals.includes("contains_pipe_separator"); + + // Long and headline-like text tends to churn frequently on news pages. + if (hasHeadlineSignal && (normalized.length >= 48 || hasPipeSeparatorSignal)) { + return true; + } + + if ( + hasHeadlineSignal && + (hasWeatherOrNewsSignal || hasDateOrTimeSignal || hasNumericSignal) + ) { + return true; + } + + const hasDynamicNumericSignal = hasNumericSignal && hasHeadlineSignal; + const strongSignalCount = [ + hasDateOrTimeSignal, + hasWeatherOrNewsSignal, + hasDynamicNumericSignal, + ].filter(Boolean).length; + return strongSignalCount >= 2; +} From 4052cd04f9c6b1be25782516f48426e55a62531a Mon Sep 17 00:00:00 2001 From: Douwe de Vries Date: Mon, 9 Mar 2026 09:08:11 +0100 Subject: [PATCH 19/19] Remove obsolete selector-normalize module and tests Drop the temporary selector-normalize restoration from this branch and align with main by removing the now-unused transform helper and its test coverage. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- src/core/transform/selector-normalize.test.ts | 55 ---- src/core/transform/selector-normalize.ts | 280 ------------------ 2 files changed, 335 deletions(-) delete mode 100644 src/core/transform/selector-normalize.test.ts delete mode 100644 src/core/transform/selector-normalize.ts diff --git a/src/core/transform/selector-normalize.test.ts b/src/core/transform/selector-normalize.test.ts deleted file mode 100644 index 6666304..0000000 --- a/src/core/transform/selector-normalize.test.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { locatorNodeToExpression } from "./selector-normalize.js"; - -describe("locatorNodeToExpression dynamic exact normalization", () => { - it("drops exact for long headline-like role names when enabled", () => { - const expression = locatorNodeToExpression( - { - kind: "role", - body: "link", - options: { - name: "Nederlaag voor Trump: hooggerechtshof VS oordeelt dat heffingen onwettig zijn", - exact: true, - }, - }, - 0, - { dropDynamicExact: true } - ); - - expect(expression).toBe( - "getByRole('link', { name: 'Nederlaag voor Trump: hooggerechtshof VS oordeelt dat heffingen onwettig zijn' })" - ); - expect(expression).not.toContain("exact: true"); - }); - - it("drops exact for headline-like text with time fragments", () => { - const expression = locatorNodeToExpression( - { - kind: "text", - body: "Winterweer update Schiphol 12:30, alle vluchten vertraagd", - options: { exact: true }, - }, - 0, - { dropDynamicExact: true } - ); - - expect(expression).toBe( - "getByText('Winterweer update Schiphol 12:30, alle vluchten vertraagd')" - ); - expect(expression).not.toContain("exact: true"); - }); - - it("keeps exact for short stable text", () => { - const expression = locatorNodeToExpression( - { - kind: "role", - body: "link", - options: { name: "Algemeen", exact: true }, - }, - 0, - { dropDynamicExact: true } - ); - - expect(expression).toBe("getByRole('link', { name: 'Algemeen', exact: true })"); - }); -}); diff --git a/src/core/transform/selector-normalize.ts b/src/core/transform/selector-normalize.ts deleted file mode 100644 index 439c923..0000000 --- a/src/core/transform/selector-normalize.ts +++ /dev/null @@ -1,280 +0,0 @@ -import { detectDynamicSignals } from "../improve/dynamic-signal-detection.js"; - -export interface JsonlLocatorNode { - kind: string; - body?: unknown; - options?: Record; - next?: JsonlLocatorNode; -} - -export interface LocatorNormalizeOptions { - dropDynamicExact?: boolean; -} - -export function locatorNodeToExpression( - node: unknown, - depth = 0, - normalizeOptions: LocatorNormalizeOptions = {} -): string | undefined { - if (!isLocatorNode(node) || depth > 64) return undefined; - - const { kind, body, options = {}, next } = node; - let current: string; - - switch (kind) { - case "default": { - const hasText = options["hasText"]; - const hasNotText = options["hasNotText"]; - if (hasText !== undefined) { - current = `locator(${toLiteral(body)}, { hasText: ${toLiteral(hasText)} })`; - } else if (hasNotText !== undefined) { - current = `locator(${toLiteral(body)}, { hasNotText: ${toLiteral(hasNotText)} })`; - } else { - current = `locator(${toLiteral(body)})`; - } - break; - } - - case "frame-locator": - current = `frameLocator(${toLiteral(body)})`; - break; - - case "frame": - current = "contentFrame()"; - break; - - case "nth": { - const nthIndex = typeof body === "number" ? body : Number(body); - if (!Number.isFinite(nthIndex)) return undefined; - current = `nth(${nthIndex})`; - break; - } - - case "first": - current = "first()"; - break; - - case "last": - current = "last()"; - break; - - case "visible": - current = `filter({ visible: ${body === true || body === "true" ? "true" : "false"} })`; - break; - - case "role": { - const roleOptions: string[] = []; - let roleName = ""; - if (options["name"] !== undefined) { - roleOptions.push(`name: ${toLiteral(options["name"])}`); - roleName = typeof options["name"] === "string" ? options["name"] : ""; - } - const dropExact = shouldDropExactForDynamicText( - roleName, - normalizeOptions.dropDynamicExact === true - ); - if (options["exact"] === true && !dropExact) roleOptions.push("exact: true"); - const attrs = Array.isArray(options["attrs"]) - ? options["attrs"].filter( - (value): value is { name: unknown; value: unknown } => isPlainObject(value) - ) - : []; - for (const attr of attrs) { - if (typeof attr.name !== "string") continue; - roleOptions.push(`${safeObjectKey(attr.name)}: ${toLiteral(attr.value)}`); - } - current = - roleOptions.length > 0 - ? `getByRole(${toLiteral(body)}, { ${roleOptions.join(", ")} })` - : `getByRole(${toLiteral(body)})`; - break; - } - - case "has-text": - current = `filter({ hasText: ${toLiteral(body)} })`; - break; - - case "has-not-text": - current = `filter({ hasNotText: ${toLiteral(body)} })`; - break; - - case "has": { - const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); - if (!nested) return undefined; - current = `filter({ has: ${nested} })`; - break; - } - - case "hasNot": { - const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); - if (!nested) return undefined; - current = `filter({ hasNot: ${nested} })`; - break; - } - - case "and": { - const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); - if (!nested) return undefined; - current = `and(${nested})`; - break; - } - - case "or": { - const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); - if (!nested) return undefined; - current = `or(${nested})`; - break; - } - - case "chain": { - const nested = locatorNodeToExpression(body, depth + 1, normalizeOptions); - if (!nested) return undefined; - current = `locator(${nested})`; - break; - } - - case "test-id": - current = `getByTestId(${toLiteral(body)})`; - break; - - case "text": - current = toGetByTextMethod("getByText", body, options, normalizeOptions); - break; - - case "alt": - current = toGetByTextMethod("getByAltText", body, options, normalizeOptions); - break; - - case "placeholder": - current = toGetByTextMethod("getByPlaceholder", body, options, normalizeOptions); - break; - - case "label": - current = toGetByTextMethod("getByLabel", body, options, normalizeOptions); - break; - - case "title": - current = toGetByTextMethod("getByTitle", body, options, normalizeOptions); - break; - - default: - return undefined; - } - - if (!next) return current; - const nextExpression = locatorNodeToExpression(next, depth + 1, normalizeOptions); - if (!nextExpression) return current; - return `${current}.${nextExpression}`; -} - -function toGetByTextMethod( - methodName: "getByText" | "getByAltText" | "getByPlaceholder" | "getByLabel" | "getByTitle", - body: unknown, - options: Record, - normalizeOptions: LocatorNormalizeOptions -): string { - const bodyText = typeof body === "string" ? body : ""; - const dropExact = shouldDropExactForDynamicText( - bodyText, - normalizeOptions.dropDynamicExact === true - ); - if (options["exact"] === true && !dropExact) { - return `${methodName}(${toLiteral(body)}, { exact: true })`; - } - return `${methodName}(${toLiteral(body)})`; -} - -function toLiteral(value: unknown): string { - if (value === null) return "null"; - if (value === undefined) return "undefined"; - if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") { - return String(value); - } - if (typeof value === "string") return quote(value); - if (Array.isArray(value)) return `[${value.map((entry) => toLiteral(entry)).join(", ")}]`; - if (isRegexLike(value)) return `/${escapeRegexBody(value.source)}/${value.flags}`; - if (isPlainObject(value)) { - const entries = Object.entries(value).map( - ([key, entry]) => `${safeObjectKey(key)}: ${toLiteral(entry)}` - ); - return `{ ${entries.join(", ")} }`; - } - return quote(formatFallbackLiteral(value)); -} - -function quote(value: string): string { - return `'${value.replace(/\\/g, "\\\\").replace(/'/g, "\\'").replace(/\n/g, "\\n")}'`; -} - -function safeObjectKey(key: string): string { - if (/^[A-Za-z_$][A-Za-z0-9_$]*$/u.test(key)) return key; - return quote(key); -} - -function isPlainObject(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - -function isLocatorNode(value: unknown): value is JsonlLocatorNode { - return isPlainObject(value) && typeof value["kind"] === "string"; -} - -function isRegexLike(value: unknown): value is { source: string; flags: string } { - return ( - isPlainObject(value) && - typeof value["source"] === "string" && - typeof value["flags"] === "string" - ); -} - -function escapeRegexBody(value: string): string { - return value.replace(/\//g, "\\/"); -} - -function formatFallbackLiteral(value: unknown): string { - if (typeof value === "symbol") { - return value.description ? `Symbol(${value.description})` : "Symbol()"; - } - return Object.prototype.toString.call(value); -} - -function shouldDropExactForDynamicText(text: string, enabled: boolean): boolean { - if (!enabled) return false; - - const normalized = text.trim().toLowerCase(); - if (!normalized) return false; - - if (normalized.length <= 24) { - return false; - } - const dynamicSignals = detectDynamicSignals(text); - const hasWeatherOrNewsSignal = dynamicSignals.includes( - "contains_weather_or_news_fragment" - ); - const hasDateOrTimeSignal = dynamicSignals.includes( - "contains_date_or_time_fragment" - ); - const hasNumericSignal = dynamicSignals.includes("contains_numeric_fragment"); - const hasHeadlineSignal = dynamicSignals.includes("contains_headline_like_text"); - const hasPipeSeparatorSignal = dynamicSignals.includes("contains_pipe_separator"); - - // Long and headline-like text tends to churn frequently on news pages. - if (hasHeadlineSignal && (normalized.length >= 48 || hasPipeSeparatorSignal)) { - return true; - } - - if ( - hasHeadlineSignal && - (hasWeatherOrNewsSignal || hasDateOrTimeSignal || hasNumericSignal) - ) { - return true; - } - - const hasDynamicNumericSignal = hasNumericSignal && hasHeadlineSignal; - const strongSignalCount = [ - hasDateOrTimeSignal, - hasWeatherOrNewsSignal, - hasDynamicNumericSignal, - ].filter(Boolean).length; - return strongSignalCount >= 2; -}