diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d4dfb4..24e1cc9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,11 +14,13 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht - Workflow builder controls for undo/redo and explicit edge disconnect. - Web recorder stop endpoint (`/api/recorders/web/stop`) and recorder navigation event capture. - Recorder draft review panel with reorder/edit/skip controls before inserting recorded steps. +- Autopilot plan diagnostics: overall confidence score, node-level insights, and fallback template options. ### Changed - CI now includes browser smoke validation (`Web E2E Smoke`). - Web editor keyboard shortcuts now include undo/redo and selection-aware delete behavior. - Web recorder now follows capture -> review -> insert flow instead of immediate node injection. +- Autopilot now requires explicit confirm-before-create flow and uses richer starter templates for vague prompts. ## [1.0.7] - 2026-02-13 diff --git a/README.md b/README.md index 2bda5de..00f056e 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ It combines a drag-and-drop workflow studio, resilient execution, AI-assisted au - Core editor UX: undo/redo, duplicate, edge disconnect, auto-layout, and JSON import/export - Web automation (Playwright) and desktop automation (agent service) - Recorder flows for web and desktop action capture with review-before-insert draft editing -- Autopilot workflow generation from natural-language prompts +- Autopilot workflow generation from natural-language prompts with confidence scoring and confirm-before-create review - AI nodes: `transform_llm`, `document_understanding`, `clipboard_ai_transfer` - Integrations (`http_api`, `postgresql`, `mysql`, `mongodb`, `google_sheets`, `airtable`, `s3`) - Orchestrator queue with attended/unattended robots and dispatch lifecycle diff --git a/apps/server/src/lib/autopilot.test.ts b/apps/server/src/lib/autopilot.test.ts index 57847a1..a427436 100644 --- a/apps/server/src/lib/autopilot.test.ts +++ b/apps/server/src/lib/autopilot.test.ts @@ -10,16 +10,29 @@ test("buildAutopilotPlan generates web + api + ai chain from natural language pr assert.ok(nodeTypes.includes("transform_llm")); assert.ok(nodeTypes.includes("http_request")); assert.equal(plan.definition.edges.length, plan.definition.nodes.length - 1); + assert.ok(plan.confidence > 0.6); + assert.equal(plan.requiresConfirmation, true); + assert.ok(plan.nodeInsights.some((insight) => insight.nodeType === "http_request")); }); test("buildAutopilotPlan falls back to starter workflow for vague prompts", () => { const plan = buildAutopilotPlan("do stuff"); const nodeTypes = plan.definition.nodes.map((node) => String(node.data?.type || "")); - assert.deepEqual(nodeTypes, ["start", "set_variable"]); - assert.ok(plan.capabilities.includes("orchestration")); + assert.ok(nodeTypes.length >= 4); + assert.ok(plan.fallbackUsed); + assert.ok(Boolean(plan.fallbackTemplateId)); + assert.ok(plan.warnings.some((warning) => warning.toLowerCase().includes("starter template"))); + assert.ok(plan.fallbackOptions.length >= 3); }); test("buildAutopilotPlan returns warnings for planned feature areas", () => { const plan = buildAutopilotPlan("process SAP invoice PDF with clipboard ai"); assert.ok(plan.warnings.length >= 2); }); + +test("buildAutopilotPlan includes per-node warnings for placeholder heavy steps", () => { + const plan = buildAutopilotPlan("simple web workflow"); + const navigateInsight = plan.nodeInsights.find((insight) => insight.nodeType === "playwright_navigate"); + assert.ok(navigateInsight); + assert.ok(typeof navigateInsight?.confidence === "number"); +}); diff --git a/apps/server/src/lib/autopilot.ts b/apps/server/src/lib/autopilot.ts index e3bb4ac..6678f08 100644 --- a/apps/server/src/lib/autopilot.ts +++ b/apps/server/src/lib/autopilot.ts @@ -1,10 +1,32 @@ import type { WorkflowDefinition, WorkflowNode } from "./types.js"; +type AutopilotNodeInsight = { + nodeId: string; + nodeType: string; + label: string; + confidence: number; + reason: string; + warnings: string[]; +}; + +type AutopilotFallbackOption = { + id: string; + name: string; + description: string; + reason: string; +}; + type PlanResult = { name: string; description: string; capabilities: string[]; warnings: string[]; + confidence: number; + requiresConfirmation: boolean; + fallbackUsed: boolean; + fallbackTemplateId?: string; + fallbackOptions: AutopilotFallbackOption[]; + nodeInsights: AutopilotNodeInsight[]; definition: WorkflowDefinition; }; @@ -14,6 +36,27 @@ const EXECUTION_DEFAULTS = { defaultNodeTimeoutMs: 30000 }; +const FALLBACK_OPTIONS: AutopilotFallbackOption[] = [ + { + id: "web_intake_ai_review", + name: "Web Intake + AI Review", + description: "Navigate, extract page content, transform with AI, and route for approval.", + reason: "Best for broad web-based operational tasks." + }, + { + id: "data_cleanup_review", + name: "Data Cleanup + Validation", + description: "Import CSV, normalize values with AI, validate output, and add approval.", + reason: "Best for generic data wrangling prompts." + }, + { + id: "api_sync_starter", + name: "API Sync Starter", + description: "Set run context, send API request, and require human approval.", + reason: "Best for broad integration and handoff requests." + } +]; + function compactName(rawPrompt: string) { const cleaned = rawPrompt .replace(/[^a-zA-Z0-9\s-]/g, " ") @@ -37,11 +80,225 @@ function makeNode(id: string, x: number, data: Record): Workflo }; } +function round2(value: number) { + return Math.round(value * 100) / 100; +} + +function buildNodeInsights(nodes: WorkflowNode[], prompt: string, fallbackUsed: boolean): AutopilotNodeInsight[] { + return nodes.map((node) => { + const nodeType = String(node.data?.type || ""); + const label = String(node.data?.label || node.id); + if (nodeType === "start") { + return { + nodeId: node.id, + nodeType, + label, + confidence: 0.99, + reason: "Mandatory workflow entry node.", + warnings: [] + }; + } + + let confidence = 0.7; + let reason = "Generated from recognized prompt intent."; + const warnings: string[] = []; + + if (fallbackUsed) { + confidence -= 0.12; + reason = "Generated from fallback template because prompt intent was broad."; + } + + if (nodeType === "playwright_navigate") { + if (hasAny(prompt, ["web", "website", "browser", "url", "portal"])) confidence += 0.16; + else warnings.push("Target URL is placeholder and should be refined."); + reason = "Web navigation step inferred from prompt."; + } + + if (nodeType === "playwright_extract") { + if (hasAny(prompt, ["extract", "scrape", "table", "screen scrape", "data scrape"])) confidence += 0.16; + else warnings.push("Selector is generic and may need adjustment."); + reason = "Data extraction step inferred from prompt."; + } + + if (nodeType === "playwright_fill" || nodeType === "playwright_click") { + if (hasAny(prompt, ["form", "login", "sign in", "fill"])) confidence += 0.13; + else warnings.push("Recorded selectors/values should be reviewed."); + reason = "Form interaction inferred from prompt."; + } + + if (nodeType === "http_request") { + if (hasAny(prompt, ["api", "webhook", "http", "endpoint"])) confidence += 0.15; + else warnings.push("API URL/body are starter placeholders."); + reason = "Integration call inferred from prompt."; + } + + if (nodeType === "transform_llm") { + if (hasAny(prompt, ["ai", "classify", "summarize", "clean", "understand"])) confidence += 0.14; + reason = "AI transformation inferred from prompt."; + } + + if (nodeType === "data_import_csv") { + if (hasAny(prompt, ["csv", "excel", "spreadsheet"])) confidence += 0.14; + reason = "Structured file ingestion inferred from prompt."; + } + + if (nodeType === "manual_approval") { + if (hasAny(prompt, ["approval", "review", "human"])) confidence += 0.14; + reason = "Human checkpoint inferred from prompt/risk profile."; + } + + if (nodeType === "submit_guard") { + if (hasAny(prompt, ["validate", "guard", "check"])) confidence += 0.12; + reason = "Validation guard inferred from prompt."; + } + + if (nodeType === "set_variable") { + confidence = Math.min(confidence, 0.62); + reason = "Context bootstrap used to seed starter workflow."; + } + + return { + nodeId: node.id, + nodeType, + label, + confidence: round2(Math.max(0.25, Math.min(0.98, confidence))), + reason, + warnings + }; + }); +} + +function chooseFallbackTemplate(prompt: string) { + if (hasAny(prompt, ["csv", "excel", "spreadsheet", "report", "dataset", "clean", "normalize"])) { + return FALLBACK_OPTIONS[1]; + } + if (hasAny(prompt, ["api", "webhook", "endpoint", "integration", "sync"])) { + return FALLBACK_OPTIONS[2]; + } + return FALLBACK_OPTIONS[0]; +} + +function buildFallbackNodes(templateId: string): { nodes: WorkflowNode[]; capabilities: string[]; warnings: string[] } { + const start = makeNode("start", 80, { type: "start", label: "Start" }); + if (templateId === "data_cleanup_review") { + return { + nodes: [ + start, + makeNode("csv", 330, { type: "data_import_csv", label: "Import CSV", outputKey: "csvRows" }), + makeNode("llm", 580, { + type: "transform_llm", + label: "Normalize Data", + inputKey: "csvRows", + outputKey: "normalizedRows", + strictJson: true + }), + makeNode("validate", 830, { + type: "submit_guard", + label: "Validate Rows", + inputKey: "normalizedRows", + schema: { type: "array" } + }), + makeNode("approval", 1080, { + type: "manual_approval", + label: "Approve Output", + message: "Please review normalized rows before submit." + }) + ], + capabilities: ["data-import", "ai-transform", "validation", "human-in-the-loop"], + warnings: ["Prompt was broad; generated a data cleanup starter template."] + }; + } + if (templateId === "api_sync_starter") { + return { + nodes: [ + start, + makeNode("set-context", 330, { + type: "set_variable", + label: "Set Request Context", + key: "task", + value: "autopilot-api-sync" + }), + makeNode("api", 580, { + type: "http_request", + label: "Send API Request", + method: "POST", + url: "https://example.com/api", + body: { task: "{{task}}" } + }), + makeNode("approval", 830, { + type: "manual_approval", + label: "Review API Response", + message: "Confirm API output before downstream actions." + }) + ], + capabilities: ["api-integration", "orchestration", "human-in-the-loop"], + warnings: ["Prompt was broad; generated an API sync starter template."] + }; + } + return { + nodes: [ + start, + makeNode("navigate", 330, { + type: "playwright_navigate", + label: "Navigate", + url: "https://example.com" + }), + makeNode("extract", 580, { + type: "playwright_extract", + label: "Extract Data", + selector: "main, table, .content", + saveAs: "pageData" + }), + makeNode("llm", 830, { + type: "transform_llm", + label: "Summarize Data", + inputKey: "pageData", + outputKey: "summary", + strictJson: true + }), + makeNode("approval", 1080, { + type: "manual_approval", + label: "Approve Summary", + message: "Review summary before final handoff." + }) + ], + capabilities: ["web-automation", "scraping", "ai-transform", "human-in-the-loop"], + warnings: ["Prompt was broad; generated a web intake starter template."] + }; +} + +function computePlanConfidence( + insights: AutopilotNodeInsight[], + warnings: string[], + fallbackUsed: boolean, + matchedSignalCount: number +) { + const avgNodeConfidence = + insights.length > 0 ? insights.reduce((sum, insight) => sum + insight.confidence, 0) / insights.length : 0.4; + let confidence = avgNodeConfidence + Math.min(0.2, matchedSignalCount * 0.02) - Math.min(0.2, warnings.length * 0.03); + if (fallbackUsed) confidence -= 0.06; + return round2(Math.max(0.2, Math.min(0.97, confidence))); +} + export function buildAutopilotPlan(promptRaw: string, preferredName?: string): PlanResult { const prompt = promptRaw.toLowerCase(); const nodes: WorkflowNode[] = [makeNode("start", 80, { type: "start", label: "Start" })]; const capabilities = new Set(); const warnings: string[] = []; + let fallbackUsed = false; + let fallbackTemplateId: string | undefined; + + const coreSignals = [ + hasAny(prompt, ["web", "website", "browser", "url", "portal"]), + hasAny(prompt, ["form", "login", "sign in", "fill"]), + hasAny(prompt, ["extract", "scrape", "table", "screen scrape", "data scrape"]), + hasAny(prompt, ["api", "webhook", "http", "endpoint"]), + hasAny(prompt, ["csv", "excel", "spreadsheet"]), + hasAny(prompt, ["ai", "classify", "summarize", "clean", "understand"]), + hasAny(prompt, ["approval", "review", "human"]), + hasAny(prompt, ["validate", "guard", "check"]) + ]; + const matchedSignalCount = coreSignals.filter(Boolean).length; if (hasAny(prompt, ["web", "website", "browser", "url", "portal"])) { nodes.push( @@ -157,15 +414,13 @@ export function buildAutopilotPlan(promptRaw: string, preferredName?: string): P } if (nodes.length === 1) { - nodes.push( - makeNode("set-context", 330, { - type: "set_variable", - label: "Set Context", - key: "task", - value: promptRaw - }) - ); - capabilities.add("orchestration"); + const fallback = chooseFallbackTemplate(prompt); + const fallbackPlan = buildFallbackNodes(fallback.id); + nodes.splice(0, nodes.length, ...fallbackPlan.nodes); + fallbackPlan.capabilities.forEach((item) => capabilities.add(item)); + warnings.push(...fallbackPlan.warnings); + fallbackUsed = true; + fallbackTemplateId = fallback.id; } const edges = nodes.slice(1).map((node, index) => ({ @@ -174,11 +429,20 @@ export function buildAutopilotPlan(promptRaw: string, preferredName?: string): P target: node.id })); + const nodeInsights = buildNodeInsights(nodes, prompt, fallbackUsed); + const confidence = computePlanConfidence(nodeInsights, warnings, fallbackUsed, matchedSignalCount); + return { name: preferredName?.trim() || compactName(promptRaw), description: `Autopilot draft generated from prompt: "${promptRaw.trim()}"`, capabilities: Array.from(capabilities), warnings, + confidence, + requiresConfirmation: true, + fallbackUsed, + fallbackTemplateId, + fallbackOptions: FALLBACK_OPTIONS, + nodeInsights, definition: { nodes, edges, diff --git a/apps/web/src/App.tsx b/apps/web/src/App.tsx index cace68f..551854b 100644 --- a/apps/web/src/App.tsx +++ b/apps/web/src/App.tsx @@ -97,6 +97,7 @@ import { filterNodeOptions, NODE_OPTIONS } from "./lib/nodeCatalog"; import { buildPersistedDefinition, hashDefinition } from "./lib/workflowDraft"; import type { ActivityCatalog, + AutopilotPlan, MiningSummary, OrchestratorJob, OrchestratorOverview, @@ -375,6 +376,7 @@ export default function App() { const [templateSearch, setTemplateSearch] = useState(""); const [autopilotPrompt, setAutopilotPrompt] = useState(""); const [autopilotWorkflowName, setAutopilotWorkflowName] = useState(""); + const [autopilotPlanDraft, setAutopilotPlanDraft] = useState(null); const [activityCatalog, setActivityCatalog] = useState(null); const [nodeSearch, setNodeSearch] = useState(""); const [templateCategoryFilter, setTemplateCategoryFilter] = useState("all"); @@ -1250,7 +1252,7 @@ export default function App() { await refreshDashboard(); }; - const handleCreateFromAutopilot = async () => { + const handleGenerateAutopilotPlan = async () => { const prompt = autopilotPrompt.trim(); if (prompt.length < 6) { setFeedback("Describe the automation in at least 6 characters", "error"); @@ -1260,22 +1262,33 @@ export default function App() { prompt, name: autopilotWorkflowName.trim() || undefined }); + setAutopilotPlanDraft(plan as AutopilotPlan); + const confidencePct = Math.round(Math.max(0, Math.min(1, Number(plan.confidence || 0))) * 100); + setFeedback(`Autopilot plan generated (${confidencePct}% confidence). Review and confirm before creating.`, "success"); + if (plan.warnings.length) { + setFeedback(`Autopilot note: ${plan.warnings[0]}`, "info"); + } + }; + + const handleConfirmAutopilotPlan = async () => { + if (!autopilotPlanDraft) { + setFeedback("Generate an Autopilot plan first", "error"); + return; + } const created = await createWorkflow({ - name: plan.name, - definition: plan.definition + name: autopilotPlanDraft.name, + definition: autopilotPlanDraft.definition }); setWorkflowList((list) => [created, ...list]); await selectWorkflow(created); setAutopilotPrompt(""); setAutopilotWorkflowName(""); - const capabilityPreview = plan.capabilities.slice(0, 3).join(", "); + setAutopilotPlanDraft(null); + const capabilityPreview = autopilotPlanDraft.capabilities.slice(0, 3).join(", "); setFeedback( - `Autopilot draft created${capabilityPreview ? ` (${capabilityPreview}${plan.capabilities.length > 3 ? ", ..." : ""})` : ""}`, + `Autopilot draft created${capabilityPreview ? ` (${capabilityPreview}${autopilotPlanDraft.capabilities.length > 3 ? ", ..." : ""})` : ""}`, "success" ); - if (plan.warnings.length) { - setFeedback(`Autopilot note: ${plan.warnings[0]}`, "info"); - } await refreshDashboard(); }; @@ -1310,6 +1323,7 @@ export default function App() { setActivityCatalog(null); setAutopilotPrompt(""); setAutopilotWorkflowName(""); + setAutopilotPlanDraft(null); setSchedules([]); setScheduleDependsOnId(""); setMaintenanceEnabled(false); @@ -2768,22 +2782,85 @@ export default function App() { Describe desired automation in plain language. ForgeFlow will generate a draft workflow. setAutopilotWorkflowName(e.target.value)} + onChange={(e) => { + setAutopilotWorkflowName(e.target.value); + setAutopilotPlanDraft(null); + }} placeholder="Workflow name (optional)" />