MobileReality · gitsad · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/README.md b/README.md
@@ -70,6 +70,9 @@ npm install @mobile-reality/mdma-prompt-pack
 
 # Validation — static analysis for MDMA documents
 npm install @mobile-reality/mdma-validator
+
+# CLI — interactive prompt builder + document validation
+npx @mobile-reality/mdma-cli
 ```
 
 All packages are published under the [`@mobile-reality`](https://www.npmjs.com/org/mobile-reality) npm org.
@@ -185,7 +188,8 @@ function App({ ast, store }) {
 | `@mobile-reality/mdma-renderer-react` | React rendering layer with components for all 9 MDMA types and hooks for state access. Provides `MdmaDocument` for full-document rendering and `useComponentState`/`useBinding` for fine-grained reactivity. |
 | `@mobile-reality/mdma-prompt-pack` | System prompts that teach LLMs how to author valid MDMA documents. Exports `buildSystemPrompt()` to combine the full spec reference with optional custom instructions for domain-specific generation. |
 | `@mobile-reality/mdma-validator` | Static analysis engine with 10 lint rules covering YAML correctness, schema conformance, ID uniqueness, binding resolution, and PII sensitivity. Powers programmatic validation in CI pipelines and custom tooling. |
-| `@mobile-reality/mdma-evals` | LLM evaluation suite built on promptfoo with 3 test suites: base generation quality (25 tests), custom prompt compliance (10 tests), and multi-turn conversation handling (11 conversations, 25 turns). Validates that AI-generated MDMA documents are structurally correct and semantically appropriate. |
+| `@mobile-reality/mdma-cli` | Interactive CLI tool for creating custom MDMA prompts. Opens a local web app where you visually select components, configure fields, set domain rules and trigger conditions, then an LLM generates a tailored `customPrompt` for use with `buildSystemPrompt()`. Also includes a `validate` command for static document analysis. |
+| `@mobile-reality/mdma-evals` | LLM evaluation suite built on promptfoo with 4 test suites: base generation quality (25 tests), custom prompt compliance (10 tests), multi-turn conversation handling (11 conversations, 25 turns), and prompt builder verification (25 tests). Validates that AI-generated MDMA documents are structurally correct and semantically appropriate. |
 
 ## Architecture
 
@@ -197,6 +201,7 @@ function App({ ast, store }) {
   └── @mobile-reality/mdma-runtime         State / events / policy engine
         └── @mobile-reality/mdma-attachables-core   Component handlers
               └── @mobile-reality/mdma-renderer-react   React components
+@mobile-reality/mdma-cli                   CLI prompt builder + validation
 @mobile-reality/mdma-evals                 LLM evaluation suite (promptfoo)
 ```
 
@@ -208,6 +213,35 @@ pnpm build
 pnpm test
 ```
 
+## CLI
+
+Interactive prompt builder for creating custom MDMA prompts.
+
+```bash
+# Run the prompt builder — opens a web app in your browser
+npx @mobile-reality/mdma-cli
+
+# Validate MDMA documents
+npx @mobile-reality/mdma-cli validate "docs/**/*.md"
+npx @mobile-reality/mdma-cli validate "docs/**/*.md" --fix  # auto-fix issues
+npx @mobile-reality/mdma-cli validate "docs/**/*.md" --json # JSON output
+```
+
+The prompt builder walks you through:
+1. **Pick components** — select from the 9 MDMA types (form, table, approval-gate, etc.)
+2. **Configure** — define fields, options, roles, sensitive flags, and business rules
+3. **Set triggers** — specify when the AI should generate MDMA components (keywords, contextual conditions)
+4. **Generate** — an LLM creates a tailored `customPrompt` based on your configuration
+5. **Export** — copy the result and use it in your app:
+
+```typescript
+import { buildSystemPrompt } from '@mobile-reality/mdma-prompt-pack';
+
+const systemPrompt = buildSystemPrompt({
+  customPrompt: '<paste generated prompt here>',
+});
+```
+
 ## Evals
 
 LLM evaluation suite using [promptfoo](https://www.promptfoo.dev/) to verify MDMA generation quality.
@@ -222,6 +256,9 @@ pnpm eval:custom
 # Run multi-turn conversation tests (25 turns across 11 conversations)
 pnpm eval:conversation
 
+# Run prompt builder tests (25 tests)
+pnpm eval:prompt-builder
+
 # Run all eval suites
 pnpm eval:all
 
@@ -241,15 +278,15 @@ pnpm eval:view
 
 ### v0.2 — Developer Experience
 - [ ] More examples (10+ real-world use cases)
-- [ ] CLI tool for prompt creation (MDMA flows)
+- [x] CLI tool for prompt creation (MDMA flows)
 - [ ] Improved error messages in parser and validator
-- [ ] Webhook execution engine (real HTTP calls in production environments)
 - [ ] File upload field type for forms
 
 ### v0.3 — AI & Generation
 - [ ] Multi-model eval coverage (Claude, GPT-4o, Gemini, Llama)
 - [ ] Prompt tuning toolkit — test and compare custom prompts
 - [ ] Agent-friendly SDK — let AI agents fill forms and trigger actions programmatically
+- [ ] Webhook execution engine (real HTTP calls in production environments)
 
 ### v1.0 — Production Ready
 - [ ] Stable API with semantic versioning guarantees

diff --git a/evals/assertions/mentions-fields.mjs b/evals/assertions/mentions-fields.mjs
@@ -0,0 +1,34 @@
+/**
+ * Asserts that the generated prompt mentions a minimum percentage of the
+ * configured field names.
+ *
+ * config.fields: string[]  — field names to look for
+ * config.minRatio: number  — minimum ratio of fields that must appear (default: 0.5)
+ */
+export default function (output, { config }) {
+  const fields = config?.fields || [];
+  const minRatio = config?.minRatio ?? 0.5;
+
+  if (fields.length === 0) {
+    return { pass: true, score: 1, reason: 'No fields to check' };
+  }
+
+  const lower = output.toLowerCase();
+  const found = fields.filter((f) => lower.includes(f.toLowerCase()));
+  const ratio = found.length / fields.length;
+
+  if (ratio >= minRatio) {
+    return {
+      pass: true,
+      score: ratio,
+      reason: `Found ${found.length}/${fields.length} field names (${(ratio * 100).toFixed(0)}%)`,
+    };
+  }
+
+  const missing = fields.filter((f) => !lower.includes(f.toLowerCase()));
+  return {
+    pass: false,
+    score: ratio,
+    reason: `Only found ${found.length}/${fields.length} field names (need ${(minRatio * 100).toFixed(0)}%). Missing: ${missing.join(', ')}`,
+  };
+}
diff --git a/evals/assertions/mentions-trigger.mjs b/evals/assertions/mentions-trigger.mjs
@@ -0,0 +1,67 @@
+/**
+ * Asserts that the generated customPrompt includes trigger/when-to-generate
+ * instructions matching the configured trigger mode.
+ *
+ * config.mode: 'keyword' | 'immediate' | 'contextual'
+ * config.keywords: string[] — for keyword mode, specific phrases to check
+ * config.contextHints: string[] — for contextual mode, hints to look for
+ */
+export default function (output, { config }) {
+  const mode = config?.mode;
+  const lower = output.toLowerCase();
+
+  if (mode === 'keyword') {
+    const keywords = config?.keywords || [];
+    if (keywords.length === 0) {
+      return { pass: true, score: 1, reason: 'No keywords to check' };
+    }
+    const found = keywords.filter((kw) => lower.includes(kw.toLowerCase()));
+    if (found.length > 0) {
+      return {
+        pass: true,
+        score: found.length / keywords.length,
+        reason: `Found ${found.length}/${keywords.length} trigger keywords: ${found.join(', ')}`,
+      };
+    }
+    return {
+      pass: false,
+      score: 0,
+      reason: `None of the trigger keywords found: ${keywords.join(', ')}`,
+    };
+  }
+
+  if (mode === 'immediate') {
+    const markers = /immediate|first message|always|conversation start|right away/;
+    if (markers.test(lower)) {
+      return { pass: true, score: 1, reason: 'Found immediate trigger instruction' };
+    }
+    return { pass: false, score: 0, reason: 'Missing immediate trigger instruction' };
+  }
+
+  if (mode === 'contextual') {
+    const hints = config?.contextHints || [];
+    if (hints.length === 0) {
+      // At least check for contextual-sounding language
+      const contextMarkers = /when.*user|after.*attempt|if.*express|condition|context/;
+      if (contextMarkers.test(lower)) {
+        return { pass: true, score: 1, reason: 'Found contextual trigger language' };
+      }
+      return { pass: false, score: 0, reason: 'Missing contextual trigger language' };
+    }
+    const found = hints.filter((h) => lower.includes(h.toLowerCase()));
+    if (found.length > 0) {
+      return {
+        pass: true,
+        score: found.length / hints.length,
+        reason: `Found ${found.length}/${hints.length} context hints`,
+      };
+    }
+    return {
+      pass: false,
+      score: 0,
+      reason: `None of the contextual hints found: ${hints.join(', ')}`,
+    };
+  }
+
+  return { pass: true, score: 1, reason: 'No trigger mode specified' };
+}
diff --git a/evals/assertions/no-spec-repetition.mjs b/evals/assertions/no-spec-repetition.mjs
@@ -0,0 +1,35 @@
+/**
+ * Asserts that the generated customPrompt does NOT repeat the full MDMA spec.
+ *
+ * A customPrompt should layer domain-specific instructions on top of the spec,
+ * not duplicate it. Checks for spec-level content that should not appear.
+ */
+export default function (output) {
+  const specMarkers = [
+    { pattern: 'MDMA_AUTHOR_PROMPT', label: 'MDMA_AUTHOR_PROMPT reference' },
+    { pattern: '## Self-Check Checklist', label: 'Self-check checklist' },
+    { pattern: 'Component Reference Table', label: 'Component reference table' },
+    { pattern: 'MUST be inside a fenced code block tagged', label: 'Base authoring rule' },
+  ];
+
+  const found = [];
+  for (const marker of specMarkers) {
+    if (output.includes(marker.pattern)) {
+      found.push(marker.label);
+    }
+  }
+
+  if (found.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: 'No MDMA spec content repeated',
+    };
+  }
+
+  return {
+    pass: false,
+    score: 0,
+    reason: `CustomPrompt repeats MDMA spec content: ${found.join(', ')}`,
+  };
+}
diff --git a/evals/assertions/prompt-has-sections.mjs b/evals/assertions/prompt-has-sections.mjs
@@ -0,0 +1,43 @@
+/**
+ * Asserts that the generated customPrompt contains the expected structural sections.
+ *
+ * A well-structured customPrompt should include most of these elements:
+ * - Domain/role context
+ * - When to generate / trigger rules
+ * - Component instructions
+ * - Workflow or constraints
+ *
+ * Pass required section keywords via config.sections (array of regex patterns).
+ * By default checks for broad structural markers.
+ */
+export default function (output, { config }) {
+  const sections = config?.sections || [
+    'domain|workflow|role|assist',
+    'form|component|field',
+    'sensitive|pii|personal',
+  ];
+
+  const lower = output.toLowerCase();
+  const missing = [];
+
+  for (const pattern of sections) {
+    const regex = new RegExp(pattern, 'i');
+    if (!regex.test(lower)) {
+      missing.push(pattern);
+    }
+  }
+
+  if (missing.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `All ${sections.length} expected section markers found`,
+    };
+  }
+
+  return {
+    pass: false,
+    score: (sections.length - missing.length) / sections.length,
+    reason: `Missing section markers: ${missing.join(', ')}`,
+  };
+}
diff --git a/evals/assertions/prompt-length.mjs b/evals/assertions/prompt-length.mjs
@@ -0,0 +1,33 @@
+/**
+ * Asserts that the generated customPrompt is within a reasonable length range.
+ *
+ * config.min: minimum chars (default 200)
+ * config.max: maximum chars (default 8000)
+ */
+export default function (output, { config }) {
+  const min = config?.min ?? 200;
+  const max = config?.max ?? 8000;
+  const len = output.length;
+
+  if (len < min) {
+    return {
+      pass: false,
+      score: len / min,
+      reason: `Output too short: ${len} chars (minimum ${min})`,
+    };
+  }
+
+  if (len > max) {
+    return {
+      pass: false,
+      score: max / len,
+      reason: `Output too long: ${len} chars (maximum ${max})`,
+    };
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: `Output length ${len} chars (within ${min}-${max})`,
+  };
+}
diff --git a/evals/assertions/validate-mdma-examples.mjs b/evals/assertions/validate-mdma-examples.mjs
@@ -0,0 +1,60 @@
+import { validate } from '@mobile-reality/mdma-validator';
+
+/**
+ * Extracts ```mdma blocks from a customPrompt and validates each one
+ * as a standalone MDMA document.
+ *
+ * Unlike validate-mdma.mjs (which validates the entire output as a document),
+ * this assertion handles the case where mdma blocks are embedded as examples
+ * inside instructional prose.
+ */
+export default function (output) {
+  const blockRegex = /```mdma\n([\s\S]*?)```/g;
+  const blocks = [...output.matchAll(blockRegex)];
+
+  if (blocks.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: 'No mdma example blocks to validate (OK for customPrompt)',
+    };
+  }
+
+  const errors = [];
+  let validCount = 0;
+
+  for (let i = 0; i < blocks.length; i++) {
+    const blockContent = blocks[i][1].trim();
+    // Wrap each block back into a markdown document for the validator
+    const doc = `\`\`\`mdma\n${blockContent}\n\`\`\``;
+
+    const result = validate(doc, {
+      exclude: ['thinking-block'],
+      autoFix: false,
+    });
+
+    if (result.ok) {
+      validCount++;
+    } else {
+      const blockErrors = result.issues
+        .filter((issue) => issue.severity === 'error')
+        .map((issue) => `[${issue.ruleId}] ${issue.message}`)
+        .join('; ');
+      errors.push(`Block ${i + 1}: ${blockErrors}`);
+    }
+  }
+
+  if (errors.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `All ${validCount} mdma example block(s) are valid MDMA`,
+    };
+  }
+
+  return {
+    pass: false,
+    score: validCount / blocks.length,
+    reason: `${errors.length}/${blocks.length} mdma block(s) have validation errors:\n${errors.join('\n')}`,
+  };
+}