diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eefb80d..810ee81 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,8 +39,8 @@ jobs: for k in totals: totals[k]+=int(r.get(k,'0')) except Exception: pass - exp_tests=611 - exp_skipped=0 + exp_tests=639 + exp_skipped=5 if totals['tests']!=exp_tests or totals['skipped']!=exp_skipped: print(f"Unexpected test totals: {totals} != expected tests={exp_tests}, skipped={exp_skipped}") sys.exit(1) diff --git a/.github/workflows/jtd-esm-codegen-release.yml b/.github/workflows/jtd-esm-codegen-release.yml new file mode 100644 index 0000000..7ef2115 --- /dev/null +++ b/.github/workflows/jtd-esm-codegen-release.yml @@ -0,0 +1,142 @@ +name: JTD-ESM-Codegen Nightly + +on: + schedule: + - cron: '0 3 * * *' # 3 AM UTC daily + workflow_dispatch: + +permissions: + contents: write + +jobs: + build-uber-jar: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '21' + cache: 'maven' + - name: Build uber JAR + run: | + ./mvnw -pl jtd-esm-codegen -am package + cp jtd-esm-codegen/target/jtd-esm-codegen.jar jtd-esm-codegen.jar + - uses: actions/upload-artifact@v4 + with: + name: uber-jar + path: jtd-esm-codegen.jar + + native-linux: + needs: build-uber-jar + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + name: uber-jar + path: . + - uses: graalvm/setup-graalvm@v1 + with: + java-version: '21' + distribution: 'graalvm' + github-token: ${{ secrets.GITHUB_TOKEN }} + components: 'native-image' + cache: 'maven' + - name: Build native image + run: | + native-image --no-fallback -jar jtd-esm-codegen.jar jtd-esm-codegen-linux-amd64 + chmod +x jtd-esm-codegen-linux-amd64 + - uses: actions/upload-artifact@v4 + with: + name: native-linux-amd64 + path: jtd-esm-codegen-linux-amd64 + + native-windows: + needs: build-uber-jar + runs-on: windows-latest + steps: + - uses: actions/download-artifact@v4 + with: + name: uber-jar + path: . + - uses: graalvm/setup-graalvm@v1 + with: + java-version: '21' + distribution: 'graalvm' + github-token: ${{ secrets.GITHUB_TOKEN }} + components: 'native-image' + cache: 'maven' + - name: Build native image + run: | + native-image --no-fallback -jar jtd-esm-codegen.jar jtd-esm-codegen-windows-amd64 + - uses: actions/upload-artifact@v4 + with: + name: native-windows-amd64 + path: jtd-esm-codegen-windows-amd64.exe + + native-macos-intel: + needs: build-uber-jar + runs-on: macos-13 # Intel + steps: + - uses: actions/download-artifact@v4 + with: + name: uber-jar + path: . + - uses: graalvm/setup-graalvm@v1 + with: + java-version: '21' + distribution: 'graalvm' + github-token: ${{ secrets.GITHUB_TOKEN }} + components: 'native-image' + cache: 'maven' + - name: Build native image + run: | + native-image --no-fallback -jar jtd-esm-codegen.jar jtd-esm-codegen-macos-amd64 + chmod +x jtd-esm-codegen-macos-amd64 + - uses: actions/upload-artifact@v4 + with: + name: native-macos-amd64 + path: jtd-esm-codegen-macos-amd64 + + native-macos-arm: + needs: build-uber-jar + runs-on: macos-14 # Apple Silicon + steps: + - uses: actions/download-artifact@v4 + with: + name: uber-jar + path: . + - uses: graalvm/setup-graalvm@v1 + with: + java-version: '21' + distribution: 'graalvm' + github-token: ${{ secrets.GITHUB_TOKEN }} + components: 'native-image' + cache: 'maven' + - name: Build native image + run: | + native-image --no-fallback -jar jtd-esm-codegen.jar jtd-esm-codegen-macos-arm64 + chmod +x jtd-esm-codegen-macos-arm64 + - uses: actions/upload-artifact@v4 + with: + name: native-macos-arm64 + path: jtd-esm-codegen-macos-arm64 + + release: + needs: [native-linux, native-windows, native-macos-intel, native-macos-arm] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + - name: Create nightly release + uses: softprops/action-gh-release@v1 + with: + tag_name: nightly-${{ github.run_number }} + name: Nightly Build ${{ github.run_number }} + prerelease: true + files: | + native-linux-amd64/jtd-esm-codegen-linux-amd64 + native-windows-amd64/jtd-esm-codegen-windows-amd64.exe + native-macos-amd64/jtd-esm-codegen-macos-amd64 + native-macos-arm64/jtd-esm-codegen-macos-arm64 + uber-jar/jtd-esm-codegen.jar + diff --git a/README.md b/README.md index 04fdad8..95a7f31 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ In addition to the core backport, this repo includes implementations of more adv | --- | --- | --- | | `json-java21-jtd` | JSON Type Definition (JTD) validator implementing RFC 8927 | [JTD validator](#json-type-definition-jtd-validator) | | `json-java21-jsonpath` | JsonPath query engine over `java.util.json` values | [JsonPath](#jsonpath) | +| `jtd-esm-codegen` | Experimental JTD → ES2020 ESM validator code generator | [JTD → ESM codegen](#jtd-to-esm-validator-codegen-experimental) | We welcome contributions to these incubating modules. @@ -388,6 +389,38 @@ Features: - ✅ Discriminator tag exemption from additional properties - ✅ Stack-based validation preventing StackOverflowError +## JTD to ESM Validator Codegen (Experimental) + +This repo also contains an **experimental** CLI tool that reads a JTD schema (RFC 8927) and generates a **vanilla ES2020 module** exporting a `validate(instance)` function. The intended use case is validating JSON event payloads in the browser (for example, across tabs using `BroadcastChannel`) without a build step. + +### Supported JTD subset (flat schemas only) + +This tool deliberately supports only: +- `properties` (required properties) +- `optionalProperties` +- `type` primitives (`string`, `boolean`, `timestamp`, `int8`, `int16`, `int32`, `uint8`, `uint16`, `uint32`, `float32`, `float64`) +- `enum` +- `metadata.id` (used for the output filename prefix) + +It rejects other JTD features (`elements`, `values`, `discriminator`/`mapping`, `ref`/`definitions`) and also rejects **nested `properties`** (object schemas inside properties). + +When rejected, the error message is: + +`Unsupported JTD feature: . This experimental tool only supports flat schemas with properties, optionalProperties, type, and enum.` + +### Build and run + +```bash +./mvnw -pl jtd-esm-codegen -am package +java -jar ./jtd-esm-codegen/target/jtd-esm-codegen.jar schema.jtd.json +``` + +The output file is written to the current directory as: + +`-.js` + +Where `` is the first 8 characters of the SHA-256 hash of the input schema file bytes. + ## Building Requires JDK 21 or later. Build with Maven: diff --git a/jtd-esm-codegen/JTD_CODEGEN_SPEC.md b/jtd-esm-codegen/JTD_CODEGEN_SPEC.md new file mode 100644 index 0000000..caf3fc9 --- /dev/null +++ b/jtd-esm-codegen/JTD_CODEGEN_SPEC.md @@ -0,0 +1,705 @@ +# JTD Code Generation Specification + +A language-independent specification for compiling RFC 8927 JSON Type Definition +schemas into target-language source code that validates JSON documents. The +generated code contains exactly the checks the schema requires -- no +interpreter, no AST, no runtime stack, no dead code. + +## 1. Terminology + +| Term | Meaning | +|---|---| +| **schema** | A JSON object conforming to RFC 8927. | +| **instance** | The JSON value being validated at runtime. | +| **form** | One of the 8 mutually-exclusive schema shapes defined in RFC 8927 plus the nullable modifier. | +| **AST node** | An immutable, tagged value representing one compiled schema form. Used during generation, discarded after. | +| **error** | A pair of JSON Pointers: `(instancePath, schemaPath)`. | +| **definitions** | A flat string-keyed map of named AST nodes, resolved at compile time. Each becomes a generated function. | + +## 2. Overview + +A JTD code generator operates in two phases: + +1. **Parse**: Read the JTD schema JSON and compile it into an intermediate + AST of immutable nodes (Section 3). +2. **Emit**: Walk the AST and emit target-language source code. Each AST + node maps to a specific code pattern. The AST is discarded after + emission (Section 5). + +The generated code is a standalone validation function. When executed against +a JSON instance, it produces the same `(instancePath, schemaPath)` error +pairs that RFC 8927 Section 3.3 specifies. + +## 3. Intermediate AST + +The AST is used only during generation. It is not present in the output. + +### 3.1 Node Types + +``` +Node = + | Empty -- {} + | Ref { name: String } -- {"ref": "..."} + | Type { type: TypeKeyword } -- {"type": "..."} + | Enum { values: List } -- {"enum": [...]} + | Elements { schema: Node } -- {"elements": ...} + | Properties { required: Map, -- {"properties": ...} + optional: Map, -- {"optionalProperties": ...} + additional: Boolean } -- {"additionalProperties": ...} + | Values { schema: Node } -- {"values": ...} + | Discrim { tag: String, mapping: Map} -- {"discriminator":...,"mapping":...} + | Nullable { inner: Node } -- any form + "nullable": true +``` + +`TypeKeyword` is one of the 12 strings defined in RFC 8927 Section 2.2.3: + +``` +TypeKeyword = boolean | string | timestamp + | int8 | uint8 | int16 | uint16 | int32 | uint32 + | float32 | float64 +``` + +### 3.2 Compilation Algorithm + +``` +compile(json, isRoot=true, definitions) -> Node: + + REQUIRE json is a JSON object + + IF isRoot: + IF json has key "definitions": + REQUIRE json["definitions"] is a JSON object + -- Pass 1: register all keys as placeholders for forward refs + FOR EACH key in json["definitions"]: + definitions[key] = PLACEHOLDER + -- Pass 2: compile each definition + FOR EACH key in json["definitions"]: + definitions[key] = compile(json["definitions"][key], isRoot=false, definitions) + ELSE: + REQUIRE json does NOT have key "definitions" + + -- Detect form + forms = [] + IF json has "ref": forms += "ref" + IF json has "type": forms += "type" + IF json has "enum": forms += "enum" + IF json has "elements": forms += "elements" + IF json has "values": forms += "values" + IF json has "discriminator": forms += "discriminator" + IF json has "properties" OR json has "optionalProperties": + forms += "properties" + + REQUIRE |forms| <= 1 + + -- Compile form + node = MATCH forms: + [] -> Empty + ["ref"] -> compileRef(json, definitions) + ["type"] -> compileType(json) + ["enum"] -> compileEnum(json) + ["elements"] -> compileElements(json, definitions) + ["properties"] -> compileProperties(json, definitions) + ["values"] -> compileValues(json, definitions) + ["discriminator"]-> compileDiscriminator(json, definitions) + + -- Nullable modifier wraps any form + IF json has "nullable" AND json["nullable"] == true: + node = Nullable { inner: node } + + RETURN node +``` + +### 3.3 Form-Specific Compilation + +**Ref**: +``` +compileRef(json, definitions): + name = json["ref"] -- must be a string + REQUIRE name IN definitions -- forward refs are valid (placeholder exists) + RETURN Ref { name } +``` + +**Type**: +``` +compileType(json): + t = json["type"] -- must be a string + REQUIRE t IN TypeKeyword + RETURN Type { type: t } +``` + +**Enum**: +``` +compileEnum(json): + values = json["enum"] -- must be a non-empty array of strings + REQUIRE no duplicates in values + RETURN Enum { values } +``` + +**Elements**: +``` +compileElements(json, definitions): + inner = compile(json["elements"], isRoot=false, definitions) + RETURN Elements { schema: inner } +``` + +**Properties**: +``` +compileProperties(json, definitions): + req = {} + opt = {} + IF json has "properties": + FOR EACH (key, schema) in json["properties"]: + req[key] = compile(schema, isRoot=false, definitions) + IF json has "optionalProperties": + FOR EACH (key, schema) in json["optionalProperties"]: + opt[key] = compile(schema, isRoot=false, definitions) + REQUIRE keys(req) INTERSECT keys(opt) == {} + additional = json.get("additionalProperties", false) + RETURN Properties { required: req, optional: opt, additional } +``` + +**Values**: +``` +compileValues(json, definitions): + inner = compile(json["values"], isRoot=false, definitions) + RETURN Values { schema: inner } +``` + +**Discriminator**: +``` +compileDiscriminator(json, definitions): + tag = json["discriminator"] -- must be a string + REQUIRE json has "mapping" + mapping = {} + FOR EACH (key, schema) in json["mapping"]: + node = compile(schema, isRoot=false, definitions) + REQUIRE node is Properties -- not Nullable, not any other form + REQUIRE tag NOT IN node.required + REQUIRE tag NOT IN node.optional + mapping[key] = node + RETURN Discrim { tag, mapping } +``` + +### 3.4 Compile-Time Invariants + +After compilation, the following are guaranteed: +- Every `Ref.name` resolves to an entry in `definitions`. +- Every `Discrim.mapping` value is a `Properties` node (not nullable). +- No `Properties` node has overlapping required/optional keys. +- The AST is immutable. No node is modified after construction. + +## 4. Type Checking Reference + +Exact semantics for each `TypeKeyword`. The code generator emits exactly +this check, inlined, for each type keyword it encounters. + +### 4.1 boolean + +``` +value is a JSON boolean (true or false) +``` + +Target-language expression examples: +- JavaScript: `typeof v === "boolean"` +- Java: `v instanceof JsonBoolean` +- Python: `isinstance(v, bool)` + +### 4.2 string + +``` +value is a JSON string +``` + +Target-language expression examples: +- JavaScript: `typeof v === "string"` +- Java: `v instanceof JsonString` +- Python: `isinstance(v, str)` + +### 4.3 timestamp + +``` +value is a JSON string +AND value matches the RFC 3339 date-time production + (regex: ^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:(\d{2}|60)(\.\d+)?(Z|[+-]\d{2}:\d{2})$) +AND the date-time is parseable (accounting for leap seconds by + normalizing :60 to :59 before parsing) +``` + +Target-language expression examples: +- JavaScript: `typeof v === "string" && !Number.isNaN(Date.parse(v))` (simplified; + a full implementation needs the regex for leap-second support) +- Java: regex match + `OffsetDateTime.parse(normalized)` + +### 4.4 float32, float64 + +``` +value is a JSON number (any finite number; no range check) +``` + +RFC 8927 does not distinguish float32 from float64 at the validation level. +Both accept any JSON number. + +Target-language expression examples: +- JavaScript: `typeof v === "number" && Number.isFinite(v)` +- Java: `v instanceof JsonNumber` + +### 4.5 Integer types + +All integer types share the same two-step check: + +``` +value is a JSON number +AND value has zero fractional part (floor(value) == value) +AND value is within the type's range (inclusive) +``` + +| Type | Min | Max | +|---|---|---| +| int8 | -128 | 127 | +| uint8 | 0 | 255 | +| int16 | -32768 | 32767 | +| uint16 | 0 | 65535 | +| int32 | -2147483648 | 2147483647 | +| uint32 | 0 | 4294967295 | + +Note: `3.0` is a valid int8. `3.5` is not. This is value-based, not +syntax-based. + +Target-language expression examples: +- JavaScript (uint8): `typeof v === "number" && Number.isInteger(v) && v >= 0 && v <= 255` +- Java (uint8): `v instanceof JsonNumber n && n.toDouble() == Math.floor(n.toDouble()) && n.toLong() >= 0 && n.toLong() <= 255` + +## 5. Emission Rules + +The code generator walks the AST and emits target-language source code. +Each AST node maps to a specific code pattern. The central rule: + +**Emit only what the schema requires. If the schema does not mention a +form, the generated code does not contain any logic for that form.** + +### 5.1 Generated Code Structure + +The generator emits: + +1. **One function per definition** -- named `validate_`, taking + `(instance, errors, instancePath)` as parameters. Only emitted if the + schema has definitions. + +2. **One exported `validate(instance)` function** -- the entry point. Creates + the error list, calls the root validation logic, returns the error list. + +3. **No helpers, no libraries, no imports.** Every check is inlined. If the + schema uses only `"type": "string"`, the generated code contains one + `typeof` check and nothing else. + +### 5.2 Node-to-Code Mapping + +#### Empty + +Emit nothing. No check. No code. + +If an Empty node is a required property value, the generated code checks +that the key exists but does not validate the value: + +```javascript +// Schema: {"properties": {"data": {}}} +if (!("data" in obj)) e.push({instancePath: p, schemaPath: sp + "/properties/data"}); +// No else branch -- empty schema accepts any value +``` + +#### Nullable + +Emit a null guard before the inner check: + +```javascript +// Schema: {"type": "string", "nullable": true} +if (v !== null) { + if (typeof v !== "string") e.push({instancePath: p, schemaPath: sp + "/type"}); +} +``` + +If the inner node is Empty, the nullable wraps nothing -- emit only the +null guard (which passes everything, so emit nothing at all). + +#### Type + +Emit the type-specific check inlined. No helper function. + +```javascript +// "type": "string" +if (typeof v !== "string") e.push({instancePath: p, schemaPath: sp + "/type"}); + +// "type": "uint8" +if (typeof v !== "number" || !Number.isInteger(v) || v < 0 || v > 255) + e.push({instancePath: p, schemaPath: sp + "/type"}); + +// "type": "boolean" +if (typeof v !== "boolean") e.push({instancePath: p, schemaPath: sp + "/type"}); + +// "type": "float64" +if (typeof v !== "number" || !Number.isFinite(v)) + e.push({instancePath: p, schemaPath: sp + "/type"}); +``` + +#### Enum + +Emit a set-membership check. For small enums, inline the array. For large +enums, a code generator MAY hoist the array to module scope as a constant. + +```javascript +// "enum": ["a", "b", "c"] +if (typeof v !== "string" || !["a","b","c"].includes(v)) + e.push({instancePath: p, schemaPath: sp + "/enum"}); +``` + +Note: the string type guard is required because RFC 8927 specifies that +non-string values fail enum validation. + +#### Elements + +Emit an array type guard, then a loop. The loop body is the generated +check for the element schema. + +```javascript +// "elements": {"type": "string"} +if (!Array.isArray(v)) { + e.push({instancePath: p, schemaPath: sp}); +} else { + for (let i = 0; i < v.length; i++) { + if (typeof v[i] !== "string") + e.push({instancePath: p + "/" + i, schemaPath: sp + "/elements/type"}); + } +} +``` + +If the element schema is a complex type (Properties, Discrim), emit a +function call in the loop body instead of inlining. + +For nested arrays (arrays of arrays), a code generator MAY inline nested +loops up to a configurable depth (e.g. 3 levels) for performance, falling +back to function calls beyond that depth. + +#### Properties + +Emit an object type guard, then: +1. One presence check per required key. +2. Inlined value checks for each required and optional property. +3. A key-rejection loop if `additional == false`. + +```javascript +// Schema: {"properties":{"name":{"type":"string"}}, "optionalProperties":{"age":{"type":"uint8"}}} +if (v === null || typeof v !== "object" || Array.isArray(v)) { + e.push({instancePath: p, schemaPath: sp}); +} else { + // Required properties + if (!("name" in v)) e.push({instancePath: p, schemaPath: sp + "/properties/name"}); + else if (typeof v["name"] !== "string") + e.push({instancePath: p + "/name", schemaPath: sp + "/properties/name/type"}); + + // Optional properties + if ("age" in v) { + const a = v["age"]; + if (typeof a !== "number" || !Number.isInteger(a) || a < 0 || a > 255) + e.push({instancePath: p + "/age", schemaPath: sp + "/optionalProperties/age/type"}); + } + + // Additional properties (only emitted when additional == false) + for (const k in v) { + if (k !== "name" && k !== "age") + e.push({instancePath: p + "/" + k, schemaPath: sp}); + } +} +``` + +If `additional` is `true`, the for-in loop is **not emitted at all**. + +If a property value's schema is a complex type (Properties, Elements, etc.), +emit a function call instead of inlining. If it is a leaf (Type, Enum, +Empty), inline it. + +#### Values + +Emit an object type guard, then a for-in loop. The loop body is the +generated check for the value schema. + +```javascript +// "values": {"type": "string"} +if (v === null || typeof v !== "object" || Array.isArray(v)) { + e.push({instancePath: p, schemaPath: sp}); +} else { + for (const k in v) { + if (typeof v[k] !== "string") + e.push({instancePath: p + "/" + k, schemaPath: sp + "/values/type"}); + } +} +``` + +#### Discriminator + +Emit a 5-step sequential check, then a switch/if-else dispatching to the +variant validator. + +```javascript +// "discriminator": "type", "mapping": {"a": {...}, "b": {...}} +if (v === null || typeof v !== "object" || Array.isArray(v)) { + e.push({instancePath: p, schemaPath: sp}); +} else if (!("type" in v)) { + e.push({instancePath: p, schemaPath: sp}); +} else if (typeof v["type"] !== "string") { + e.push({instancePath: p + "/type", schemaPath: sp + "/discriminator"}); +} else if (v["type"] === "a") { + validate_variant_a(v, e, p, sp + "/mapping/a"); +} else if (v["type"] === "b") { + validate_variant_b(v, e, p, sp + "/mapping/b"); +} else { + e.push({instancePath: p + "/type", schemaPath: sp + "/mapping"}); +} +``` + +Each variant validator is a generated Properties check. The discriminator +tag field is excluded from additional-properties checking and from +property validation in the variant (it was already validated by the +discriminator check). + +#### Ref + +Emit a function call to the generated definition validator: + +```javascript +// "ref": "address" +validate_address(v, e, p, sp); +``` + +Each definition becomes a generated function. The function body is the +emitted code for the definition's AST node. + +### 5.3 Inlining Policy + +A code generator SHOULD inline checks for leaf nodes (Type, Enum, Empty) +directly into their parent's generated code. + +A code generator SHOULD emit separate functions for: +- Each definition (called via Ref). +- Each Properties or Discrim node that appears as the child of Elements, + Values, or other container nodes. +- Each discriminator variant. + +A code generator MUST NOT emit helper functions, type-checking utilities, +or library imports that are not required by the specific schema being +compiled. + +### 5.4 Recursive Schemas + +Recursive refs (a definition that ultimately references itself) are legal +in RFC 8927. In generated code, this becomes recursive function calls: + +```javascript +// Schema: {"definitions":{"node":{"properties":{"next":{"ref":"node","nullable":true}}}}, +// "ref":"node"} +function validate_node(v, e, p, sp) { + if (v === null || typeof v !== "object" || Array.isArray(v)) { + e.push({instancePath: p, schemaPath: sp}); + return; + } + if (!("next" in v)) { + e.push({instancePath: p, schemaPath: sp + "/properties/next"}); + } else if (v["next"] !== null) { + validate_node(v["next"], e, p + "/next", sp + "/properties/next"); + } +} + +export function validate(instance) { + const e = []; + validate_node(instance, e, "", ""); + return e; +} +``` + +The target-language call stack provides the implicit work stack. For most +real-world schemas, recursion depth is bounded by the document's structure. + +### 5.5 Discriminator Tag Exemption + +When emitting a variant Properties check inside a discriminator, the +code generator MUST: +- Exclude the tag field from additional-properties rejection. +- Not emit a value check for the tag field (it was already validated + as a string by the discriminator check). + +This means the generated known-key set in the for-in loop includes the +tag field name, and no property check is emitted for it. + +## 6. Error Format + +Errors follow RFC 8927 Section 3.3, which defines error indicators as +pairs of JSON Pointers: + +``` +Error = { + instancePath: String, -- JSON Pointer (RFC 6901) into the instance + schemaPath: String -- JSON Pointer (RFC 6901) into the schema +} +``` + +The `instancePath` points to the value that failed. The `schemaPath` points +to the schema keyword that caused the failure. + +### 6.1 Schema Path Construction + +The schema path is built at generation time and baked into the generated +code as string literals. Each emission rule appends to the schema path: + +| Form | Appended path component(s) | +|---|---| +| Type | `/type` | +| Enum | `/enum` | +| Elements (type guard) | (nothing -- error at current path) | +| Elements (child) | `/elements` | +| Properties (missing key) | `/properties/` | +| Properties (additional) | (nothing -- error at current path) | +| Properties (child req) | `/properties/` | +| Properties (child opt) | `/optionalProperties/` | +| Values (type guard) | (nothing -- error at current path) | +| Values (child) | `/values` | +| Discrim (not object) | (nothing -- error at current path) | +| Discrim (tag missing) | (nothing -- error at current path) | +| Discrim (tag not string) | `/discriminator` | +| Discrim (tag not in map) | `/mapping` | +| Discrim (variant) | `/mapping/` | + +Schema paths are string literals in the generated code. They do not change +at runtime. + +### 6.2 Instance Path Construction + +Instance paths are built at runtime via string concatenation: + +| Descent into | Appended to instancePath | +|---|---| +| Array element at index `i` | `"/" + i` | +| Object property with key `k` | `"/" + k` | +| Discriminator tag value | `"/" + tagFieldName` | +| Discriminator variant | (nothing -- same object) | +| Ref target | (nothing -- transparent) | + +## 7. Conformance + +Generated code conforms to this spec if: + +1. For any valid RFC 8927 schema and any JSON instance, the generated + `validate(instance)` function returns the same set of + `(instancePath, schemaPath)` error pairs that RFC 8927 Section 3.3 + specifies. + +2. The generated code passes the official JTD validation test suite + (`validation.json` from `json-typedef-spec`) when used as the + validation engine. + +3. The code generator rejects invalid schemas at generation time per the + constraints in Section 3.4. + +4. The generated code contains no dead code: no helper functions, loops, + branches, or checks that the schema does not require. + +5. Validation does not short-circuit. All errors are collected in a + single pass. + +## 8. Worked Example + +Schema: +```json +{ + "properties": { + "name": { "type": "string" }, + "age": { "type": "uint8" }, + "tags": { "elements": { "type": "string" } } + }, + "optionalProperties": { + "email": { "type": "string" } + } +} +``` + +### Compiled AST (intermediate, discarded after emission) + +``` +Properties { + required: { + "name" -> Type { type: "string" }, + "age" -> Type { type: "uint8" }, + "tags" -> Elements { schema: Type { type: "string" } } + }, + optional: { + "email" -> Type { type: "string" } + }, + additional: false +} +``` + +### Generated Code (JavaScript ES2020) + +```javascript +export function validate(instance) { + const e = []; + if (instance === null || typeof instance !== "object" || Array.isArray(instance)) { + e.push({instancePath: "", schemaPath: ""}); + return e; + } + + if (!("name" in instance)) e.push({instancePath: "", schemaPath: "/properties/name"}); + else if (typeof instance["name"] !== "string") + e.push({instancePath: "/name", schemaPath: "/properties/name/type"}); + + if (!("age" in instance)) e.push({instancePath: "", schemaPath: "/properties/age"}); + else { + const v = instance["age"]; + if (typeof v !== "number" || !Number.isInteger(v) || v < 0 || v > 255) + e.push({instancePath: "/age", schemaPath: "/properties/age/type"}); + } + + if (!("tags" in instance)) e.push({instancePath: "", schemaPath: "/properties/tags"}); + else if (!Array.isArray(instance["tags"])) + e.push({instancePath: "/tags", schemaPath: "/properties/tags"}); + else { + const arr = instance["tags"]; + for (let i = 0; i < arr.length; i++) { + if (typeof arr[i] !== "string") + e.push({instancePath: "/tags/" + i, schemaPath: "/properties/tags/elements/type"}); + } + } + + if ("email" in instance && typeof instance["email"] !== "string") + e.push({instancePath: "/email", schemaPath: "/optionalProperties/email/type"}); + + for (const k in instance) { + if (k !== "name" && k !== "age" && k !== "tags" && k !== "email") + e.push({instancePath: "/" + k, schemaPath: ""}); + } + + return e; +} +``` + +No helper functions. No dead code. Every line corresponds to a specific +constraint in the schema. + +### Validation of example instance + +Instance: +```json +{ "name": "Alice", "age": 300, "tags": ["a", 42], "extra": true } +``` + +Errors produced: +```json +[ + { "instancePath": "/age", "schemaPath": "/properties/age/type" }, + { "instancePath": "/tags/1", "schemaPath": "/properties/tags/elements/type" }, + { "instancePath": "/extra", "schemaPath": "" } +] +``` + +- `age`: 300 is a number with zero fractional part, but 300 > 255 (uint8 max). +- `tags/1`: 42 is not a string. +- `extra`: not in required or optional properties, and `additionalProperties` + defaults to `false`. diff --git a/jtd-esm-codegen/pom.xml b/jtd-esm-codegen/pom.xml new file mode 100644 index 0000000..4b04a83 --- /dev/null +++ b/jtd-esm-codegen/pom.xml @@ -0,0 +1,171 @@ + + + 4.0.0 + + + io.github.simbo1905.json + parent + 0.1.9 + + + jtd-esm-codegen + jar + + JTD to ES2020 Validator Code Generator (Experimental) + https://simbo1905.github.io/java.util.json.Java21/ + + scm:git:https://github.com/simbo1905/java.util.json.Java21.git + scm:git:git@github.com:simbo1905/java.util.json.Java21.git + https://github.com/simbo1905/java.util.json.Java21 + HEAD + + Experimental CLI that generates vanilla ES2020 ESM validators from a deliberately-limited JTD (RFC 8927) subset for browser payload validation. + + + UTF-8 + 21 + 3.6.0 + 24.1.2 + + + + + io.github.simbo1905.json + java.util.json + ${project.version} + + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.jupiter + junit-jupiter-params + test + + + org.assertj + assertj-core + test + + + net.jqwik + jqwik + 1.9.3 + test + + + + org.graalvm.polyglot + polyglot + ${graaljs.version} + test + + + org.graalvm.polyglot + js-community + ${graaljs.version} + pom + test + + + + + org.bitbucket.thinbus + junit-js + 2.0.0 + test + + + + org.graalvm.polyglot + * + + + org.graalvm.js + * + + + org.graalvm.truffle + * + + + + + + org.junit.vintage + junit-vintage-engine + ${junit.jupiter.version} + test + + + + + jtd-esm-codegen + + + + org.apache.maven.plugins + maven-compiler-plugin + + 21 + + -Xlint:all + -Werror + -Xdiags:verbose + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + -ea + + **/*Test.java + **/*Tests.java + **/*TestSuite.java + + + false + + + + + + + org.apache.maven.plugins + maven-shade-plugin + ${maven-shade-plugin.version} + + + package + + shade + + + false + + + io.github.simbo1905.json.jtd.codegen.JtdToEsmCli + + + + + + + + + + diff --git a/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/EsmRenderer.java b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/EsmRenderer.java new file mode 100644 index 0000000..6abfcbc --- /dev/null +++ b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/EsmRenderer.java @@ -0,0 +1,467 @@ +package io.github.simbo1905.json.jtd.codegen; + +import java.util.*; + +import static io.github.simbo1905.json.jtd.codegen.JtdAst.*; + +/// Generates ES2020 ESM validators per JTD_CODEGEN_SPEC.md +/// +/// Key principles: +/// - No runtime stack - direct code emission +/// - No helper functions - inline all checks +/// - Emit only what the schema requires (no dead code) +/// - Return {instancePath, schemaPath} error objects per RFC 8927 +final class EsmRenderer { + private EsmRenderer() {} + + static String render(RootNode schema, String sha256Hex, String shaPrefix8) { + Objects.requireNonNull(schema, "schema must not be null"); + Objects.requireNonNull(sha256Hex, "sha256Hex must not be null"); + Objects.requireNonNull(shaPrefix8, "shaPrefix8 must not be null"); + + final var ctx = new RenderContext(); + ctx.sha256Hex = sha256Hex; + ctx.shaPrefix8 = shaPrefix8; + ctx.schemaId = schema.id(); + + final var sb = new StringBuilder(8 * 1024); + + // Header + sb.append("// ").append(schema.id()).append("-").append(shaPrefix8).append(".js\n"); + sb.append("// Generated from JTD schema: ").append(schema.id()).append("\n"); + sb.append("// SHA-256: ").append(sha256Hex).append(" (prefix: ").append(shaPrefix8).append(")\n"); + sb.append("\n"); + + sb.append("const SCHEMA_ID = ").append(jsString(schema.id())).append(";\n\n"); + + // Collect all enum constants used in the schema + collectEnums(schema.rootSchema(), ctx); + for (var def : schema.definitions().values()) { + collectEnums(def, ctx); + } + generateEnumConstants(sb, ctx); + + // Generate validation functions for definitions + for (var entry : schema.definitions().entrySet()) { + final String defName = entry.getKey(); + final JtdNode defNode = entry.getValue(); + generateDefinitionFunction(sb, defName, defNode, ctx); + } + + // Generate the main validate function + sb.append("export function validate(instance) {\n"); + sb.append(" const errors = [];\n"); + + // Emit validation logic inline for root + final var rootCode = new StringBuilder(); + generateNodeValidation(rootCode, schema.rootSchema(), ctx, "instance", "\"\"", "\"\"", " ", null); + sb.append(rootCode); + + sb.append(" return errors;\n"); + sb.append("}\n\n"); + + // Generate inline validator functions for complex nested schemas + generateInlineFunctions(sb, ctx); + + sb.append("export { SCHEMA_ID };\n"); + + return sb.toString(); + } + + private static void collectEnums(JtdNode node, RenderContext ctx) { + switch (node) { + case EnumNode en -> { + final String constName = "ENUM_" + (ctx.enumCounter++); + ctx.enumConstants.put(constName, en.values()); + } + case ElementsNode el -> collectEnums(el.schema(), ctx); + case ValuesNode vn -> collectEnums(vn.schema(), ctx); + case PropertiesNode pn -> { + pn.properties().values().forEach(n -> collectEnums(n, ctx)); + pn.optionalProperties().values().forEach(n -> collectEnums(n, ctx)); + } + case DiscriminatorNode dn -> dn.mapping().values().forEach(n -> collectEnums(n, ctx)); + case NullableNode nn -> collectEnums(nn.wrapped(), ctx); + default -> {} // No enums + } + } + + private static void generateEnumConstants(StringBuilder sb, RenderContext ctx) { + if (ctx.enumConstants.isEmpty()) return; + + for (var entry : ctx.enumConstants.entrySet()) { + sb.append("const ").append(entry.getKey()).append(" = ") + .append(jsStringArray(entry.getValue())).append(";\n"); + } + sb.append("\n"); + } + + private static void generateDefinitionFunction(StringBuilder sb, String defName, JtdNode node, RenderContext ctx) { + final String safeName = toSafeName(defName); + + sb.append("function validate_").append(safeName).append("(v, errors, p, sp) {\n"); + generateNodeValidation(sb, node, ctx, "v", "p", "sp", " ", null); + sb.append("}\n\n"); + } + + /** + * Generates validation code for a node. + * @param discriminatorKey If non-null, this PropertiesNode is a discriminator variant and should + * skip validation of the discriminator key itself + */ + private static void generateNodeValidation(StringBuilder sb, JtdNode node, RenderContext ctx, + String valueExpr, String pathExpr, String schemaPathExpr, String indent, String discriminatorKey) { + + switch (node) { + case EmptyNode ignored -> { + // Accepts anything - no validation + } + + case NullableNode nn -> { + if (nn.wrapped() instanceof EmptyNode) { + // Nullable empty - accepts anything including null, no check needed + } else { + sb.append(indent).append("if (").append(valueExpr).append(" !== null) {\n"); + generateNodeValidation(sb, nn.wrapped(), ctx, valueExpr, pathExpr, schemaPathExpr, indent + " ", discriminatorKey); + sb.append(indent).append("}\n"); + } + } + + case TypeNode tn -> { + generateTypeCheck(sb, tn.type(), valueExpr, pathExpr, schemaPathExpr, indent); + } + + case EnumNode en -> { + final String constName = findEnumConst(ctx.enumConstants, en.values()); + sb.append(indent).append("if (typeof ").append(valueExpr).append(" !== \"string\" || !") + .append(constName).append(".includes(").append(valueExpr).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append(" + \"/enum\"});\n"); + sb.append(indent).append("}\n"); + } + + case ElementsNode el -> { + // Type guard + sb.append(indent).append("if (!Array.isArray(").append(valueExpr).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append("});\n"); + sb.append(indent).append("} else {\n"); + + // Loop over elements + sb.append(indent).append(" for (let i = 0; i < ").append(valueExpr).append(".length; i++) {\n"); + final String elemValue = valueExpr + "[i]"; + final String elemPath = pathExpr + " + \"/\" + i"; + final String elemSchemaPath = schemaPathExpr + " + \"/elements\""; + + if (isLeafNode(el.schema())) { + // Inline leaf validation + generateNodeValidation(sb, el.schema(), ctx, elemValue, elemPath, elemSchemaPath, indent + " ", null); + } else { + // Complex schema - needs inline function + final String fnName = getInlineFunctionName(el.schema(), ctx); + sb.append(indent).append(" ").append(fnName).append("(") + .append(elemValue).append(", errors, ").append(elemPath) + .append(", ").append(elemSchemaPath).append(");\n"); + } + + sb.append(indent).append(" }\n"); + sb.append(indent).append("}\n"); + } + + case PropertiesNode pn -> { + // Type guard + sb.append(indent).append("if (").append(valueExpr).append(" === null || typeof ") + .append(valueExpr).append(" !== \"object\" || Array.isArray(").append(valueExpr).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append("});\n"); + sb.append(indent).append("} else {\n"); + + // Required properties + for (var entry : pn.properties().entrySet()) { + final String key = entry.getKey(); + final JtdNode propSchema = entry.getValue(); + + // Skip discriminator key if we're in a discriminator variant + if (discriminatorKey != null && key.equals(discriminatorKey)) { + continue; + } + + sb.append(indent).append(" if (!(\"").append(key).append("\" in ") + .append(valueExpr).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append(" + \"/properties/") + .append(jsonPointerEscape(key)).append("\"});\n"); + sb.append(indent).append(" }\n"); + + // Validate value if present + sb.append(indent).append(" if (\"").append(key).append("\" in ") + .append(valueExpr).append(") {\n"); + final String propValue = valueExpr + "[\"" + key + "\"]"; + final String propPath = pathExpr + " + \"/" + jsonPointerEscape(key) + "\""; + final String propSchemaPath = schemaPathExpr + " + \"/properties/" + jsonPointerEscape(key) + "\""; + + if (isLeafNode(propSchema)) { + generateNodeValidation(sb, propSchema, ctx, propValue, propPath, propSchemaPath, indent + " ", null); + } else { + final String fnName = getInlineFunctionName(propSchema, ctx); + sb.append(indent).append(" ").append(fnName).append("(") + .append(propValue).append(", errors, ").append(propPath) + .append(", ").append(propSchemaPath).append(");\n"); + } + sb.append(indent).append(" }\n"); + } + + // Optional properties + for (var entry : pn.optionalProperties().entrySet()) { + final String key = entry.getKey(); + final JtdNode propSchema = entry.getValue(); + + // Skip discriminator key if we're in a discriminator variant + if (discriminatorKey != null && key.equals(discriminatorKey)) { + continue; + } + + sb.append(indent).append(" if (\"").append(key).append("\" in ") + .append(valueExpr).append(") {\n"); + final String propValue = valueExpr + "[\"" + key + "\"]"; + final String propPath = pathExpr + " + \"/" + jsonPointerEscape(key) + "\""; + final String propSchemaPath = schemaPathExpr + " + \"/optionalProperties/" + jsonPointerEscape(key) + "\""; + + if (isLeafNode(propSchema)) { + generateNodeValidation(sb, propSchema, ctx, propValue, propPath, propSchemaPath, indent + " ", null); + } else { + final String fnName = getInlineFunctionName(propSchema, ctx); + sb.append(indent).append(" ").append(fnName).append("(") + .append(propValue).append(", errors, ").append(propPath) + .append(", ").append(propSchemaPath).append(");\n"); + } + sb.append(indent).append(" }\n"); + } + + // Additional properties check (if not allowed) + if (!pn.additionalProperties()) { + // Build list of allowed keys (including discriminator key if applicable) + final Set allowedKeys = new HashSet<>(pn.properties().keySet()); + allowedKeys.addAll(pn.optionalProperties().keySet()); + + if (discriminatorKey != null) { + allowedKeys.add(discriminatorKey); + } + + sb.append(indent).append(" for (const k in ").append(valueExpr).append(") {\n"); + sb.append(indent).append(" if (").append(buildKeyCheck("k", allowedKeys)).append(") {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(" + \"/\" + k, schemaPath: ").append(schemaPathExpr).append("});\n"); + sb.append(indent).append(" }\n"); + sb.append(indent).append(" }\n"); + } + + sb.append(indent).append("}\n"); + } + + case ValuesNode vn -> { + // Type guard + sb.append(indent).append("if (").append(valueExpr).append(" === null || typeof ") + .append(valueExpr).append(" !== \"object\" || Array.isArray(").append(valueExpr).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append("});\n"); + sb.append(indent).append("} else {\n"); + + // Loop over values + sb.append(indent).append(" for (const k in ").append(valueExpr).append(") {\n"); + final String valValue = valueExpr + "[k]"; + final String valPath = pathExpr + " + \"/\" + k"; + final String valSchemaPath = schemaPathExpr + " + \"/values\""; + + if (isLeafNode(vn.schema())) { + generateNodeValidation(sb, vn.schema(), ctx, valValue, valPath, valSchemaPath, indent + " ", null); + } else { + final String fnName = getInlineFunctionName(vn.schema(), ctx); + sb.append(indent).append(" ").append(fnName).append("(") + .append(valValue).append(", errors, ").append(valPath) + .append(", ").append(valSchemaPath).append(");\n"); + } + + sb.append(indent).append(" }\n"); + sb.append(indent).append("}\n"); + } + + case DiscriminatorNode dn -> { + // 5-step validation per RFC 8927 + sb.append(indent).append("if (").append(valueExpr).append(" === null || typeof ") + .append(valueExpr).append(" !== \"object\" || Array.isArray(").append(valueExpr).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append("});\n"); + sb.append(indent).append("} else if (!(\"").append(dn.discriminator()).append("\" in ") + .append(valueExpr).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append("});\n"); + sb.append(indent).append("} else if (typeof ").append(valueExpr).append("[\"").append(dn.discriminator()) + .append("\"] !== \"string\") {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(" + \"/").append(dn.discriminator()).append("\", schemaPath: ").append(schemaPathExpr) + .append(" + \"/discriminator\"});\n"); + sb.append(indent).append("} else {\n"); + sb.append(indent).append(" const tag = ").append(valueExpr).append("[\"").append(dn.discriminator()).append("\"];\n"); + + // Switch on tag + boolean first = true; + for (var entry : dn.mapping().entrySet()) { + final String tagValue = entry.getKey(); + final JtdNode variantSchema = entry.getValue(); + + if (first) { + sb.append(indent).append(" if (tag === ").append(jsString(tagValue)).append(") {\n"); + first = false; + } else { + sb.append(indent).append(" } else if (tag === ").append(jsString(tagValue)).append(") {\n"); + } + + // Generate variant validation with discriminator exemption + generateNodeValidation(sb, variantSchema, ctx, valueExpr, pathExpr, + schemaPathExpr + " + \"/mapping/" + jsonPointerEscape(tagValue) + "\"", + indent + " ", dn.discriminator()); + } + + sb.append(indent).append(" } else {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(" + \"/").append(dn.discriminator()).append("\", schemaPath: ").append(schemaPathExpr) + .append(" + \"/mapping\"});\n"); + sb.append(indent).append(" }\n"); + sb.append(indent).append("}\n"); + } + + case RefNode rn -> { + sb.append(indent).append("validate_").append(toSafeName(rn.ref())).append("(") + .append(valueExpr).append(", errors, ").append(pathExpr).append(", ").append(schemaPathExpr).append(");\n"); + } + } + } + + private static void generateTypeCheck(StringBuilder sb, String type, String valueExpr, + String pathExpr, String schemaPathExpr, String indent) { + + final String check = switch (type) { + case "boolean" -> "typeof " + valueExpr + " === \"boolean\""; + case "string" -> "typeof " + valueExpr + " === \"string\""; + case "timestamp" -> "typeof " + valueExpr + " === \"string\" && /^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:(\\d{2}|60)(\\.\\d+)?(Z|[+-]\\d{2}:\\d{2})$/.test(" + valueExpr + ")"; + case "float32", "float64" -> "typeof " + valueExpr + " === \"number\" && Number.isFinite(" + valueExpr + ")"; + case "int8" -> "typeof " + valueExpr + " === \"number\" && Number.isInteger(" + valueExpr + ") && " + valueExpr + " >= -128 && " + valueExpr + " <= 127"; + case "uint8" -> "typeof " + valueExpr + " === \"number\" && Number.isInteger(" + valueExpr + ") && " + valueExpr + " >= 0 && " + valueExpr + " <= 255"; + case "int16" -> "typeof " + valueExpr + " === \"number\" && Number.isInteger(" + valueExpr + ") && " + valueExpr + " >= -32768 && " + valueExpr + " <= 32767"; + case "uint16" -> "typeof " + valueExpr + " === \"number\" && Number.isInteger(" + valueExpr + ") && " + valueExpr + " >= 0 && " + valueExpr + " <= 65535"; + case "int32" -> "typeof " + valueExpr + " === \"number\" && Number.isInteger(" + valueExpr + ") && " + valueExpr + " >= -2147483648 && " + valueExpr + " <= 2147483647"; + case "uint32" -> "typeof " + valueExpr + " === \"number\" && Number.isInteger(" + valueExpr + ") && " + valueExpr + " >= 0 && " + valueExpr + " <= 4294967295"; + default -> throw new IllegalArgumentException("Unknown type: " + type); + }; + + sb.append(indent).append("if (!(").append(check).append(")) {\n"); + sb.append(indent).append(" errors.push({instancePath: ").append(pathExpr) + .append(", schemaPath: ").append(schemaPathExpr).append(" + \"/type\"});\n"); + sb.append(indent).append("}\n"); + } + + private static boolean isLeafNode(JtdNode node) { + return node instanceof TypeNode || node instanceof EnumNode || node instanceof EmptyNode || node instanceof RefNode; + } + + private static String getInlineFunctionName(JtdNode node, RenderContext ctx) { + // Check if this node already has a function name assigned + for (var entry : ctx.generatedInlineFunctions.entrySet()) { + if (entry.getValue() == node) { + return entry.getKey(); + } + } + // Create new unique function name using counter (not hashCode - avoids collisions) + final String name = "validate_inline_" + (ctx.inlineCounter++); + ctx.generatedInlineFunctions.put(name, node); + return name; + } + + private static void generateInlineFunctions(StringBuilder sb, RenderContext ctx) { + // Keep generating until no new inline functions are added + // (inline functions can reference other inline functions) + var processed = new HashSet(); + boolean changed; + do { + changed = false; + var entries = new ArrayList<>(ctx.generatedInlineFunctions.entrySet()); + for (var entry : entries) { + final String fnName = entry.getKey(); + if (processed.contains(fnName)) { + continue; + } + processed.add(fnName); + changed = true; + + final JtdNode node = entry.getValue(); + sb.append("function ").append(fnName).append("(v, errors, p, sp) {\n"); + generateNodeValidation(sb, node, ctx, "v", "p", "sp", " ", null); + sb.append("}\n\n"); + } + } while (changed); + } + + private static String getDiscriminatorKey(PropertiesNode pn) { + // We need to track which discriminator this properties node belongs to + // For now, this is a placeholder - we'd need to track this during generation + return null; + } + + private static String buildKeyCheck(String varName, Set allowedKeys) { + if (allowedKeys.isEmpty()) { + return "true"; // No keys allowed, everything is extra + } + + final StringBuilder sb = new StringBuilder(); + boolean first = true; + for (String key : allowedKeys) { + if (!first) sb.append(" && "); + sb.append(varName).append(" !== \"").append(key).append("\""); + first = false; + } + return sb.toString(); + } + + private static String findEnumConst(Map> enumConsts, List values) { + for (var e : enumConsts.entrySet()) { + if (e.getValue().equals(values)) { + return e.getKey(); + } + } + throw new IllegalStateException("Enum values not found: " + values); + } + + private static String toSafeName(String name) { + return name.replaceAll("[^a-zA-Z0-9_]", "_"); + } + + private static String jsonPointerEscape(String s) { + return s.replace("~", "~0").replace("/", "~1"); + } + + private static String jsString(String s) { + return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n").replace("\r", "\\r").replace("\t", "\\t") + "\""; + } + + private static String jsStringArray(List values) { + final var sb = new StringBuilder(); + sb.append("["); + for (int i = 0; i < values.size(); i++) { + if (i > 0) sb.append(", "); + sb.append(jsString(values.get(i))); + } + sb.append("]"); + return sb.toString(); + } + + private static class RenderContext { + String sha256Hex; + String shaPrefix8; + String schemaId; + int enumCounter = 1; + int inlineCounter = 0; + final Map> enumConstants = new LinkedHashMap<>(); + final Map generatedInlineFunctions = new LinkedHashMap<>(); + } +} diff --git a/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdAst.java b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdAst.java new file mode 100644 index 0000000..f561607 --- /dev/null +++ b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdAst.java @@ -0,0 +1,61 @@ +package io.github.simbo1905.json.jtd.codegen; + +import java.util.List; +import java.util.Map; + +/// Complete AST for RFC 8927 JTD code generation. +/// Supports all schema forms: empty, type, enum, elements, properties, values, +/// discriminator, ref, and nullable. +/// +/// This AST is designed for stack-based code generation where each node +/// knows how to generate its own validation logic. +public final class JtdAst { + private JtdAst() {} + + public sealed interface JtdNode permits + EmptyNode, TypeNode, EnumNode, ElementsNode, PropertiesNode, + ValuesNode, DiscriminatorNode, RefNode, NullableNode {} + + /// Root of a JTD document with metadata, definitions, and root schema. + public record RootNode( + String id, + Map definitions, + JtdNode rootSchema + ) {} + + /// Empty form {} - accepts any JSON value. + public record EmptyNode() implements JtdNode {} + + /// Type form - validates primitive types. + /// Type values: string, boolean, timestamp, int8, uint8, int16, uint16, + /// int32, uint32, float32, float64 + public record TypeNode(String type) implements JtdNode {} + + /// Enum form - validates string is one of allowed values. + public record EnumNode(List values) implements JtdNode {} + + /// Elements form - validates array where each element matches schema. + public record ElementsNode(JtdNode schema) implements JtdNode {} + + /// Properties form - validates object with required/optional properties. + public record PropertiesNode( + Map properties, + Map optionalProperties, + boolean additionalProperties + ) implements JtdNode {} + + /// Values form - validates object where all values match schema. + public record ValuesNode(JtdNode schema) implements JtdNode {} + + /// Discriminator form - validates tagged unions. + public record DiscriminatorNode( + String discriminator, + Map mapping + ) implements JtdNode {} + + /// Ref form - references a definition. + public record RefNode(String ref) implements JtdNode {} + + /// Nullable wrapper - allows null in addition to wrapped schema. + public record NullableNode(JtdNode wrapped) implements JtdNode {} +} diff --git a/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdParser.java b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdParser.java new file mode 100644 index 0000000..c9c1092 --- /dev/null +++ b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdParser.java @@ -0,0 +1,212 @@ +package io.github.simbo1905.json.jtd.codegen; + +import jdk.sandbox.java.util.json.*; + +import java.util.*; + +import static io.github.simbo1905.json.jtd.codegen.JtdAst.*; + +/// Parses JTD (RFC 8927) schemas for code generation. +/// Supports all schema forms including elements, values, discriminator, and nullable. +final class JtdParser { + private JtdParser() {} + + static RootNode parseString(String jtdJson) { + Objects.requireNonNull(jtdJson, "jtdJson must not be null"); + return parseValue(Json.parse(jtdJson)); + } + + static RootNode parseValue(JsonValue rootValue) { + Objects.requireNonNull(rootValue, "rootValue must not be null"); + if (!(rootValue instanceof JsonObject root)) { + throw new IllegalArgumentException("JTD schema must be a JSON object"); + } + + final var metadata = getObjectOrNull(root, "metadata"); + final String id; + if (metadata != null && metadata.members().containsKey("id")) { + id = getString(metadata, "id"); + if (id.isBlank()) { + throw new IllegalArgumentException("metadata.id must be non-blank"); + } + } else { + id = "JtdSchema"; + } + + final Map definitions = new LinkedHashMap<>(); + if (root.members().containsKey("definitions")) { + final var defsObj = getObjectOrNull(root, "definitions"); + if (defsObj != null) { + for (var e : defsObj.members().entrySet()) { + definitions.put(e.getKey(), parseSchema(e.getKey(), e.getValue(), true)); + } + } + } + + final JtdNode rootSchema = parseSchema("root", root, false); + + return new RootNode(id, definitions, rootSchema); + } + + private static JtdNode parseSchema(String propName, JsonValue schemaValue, boolean inDefinitions) { + if (!(schemaValue instanceof JsonObject schema)) { + throw new IllegalArgumentException("Schema for '" + propName + "' must be a JSON object"); + } + + // Check for nullable wrapper first + boolean isNullable = false; + if (schema.members().containsKey("nullable")) { + final var nullableVal = schema.members().get("nullable"); + if (nullableVal instanceof JsonBoolean jb && jb.bool()) { + isNullable = true; + } + } + + JtdNode coreNode; + + // 1. Ref + if (schema.members().containsKey("ref")) { + final var ref = stringValue(schema.members().get("ref"), propName, "ref"); + coreNode = new RefNode(ref); + } + // 2. Type + else if (schema.members().containsKey("type")) { + final var typeStr = stringValue(schema.members().get("type"), propName, "type"); + final var normalized = typeStr.toLowerCase(Locale.ROOT).trim(); + if (!ALLOWED_TYPES.contains(normalized)) { + throw new IllegalArgumentException("Unknown type: '" + typeStr + + "', expected one of: " + String.join(", ", ALLOWED_TYPES)); + } + coreNode = new TypeNode(normalized); + } + // 3. Enum + else if (schema.members().containsKey("enum")) { + final var enumValues = enumValues(schema.members().get("enum"), propName); + coreNode = new EnumNode(List.copyOf(enumValues)); + } + // 4. Elements (arrays) + else if (schema.members().containsKey("elements")) { + final var elementsVal = schema.members().get("elements"); + final var elementSchema = parseSchema(propName + "[]", elementsVal, inDefinitions); + coreNode = new ElementsNode(elementSchema); + } + // 5. Values (string->value maps) + else if (schema.members().containsKey("values")) { + final var valuesVal = schema.members().get("values"); + final var valueSchema = parseSchema(propName + "{}", valuesVal, inDefinitions); + coreNode = new ValuesNode(valueSchema); + } + // 6. Discriminator (tagged unions) + else if (schema.members().containsKey("discriminator")) { + final var discVal = stringValue(schema.members().get("discriminator"), propName, "discriminator"); + + if (!schema.members().containsKey("mapping")) { + throw new IllegalArgumentException("discriminator requires mapping"); + } + + final var mappingObj = getObjectOrNull(schema, "mapping"); + if (mappingObj == null) { + throw new IllegalArgumentException("mapping must be an object"); + } + + final Map mapping = new LinkedHashMap<>(); + for (var e : mappingObj.members().entrySet()) { + mapping.put(e.getKey(), parseSchema(propName + "." + e.getKey(), e.getValue(), inDefinitions)); + } + + coreNode = new DiscriminatorNode(discVal, mapping); + } + // 7. Properties + else if (hasPropertiesLikeKeys(schema)) { + final Map props = new LinkedHashMap<>(); + if (schema.members().containsKey("properties")) { + final var p = getObjectOrNull(schema, "properties"); + if (p != null) { + for (var e : p.members().entrySet()) { + props.put(e.getKey(), parseSchema(propName + "." + e.getKey(), e.getValue(), inDefinitions)); + } + } + } + + final Map optionalProps = new LinkedHashMap<>(); + if (schema.members().containsKey("optionalProperties")) { + final var op = getObjectOrNull(schema, "optionalProperties"); + if (op != null) { + for (var e : op.members().entrySet()) { + optionalProps.put(e.getKey(), parseSchema(propName + "." + e.getKey(), e.getValue(), inDefinitions)); + } + } + } + + boolean additional = false; + if (schema.members().containsKey("additionalProperties")) { + final var ap = schema.members().get("additionalProperties"); + if (ap instanceof JsonBoolean b) { + additional = b.bool(); + } + } + + coreNode = new PropertiesNode(props, optionalProps, additional); + } + // 8. Empty (accepts anything) + else { + coreNode = new EmptyNode(); + } + + // Wrap in nullable if needed + if (isNullable && !(coreNode instanceof EmptyNode)) { + return new NullableNode(coreNode); + } + return coreNode; + } + + private static boolean hasPropertiesLikeKeys(JsonObject schema) { + return schema.members().containsKey("properties") || + schema.members().containsKey("optionalProperties") || + schema.members().containsKey("additionalProperties"); + } + + private static JsonObject getObjectOrNull(JsonObject obj, String key) { + final var v = obj.members().get(key); + if (v == null) return null; + if (!(v instanceof JsonObject o)) { + throw new IllegalArgumentException("Expected '" + key + "' to be an object"); + } + return o; + } + + private static String getString(JsonObject obj, String key) { + final var v = obj.members().get(key); + if (!(v instanceof JsonString js)) { + throw new IllegalArgumentException("Expected '" + key + "' to be a string"); + } + return js.string(); + } + + private static String stringValue(JsonValue v, String container, String key) { + if (!(v instanceof JsonString js)) { + throw new IllegalArgumentException("Expected '" + container + "." + key + "' to be a string"); + } + return js.string(); + } + + private static List enumValues(JsonValue v, String propName) { + if (!(v instanceof JsonArray arr)) { + throw new IllegalArgumentException("Expected '" + propName + ".enum' to be an array"); + } + final var out = new ArrayList(); + for (int i = 0; i < arr.elements().size(); i++) { + final var el = arr.element(i); + if (!(el instanceof JsonString js)) { + throw new IllegalArgumentException("Expected '" + propName + ".enum[" + i + "]' to be a string"); + } + out.add(js.string()); + } + return out; + } + + private static final Set ALLOWED_TYPES = Set.of( + "string", "boolean", "timestamp", "int8", "uint8", "int16", "uint16", + "int32", "uint32", "float32", "float64" + ); +} diff --git a/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdToEsmCli.java b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdToEsmCli.java new file mode 100644 index 0000000..487c83e --- /dev/null +++ b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/JtdToEsmCli.java @@ -0,0 +1,53 @@ +package io.github.simbo1905.json.jtd.codegen; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.logging.Logger; + +/// CLI entry point for the new JTD to ESM code generator. +/// Generates optimal vanilla ES2020 validators with explicit stack-based validation. +public final class JtdToEsmCli { + private static final Logger LOG = Logger.getLogger(JtdToEsmCli.class.getName()); + + private JtdToEsmCli() {} + + public static void main(String[] args) throws Exception { + if (args.length < 1) { + System.err.println("Usage: java -jar jtd-esm-codegen.jar [output-dir]"); + System.exit(1); + } + + final Path schemaPath = Path.of(args[0]).toAbsolutePath().normalize(); + final Path outDir = args.length > 1 + ? Path.of(args[1]).toAbsolutePath().normalize() + : Path.of(".").toAbsolutePath().normalize(); + + final Path outJs = run(schemaPath, outDir); + System.out.println("Generated: " + outJs); + } + + public static Path run(Path schemaPath, Path outDir) throws IOException { + LOG.fine(() -> "Reading schema from: " + schemaPath); + + final String schemaJson = Files.readString(schemaPath, StandardCharsets.UTF_8); + final var schema = JtdParser.parseString(schemaJson); + + final byte[] digest = Sha256.digest(schemaPath); + final String shaHex = Sha256.hex(digest); + final String shaPrefix8 = Sha256.hexPrefix8(digest); + + LOG.fine(() -> "Schema SHA-256: " + shaHex); + + final String js = EsmRenderer.render(schema, shaHex, shaPrefix8); + + final String fileName = schema.id() + "-" + shaPrefix8 + ".js"; + final Path outJs = outDir.resolve(fileName); + + Files.writeString(outJs, js, StandardCharsets.UTF_8); + + LOG.fine(() -> "Generated validator: " + outJs); + return outJs; + } +} diff --git a/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/Sha256.java b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/Sha256.java new file mode 100644 index 0000000..fda2f41 --- /dev/null +++ b/jtd-esm-codegen/src/main/java/io/github/simbo1905/json/jtd/codegen/Sha256.java @@ -0,0 +1,63 @@ +package io.github.simbo1905.json.jtd.codegen; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/// SHA-256 helpers for deterministic output naming. +final class Sha256 { + private Sha256() {} + + static byte[] digest(Path file) throws IOException { + try (InputStream in = Files.newInputStream(file)) { + return digest(in); + } + } + + static byte[] digest(InputStream in) throws IOException { + final MessageDigest md = messageDigest(); + final byte[] buf = new byte[16 * 1024]; + for (int r; (r = in.read(buf)) >= 0; ) { + if (r > 0) { + md.update(buf, 0, r); + } + } + return md.digest(); + } + + static String hex(byte[] digest) { + final var out = new StringBuilder(digest.length * 2); + for (byte b : digest) { + out.append(HEX[(b >>> 4) & 0x0F]).append(HEX[b & 0x0F]); + } + return out.toString(); + } + + static String hexPrefix8(byte[] digest) { + // 8 hex chars == 4 bytes. + if (digest.length < 4) { + throw new IllegalArgumentException("digest too short: " + digest.length); + } + final var out = new StringBuilder(8); + for (int i = 0; i < 4; i++) { + final byte b = digest[i]; + out.append(HEX[(b >>> 4) & 0x0F]).append(HEX[b & 0x0F]); + } + return out.toString(); + } + + private static MessageDigest messageDigest() { + try { + return MessageDigest.getInstance("SHA-256"); + } catch (NoSuchAlgorithmException e) { + // SHA-256 is required by the Java platform. + throw new IllegalStateException("SHA-256 not available", e); + } + } + + private static final char[] HEX = "0123456789abcdef".toCharArray(); +} + diff --git a/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/GraalJsRunner.java b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/GraalJsRunner.java new file mode 100644 index 0000000..1e80e1a --- /dev/null +++ b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/GraalJsRunner.java @@ -0,0 +1,61 @@ +package io.github.simbo1905.json.jtd.codegen; + +import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.Source; +import org.graalvm.polyglot.Value; +import org.graalvm.polyglot.io.IOAccess; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +/// Executes generated ES2020 validators in-process using GraalVM Polyglot JS. +/// No external runtime required - the JS engine runs inside the JVM. +final class GraalJsRunner { + private static final Logger LOG = Logger.getLogger(GraalJsRunner.class.getName()); + + private GraalJsRunner() {} + + /// Evaluates a generated validator module and returns its exports. + /// The module must export a `validate(instance)` function. + static Value loadValidatorModule(Context context, Path modulePath) throws IOException { + LOG.fine(() -> "Loading validator module: " + modulePath); + final var source = Source.newBuilder("js", modulePath.toFile()) + .mimeType("application/javascript+module") + .build(); + return context.eval(source); + } + + /// Creates a GraalVM Polyglot context configured for ES2020 module evaluation. + static Context createContext() { + return Context.newBuilder("js") + .allowIO(IOAccess.ALL) + .option("js.esm-eval-returns-exports", "true") + .option("js.ecmascript-version", "2020") + .build(); + } + + /// Validates a JSON value against a generated validator by calling its + /// `validate` export. Returns a list of error maps (instancePath, schemaPath). + static List> validate(Value exports, Object jsonValue) { + final var validateFn = exports.getMember("validate"); + assert validateFn != null && validateFn.canExecute() : "Module must export a validate function"; + final var result = validateFn.execute(jsonValue); + return convertErrors(result); + } + + @SuppressWarnings("unchecked") + private static List> convertErrors(Value result) { + final var size = (int) result.getArraySize(); + final var errors = new java.util.ArrayList>(size); + for (int i = 0; i < size; i++) { + final var errorVal = result.getArrayElement(i); + final var instancePath = errorVal.getMember("instancePath").asString(); + final var schemaPath = errorVal.getMember("schemaPath").asString(); + errors.add(Map.of("instancePath", instancePath, "schemaPath", schemaPath)); + } + return errors; + } +} diff --git a/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmCodegenLoggingConfig.java b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmCodegenLoggingConfig.java new file mode 100644 index 0000000..3bfbe39 --- /dev/null +++ b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmCodegenLoggingConfig.java @@ -0,0 +1,43 @@ +package io.github.simbo1905.json.jtd.codegen; + +import org.junit.jupiter.api.BeforeAll; + +import java.util.Locale; +import java.util.logging.Handler; +import java.util.logging.Level; +import java.util.logging.Logger; + +/// Base class for JTD ESM codegen tests that configures JUL logging from system properties. +/// All test classes should extend this class to enable consistent logging behavior. +public class JtdEsmCodegenLoggingConfig { + @BeforeAll + static void enableJulDebug() { + final var log = Logger.getLogger(JtdEsmCodegenLoggingConfig.class.getName()); + final Logger root = Logger.getLogger(""); + + final String levelProp = System.getProperty("java.util.logging.ConsoleHandler.level"); + Level targetLevel = Level.INFO; + if (levelProp != null) { + try { + targetLevel = Level.parse(levelProp.trim()); + } catch (IllegalArgumentException ex) { + try { + targetLevel = Level.parse(levelProp.trim().toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException ignored) { + log.warning(() -> "Unrecognized logging level from 'java.util.logging.ConsoleHandler.level': " + levelProp); + } + } + } + + if (root.getLevel() == null || root.getLevel().intValue() > targetLevel.intValue()) { + root.setLevel(targetLevel); + } + for (Handler handler : root.getHandlers()) { + final Level handlerLevel = handler.getLevel(); + if (handlerLevel == null || handlerLevel.intValue() > targetLevel.intValue()) { + handler.setLevel(targetLevel); + } + } + } +} + diff --git a/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmJsTestSuite.java b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmJsTestSuite.java new file mode 100644 index 0000000..0b9fab3 --- /dev/null +++ b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmJsTestSuite.java @@ -0,0 +1,23 @@ +package io.github.simbo1905.json.jtd.codegen; + +import org.bitbucket.thinbus.junitjs.JSRunner; +import org.bitbucket.thinbus.junitjs.Tests; +import org.junit.runner.RunWith; + +/// JUnit test suite that runs JavaScript tests via GraalVM polyglot. +/// Uses junit-js JSRunner to execute .js test files from `src/test/resources/`. +/// Each JS file uses the `tests({...})` pattern from JUnitJSUtils.js. +/// +/// This replaces the previous bun-based JS test execution that required +/// an external JavaScript runtime not available in the CI image. +/// +/// Discovered by Surefire via the JUnit Vintage engine (JUnit 4 runner +/// under JUnit Platform). The class name ends in "Test" so that Surefire's +/// default includes pattern picks it up. +@Tests({ + "boolean-schema.test.js", + "nested-elements-empty-focused.test.js" +}) +@RunWith(JSRunner.class) +public class JtdEsmJsTestSuite { +} diff --git a/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmPropertyTest.java b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmPropertyTest.java new file mode 100644 index 0000000..a609123 --- /dev/null +++ b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdEsmPropertyTest.java @@ -0,0 +1,587 @@ +package io.github.simbo1905.json.jtd.codegen; + +import jdk.sandbox.java.util.json.*; +import net.jqwik.api.*; +import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.Source; +import org.graalvm.polyglot.Value; +import org.graalvm.polyglot.io.IOAccess; +import org.junit.jupiter.api.Assertions; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static org.assertj.core.api.Assertions.assertThat; + +/// Property-based testing for JTD to ESM code generator. +/// Generates comprehensive schema/document permutations to validate generated JavaScript validators. +/// +/// Uses GraalVM Polyglot JS for in-process JavaScript execution - no external runtime needed. +class JtdEsmPropertyTest extends JtdEsmCodegenLoggingConfig { + static final Logger LOG = Logger.getLogger(JtdEsmPropertyTest.class.getName()); + + private static final List PROPERTY_NAMES = List.of("alpha", "beta", "gamma", "delta", "epsilon"); + private static final List> PROPERTY_PAIRS = List.of( + List.of("alpha", "beta"), List.of("alpha", "gamma"), + List.of("beta", "delta"), List.of("gamma", "epsilon") + ); + private static final List DISCRIMINATOR_VALUES = List.of("type1", "type2", "type3"); + private static final List ENUM_VALUES = List.of("red", "green", "blue", "yellow"); + private static final Random RANDOM = new Random(); + + /// Sealed interface for JTD test schemas + sealed interface JtdTestSchema permits EmptySchema, RefSchema, TypeSchema, EnumSchema, + ElementsSchema, PropertiesSchema, ValuesSchema, DiscriminatorSchema, NullableSchema {} + + record EmptySchema() implements JtdTestSchema {} + record RefSchema(String ref) implements JtdTestSchema {} + record TypeSchema(String type) implements JtdTestSchema {} + record EnumSchema(List values) implements JtdTestSchema {} + record ElementsSchema(JtdTestSchema elements) implements JtdTestSchema {} + record PropertiesSchema(Map properties, + Map optionalProperties, + boolean additionalProperties) implements JtdTestSchema {} + record ValuesSchema(JtdTestSchema values) implements JtdTestSchema {} + record DiscriminatorSchema(String discriminator, Map mapping) implements JtdTestSchema {} + record NullableSchema(JtdTestSchema schema) implements JtdTestSchema {} + + @Provide + Arbitrary jtdSchemas() { + return jtdSchemaArbitrary(3); + } + + @SuppressWarnings("unchecked") + private static Arbitrary jtdSchemaArbitrary(int depth) { + final var primitives = Arbitraries.of( + new EmptySchema(), + new TypeSchema("boolean"), + new TypeSchema("string"), + new TypeSchema("int32"), + new TypeSchema("float64"), + new TypeSchema("timestamp") + ); + + if (depth == 0) { + return (Arbitrary) (Arbitrary) primitives; + } + + return (Arbitrary) (Arbitrary) Arbitraries.oneOf( + primitives, + enumSchemaArbitrary(), + elementsSchemaArbitrary(depth), + propertiesSchemaArbitrary(depth), + valuesSchemaArbitrary(depth), + discriminatorSchemaArbitrary(), + nullableSchemaArbitrary(depth) + ); + } + + private static Arbitrary enumSchemaArbitrary() { + return Arbitraries.of(ENUM_VALUES).list().ofMinSize(1).ofMaxSize(4).map(values -> { + List distinctValues = values.stream().distinct().toList(); + return new EnumSchema(new ArrayList<>(distinctValues)); + }); + } + + private static Arbitrary elementsSchemaArbitrary(int depth) { + return jtdSchemaArbitrary(depth - 1).filter(schema -> { + if (schema instanceof DiscriminatorSchema disc) { + var firstVariant = disc.mapping().values().iterator().next(); + return !(firstVariant instanceof TypeSchema) && !(firstVariant instanceof EnumSchema); + } + return true; + }).map(ElementsSchema::new); + } + + private static Arbitrary propertiesSchemaArbitrary(int depth) { + final var childDepth = depth - 1; + final var empty = Arbitraries.of(new PropertiesSchema(Map.of(), Map.of(), false)); + + final var singleRequired = Combinators.combine( + Arbitraries.of(PROPERTY_NAMES), + jtdSchemaArbitrary(childDepth) + ).as((name, schema) -> { + Assertions.assertNotNull(name); + Assertions.assertNotNull(schema); + return new PropertiesSchema(Map.of(name, schema), Map.of(), false); + }); + + final var mixed = Combinators.combine( + Arbitraries.of(PROPERTY_PAIRS), + jtdSchemaArbitrary(childDepth), + jtdSchemaArbitrary(childDepth) + ).as((names, requiredSchema, optionalSchema) -> { + Assertions.assertNotNull(names); + Assertions.assertNotNull(requiredSchema); + Assertions.assertNotNull(optionalSchema); + return new PropertiesSchema( + Map.of(names.getFirst(), requiredSchema), + Map.of(names.getLast(), optionalSchema), + false + ); + }); + + final var withAdditional = mixed.map(props -> { + Assertions.assertNotNull(props); + return new PropertiesSchema(props.properties(), props.optionalProperties(), true); + }); + + return Arbitraries.oneOf(empty, singleRequired, mixed, withAdditional); + } + + private static Arbitrary valuesSchemaArbitrary(int depth) { + return jtdSchemaArbitrary(depth - 1).map(ValuesSchema::new); + } + + private static Arbitrary discriminatorSchemaArbitrary() { + return Combinators.combine( + Arbitraries.of(PROPERTY_NAMES), + Arbitraries.of(DISCRIMINATOR_VALUES), + Arbitraries.of(DISCRIMINATOR_VALUES) + ).as((discriminatorKey, value1, value2) -> { + final var mapping = new LinkedHashMap(); + final var schema1 = propertiesSchemaForDiscriminatorMapping(discriminatorKey).sample(); + mapping.put(value1, schema1); + + Assertions.assertNotNull(value1); + if (!value1.equals(value2)) { + final var schema2 = propertiesSchemaForDiscriminatorMapping(discriminatorKey).sample(); + mapping.put(value2, schema2); + } + return new DiscriminatorSchema(discriminatorKey, mapping); + }); + } + + private static Arbitrary propertiesSchemaForDiscriminatorMapping(String discriminatorKey) { + final var primitiveSchemas = Arbitraries.of( + new TypeSchema("boolean"), + new TypeSchema("string"), + new TypeSchema("int32"), + new EnumSchema(List.of("red", "green", "blue")) + ); + + final var allPropertyNames = List.of("alpha", "beta", "gamma", "delta", "epsilon"); + final var safePropertyNames = allPropertyNames.stream() + .filter(name -> !name.equals(discriminatorKey)) + .toList(); + final var effectivePropertyNames = safePropertyNames.isEmpty() + ? List.of("prop1", "prop2", "prop3") + : safePropertyNames; + + final var safePropertyPairs = effectivePropertyNames.stream() + .flatMap(name1 -> effectivePropertyNames.stream() + .filter(name2 -> !name1.equals(name2)) + .map(name2 -> List.of(name1, name2))) + .filter(pair -> !pair.getFirst().equals(discriminatorKey) && !pair.get(1).equals(discriminatorKey)) + .toList(); + + return Arbitraries.oneOf( + Combinators.combine(Arbitraries.of(effectivePropertyNames), primitiveSchemas) + .as((name, schema) -> new PropertiesSchema(Map.of(name, schema), Map.of(), false)), + Combinators.combine(Arbitraries.of(effectivePropertyNames), primitiveSchemas) + .as((name, schema) -> new PropertiesSchema(Map.of(), Map.of(name, schema), false)), + Combinators.combine(Arbitraries.of(safePropertyPairs), primitiveSchemas, primitiveSchemas) + .as((names, reqSchema, optSchema) -> + new PropertiesSchema(Map.of(names.getFirst(), reqSchema), + Map.of(names.getLast(), optSchema), false)) + ); + } + + private static Arbitrary nullableSchemaArbitrary(int depth) { + return jtdSchemaArbitrary(depth - 1).map(NullableSchema::new); + } + + /// Builds compliant JSON document for a schema + @SuppressWarnings({"unchecked", "rawtypes"}) + static Object buildCompliantDocument(JtdTestSchema schema) { + return switch (schema) { + case EmptySchema ignored -> "anything-goes"; + case RefSchema ignored -> "ref-compliant-value"; + case TypeSchema(var type) -> buildCompliantTypeValue(type); + case EnumSchema(var values) -> values.getFirst(); + case ElementsSchema(var elem) -> { + final var v1 = buildCompliantDocument(elem); + final var v2 = buildCompliantDocument(elem); + final var lst = new ArrayList<>(); + if (v1 != null) lst.add(v1); + if (v2 != null) lst.add(v2); + yield lst; + } + case PropertiesSchema(var props, var optProps, var ignored) -> { + final var obj = new LinkedHashMap(); + props.forEach((k, v) -> obj.put(k, buildCompliantDocument(v))); + optProps.forEach((k, v) -> obj.put(k, buildCompliantDocument(v))); + yield obj; + } + case ValuesSchema(var val) -> { + final var v1 = buildCompliantDocument(val); + final var v2 = buildCompliantDocument(val); + final var map = new LinkedHashMap(); + if (v1 != null) map.put("key1", v1); + if (v2 != null) map.put("key2", v2); + yield map; + } + case DiscriminatorSchema(var disc, var mapping) -> { + final var firstEntry = mapping.entrySet().iterator().next(); + final var discValue = firstEntry.getKey(); + final var variant = firstEntry.getValue(); + final var obj = new LinkedHashMap(); + obj.put(disc, discValue); + if (variant instanceof PropertiesSchema ps) { + ps.properties().forEach((k, v) -> { + if (!k.equals(disc)) obj.put(k, buildCompliantDocument(v)); + }); + ps.optionalProperties().forEach((k, v) -> { + if (!k.equals(disc)) obj.put(k, buildCompliantDocument(v)); + }); + } + yield obj; + } + case NullableSchema ignored -> null; + }; + } + + private static Object buildCompliantTypeValue(String type) { + return switch (type) { + case "boolean" -> true; + case "string" -> "compliant-string"; + case "timestamp" -> "2023-12-25T10:30:00Z"; + case "int8" -> 42; + case "uint8" -> 200; + case "int16" -> 30000; + case "uint16" -> 50000; + case "int32" -> 1000000; + case "uint32" -> 3000000000L; + case "float32", "float64" -> 3.14159; + default -> "unknown"; + }; + } + + /// Creates failing documents for a schema + @SuppressWarnings({"unchecked", "rawtypes"}) + static List createFailingDocuments(JtdTestSchema schema, Object compliant) { + return switch (schema) { + case EmptySchema ignored -> List.of(); + case RefSchema ignored -> Collections.singletonList(null); + case TypeSchema(var type) -> createFailingTypeValues(type); + case EnumSchema ignored -> List.of("invalid-enum-value"); + case ElementsSchema(var elem) -> { + if (compliant instanceof List lst && !lst.isEmpty()) { + final var invalidElem = createFailingDocuments(elem, lst.getFirst()); + if (!invalidElem.isEmpty()) { + final var innerLst = new ArrayList<>(); + innerLst.add(lst.getFirst()); + innerLst.add(invalidElem.getFirst()); + final var failures = new ArrayList<>(); + failures.add(innerLst); + failures.add("not-an-array"); + yield failures; + } + } + yield List.of("not-an-array"); + } + case PropertiesSchema(var props, var optProps, var add) -> { + if (props.isEmpty() && optProps.isEmpty()) { + yield List.of(); + } + final var failures = new ArrayList(); + if (!props.isEmpty() && compliant instanceof Map) { + final var firstKey = props.keySet().iterator().next(); + failures.add(removeKey((Map) compliant, firstKey)); + } + if (!add && compliant instanceof Map) { + final var extended = new LinkedHashMap<>((Map) compliant); + extended.put("extraProperty", "extra-value"); + failures.add(extended); + } + failures.add("not-an-object"); + yield failures; + } + case ValuesSchema ignored -> List.of("not-an-object"); + case DiscriminatorSchema(var disc, var ignored) -> { + final var failures = new ArrayList(); + if (compliant instanceof Map) { + final var modified = new LinkedHashMap<>((Map) compliant); + modified.put(disc, "invalid-discriminator"); + failures.add(modified); + } + failures.add("not-an-object"); + yield failures; + } + case NullableSchema ignored -> List.of(); + }; + } + + private static List createFailingTypeValues(String type) { + return switch (type) { + case "boolean" -> Arrays.asList("not-boolean", 1); + case "string", "timestamp" -> Arrays.asList(123, false); + case "int8", "uint8", "int16", "int32", "uint32", "uint16" -> + Arrays.asList("not-integer", 3.14); + case "float32", "float64" -> Arrays.asList("not-float", true); + default -> Collections.singletonList(null); + }; + } + + private static Map removeKey(Map original, String key) { + final var result = new LinkedHashMap(); + for (var entry : original.entrySet()) { + if (!entry.getKey().equals(key)) { + result.put(entry.getKey(), entry.getValue()); + } + } + return result; + } + + /// Describes schema for logging + static String describeSchema(JtdTestSchema schema) { + return switch (schema) { + case EmptySchema ignored -> "empty"; + case RefSchema(var ref) -> "ref:" + ref; + case TypeSchema(var type) -> "type:" + type; + case EnumSchema(var values) -> "enum[" + String.join(",", values) + "]"; + case ElementsSchema(var elem) -> "elements[" + describeSchema(elem) + "]"; + case PropertiesSchema(var props, var optProps, var add) -> { + final var parts = new ArrayList(); + if (!props.isEmpty()) parts.add("required{" + String.join(",", props.keySet()) + "}"); + if (!optProps.isEmpty()) parts.add("optional{" + String.join(",", optProps.keySet()) + "}"); + if (add) parts.add("additional"); + yield "properties[" + String.join(",", parts) + "]"; + } + case ValuesSchema(var val) -> "values[" + describeSchema(val) + "]"; + case DiscriminatorSchema(var disc, var mapping) -> + "discriminator[" + disc + "={" + String.join(",", mapping.keySet()) + "}]"; + case NullableSchema(var inner) -> "nullable[" + describeSchema(inner) + "]"; + }; + } + + /// Converts test schema to JSON for the codegen parser + static JsonObject jtdSchemaToJsonObject(JtdTestSchema schema) { + return switch (schema) { + case EmptySchema ignored -> JsonObject.of(Map.of()); + case RefSchema(var ref) -> { + final Map map = Map.of("ref", JsonString.of(ref)); + yield JsonObject.of(map); + } + case TypeSchema(var type) -> { + final Map map = Map.of("type", JsonString.of(type)); + yield JsonObject.of(map); + } + case EnumSchema(var values) -> { + final Map map = Map.of("enum", JsonArray.of(values.stream().map(JsonString::of).toList())); + yield JsonObject.of(map); + } + case ElementsSchema(var elem) -> { + final Map map = Map.of("elements", jtdSchemaToJsonObject(elem)); + yield JsonObject.of(map); + } + case PropertiesSchema(var props, var optProps, var add) -> { + final var schemaMap = new LinkedHashMap(); + if (!props.isEmpty()) { + final Map propsMap = props.entrySet().stream().collect(Collectors.toMap( + Map.Entry::getKey, e -> jtdSchemaToJsonObject(e.getValue()), + (a, b) -> a, LinkedHashMap::new)); + schemaMap.put("properties", JsonObject.of(propsMap)); + } + if (!optProps.isEmpty()) { + final Map optMap = optProps.entrySet().stream().collect(Collectors.toMap( + Map.Entry::getKey, e -> jtdSchemaToJsonObject(e.getValue()), + (a, b) -> a, LinkedHashMap::new)); + schemaMap.put("optionalProperties", JsonObject.of(optMap)); + } + if (add) { + schemaMap.put("additionalProperties", JsonBoolean.of(true)); + } + yield JsonObject.of(schemaMap); + } + case ValuesSchema(var val) -> { + final Map map = Map.of("values", jtdSchemaToJsonObject(val)); + yield JsonObject.of(map); + } + case DiscriminatorSchema(var disc, var mapping) -> { + final var schemaMap = new LinkedHashMap(); + schemaMap.put("discriminator", JsonString.of(disc)); + final Map mappingMap = mapping.entrySet().stream().collect(Collectors.toMap( + Map.Entry::getKey, e -> jtdSchemaToJsonObject(e.getValue()), + (a, b) -> a, LinkedHashMap::new)); + schemaMap.put("mapping", JsonObject.of(mappingMap)); + yield JsonObject.of(schemaMap); + } + case NullableSchema(var inner) -> { + final var innerSchema = jtdSchemaToJsonObject(inner); + final var nullableMap = new LinkedHashMap(); + nullableMap.putAll(innerSchema.members()); + nullableMap.put("nullable", JsonBoolean.of(true)); + yield JsonObject.of(nullableMap); + } + }; + } + + /// Converts a Java value to a GraalVM polyglot-compatible value. + @SuppressWarnings("unchecked") + private static Object toGraalValue(Context context, Object value) { + if (value == null) return null; + if (value instanceof Boolean || value instanceof String) return value; + if (value instanceof Number num) return num.doubleValue(); + if (value instanceof List lst) { + final var jsArray = context.eval("js", "[]"); + for (int i = 0; i < lst.size(); i++) { + jsArray.setArrayElement(i, toGraalValue(context, lst.get(i))); + } + return jsArray; + } + if (value instanceof Map rawMap) { + final var jsObj = context.eval("js", "({})"); + final var typedMap = (Map) rawMap; + for (var entry : typedMap.entrySet()) { + jsObj.putMember(entry.getKey(), toGraalValue(context, entry.getValue())); + } + return jsObj; + } + return value; + } + + /// Runs validation via GraalVM polyglot: loads the generated ESM module, + /// calls `validate(instance)`, returns the number of errors. + private static int runValidation(Path modulePath, Object document, String schemaDescription, String testName) throws IOException { + final var jsContent = Files.readString(modulePath, StandardCharsets.UTF_8); + LOG.finest(() -> String.format("%s - Generated JS for schema '%s':%n%s", testName, schemaDescription, jsContent)); + LOG.finest(() -> String.format("%s - Document: %s", testName, document)); + + try (var context = Context.newBuilder("js") + .allowIO(IOAccess.ALL) + .option("js.esm-eval-returns-exports", "true") + .option("js.ecmascript-version", "2020") + .build()) { + final var source = Source.newBuilder("js", modulePath.toFile()) + .mimeType("application/javascript+module") + .build(); + final var exports = context.eval(source); + final var validateFn = exports.getMember("validate"); + final var graalDoc = toGraalValue(context, document); + final var result = validateFn.execute(graalDoc); + return (int) result.getArraySize(); + } + } + + @Property(generation = GenerationMode.AUTO) + @SuppressWarnings({"unchecked", "rawtypes"}) + void generatedValidatorPassesCompliantDocuments(@ForAll("jtdSchemas") JtdTestSchema schema) throws Exception { + LOG.finer(() -> "Executing generatedValidatorPassesCompliantDocuments"); + + final var schemaDescription = describeSchema(schema); + LOG.fine(() -> "Testing schema: " + schemaDescription); + + // Skip problematic combinations + if (schemaDescription.contains("elements[discriminator[") && schemaDescription.contains("type=")) { + LOG.fine(() -> "Skipping problematic schema: " + schemaDescription); + return; + } + + final var tempDir = Files.createTempDirectory("jtd-esm-prop-test-"); + + // Write schema JSON and generate validator + final var schemaJson = jtdSchemaToJsonObject(schema); + final var schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, Json.toDisplayString(schemaJson, 0), StandardCharsets.UTF_8); + final var outJs = JtdToEsmCli.run(schemaFile, tempDir); + + // Build compliant document + final var compliantDoc = buildCompliantDocument(schema); + if (compliantDoc == null) { + LOG.fine(() -> "Skipping null compliant document for schema: " + schemaDescription); + cleanup(tempDir); + return; + } + + // Validate via GraalVM polyglot + final int errorCount = runValidation(outJs, compliantDoc, schemaDescription, "generatedValidatorPassesCompliantDocuments"); + + if (errorCount != 0) { + LOG.severe(() -> String.format( + "Compliant document FAILED for schema: %s%nDocument: %s%nErrors: %d%nGenerated JS: %s", + schemaDescription, compliantDoc, errorCount, outJs)); + } + + assertThat(errorCount).as( + "Compliant document should pass validation for schema: %s with doc: %s", + schemaDescription, compliantDoc).isZero(); + + cleanup(tempDir); + } + + @Property(generation = GenerationMode.AUTO) + @SuppressWarnings({"unchecked", "rawtypes"}) + void generatedValidatorRejectsFailingDocuments(@ForAll("jtdSchemas") JtdTestSchema schema) throws Exception { + LOG.finer(() -> "Executing generatedValidatorRejectsFailingDocuments"); + + final var schemaDescription = describeSchema(schema); + LOG.fine(() -> "Testing schema: " + schemaDescription); + + // Skip problematic combinations + if (schemaDescription.contains("elements[discriminator[") && schemaDescription.contains("type=")) { + LOG.fine(() -> "Skipping problematic schema: " + schemaDescription); + return; + } + + // Skip schemas that accept everything + if (schema instanceof EmptySchema || schema instanceof NullableSchema) { + return; + } + + final var tempDir = Files.createTempDirectory("jtd-esm-prop-test-"); + + // Write schema JSON and generate validator + final var schemaJson = jtdSchemaToJsonObject(schema); + final var schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, Json.toDisplayString(schemaJson, 0), StandardCharsets.UTF_8); + final var outJs = JtdToEsmCli.run(schemaFile, tempDir); + + // Create failing documents + final var compliantDoc = buildCompliantDocument(schema); + final var failingDocs = createFailingDocuments(schema, compliantDoc); + + if (failingDocs.isEmpty()) { + cleanup(tempDir); + return; + } + + // Validate each failing document + for (int i = 0; i < failingDocs.size(); i++) { + final var failingDoc = failingDocs.get(i); + if (failingDoc == null) continue; + + final int errorCount = runValidation(outJs, failingDoc, schemaDescription, "generatedValidatorRejectsFailingDocuments"); + final int docIndex = i; + + if (errorCount == 0) { + LOG.severe(() -> String.format( + "Failing document #%d PASSED (should have failed) for schema: %s%nDocument: %s%nGenerated JS: %s", + docIndex, schemaDescription, failingDoc, outJs)); + } + + assertThat(errorCount).as( + "Failing document #%d should be rejected for schema: %s with doc: %s", + docIndex, schemaDescription, failingDoc).isGreaterThan(0); + } + + cleanup(tempDir); + } + + private static void cleanup(Path tempDir) throws IOException { + Files.walk(tempDir) + .sorted(Comparator.reverseOrder()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (IOException e) { + // Ignore cleanup errors + } + }); + } +} diff --git a/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdToEsmCodegenTest.java b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdToEsmCodegenTest.java new file mode 100644 index 0000000..7b30134 --- /dev/null +++ b/jtd-esm-codegen/src/test/java/io/github/simbo1905/json/jtd/codegen/JtdToEsmCodegenTest.java @@ -0,0 +1,323 @@ +package io.github.simbo1905.json.jtd.codegen; + +import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.Source; +import org.graalvm.polyglot.Value; +import org.graalvm.polyglot.io.IOAccess; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.logging.Logger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/// Tests for the stack-based JTD to ESM code generator. +/// Uses GraalVM Polyglot JS for in-process JavaScript execution - no external runtime needed. +final class JtdToEsmCodegenTest extends JtdEsmCodegenLoggingConfig { + private static final Logger LOG = Logger.getLogger(JtdToEsmCodegenTest.class.getName()); + + // --- Parser tests (pure Java, no JS execution) --- + + @Test + void parsesSimpleBooleanTypeSchema() { + LOG.info(() -> "Running parsesSimpleBooleanTypeSchema"); + final var root = JtdParser.parseString(""" + {"type": "boolean"} + """); + assertThat(root.id()).isEqualTo("JtdSchema"); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.TypeNode.class); + final var typeNode = (JtdAst.TypeNode) root.rootSchema(); + assertThat(typeNode.type()).isEqualTo("boolean"); + } + + @Test + void parsesSchemaWithMetadataId() { + LOG.info(() -> "Running parsesSchemaWithMetadataId"); + final var root = JtdParser.parseString(""" + {"type": "string", "metadata": {"id": "my-schema-v1"}} + """); + assertThat(root.id()).isEqualTo("my-schema-v1"); + } + + @Test + void parsesEnumSchema() { + LOG.info(() -> "Running parsesEnumSchema"); + final var root = JtdParser.parseString(""" + {"enum": ["active", "inactive", "pending"]} + """); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.EnumNode.class); + final var enumNode = (JtdAst.EnumNode) root.rootSchema(); + assertThat(enumNode.values()).containsExactly("active", "inactive", "pending"); + } + + @Test + void parsesElementsArraySchema() { + LOG.info(() -> "Running parsesElementsArraySchema"); + final var root = JtdParser.parseString(""" + {"elements": {"type": "string"}, "metadata": {"id": "string-array"}} + """); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.ElementsNode.class); + final var elementsNode = (JtdAst.ElementsNode) root.rootSchema(); + assertThat(elementsNode.schema()).isInstanceOf(JtdAst.TypeNode.class); + } + + @Test + void parsesNestedElementsSchema() { + LOG.info(() -> "Running parsesNestedElementsSchema"); + final var root = JtdParser.parseString(""" + {"elements": {"elements": {"type": "int32"}}, "metadata": {"id": "matrix"}} + """); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.ElementsNode.class); + final var outer = (JtdAst.ElementsNode) root.rootSchema(); + assertThat(outer.schema()).isInstanceOf(JtdAst.ElementsNode.class); + } + + @Test + void parsesValuesMapSchema() { + LOG.info(() -> "Running parsesValuesMapSchema"); + final var root = JtdParser.parseString(""" + {"values": {"type": "string"}, "metadata": {"id": "string-map"}} + """); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.ValuesNode.class); + } + + @Test + void parsesDiscriminatorUnionSchema() { + LOG.info(() -> "Running parsesDiscriminatorUnionSchema"); + final var root = JtdParser.parseString(""" + { + "discriminator": "type", + "mapping": { + "cat": {"properties": {"name": {"type": "string"}, "meow": {"type": "boolean"}}}, + "dog": {"properties": {"name": {"type": "string"}, "bark": {"type": "boolean"}}} + }, + "metadata": {"id": "animal-union"} + } + """); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.DiscriminatorNode.class); + final var discNode = (JtdAst.DiscriminatorNode) root.rootSchema(); + assertThat(discNode.discriminator()).isEqualTo("type"); + assertThat(discNode.mapping()).containsKeys("cat", "dog"); + } + + @Test + void parsesNullableWrapperSchema() { + LOG.info(() -> "Running parsesNullableWrapperSchema"); + final var root = JtdParser.parseString(""" + {"type": "string", "nullable": true, "metadata": {"id": "nullable-string"}} + """); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.NullableNode.class); + final var nullableNode = (JtdAst.NullableNode) root.rootSchema(); + assertThat(nullableNode.wrapped()).isInstanceOf(JtdAst.TypeNode.class); + } + + @Test + void parsesRefAndDefinitions() { + LOG.info(() -> "Running parsesRefAndDefinitions"); + final var root = JtdParser.parseString(""" + { + "definitions": {"dataValue": {"type": "string"}}, + "properties": {"data": {"ref": "dataValue"}}, + "metadata": {"id": "ref-test"} + } + """); + assertThat(root.definitions()).containsKey("dataValue"); + assertThat(root.rootSchema()).isInstanceOf(JtdAst.PropertiesNode.class); + } + + @Test + void rejectsUnknownType() { + LOG.info(() -> "Running rejectsUnknownType"); + assertThatThrownBy(() -> JtdParser.parseString("{\"type\": \"unknown\"}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unknown type"); + } + + @Test + void rejectsInvalidEnum() { + LOG.info(() -> "Running rejectsInvalidEnum"); + assertThatThrownBy(() -> JtdParser.parseString("{\"enum\": [\"a\", 123, \"c\"]}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("to be a string"); + } + + // --- Generated code content tests (no JS execution) --- + + @Test + void generatedValidatorIncludesOnlyNeededHelpers(@TempDir Path tempDir) throws Exception { + LOG.info(() -> "Running generatedValidatorIncludesOnlyNeededHelpers"); + final Path schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, """ + {"type": "boolean", "metadata": {"id": "simple"}} + """, StandardCharsets.UTF_8); + final Path outJs = JtdToEsmCli.run(schemaFile, tempDir); + final String generated = Files.readString(outJs, StandardCharsets.UTF_8); + + assertThat(generated).doesNotContain("isTimestamp"); + assertThat(generated).doesNotContain("isIntInRange"); + assertThat(generated).doesNotContain("isFloat"); + assertThat(generated).contains("typeof"); + } + + @Test + void generatedTimestampValidatorIncludesTimestampHelper(@TempDir Path tempDir) throws Exception { + LOG.info(() -> "Running generatedTimestampValidatorIncludesTimestampHelper"); + final Path schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, """ + {"type": "timestamp", "metadata": {"id": "ts-test"}} + """, StandardCharsets.UTF_8); + final Path outJs = JtdToEsmCli.run(schemaFile, tempDir); + final String generated = Files.readString(outJs, StandardCharsets.UTF_8); + + // Spec-compliant: timestamp check is inlined (no helper function) + assertThat(generated).contains("/type"); + assertThat(generated).contains("errors.push"); + } + + // --- GraalVM Polyglot JS execution tests --- + + @Test + void generatedBooleanValidatorPassesValidCases(@TempDir Path tempDir) throws Exception { + LOG.info(() -> "Running generatedBooleanValidatorPassesValidCases"); + final Path schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, """ + {"type": "boolean", "metadata": {"id": "bool-test"}} + """, StandardCharsets.UTF_8); + final Path outJs = JtdToEsmCli.run(schemaFile, tempDir); + + try (var cx = jsContext()) { + final var exports = evalModule(cx, outJs); + final var validate = exports.getMember("validate"); + + // Valid cases + assertThat(errCount(validate, true)).as("true").isZero(); + assertThat(errCount(validate, false)).as("false").isZero(); + + // Invalid cases + assertThat(errCount(validate, "hello")).as("string").isGreaterThan(0); + assertThat(errCount(validate, 42)).as("number").isGreaterThan(0); + assertThat(errCount(validate, cx.eval("js", "null"))).as("null").isGreaterThan(0); + } + } + + @Test + void generatedStringArrayValidatorWorks(@TempDir Path tempDir) throws Exception { + LOG.info(() -> "Running generatedStringArrayValidatorWorks"); + final Path schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, """ + {"elements": {"type": "string"}, "metadata": {"id": "string-array-test"}} + """, StandardCharsets.UTF_8); + final Path outJs = JtdToEsmCli.run(schemaFile, tempDir); + + try (var cx = jsContext()) { + final var exports = evalModule(cx, outJs); + final var validate = exports.getMember("validate"); + + // Valid: empty array + assertThat(errCount(validate, cx.eval("js", "[]"))).as("empty-array").isZero(); + // Valid: string array + assertThat(errCount(validate, cx.eval("js", "['a','b','c']"))).as("string-array").isZero(); + // Invalid: not an array + assertThat(errCount(validate, "hello")).as("not-array").isGreaterThan(0); + // Invalid: mixed + assertThat(errCount(validate, cx.eval("js", "['a',123,'c']"))).as("mixed").isGreaterThan(0); + } + } + + @Test + void generatedObjectValidatorChecksRequiredAndOptional(@TempDir Path tempDir) throws Exception { + LOG.info(() -> "Running generatedObjectValidatorChecksRequiredAndOptional"); + final Path schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, """ + { + "properties": {"id": {"type": "int32"}, "name": {"type": "string"}}, + "optionalProperties": {"email": {"type": "string"}}, + "metadata": {"id": "user-schema"} + } + """, StandardCharsets.UTF_8); + final Path outJs = JtdToEsmCli.run(schemaFile, tempDir); + + try (var cx = jsContext()) { + final var exports = evalModule(cx, outJs); + final var validate = exports.getMember("validate"); + + // Valid: complete object + assertThat(errCount(validate, cx.eval("js", "({id:1,name:'Alice',email:'a@b.com'})"))) + .as("complete").isZero(); + // Valid: without optional + assertThat(errCount(validate, cx.eval("js", "({id:1,name:'Alice'})"))) + .as("without-optional").isZero(); + // Invalid: missing required + assertThat(errCount(validate, cx.eval("js", "({name:'Alice'})"))) + .as("missing-required").isGreaterThan(0); + // Invalid: wrong type + assertThat(errCount(validate, cx.eval("js", "({id:'not-int',name:'Alice'})"))) + .as("wrong-type").isGreaterThan(0); + // Invalid: not an object + assertThat(errCount(validate, "hello")).as("not-object").isGreaterThan(0); + } + } + + @Test + void generatedDiscriminatorValidatorWorks(@TempDir Path tempDir) throws Exception { + LOG.info(() -> "Running generatedDiscriminatorValidatorWorks"); + final Path schemaFile = tempDir.resolve("schema.json"); + Files.writeString(schemaFile, """ + { + "discriminator": "kind", + "mapping": { + "cat": {"properties": {"name": {"type": "string"}, "meow": {"type": "boolean"}}}, + "dog": {"properties": {"name": {"type": "string"}, "bark": {"type": "boolean"}}} + }, + "metadata": {"id": "animal-disc"} + } + """, StandardCharsets.UTF_8); + final Path outJs = JtdToEsmCli.run(schemaFile, tempDir); + + try (var cx = jsContext()) { + final var exports = evalModule(cx, outJs); + final var validate = exports.getMember("validate"); + + // Valid: cat + assertThat(errCount(validate, cx.eval("js", "({kind:'cat',name:'Whiskers',meow:true})"))) + .as("valid-cat").isZero(); + // Valid: dog + assertThat(errCount(validate, cx.eval("js", "({kind:'dog',name:'Rex',bark:true})"))) + .as("valid-dog").isZero(); + // Invalid: unknown discriminator value + assertThat(errCount(validate, cx.eval("js", "({kind:'fish',name:'Nemo'})"))) + .as("unknown-kind").isGreaterThan(0); + // Invalid: missing discriminator + assertThat(errCount(validate, cx.eval("js", "({name:'Rex',bark:true})"))) + .as("missing-disc").isGreaterThan(0); + // Invalid: not an object + assertThat(errCount(validate, "hello")).as("not-object").isGreaterThan(0); + } + } + + // --- Helpers --- + + private static Context jsContext() { + return Context.newBuilder("js") + .allowIO(IOAccess.ALL) + .option("js.esm-eval-returns-exports", "true") + .option("js.ecmascript-version", "2020") + .build(); + } + + private static Value evalModule(Context cx, Path modulePath) throws Exception { + final var source = Source.newBuilder("js", modulePath.toFile()) + .mimeType("application/javascript+module") + .build(); + return cx.eval(source); + } + + private static int errCount(Value validateFn, Object value) { + return (int) validateFn.execute(value).getArraySize(); + } +} diff --git a/jtd-esm-codegen/src/test/resources/boolean-schema.test.js b/jtd-esm-codegen/src/test/resources/boolean-schema.test.js new file mode 100644 index 0000000..daf14e9 --- /dev/null +++ b/jtd-esm-codegen/src/test/resources/boolean-schema.test.js @@ -0,0 +1,53 @@ +/// boolean-schema.test.js - JUnit JS test for the boolean type validator +/// Runs via junit-js JSRunner (GraalVM polyglot, no bun/node required) +/// +/// Tests that the generated boolean validator correctly accepts booleans +/// and rejects all other JSON value types. + +// Load the expected fixture, stripping ESM export keywords for plain eval +var Files = Java.type('java.nio.file.Files'); +var Paths = Java.type('java.nio.file.Paths'); +var fixtureContent = Files.readString( + Paths.get('src/test/resources/expected/boolean-schema.js') +); +// Strip 'export ' prefix so the function is declared in global scope +eval(fixtureContent.replace(/^export /gm, '')); + +tests({ + validateReturnEmptyArrayForTrue: function() { + var errors = validate(true); + assert.assertEquals(0, errors.length); + }, + + validateReturnEmptyArrayForFalse: function() { + var errors = validate(false); + assert.assertEquals(0, errors.length); + }, + + validateReturnErrorForString: function() { + var errors = validate('hello'); + assert.assertEquals(1, errors.length); + assert.assertEquals('', errors[0].instancePath); + assert.assertEquals('/type', errors[0].schemaPath); + }, + + validateReturnErrorForNumber: function() { + var errors = validate(42); + assert.assertEquals(1, errors.length); + }, + + validateReturnErrorForNull: function() { + var errors = validate(null); + assert.assertEquals(1, errors.length); + }, + + validateReturnErrorForObject: function() { + var errors = validate({}); + assert.assertEquals(1, errors.length); + }, + + validateReturnErrorForArray: function() { + var errors = validate([]); + assert.assertEquals(1, errors.length); + } +}); diff --git a/jtd-esm-codegen/src/test/resources/expected/boolean-schema.js b/jtd-esm-codegen/src/test/resources/expected/boolean-schema.js new file mode 100644 index 0000000..5f46281 --- /dev/null +++ b/jtd-esm-codegen/src/test/resources/expected/boolean-schema.js @@ -0,0 +1,15 @@ +// boolean-schema.js +// Generated from JTD schema: boolean-schema +// SHA-256: (test fixture) + +export function validate(instance) { + const errors = []; + const instancePath = ""; + + // Type check for boolean + if (typeof instance !== "boolean") { + errors.push({ instancePath: "", schemaPath: "/type" }); + } + + return errors; +} diff --git a/jtd-esm-codegen/src/test/resources/jtd/boolean-schema.jtd.json b/jtd-esm-codegen/src/test/resources/jtd/boolean-schema.jtd.json new file mode 100644 index 0000000..dd97725 --- /dev/null +++ b/jtd-esm-codegen/src/test/resources/jtd/boolean-schema.jtd.json @@ -0,0 +1,3 @@ +{ + "type": "boolean" +} diff --git a/jtd-esm-codegen/src/test/resources/nested-elements-empty-focused.test.js b/jtd-esm-codegen/src/test/resources/nested-elements-empty-focused.test.js new file mode 100644 index 0000000..b163aa0 --- /dev/null +++ b/jtd-esm-codegen/src/test/resources/nested-elements-empty-focused.test.js @@ -0,0 +1,66 @@ +/// Focused test for nested elements with empty schema +/// Schema: elements[elements[empty]] +/// This verifies the fix for "validate_inline_X is not defined" error + +var Files = Java.type('java.nio.file.Files'); +var Paths = Java.type('java.nio.file.Paths'); +var StandardCharsets = Java.type('java.nio.charset.StandardCharsets'); +var JtdToEsmCli = Java.type('io.github.simbo1905.json.jtd.codegen.JtdToEsmCli'); + +function generateValidator(schemaJson) { + var tempDir = Files.createTempDirectory('jtd-esm-test-'); + var schemaFile = tempDir.resolve('schema.json'); + Files.writeString(schemaFile, schemaJson, StandardCharsets.UTF_8); + var outJs = JtdToEsmCli.run(schemaFile, tempDir); + var jsContent = Files.readString(outJs, StandardCharsets.UTF_8); + + // Cleanup + try { + Files.walk(tempDir).sorted(function(a, b) { return -1; }).forEach(function(p) { + try { Files.deleteIfExists(p); } catch (e) {} + }); + } catch (e) {} + + return jsContent; +} + +tests({ + nestedElementsWithEmptySchemaGeneratesInlineValidators: function() { + var schemaJson = '{"elements":{"elements":{}}}'; + var jsContent = generateValidator(schemaJson); + + // Should reference validate_inline_0 for inner elements + var hasInlineRef = jsContent.indexOf('validate_inline_0') !== -1; + // Should define validate_inline_0 function + var hasInlineDef = jsContent.indexOf('function validate_inline_0') !== -1; + + assert.assertTrue('Generated JS should reference inline validator', hasInlineRef); + assert.assertTrue('Generated JS should define inline validator', hasInlineDef); + }, + + tripleNestedElementsGeneratesMultipleInlineValidators: function() { + var schemaJson = '{"elements":{"elements":{"elements":{}}}}'; + var jsContent = generateValidator(schemaJson); + + // Should have validate_inline_0 and validate_inline_1 + var hasInline0 = jsContent.indexOf('function validate_inline_0') !== -1; + var hasInline1 = jsContent.indexOf('function validate_inline_1') !== -1; + + assert.assertTrue('Should generate validate_inline_0', hasInline0); + assert.assertTrue('Should generate validate_inline_1', hasInline1); + }, + + generatedJavaScriptIsValid: function() { + var schemaJson = '{"elements":{"elements":{}}}'; + var jsContent = generateValidator(schemaJson); + + // Strip export and check syntax + try { + var testJs = jsContent.replace(/^export /gm, ''); + eval(testJs); + assert.assertTrue(true); + } catch (e) { + assert.fail('Generated JS has syntax error: ' + e.message); + } + } +}); diff --git a/jtd-esm-codegen/src/test/resources/odc-chart-event-v1.jtd.json b/jtd-esm-codegen/src/test/resources/odc-chart-event-v1.jtd.json new file mode 100644 index 0000000..2b90c2e --- /dev/null +++ b/jtd-esm-codegen/src/test/resources/odc-chart-event-v1.jtd.json @@ -0,0 +1,16 @@ +{ + "properties": { + "src": { "type": "string" }, + "action": { "enum": ["on_click", "on_hover", "on_select", "on_deselect"] }, + "domain": { "type": "string" }, + "data": {} + }, + "optionalProperties": { + "ts": { "type": "timestamp" } + }, + "metadata": { + "id": "odc-chart-event-v1", + "description": "Event payload for ODC-bound chart interactions" + } +} + diff --git a/pom.xml b/pom.xml index 16d8439..2926cf5 100644 --- a/pom.xml +++ b/pom.xml @@ -42,6 +42,7 @@ json-compatibility-suite json-java21-jtd json-java21-jsonpath + jtd-esm-codegen