diff --git a/README.md b/README.md index db4c15d..b08c185 100644 --- a/README.md +++ b/README.md @@ -1,76 +1,247 @@ -# json2yaml +# yaml-json-cli -Command line utilities to convert between JSON and YAML while -preserving the order of associative arrays. +Bidirectional **YAML ↔ JSON** converter for the command line and Python API. -Preserving the mapping order is helpful to humans reading the -documents, despite not affecting their meaning. +``` +yaml-json-cli config.yaml -o config.json +yaml-json-cli data.json -o data.yaml +``` -## Install +--- -```pip install json2yaml``` +## Features -## Usage -```json2yaml input.json output.yaml``` +- **YAML → JSON** — full nested-structure preservation, multi-document support +- **JSON → YAML** — block (default) or flow style output +- **Multi-document YAML** (`---` separators) → JSON array +- **YAML anchors & aliases** resolved automatically +- **Date/datetime** values serialized to ISO-8601 strings +- **stdin/stdout** pipeline support +- Configurable indent, sort-keys, encoding +- Zero mandatory dependencies beyond **PyYAML** -```yaml2json input.yaml output.json``` +--- +## Installation + +```bash +pip install yaml-json-cli ``` -$ json2yaml --help -Usage: - json2yaml (--version|--help) - json2yaml [] [] - -Arguments: - The input file containing the JSON to convert. If not - specified, reads from stdin. - The output file to which to write the converted YAML. If - not specified, writes to stdout. + +Or from source: + +```bash +git clone https://github.com/toxfox69/yaml-json-cli +cd yaml-json-cli +pip install . ``` +--- + +## CLI Usage + ``` -$ yaml2json --help -Usage: - yaml2json (--version|--help) - yaml2json [-i ] [] [] - -Arguments: - -i, --indent=INDENT Number of spaces to indent [default: 4] - The input file containing the YAML to convert. If not - specified, reads from stdin. - The output file to which to write the converted JSON. - If not specified, writes to stdout. +yaml-json-cli INPUT [-o OUTPUT] [options] + +positional arguments: + INPUT Input file (.yaml, .yml, or .json) + +options: + -o, --output Output file (default: stdout) + --from FORMAT Force input format: yaml or json + --indent N JSON indent width (default: 2) + --sort-keys Sort mapping keys in output + --flow-style Compact YAML flow style (JSON-like) + --encoding ENC File encoding (default: utf-8) + --version Show version and exit + -h, --help Show help message ``` -## Changelog +### Examples -+ 1.2.0 (October 19, 2021) - + support Python 3 - + support multiple yaml documents in one file - + learn to wrap multiple yaml documents in a JSON array (-a | --array) - + use yaml safe_load to prevent loading of arbitrary Python objects -+ 1.1.1 (March 16, 2015) - + terminate json output with newline -+ 1.1.0 (March 16, 2015) - + take indent as command line argument (-i | --indent) - + prevent trailing spaces in json output +```bash +# YAML → JSON (print to stdout) +yaml-json-cli config.yaml -## Authors -**David Bild** +# YAML → JSON (write to file) +yaml-json-cli config.yaml -o config.json -+ [https://www.davidbild.org](https://www.davidbild.org) -+ [https://github.com/drbild](https://github.com/drbild) +# JSON → YAML (write to file) +yaml-json-cli data.json -o data.yaml -## License -Copyright 2015 David R. Bild +# JSON → YAML (print to stdout) +yaml-json-cli data.json + +# Sort keys in output +yaml-json-cli config.yaml --sort-keys -o config.json + +# 4-space JSON indent +yaml-json-cli config.yaml --indent 4 -o config.json + +# Compact YAML flow style +yaml-json-cli data.json --flow-style + +# Multi-document YAML → JSON array +yaml-json-cli multi.yaml -o out.json + +# Force format (useful for stdin or non-standard extensions) +cat config.yaml | yaml-json-cli /dev/stdin --from yaml + +# Force JSON input from a .txt file +yaml-json-cli data.txt --from json -o data.yaml +``` + +--- + +## Input / Output Examples + +### YAML → JSON + +**Input** (`config.yaml`): +```yaml +server: + host: localhost + port: 8080 +database: + url: postgres://localhost/mydb + pool_size: 10 +features: + - auth + - logging + - metrics +debug: false +``` + +**Output** (`config.json`): +```json +{ + "server": { + "host": "localhost", + "port": 8080 + }, + "database": { + "url": "postgres://localhost/mydb", + "pool_size": 10 + }, + "features": [ + "auth", + "logging", + "metrics" + ], + "debug": false +} +``` + +--- + +### JSON → YAML + +**Input** (`data.json`): +```json +{ + "users": [ + {"id": 1, "name": "Alice", "active": true}, + {"id": 2, "name": "Bob", "active": false} + ] +} +``` + +**Output** (`data.yaml`): +```yaml +users: +- active: true + id: 1 + name: Alice +- active: false + id: 2 + name: Bob +``` -Licensed under the Apache License, Version 2.0 (the "License"); you may not use -this work except in compliance with the License. You may obtain a copy of the -License from the LICENSE.txt file or at +--- -[http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) +### Multi-document YAML → JSON array + +**Input** (`services.yaml`): +```yaml +name: api +port: 8080 +--- +name: worker +port: 9090 +``` + +**Output**: +```json +[ + {"name": "api", "port": 8080}, + {"name": "worker", "port": 9090} +] +``` + +--- + +## Python API + +```python +from yaml_json_converter import yaml_to_json, json_to_yaml + +# YAML file → JSON file +data = yaml_to_json("config.yaml", "config.json") + +# JSON file → YAML file +data = json_to_yaml("data.json", "data.yaml", sort_keys=True) + +# From string (no file I/O) +data = yaml_to_json(text="key: value\n") +data = json_to_yaml(text='{"key": "value"}') + +# Options +yaml_to_json( + "input.yaml", + "output.json", + indent=4, + sort_keys=True, + encoding="utf-8", +) + +json_to_yaml( + "input.json", + "output.yaml", + sort_keys=False, + default_flow_style=False, + encoding="utf-8", +) +``` + +--- + +## Error Handling + +The CLI exits with: + +| Code | Meaning | +|------|---------| +| `0` | Success | +| `1` | File not found or parse error (message printed to stderr) | +| `2` | Bad arguments (argparse) | +| `130`| Interrupted (Ctrl-C) | + +The library raises: +- `FileNotFoundError` — input file missing +- `ValueError` — malformed YAML or JSON, empty document + +--- + +## Development + +```bash +pip install -e ".[dev]" +pytest test_converter.py -v +pytest test_converter.py -v --cov=yaml_json_converter --cov-report=term-missing +``` + +--- + +## License -Unless required by applicable law or agreed to in writing, software distributed -under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -CONDITIONS OF ANY KIND, either express or implied. See the License for the -specific language governing permissions and limitations under the License. +MIT — Copyright © 2026 ENERGENAI LLC diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3aecde9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pyyaml>=6.0 diff --git a/setup.py b/setup.py index 9273c53..3f9f0ff 100644 --- a/setup.py +++ b/setup.py @@ -1,31 +1,50 @@ -#!/usr/bin/env python -try: - from setuptools import setup -except ImportError: - from distutils.core import setup +from setuptools import setup -from os.path import join as pjoin +with open("README.md", encoding="utf-8") as fh: + long_description = fh.read() setup( - name = 'json2yaml', - version = '1.2.1-SNAPSHOT', - author = 'David R. Bild', - author_email = 'david@davidbild.org', - keywords = 'yaml json converter ordered order preserving', - url = 'https://github.com/drbild/json2yaml', - description = 'Convert JSON to YAML or vice versa, while' - ' preserving the order of associative arrays.', - classifiers = [ - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python', + name="yaml-json-cli", + version="1.0.0", + author="ENERGENAI LLC", + author_email="tiamat@tiamat.live", + description="Bidirectional YAML ↔ JSON converter CLI and library", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/toxfox69/yaml-json-cli", + py_modules=["yaml_json_converter"], + python_requires=">=3.8", + install_requires=[ + "pyyaml>=6.0", ], - scripts = [ - pjoin('bin', 'json2yaml'), - pjoin('bin', 'yaml2json') + extras_require={ + "dev": [ + "pytest>=7.0", + "pytest-cov>=4.0", + ] + }, + entry_points={ + "console_scripts": [ + "yaml-json-cli=yaml_json_converter:main", + ] + }, + classifiers=[ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Utilities", + "Topic :: Text Processing :: Markup", + "Environment :: Console", ], - install_requires = [ - 'pyyaml', - 'pyaml', - 'docopt' - ] + keywords="yaml json converter cli data transformation", + license="MIT", + project_urls={ + "Bug Tracker": "https://github.com/toxfox69/yaml-json-cli/issues", + "Source": "https://github.com/toxfox69/yaml-json-cli", + }, ) diff --git a/test_converter.py b/test_converter.py new file mode 100644 index 0000000..6f00e66 --- /dev/null +++ b/test_converter.py @@ -0,0 +1,555 @@ +""" +Tests for yaml_json_converter. + +Run with: pytest test_converter.py -v + pytest test_converter.py -v --cov=yaml_json_converter +""" + +import json +import os +import sys +import textwrap +from io import StringIO +from pathlib import Path + +import pytest +import yaml + +# Ensure the local module is importable when running tests from this dir. +sys.path.insert(0, str(Path(__file__).parent)) +from yaml_json_converter import ( + _detect_format, + _json_serializable, + _load_json, + _load_yaml_docs, + build_parser, + json_to_yaml, + main, + yaml_to_json, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture() +def tmp(tmp_path): + """Alias for tmp_path.""" + return tmp_path + + +# --------------------------------------------------------------------------- +# _load_yaml_docs +# --------------------------------------------------------------------------- + +class TestLoadYamlDocs: + def test_simple_dict(self): + docs = _load_yaml_docs("key: value\n") + assert docs == [{"key": "value"}] + + def test_nested_dict(self): + docs = _load_yaml_docs("a:\n b:\n c: 1\n") + assert docs == [{"a": {"b": {"c": 1}}}] + + def test_list_root(self): + docs = _load_yaml_docs("- 1\n- 2\n- 3\n") + assert docs == [[1, 2, 3]] + + def test_multi_document(self): + text = "key: a\n---\nkey: b\n" + docs = _load_yaml_docs(text) + assert len(docs) == 2 + assert docs[0] == {"key": "a"} + assert docs[1] == {"key": "b"} + + def test_empty_document_returns_empty_list(self): + docs = _load_yaml_docs("") + assert docs == [] + + def test_malformed_yaml_raises_value_error(self): + with pytest.raises(ValueError, match="Malformed YAML"): + _load_yaml_docs("{bad yaml: [unclosed") + + def test_scalar_root(self): + docs = _load_yaml_docs("42\n") + assert docs == [42] + + +# --------------------------------------------------------------------------- +# _load_json +# --------------------------------------------------------------------------- + +class TestLoadJson: + def test_dict(self): + data = _load_json('{"a": 1}') + assert data == {"a": 1} + + def test_list(self): + data = _load_json("[1, 2, 3]") + assert data == [1, 2, 3] + + def test_nested(self): + data = _load_json('{"a": {"b": {"c": true}}}') + assert data["a"]["b"]["c"] is True + + def test_malformed_raises_value_error(self): + with pytest.raises(ValueError, match="Malformed JSON"): + _load_json("{bad json}") + + def test_empty_string_raises_value_error(self): + with pytest.raises(ValueError): + _load_json("") + + +# --------------------------------------------------------------------------- +# _json_serializable +# --------------------------------------------------------------------------- + +class TestJsonSerializable: + def test_plain_dict_unchanged(self): + obj = {"a": 1, "b": "x"} + assert _json_serializable(obj) == obj + + def test_nested_list(self): + obj = [1, [2, 3], {"k": "v"}] + assert _json_serializable(obj) == obj + + def test_datetime_to_isoformat(self): + import datetime + obj = {"ts": datetime.datetime(2024, 1, 15, 12, 0, 0)} + result = _json_serializable(obj) + assert result["ts"] == "2024-01-15T12:00:00" + + def test_date_to_isoformat(self): + import datetime + obj = {"d": datetime.date(2024, 6, 1)} + result = _json_serializable(obj) + assert result["d"] == "2024-06-01" + + def test_set_to_sorted_list(self): + obj = {"tags": {3, 1, 2}} + result = _json_serializable(obj) + assert result["tags"] == [1, 2, 3] + + def test_bytes_to_base64(self): + import base64 + obj = {"data": b"hello"} + result = _json_serializable(obj) + assert result["data"] == base64.b64encode(b"hello").decode() + + def test_dict_keys_coerced_to_str(self): + obj = {1: "one", 2: "two"} + result = _json_serializable(obj) + assert "1" in result + assert "2" in result + + +# --------------------------------------------------------------------------- +# yaml_to_json (function API) +# --------------------------------------------------------------------------- + +class TestYamlToJson: + def test_simple_yaml_to_json_file(self, tmp): + src = tmp / "in.yaml" + out = tmp / "out.json" + src.write_text("name: Alice\nage: 30\n") + data = yaml_to_json(src, out) + assert data == {"name": "Alice", "age": 30} + parsed = json.loads(out.read_text()) + assert parsed == {"name": "Alice", "age": 30} + + def test_nested_yaml(self, tmp): + src = tmp / "nested.yaml" + src.write_text("server:\n host: localhost\n port: 8080\n") + data = yaml_to_json(src, text=None) + assert data["server"]["port"] == 8080 + + def test_yaml_list_root(self, tmp): + src = tmp / "list.yaml" + src.write_text("- a\n- b\n- c\n") + data = yaml_to_json(src) + assert data == ["a", "b", "c"] + + def test_yaml_with_nulls(self, tmp): + src = tmp / "nulls.yaml" + src.write_text("key: null\n") + data = yaml_to_json(src) + assert data["key"] is None + + def test_yaml_with_booleans(self, tmp): + src = tmp / "bools.yaml" + src.write_text("active: true\ndebug: false\n") + data = yaml_to_json(src) + assert data["active"] is True + assert data["debug"] is False + + def test_yaml_with_numbers(self, tmp): + src = tmp / "nums.yaml" + src.write_text("count: 42\nrate: 3.14\n") + data = yaml_to_json(src) + assert data["count"] == 42 + assert abs(data["rate"] - 3.14) < 1e-9 + + def test_multi_document_yaml(self, tmp): + src = tmp / "multi.yaml" + src.write_text("id: 1\n---\nid: 2\n") + data = yaml_to_json(src) + assert isinstance(data, list) + assert len(data) == 2 + assert data[0]["id"] == 1 + assert data[1]["id"] == 2 + + def test_deep_nesting(self, tmp): + src = tmp / "deep.yaml" + src.write_text("a:\n b:\n c:\n d: 'leaf'\n") + data = yaml_to_json(src) + assert data["a"]["b"]["c"]["d"] == "leaf" + + def test_unicode(self, tmp): + src = tmp / "unicode.yaml" + src.write_text("greeting: こんにちは\n", encoding="utf-8") + data = yaml_to_json(src, encoding="utf-8") + assert data["greeting"] == "こんにちは" + + def test_yaml_anchors_and_aliases(self, tmp): + src = tmp / "anchors.yaml" + src.write_text("defaults: &defaults\n color: red\nitem:\n <<: *defaults\n name: rose\n") + data = yaml_to_json(src) + assert data["item"]["color"] == "red" + assert data["item"]["name"] == "rose" + + def test_file_not_found(self, tmp): + with pytest.raises(FileNotFoundError, match="Input file not found"): + yaml_to_json(tmp / "missing.yaml") + + def test_malformed_yaml(self, tmp): + src = tmp / "bad.yaml" + src.write_text("{bad: [unclosed\n") + with pytest.raises(ValueError, match="Malformed YAML"): + yaml_to_json(src) + + def test_empty_yaml_raises(self, tmp): + src = tmp / "empty.yaml" + src.write_text("") + with pytest.raises(ValueError, match="empty"): + yaml_to_json(src) + + def test_sort_keys(self, tmp): + src = tmp / "sort.yaml" + src.write_text("z: 1\na: 2\nm: 3\n") + out = tmp / "sort.json" + yaml_to_json(src, out, sort_keys=True) + text = out.read_text() + keys = [line.strip().split('"')[1] for line in text.splitlines() if ":" in line] + assert keys == sorted(keys) + + def test_indent_option(self, tmp): + src = tmp / "indent.yaml" + src.write_text("a: 1\nb: 2\n") + out = tmp / "indent.json" + yaml_to_json(src, out, indent=4) + lines = out.read_text().splitlines() + # 4-space indent: lines like ' "a": 1' + data_lines = [l for l in lines if l.startswith(" ")] + assert len(data_lines) >= 2 + + def test_text_kwarg(self): + data = yaml_to_json(text="x: 42\n") + assert data == {"x": 42} + + def test_neither_path_nor_text_raises(self): + with pytest.raises(ValueError, match="Provide either"): + yaml_to_json() + + +# --------------------------------------------------------------------------- +# json_to_yaml (function API) +# --------------------------------------------------------------------------- + +class TestJsonToYaml: + def test_simple_json_to_yaml_file(self, tmp): + src = tmp / "in.json" + out = tmp / "out.yaml" + src.write_text('{"name": "Bob", "age": 25}') + data = json_to_yaml(src, out) + assert data == {"name": "Bob", "age": 25} + loaded = yaml.safe_load(out.read_text()) + assert loaded == {"name": "Bob", "age": 25} + + def test_nested_json_to_yaml(self, tmp): + src = tmp / "nested.json" + src.write_text('{"db": {"host": "localhost", "port": 5432}}') + data = json_to_yaml(src) + assert data["db"]["port"] == 5432 + + def test_json_list_to_yaml(self, tmp): + src = tmp / "list.json" + src.write_text('[1, 2, 3]') + data = json_to_yaml(src) + assert data == [1, 2, 3] + + def test_json_with_nulls(self, tmp): + src = tmp / "nulls.json" + src.write_text('{"key": null}') + out = tmp / "nulls.yaml" + json_to_yaml(src, out) + loaded = yaml.safe_load(out.read_text()) + assert loaded["key"] is None + + def test_json_with_booleans(self, tmp): + src = tmp / "bools.json" + src.write_text('{"active": true, "debug": false}') + out = tmp / "bools.yaml" + json_to_yaml(src, out) + loaded = yaml.safe_load(out.read_text()) + assert loaded["active"] is True + assert loaded["debug"] is False + + def test_sort_keys(self, tmp): + src = tmp / "sort.json" + src.write_text('{"z": 1, "a": 2, "m": 3}') + out = tmp / "sort.yaml" + json_to_yaml(src, out, sort_keys=True) + text = out.read_text() + keys = [line.split(":")[0].strip() for line in text.splitlines() if ":" in line] + assert keys == sorted(keys) + + def test_flow_style(self, tmp): + src = tmp / "flow.json" + src.write_text('{"a": [1, 2], "b": {"c": 3}}') + out = tmp / "flow.yaml" + json_to_yaml(src, out, default_flow_style=True) + text = out.read_text() + # Flow style puts everything on fewer lines + assert len(text.splitlines()) < 6 + + def test_unicode(self, tmp): + src = tmp / "unicode.json" + src.write_text('{"greeting": "こんにちは"}', encoding="utf-8") + out = tmp / "unicode.yaml" + json_to_yaml(src, out, encoding="utf-8") + loaded = yaml.safe_load(out.read_text(encoding="utf-8")) + assert loaded["greeting"] == "こんにちは" + + def test_deep_nesting(self, tmp): + src = tmp / "deep.json" + src.write_text('{"a":{"b":{"c":{"d":"leaf"}}}}') + data = json_to_yaml(src) + assert data["a"]["b"]["c"]["d"] == "leaf" + + def test_file_not_found(self, tmp): + with pytest.raises(FileNotFoundError, match="Input file not found"): + json_to_yaml(tmp / "missing.json") + + def test_malformed_json(self, tmp): + src = tmp / "bad.json" + src.write_text("{bad json}") + with pytest.raises(ValueError, match="Malformed JSON"): + json_to_yaml(src) + + def test_text_kwarg(self): + data = json_to_yaml(text='{"x": 99}') + assert data == {"x": 99} + + def test_neither_path_nor_text_raises(self): + with pytest.raises(ValueError, match="Provide either"): + json_to_yaml() + + +# --------------------------------------------------------------------------- +# _detect_format +# --------------------------------------------------------------------------- + +class TestDetectFormat: + def test_yaml_extension(self): + assert _detect_format(Path("file.yaml"), None) == "yaml" + + def test_yml_extension(self): + assert _detect_format(Path("file.yml"), None) == "yaml" + + def test_json_extension(self): + assert _detect_format(Path("file.json"), None) == "json" + + def test_jsonl_extension(self): + assert _detect_format(Path("file.jsonl"), None) == "json" + + def test_forced_yaml(self): + assert _detect_format(Path("file.txt"), "yaml") == "yaml" + + def test_forced_json(self): + assert _detect_format(Path("file.txt"), "json") == "json" + + def test_unknown_extension_raises(self): + with pytest.raises(ValueError, match="Cannot detect format"): + _detect_format(Path("file.txt"), None) + + +# --------------------------------------------------------------------------- +# CLI (main) +# --------------------------------------------------------------------------- + +class TestCLI: + def test_yaml_to_json_via_cli(self, tmp): + src = tmp / "in.yaml" + out = tmp / "out.json" + src.write_text("hello: world\n") + rc = main([str(src), "-o", str(out)]) + assert rc == 0 + assert json.loads(out.read_text()) == {"hello": "world"} + + def test_json_to_yaml_via_cli(self, tmp): + src = tmp / "in.json" + out = tmp / "out.yaml" + src.write_text('{"hello": "world"}') + rc = main([str(src), "-o", str(out)]) + assert rc == 0 + assert yaml.safe_load(out.read_text()) == {"hello": "world"} + + def test_missing_file_returns_1(self, tmp): + rc = main([str(tmp / "missing.yaml"), "-o", "/dev/null"]) + assert rc == 1 + + def test_malformed_yaml_returns_1(self, tmp): + src = tmp / "bad.yaml" + src.write_text("{bad: [unclosed\n") + rc = main([str(src)]) + assert rc == 1 + + def test_malformed_json_returns_1(self, tmp): + src = tmp / "bad.json" + src.write_text("{bad json}") + rc = main([str(src)]) + assert rc == 1 + + def test_unknown_extension_exits(self, tmp): + src = tmp / "file.txt" + src.write_text("hello") + with pytest.raises(SystemExit): + main([str(src)]) + + def test_force_from_yaml(self, tmp): + src = tmp / "data.txt" + out = tmp / "data.json" + src.write_text("k: v\n") + rc = main([str(src), "--from", "yaml", "-o", str(out)]) + assert rc == 0 + assert json.loads(out.read_text()) == {"k": "v"} + + def test_force_from_json(self, tmp): + src = tmp / "data.txt" + out = tmp / "data.yaml" + src.write_text('{"k": "v"}') + rc = main([str(src), "--from", "json", "-o", str(out)]) + assert rc == 0 + assert yaml.safe_load(out.read_text()) == {"k": "v"} + + def test_sort_keys_flag(self, tmp): + src = tmp / "s.yaml" + out = tmp / "s.json" + src.write_text("z: 3\na: 1\n") + main([str(src), "--sort-keys", "-o", str(out)]) + text = out.read_text() + idx_a = text.index('"a"') + idx_z = text.index('"z"') + assert idx_a < idx_z + + def test_indent_flag(self, tmp): + src = tmp / "i.yaml" + out = tmp / "i.json" + src.write_text("a: 1\n") + main([str(src), "--indent", "4", "-o", str(out)]) + lines = out.read_text().splitlines() + content_lines = [l for l in lines if '"a"' in l] + assert content_lines[0].startswith(" ") + + def test_flow_style_flag(self, tmp): + src = tmp / "f.json" + out = tmp / "f.yaml" + src.write_text('{"a": [1, 2]}') + main([str(src), "--flow-style", "-o", str(out)]) + text = out.read_text() + assert len(text.splitlines()) <= 3 + + def test_stdout_yaml_to_json(self, tmp, capsys): + src = tmp / "stdout.yaml" + src.write_text("x: 1\n") + rc = main([str(src)]) + assert rc == 0 + captured = capsys.readouterr() + assert json.loads(captured.out) == {"x": 1} + + def test_stdout_json_to_yaml(self, tmp, capsys): + src = tmp / "stdout.json" + src.write_text('{"x": 1}') + rc = main([str(src)]) + assert rc == 0 + captured = capsys.readouterr() + assert yaml.safe_load(captured.out) == {"x": 1} + + def test_version_flag(self, capsys): + with pytest.raises(SystemExit) as exc_info: + main(["--version"]) + assert exc_info.value.code == 0 + captured = capsys.readouterr() + assert "1.0.0" in captured.out + + def test_yaml_list_of_objects(self, tmp): + src = tmp / "objs.yaml" + out = tmp / "objs.json" + src.write_text("- id: 1\n name: Alice\n- id: 2\n name: Bob\n") + main([str(src), "-o", str(out)]) + data = json.loads(out.read_text()) + assert len(data) == 2 + assert data[0]["name"] == "Alice" + + +# --------------------------------------------------------------------------- +# Round-trip tests +# --------------------------------------------------------------------------- + +class TestRoundTrip: + def test_yaml_json_yaml_roundtrip(self, tmp): + original_yaml = "server:\n host: localhost\n port: 8080\ndebug: false\n" + yaml_in = tmp / "orig.yaml" + json_mid = tmp / "mid.json" + yaml_out = tmp / "final.yaml" + yaml_in.write_text(original_yaml) + yaml_to_json(yaml_in, json_mid) + json_to_yaml(json_mid, yaml_out) + orig = yaml.safe_load(original_yaml) + final = yaml.safe_load(yaml_out.read_text()) + assert orig == final + + def test_json_yaml_json_roundtrip(self, tmp): + original_json = '{"users": [{"id": 1, "active": true}, {"id": 2, "active": false}]}' + json_in = tmp / "orig.json" + yaml_mid = tmp / "mid.yaml" + json_out = tmp / "final.json" + json_in.write_text(original_json) + json_to_yaml(json_in, yaml_mid) + yaml_to_json(yaml_mid, json_out) + orig = json.loads(original_json) + final = json.loads(json_out.read_text()) + assert orig == final + + def test_complex_nested_roundtrip(self, tmp): + data = { + "project": { + "name": "TIAMAT", + "version": "1.0.0", + "features": ["chat", "summarize", "generate"], + "config": { + "max_tokens": 4096, + "temperature": 0.7, + "providers": ["anthropic", "groq", "cerebras"], + }, + } + } + json_in = tmp / "complex.json" + yaml_mid = tmp / "complex.yaml" + json_out = tmp / "complex_out.json" + json_in.write_text(json.dumps(data)) + json_to_yaml(json_in, yaml_mid) + yaml_to_json(yaml_mid, json_out) + assert json.loads(json_out.read_text()) == data diff --git a/yaml_json_converter.py b/yaml_json_converter.py new file mode 100644 index 0000000..dc51b72 --- /dev/null +++ b/yaml_json_converter.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +""" +yaml-json-cli: Convert YAML ↔ JSON from the command line. + +Supports: + - YAML → JSON with full nested-structure preservation + - JSON → YAML with configurable output style + - Multi-document YAML (---) → JSON array + - stdin/stdout pipelines + - configurable indent, flow-style, sort-keys, encoding +""" + +import argparse +import json +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +try: + import yaml +except ImportError: # pragma: no cover + sys.exit( + "Error: PyYAML is not installed. Run: pip install pyyaml" + ) + +__version__ = "1.0.0" +__author__ = "ENERGENAI LLC" + + +# --------------------------------------------------------------------------- +# Core helpers +# --------------------------------------------------------------------------- + +def _load_yaml_docs(text: str, source: str = "") -> List[Any]: + """ + Parse all YAML documents in *text* and return them as a list. + + A single-document YAML returns a one-item list. + Multi-document YAML (separated by ``---``) returns multiple items. + + Raises + ------ + ValueError — on any YAML parse error + """ + try: + docs = list(yaml.safe_load_all(text)) + except yaml.YAMLError as exc: + raise ValueError(f"Malformed YAML in {source}: {exc}") from exc + + # yaml.safe_load_all yields None for empty documents; filter them. + docs = [d for d in docs if d is not None] + return docs + + +def _load_json(text: str, source: str = "") -> Any: + """ + Parse JSON text and return the Python object. + + Raises + ------ + ValueError — on any JSON parse error + """ + try: + return json.loads(text) + except json.JSONDecodeError as exc: + raise ValueError(f"Malformed JSON in {source}: {exc}") from exc + + +def _json_serializable(obj: Any) -> Any: + """ + Recursively coerce YAML-specific types to JSON-safe equivalents. + + - datetime → ISO-8601 string + - date → ISO-8601 string + - set → sorted list + - bytes → base64 string (rare in YAML 1.1) + """ + import datetime, base64 + + if isinstance(obj, dict): + return {str(k): _json_serializable(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_json_serializable(v) for v in obj] + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + if isinstance(obj, set): + return sorted(_json_serializable(v) for v in obj) + if isinstance(obj, bytes): + return base64.b64encode(obj).decode("ascii") + return obj + + +# --------------------------------------------------------------------------- +# Conversion functions +# --------------------------------------------------------------------------- + +def yaml_to_json( + input_path: Union[str, Path, None] = None, + output_path: Optional[Union[str, Path]] = None, + *, + indent: int = 2, + sort_keys: bool = False, + encoding: str = "utf-8", + text: Optional[str] = None, +) -> Any: + """ + Convert a YAML file (or string) to JSON. + + Parameters + ---------- + input_path : path to input YAML file, or None when *text* is given + output_path : path to output JSON file, or None for stdout + indent : JSON indent width (default 2) + sort_keys : sort JSON object keys alphabetically + encoding : file encoding + text : raw YAML string (overrides input_path) + + Returns + ------- + Parsed Python object (dict, list, or scalar) + + Raises + ------ + FileNotFoundError — input file missing + ValueError — malformed YAML + """ + if text is not None: + source = "" + raw = text + elif input_path is not None: + input_path = Path(input_path) + source = str(input_path) + try: + raw = input_path.read_text(encoding=encoding) + except FileNotFoundError: + raise FileNotFoundError(f"Input file not found: {input_path}") + else: + raise ValueError("Provide either input_path or text.") + + docs = _load_yaml_docs(raw, source) + + if len(docs) == 0: + raise ValueError(f"YAML document in {source} is empty.") + + # Single document → unwrap; multi-document → keep as array + data = docs[0] if len(docs) == 1 else docs + data = _json_serializable(data) + + result = json.dumps(data, indent=indent, sort_keys=sort_keys, ensure_ascii=False) + + if output_path: + Path(output_path).write_text(result, encoding=encoding) + else: + print(result) + + return data + + +def json_to_yaml( + input_path: Union[str, Path, None] = None, + output_path: Optional[Union[str, Path]] = None, + *, + sort_keys: bool = False, + default_flow_style: bool = False, + encoding: str = "utf-8", + text: Optional[str] = None, +) -> Any: + """ + Convert a JSON file (or string) to YAML. + + Parameters + ---------- + input_path : path to input JSON file, or None when *text* is given + output_path : path to output YAML file, or None for stdout + sort_keys : sort YAML mapping keys alphabetically + default_flow_style : if True, emit YAML in compact flow style + encoding : file encoding + text : raw JSON string (overrides input_path) + + Returns + ------- + Parsed Python object + + Raises + ------ + FileNotFoundError — input file missing + ValueError — malformed JSON + """ + if text is not None: + source = "" + raw = text + elif input_path is not None: + input_path = Path(input_path) + source = str(input_path) + try: + raw = input_path.read_text(encoding=encoding) + except FileNotFoundError: + raise FileNotFoundError(f"Input file not found: {input_path}") + else: + raise ValueError("Provide either input_path or text.") + + data = _load_json(raw, source) + + result = yaml.dump( + data, + default_flow_style=default_flow_style, + sort_keys=sort_keys, + allow_unicode=True, + ) + + if output_path: + Path(output_path).write_text(result, encoding=encoding) + else: + sys.stdout.write(result) + + return data + + +# --------------------------------------------------------------------------- +# CLI plumbing +# --------------------------------------------------------------------------- + +_YAML_EXTENSIONS = {".yaml", ".yml"} +_JSON_EXTENSIONS = {".json", ".jsonl"} + + +def _detect_format(path: Path, forced: Optional[str]) -> str: + if forced: + return forced + suffix = path.suffix.lower() + if suffix in _YAML_EXTENSIONS: + return "yaml" + if suffix in _JSON_EXTENSIONS: + return "json" + raise ValueError( + f"Cannot detect format from extension '{suffix}'. " + "Use --from yaml|json to force it." + ) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="yaml-json-cli", + description="Convert YAML ↔ JSON bidirectionally.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +examples: + yaml-json-cli config.yaml YAML → JSON (stdout) + yaml-json-cli config.yaml -o config.json YAML → JSON (file) + yaml-json-cli data.json -o data.yaml JSON → YAML (file) + yaml-json-cli data.json JSON → YAML (stdout) + yaml-json-cli data.json --sort-keys Sort keys in output + yaml-json-cli data.json --flow-style Compact YAML flow style + yaml-json-cli data.yaml --indent 4 4-space JSON indent + yaml-json-cli multi.yaml -o out.json Multi-doc YAML → JSON array + cat config.yaml | yaml-json-cli /dev/stdin --from yaml +""", + ) + + parser.add_argument( + "input", metavar="INPUT", + help="Input file (.yaml, .yml, or .json). Use /dev/stdin for piped input.", + ) + parser.add_argument( + "-o", "--output", metavar="OUTPUT", default=None, + help="Output file. Defaults to stdout.", + ) + parser.add_argument( + "--from", dest="from_format", choices=["yaml", "json"], metavar="FORMAT", + help="Force input format: yaml or json.", + ) + parser.add_argument( + "--indent", type=int, default=2, metavar="N", + help="JSON indent width when writing JSON output (default: 2).", + ) + parser.add_argument( + "--sort-keys", action="store_true", + help="Sort mapping keys in output.", + ) + parser.add_argument( + "--flow-style", action="store_true", + help="Emit YAML in compact flow style (JSON-like). Only applies to YAML output.", + ) + parser.add_argument( + "--encoding", default="utf-8", metavar="ENC", + help="File encoding (default: utf-8).", + ) + parser.add_argument( + "--version", action="version", version=f"%(prog)s {__version__}", + ) + + return parser + + +def main(argv=None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + + input_path = Path(args.input) + + try: + fmt = _detect_format(input_path, args.from_format) + except ValueError as exc: + parser.error(str(exc)) + return 2 # unreachable but satisfies type checkers + + try: + if fmt == "yaml": + yaml_to_json( + input_path, + output_path=args.output, + indent=args.indent, + sort_keys=args.sort_keys, + encoding=args.encoding, + ) + else: + json_to_yaml( + input_path, + output_path=args.output, + sort_keys=args.sort_keys, + default_flow_style=args.flow_style, + encoding=args.encoding, + ) + except FileNotFoundError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + except ValueError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + except KeyboardInterrupt: + return 130 + + return 0 + + +if __name__ == "__main__": + sys.exit(main())