From f49eddc1e22db88540d5d173d9232ffe8e71ff60 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 5 Mar 2026 13:51:10 +0100 Subject: [PATCH 1/4] basic workflow to publish rules --- .github/workflows/publish.yml | 84 ++++++++++++++++++++++++++++ scripts/publish.py | 100 ++++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 .github/workflows/publish.yml create mode 100644 scripts/publish.py diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..8789b674 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,84 @@ +name: Publish Rules + +on: + workflow_call: + push: + branches: + - main + +jobs: + publish: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v44 + with: + files_ignore: | + .github/** + scripts/** + README.md + .gitignore + + - name: Debug changed files + run: | + echo "Added files:" + echo "${{ steps.changed-files.outputs.added_files }}" + + - name: Extract new rule directories + id: newdirs + run: | + NEW_DIRS="" + + for file in ${{ steps.changed-files.outputs.added_files }}; do + dir=$(dirname "$file") + + if [ -f "$dir/rule.yaml" ]; then + NEW_DIRS="$NEW_DIRS $dir" + fi + done + + NEW_DIRS=$(echo $NEW_DIRS | xargs -n1 | sort -u | xargs) + + echo "NEW_DIRS=$NEW_DIRS" + echo "NEW_DIRS=$NEW_DIRS" >> $GITHUB_OUTPUT + + - name: Find existing CORE IDs + id: coreids + run: | + COREIDS=$(ls -d CORE-* 2>/dev/null | sed 's/CORE-//' | sort -n | xargs) + + echo "COREIDS=$COREIDS" + echo "COREIDS=$COREIDS" >> $GITHUB_OUTPUT + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install pyyaml + + - name: Run publish script + run: | + python scripts/publish.py \ + --new-dirs "${{ steps.newdirs.outputs.NEW_DIRS }}" \ + --existing "${{ steps.coreids.outputs.COREIDS }}" \ + --algorithm min + + - name: Commit changes + run: | + git config user.name "github-actions" + git config user.email "github-actions@github.com" + git add . + git commit -m "Auto-publish new rules" || exit 0 + + - name: Push changes + if: ${{ !env.ACT }} + run: | + git push \ No newline at end of file diff --git a/scripts/publish.py b/scripts/publish.py new file mode 100644 index 00000000..d25b3b08 --- /dev/null +++ b/scripts/publish.py @@ -0,0 +1,100 @@ +import argparse +import os +import re +import shutil +import yaml + +CORE_PATTERN = re.compile(r"^CORE-\d{6}$") + +def extract_numeric_ids(coreids): + numbers = [] + for cid in coreids: + match = CORE_PATTERN.match(cid) + if match: + numbers.append(int(match.group(1))) + return numbers + + +def max_next_id(existing_coreids): + nums = extract_numeric_ids(existing_coreids) + max_id = max(nums) if nums else 0 + return f"CORE-{str(max_id + 1).zfill(6)}" + + +def min_next_id(existing_coreids): + nums = sorted(extract_numeric_ids(existing_coreids)) + expected = 1 + for num in nums: + if num != expected: + return f"CORE-{str(expected).zfill(6)}" + expected += 1 + return f"CORE-{str(expected).zfill(6)}" + + +def generate_core_id(existing_coreids, algorithm): + if algorithm == "min": + return min_next_id(existing_coreids) + return max_next_id(existing_coreids) + + +def publish_rule_yaml(yaml_path, existing_coreids, algorithm): + with open(yaml_path, "r") as f: + content = yaml.safe_load(f) + + if content is None: + content = {} + + if "Core" not in content or content["Core"] is None: + content["Core"] = {} + + core = content["Core"] + + if not CORE_PATTERN.match(str(core.get("Id", ""))): + new_id = generate_core_id(existing_coreids, algorithm) + core["Id"] = new_id + else: + new_id = core["Id"] + + core["Status"] = "Published" + + with open(yaml_path, "w") as f: + yaml.safe_dump(content, f, sort_keys=False) + + return new_id + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--new-dirs", required=True) + parser.add_argument("--existing", required=True) + parser.add_argument("--algorithm", choices=["min", "max"], default="min") + args = parser.parse_args() + + new_dirs = [d.strip() for d in args.new_dirs.split() if d.strip()] + existing_coreids = [d.strip() for d in args.existing.split() if d.strip()] + + for directory in new_dirs: + if not os.path.isdir(directory): + continue + + yaml_path = os.path.join(directory, "rule.yaml") + if not os.path.exists(yaml_path): + continue + + new_core_id = publish_rule_yaml( + yaml_path, + existing_coreids, + args.algorithm, + ) + + if directory != new_core_id: + shutil.move(directory, new_core_id) + + if new_core_id not in existing_coreids: + existing_coreids.append(new_core_id) + + print(f"Published {directory} -> {new_core_id}") + + +if __name__ == "__main__": + main() \ No newline at end of file From fd0da2f267f4924b6b280db8deb156efc3f72a95 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 5 Mar 2026 14:27:36 +0100 Subject: [PATCH 2/4] remove debug step --- .github/workflows/publish.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8789b674..eb7695a2 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -24,11 +24,6 @@ jobs: README.md .gitignore - - name: Debug changed files - run: | - echo "Added files:" - echo "${{ steps.changed-files.outputs.added_files }}" - - name: Extract new rule directories id: newdirs run: | From a18b4bf949bf4f48403a812b415e27d49f6f9b6f Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 9 Mar 2026 15:42:25 +0100 Subject: [PATCH 3/4] fixed regex pattern --- scripts/publish.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/publish.py b/scripts/publish.py index d25b3b08..7cd3b01c 100644 --- a/scripts/publish.py +++ b/scripts/publish.py @@ -4,7 +4,7 @@ import shutil import yaml -CORE_PATTERN = re.compile(r"^CORE-\d{6}$") +CORE_PATTERN = re.compile(r"^CORE-(\d{6})$") def extract_numeric_ids(coreids): numbers = [] From bc5b05eda279014d55ff70b52c79c3c19252741c Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 17 Mar 2026 12:19:40 +0100 Subject: [PATCH 4/4] csv ledger book support implemented --- .github/workflows/publish.yml | 17 ++-- scripts/publish.py | 186 ++++++++++++++++++++++------------ 2 files changed, 128 insertions(+), 75 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index eb7695a2..c713f5ca 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -42,18 +42,16 @@ jobs: echo "NEW_DIRS=$NEW_DIRS" echo "NEW_DIRS=$NEW_DIRS" >> $GITHUB_OUTPUT - - name: Find existing CORE IDs - id: coreids + - name: Stop if nothing to publish + if: steps.newdirs.outputs.NEW_DIRS == '' run: | - COREIDS=$(ls -d CORE-* 2>/dev/null | sed 's/CORE-//' | sort -n | xargs) - - echo "COREIDS=$COREIDS" - echo "COREIDS=$COREIDS" >> $GITHUB_OUTPUT + echo "No new rules found" + exit 0 - name: Setup Python uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: "3.11" - name: Install dependencies run: | @@ -73,7 +71,8 @@ jobs: git add . git commit -m "Auto-publish new rules" || exit 0 - - name: Push changes - if: ${{ !env.ACT }} + git diff --cached --quiet || git commit -m "Auto-publish rules" + + - name: Push run: | git push \ No newline at end of file diff --git a/scripts/publish.py b/scripts/publish.py index 7cd3b01c..6bc24041 100644 --- a/scripts/publish.py +++ b/scripts/publish.py @@ -1,100 +1,154 @@ -import argparse -import os +import csv import re -import shutil +from pathlib import Path import yaml +import argparse CORE_PATTERN = re.compile(r"^CORE-(\d{6})$") -def extract_numeric_ids(coreids): - numbers = [] - for cid in coreids: - match = CORE_PATTERN.match(cid) - if match: - numbers.append(int(match.group(1))) - return numbers +def parse_rule(rule_path: Path): + with open(rule_path, encoding="utf-8") as f: + data = yaml.safe_load(f) -def max_next_id(existing_coreids): - nums = extract_numeric_ids(existing_coreids) - max_id = max(nums) if nums else 0 - return f"CORE-{str(max_id + 1).zfill(6)}" + standards = [] + for auth in data.get("Authorities", []): + for std in auth.get("Standards", []): + name = std.get("Name") + rule_id = None + rule_ver = None -def min_next_id(existing_coreids): - nums = sorted(extract_numeric_ids(existing_coreids)) - expected = 1 - for num in nums: - if num != expected: - return f"CORE-{str(expected).zfill(6)}" - expected += 1 - return f"CORE-{str(expected).zfill(6)}" + refs = std.get("References", []) + if refs: + rid_info = refs[0].get("Rule Identifier") + if rid_info: + rule_id = rid_info.get("Id") + rule_ver = rid_info.get("Version") + std_version = str(std.get("Version")) -def generate_core_id(existing_coreids, algorithm): - if algorithm == "min": - return min_next_id(existing_coreids) - return max_next_id(existing_coreids) + standards.append( + { + "name": name, + "version": std_version, + "rule_id": rule_id, + "rule_version": rule_ver, + } + ) + return standards -def publish_rule_yaml(yaml_path, existing_coreids, algorithm): - with open(yaml_path, "r") as f: - content = yaml.safe_load(f) - if content is None: - content = {} +def get_next_core_id(mappings_dir: Path, algorithm="max"): + existing_ids = [] - if "Core" not in content or content["Core"] is None: - content["Core"] = {} + for file in mappings_dir.glob("*_mappings.csv"): + with open(file, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + core = row.get("CORE-ID", "").strip() + match = CORE_PATTERN.match(core) + if match: + existing_ids.append(int(match.group(1))) - core = content["Core"] + existing_ids.sort() - if not CORE_PATTERN.match(str(core.get("Id", ""))): - new_id = generate_core_id(existing_coreids, algorithm) - core["Id"] = new_id + if algorithm == "min": + next_id = 1 + for eid in existing_ids: + if eid != next_id: + break + next_id += 1 else: - new_id = core["Id"] - - core["Status"] = "Published" - - with open(yaml_path, "w") as f: - yaml.safe_dump(content, f, sort_keys=False) - - return new_id + next_id = max(existing_ids, default=0) + 1 + + return f"CORE-{next_id:06d}" + + +def _get_field_names(grouped_standard): + all_versions = set(grouped_standard.keys()) - {'rule_id', 'name'} + version_columns = sorted(all_versions) + fieldnames = ["Rule ID"] + version_columns + ["Status", "CORE-ID"] + return fieldnames + + +def update_csv(mapping_file: Path, grouped_standard: dict, core_id: str) -> str: + rows = [] + fieldnames = [] + if mapping_file.exists(): + with open(mapping_file, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + fieldnames = reader.fieldnames or [] + for row in reader: + rows.append(row) + if not fieldnames: + fieldnames = _get_field_names(grouped_standard) + row = next((x for x in rows if x.get('Rule ID') == grouped_standard['rule_id']), {}) + if not row: + rows.append(row) + row.update({col: grouped_standard.get(col) for col in set(fieldnames) - {"Status", "CORE-ID"}}) + row["Rule ID"] = grouped_standard['rule_id'] + row["CORE-ID"] = row.get("CORE-ID") or core_id + row["Status"] = "PUBLISHED" + + with open(mapping_file, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + return row.get("CORE-ID") def main(): parser = argparse.ArgumentParser() - parser.add_argument("--new-dirs", required=True) - parser.add_argument("--existing", required=True) - parser.add_argument("--algorithm", choices=["min", "max"], default="min") + parser.add_argument("--new-dirs", required=True, help="Папки правил через пробел") + parser.add_argument( + "--algorithm", choices=["min", "max"], default="max", help="Алгоритм CORE-ID" + ) args = parser.parse_args() - new_dirs = [d.strip() for d in args.new_dirs.split() if d.strip()] - existing_coreids = [d.strip() for d in args.existing.split() if d.strip()] + mappings_dir = Path("mappings") - for directory in new_dirs: - if not os.path.isdir(directory): + for rule_dir in args.new_dirs.split(): + rule_path = Path(rule_dir) / "rule.yaml" + if not rule_path.exists(): continue + standards = parse_rule(rule_path) - yaml_path = os.path.join(directory, "rule.yaml") - if not os.path.exists(yaml_path): - continue + core_id = get_next_core_id(mappings_dir, args.algorithm) + + result = {} + for item in standards: + key = (item['name'], item['rule_id']) + if key not in result: + result[key] = { + 'name': item['name'], + 'rule_id': item['rule_id'] + } + + result[key][item['version']] = item['version'] + + actual_core_id = core_id + for (std, rule_id), versions in result.items(): + mapping_file = mappings_dir / f"{std}_mappings.csv" + actual_core_id = update_csv(mapping_file, versions, core_id) - new_core_id = publish_rule_yaml( - yaml_path, - existing_coreids, - args.algorithm, - ) + update_rule_yaml(actual_core_id, rule_path) - if directory != new_core_id: - shutil.move(directory, new_core_id) + new_path = Path(actual_core_id) + Path(rule_dir).rename(new_path) - if new_core_id not in existing_coreids: - existing_coreids.append(new_core_id) - print(f"Published {directory} -> {new_core_id}") +def update_rule_yaml(actual_core_id: str, rule_path: Path): + with open(rule_path, encoding="utf-8") as f: + doc = yaml.safe_load(f) + if "Core" not in doc: + doc["Core"] = {} + doc["Core"]["Id"] = actual_core_id + doc["Core"]["Status"] = "Published" + with open(rule_path, "w", encoding="utf-8") as f: + yaml.safe_dump(doc, f, sort_keys=False) if __name__ == "__main__": - main() \ No newline at end of file + main()