From 5030562fc69d2b20c36a1b43abcf1991a4cb06b5 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 25 Feb 2026 15:26:55 +0100 Subject: [PATCH 1/7] feat: restructure project layout and add workspace scaffolding Migrate from single-crate layout to multi-crate workspace with Bazel 8.3 + Cargo dual build system. Add xtask runner for common development commands. --- .bazelrc | 14 + .bazelversion | 1 + .gitignore | 22 +- .ruff.toml | 97 ++ .vscode/extensions.json | 27 + .vscode/settings.json | 93 ++ .yamlfmt | 3 + BUILD | 63 + Cargo.lock | 1403 +++++++++++++++++ Cargo.toml | 48 +- MODULE.bazel | 113 ++ examples/BUILD | 8 + patches/BUILD | 5 + .../iceoryx2_bb_derive_macros_readme.patch | 20 + project_config.bzl | 5 + rust-toolchain.toml | 2 +- rustfmt.toml | 6 + src/BUILD | 0 src/api.rs | 118 -- src/catalog.rs | 65 - src/config.rs | 95 -- src/ids.rs | 58 - src/lib.rs | 37 - src/model.rs | 130 -- src/sink.rs | 50 - src/utils.rs | 51 - src/xtask/Cargo.toml | 9 + src/xtask/src/main.rs | 312 ++++ tests/hvac_component.rs | 200 --- 29 files changed, 2241 insertions(+), 814 deletions(-) create mode 100644 .bazelrc create mode 100644 .bazelversion create mode 100644 .ruff.toml create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json create mode 100644 .yamlfmt create mode 100644 BUILD create mode 100644 Cargo.lock create mode 100644 MODULE.bazel create mode 100644 examples/BUILD create mode 100644 patches/BUILD create mode 100644 patches/iceoryx2_bb_derive_macros_readme.patch create mode 100644 project_config.bzl create mode 100644 rustfmt.toml create mode 100644 src/BUILD delete mode 100644 src/api.rs delete mode 100644 src/catalog.rs delete mode 100644 src/config.rs delete mode 100644 src/ids.rs delete mode 100644 src/lib.rs delete mode 100644 src/model.rs delete mode 100644 src/sink.rs delete mode 100644 src/utils.rs create mode 100644 src/xtask/Cargo.toml create mode 100644 src/xtask/src/main.rs delete mode 100644 tests/hvac_component.rs diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 0000000..a7ef843 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,14 @@ +build --java_language_version=17 +build --tool_java_language_version=17 +build --java_runtime_version=remotejdk_17 +build --tool_java_runtime_version=remotejdk_17 +build --@score-baselibs//score/json:base_library=nlohmann +build --@score-baselibs//score/mw/log/flags:KRemote_Logging=False + +test --test_output=errors + +common --registry=https://raw.githubusercontent.com/eclipse-score/bazel_registry/main/ +common --registry=https://bcr.bazel.build + +# allow empty globs for docs +build --noincompatible_disallow_empty_glob diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000..2bf50aa --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +8.3.0 diff --git a/.gitignore b/.gitignore index b429a15..e5e92f5 100644 --- a/.gitignore +++ b/.gitignore @@ -21,9 +21,13 @@ pip-delete-this-directory.txt # Rust target/ -Cargo.lock .sccache/ +# Bazel +/bazel-* +MODULE.bazel.lock +user.bazelrc + # C++ *.o *.obj @@ -50,9 +54,9 @@ compile_commands.json .idea/ *.iml -# VS Code -.vscode/ -.vscode* +# VS Code (user-specific settings only, shared settings are tracked) +.vscode/*.code-workspace +.vscode/.ropeproject # Sublime Text *.sublime-project @@ -82,6 +86,16 @@ nb-configuration.xml Thumbs.db .bash_history +# Sphinx / docs build +/_build + +# Ruff +.ruff_cache + +# Vale (editorial style guide) +.vale.ini +styles/ + # General .cache/ .direnv/ diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..1d31456 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,97 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] + +line-length = 120 +indent-width = 4 + +[lint] +select = [ + # flake8-boolean-trap + "FBT", + # flake8-bugbear + "B", + # flake8-builtins + "A", + # flake8-comprehensions + "C4", + # flake8-fixme + "FIX", + # flake8-implicit-str-concat + "ISC", + # flake8-pie + "PIE", + # flake8-print + "T20", + # flake8-pytest-style + "PT", + # flake8-raise + "RSE", + # flake8-return + "RET501", + "RET502", + "RET503", + "RET504", + # flake8-self + "SLF", + # flake8-simplify + "SIM", + # flake8-type-checking + "TC", + # flake8-unused-arguments + "ARG", + # flake8-use-pathlib + "PTH", + + # isort + "I", + + # pycodestyle error + "E", + # Pyflakes + "F", + # pyupgrade + "UP", +] +ignore = ["F401", "PTH123", "ARG002"] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..4688a2b --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,27 @@ +{ + "recommendations": [ + // Rust language support via rust-analyzer + "rust-lang.rust-analyzer", + + // Editing *.drawio.svg files directly in VS Code + "hediet.vscode-drawio", + + // Some convenient extensions for editing reStructuredText files + "lextudio.restructuredtext", + + // Linting and live preview for score docs + "swyddfa.esbonio", + + // ErrorLens highlights errors and warnings in your code / docs + "usernamehw.errorlens", + + // Linting and formatting for Python (LSP via ruff server) + "charliermarsh.ruff", + + // BasedPyright for python various type checking improvements + "detachhead.basedpyright", + + // Bazel support + "BazelBuild.vscode-bazel" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..4909a78 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,93 @@ +{ + // General Settings + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "files.trimTrailingWhitespace": true, + "editor.insertSpaces": true, + "editor.tabCompletion": "on", + + // Default for any filetype + "editor.rulers": [ + 99 + ], + + // Exclude build, temp and cache folders + "files.watcherExclude": { + ".*/**": true, + "**/__pycache__/**": true, + "bazel-*/**": true, + ".venv*/**": true, + "_build/**": true, + "target/**": true + }, + + // Rust Settings + "[rust]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "rust-lang.rust-analyzer" + }, + "rust-analyzer.check.command": "clippy", + "rust-analyzer.check.extraArgs": [ + "--workspace" + ], + + // Python Settings (for Bazel/tooling scripts) + "python.analysis.exclude": [ + ".*", + "**/__pycache__", + "bazel-*", + ".venv*", + "_build" + ], + "[python]": { + "editor.rulers": [ + 79 + ], + "editor.codeActionsOnSave": { + "source.sortImports": "explicit" + }, + "editor.defaultFormatter": "charliermarsh.ruff" + }, + + // Markdown Settings + "[markdown]": { + "editor.rulers": [ + 79, 99 + ] + }, + + // Bazel LSP + "bazel.lsp.command": "bazel", + "bazel.lsp.args": [ + "run", + "//:starpls_server" + ], + + // RST Settings + "[restructuredtext]": { + "editor.tabSize": 3 + }, + + // Esbonio (Sphinx) Settings + "esbonio.server.pythonPath": "${workspaceFolder}/.venv_docs/bin/python", + "esbonio.sphinx.srcDir": "${workspaceFolder}/docs", + "esbonio.sphinx.confDir": "${workspaceFolder}/docs", + "esbonio.sphinx.buildDir": "${workspaceFolder}/_build", + "esbonio.server.logLevel": "info", + "esbonio.server.installBehavior": "nothing", + + // Esbonio 1.x (Preview) + "esbonio.sphinx.pythonCommand": [ + ".venv_docs/bin/python" + ], + "esbonio.sphinx.buildCommand": [ + "docs", + "_build", + "-T", + "--jobs", + "auto", + "--conf-dir", + "docs" + ], + "esbonio.logging.level": "warning" +} diff --git a/.yamlfmt b/.yamlfmt new file mode 100644 index 0000000..26775cb --- /dev/null +++ b/.yamlfmt @@ -0,0 +1,3 @@ +formatter: + type: basic + retain_line_breaks: true \ No newline at end of file diff --git a/BUILD b/BUILD new file mode 100644 index 0000000..486ea5f --- /dev/null +++ b/BUILD @@ -0,0 +1,63 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@score_cr_checker//:cr_checker.bzl", "copyright_checker") +load("@score_dash_license_checker//:dash.bzl", "dash_license_checker") +load("@score_docs_as_code//:docs.bzl", "docs") +load("@score_format_checker//:macros.bzl", "use_format_targets") +load("@score_starpls_lsp//:starpls.bzl", "setup_starpls") +load("//:project_config.bzl", "PROJECT_CONFIG") + +# Export Cargo.lock for license checking +exports_files(["Cargo.lock"]) + +# Creates all documentation targets: +# - `docs:incremental` for building docs incrementally at runtime +# - `docs:live_preview` for live preview in the browser without an IDE + +# - `docs:docs` for building documentation at build-time +docs( + data = [ + "@score_platform//:needs_json", + "@score_process//:needs_json", + ], + source_dir = "docs", +) + +setup_starpls( + name = "starpls_server", + visibility = ["//visibility:public"], +) + +copyright_checker( + name = "copyright", + srcs = [ + "src", + "tests", + "//:BUILD", + "//:MODULE.bazel", + ], + config = "@score_cr_checker//resources:config", + template = "@score_cr_checker//resources:templates", + visibility = ["//visibility:public"], +) + +dash_license_checker( + src = "//examples:cargo_lock", + file_type = "", # let it auto-detect based on project_config + project_config = PROJECT_CONFIG, + visibility = ["//visibility:public"], +) + +# Add target for formatting checks +use_format_targets() diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..c680295 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1403 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.2.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cdr" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9617422bf43fde9280707a7e90f8f7494389c182f5c70b0f67592d0f06d41dfa" +dependencies = [ + "byteorder", + "serde", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.5.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "common" +version = "0.0.1" +dependencies = [ + "iceoryx2", + "iceoryx2-bb-container", + "log", + "mockall", + "serde", + "serde_json", + "sha2", + "thiserror", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "dfm_lib" +version = "0.0.1" +dependencies = [ + "common", + "env_logger", + "iceoryx2", + "iceoryx2-bb-container", + "log", + "mockall", + "rust_kvs", + "serde_json", + "serial_test", + "tempfile", + "thiserror", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "downcast" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "enum-iterator" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4549325971814bda7a44061bf3fe7e487d447cba01e4220a4b454d630d7a016" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env_filter" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fault_lib" +version = "0.0.1" +dependencies = [ + "clap", + "common", + "env_logger", + "iceoryx2", + "iceoryx2-bb-container", + "log", + "mockall", + "serde", + "serde_json", + "serial_test", + "sha2", + "thiserror", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fragile" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619" + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iceoryx2" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-lock-free", + "iceoryx2-bb-log", + "iceoryx2-bb-memory", + "iceoryx2-bb-posix", + "iceoryx2-bb-system-types", + "iceoryx2-cal", + "iceoryx2-pal-concurrency-sync", + "iceoryx2-pal-configuration", + "serde", + "tiny-fn", + "toml", +] + +[[package]] +name = "iceoryx2-bb-container" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-log", + "iceoryx2-pal-concurrency-sync", + "serde", +] + +[[package]] +name = "iceoryx2-bb-derive-macros" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "iceoryx2-bb-elementary" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-elementary-traits", + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-bb-elementary-traits" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-bb-linux" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-container", + "iceoryx2-bb-log", + "iceoryx2-bb-posix", + "iceoryx2-bb-system-types", + "iceoryx2-pal-concurrency-sync", + "iceoryx2-pal-os-api", + "iceoryx2-pal-posix", +] + +[[package]] +name = "iceoryx2-bb-lock-free" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-log", + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-bb-log" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-bb-memory" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-lock-free", + "iceoryx2-bb-log", + "iceoryx2-bb-posix", + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-bb-posix" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "enum-iterator", + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-log", + "iceoryx2-bb-system-types", + "iceoryx2-pal-concurrency-sync", + "iceoryx2-pal-configuration", + "iceoryx2-pal-posix", + "lazy_static", + "serde", + "tiny-fn", +] + +[[package]] +name = "iceoryx2-bb-system-types" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-log", + "iceoryx2-pal-configuration", + "iceoryx2-pal-posix", + "serde", +] + +[[package]] +name = "iceoryx2-cal" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "cdr", + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-linux", + "iceoryx2-bb-lock-free", + "iceoryx2-bb-log", + "iceoryx2-bb-memory", + "iceoryx2-bb-posix", + "iceoryx2-bb-system-types", + "iceoryx2-pal-concurrency-sync", + "once_cell", + "postcard", + "serde", + "sha1_smol", + "tiny-fn", + "toml", +] + +[[package]] +name = "iceoryx2-pal-concurrency-sync" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" + +[[package]] +name = "iceoryx2-pal-configuration" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" + +[[package]] +name = "iceoryx2-pal-os-api" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "bindgen", + "cc", + "iceoryx2-pal-posix", +] + +[[package]] +name = "iceoryx2-pal-posix" +version = "0.7.0" +source = "git+https://github.com/eclipse-iceoryx/iceoryx2.git?rev=eba5da4b8d8cb03bccf1394d88a05e31f58838dc#eba5da4b8d8cb03bccf1394d88a05e31f58838dc" +dependencies = [ + "bindgen", + "cc", + "iceoryx2-pal-concurrency-sync", + "iceoryx2-pal-configuration", + "lazy_static", + "windows-sys 0.48.0", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "integration_tests" +version = "0.0.1" +dependencies = [ + "common", + "dfm_lib", + "env_logger", + "fault_lib", + "log", + "serde_json", + "serial_test", + "tempfile", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jiff" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", +] + +[[package]] +name = "jiff-static" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "mockall" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2" +dependencies = [ + "cfg-if", + "downcast", + "fragile", + "mockall_derive", + "predicates", + "predicates-tree", +] + +[[package]] +name = "mockall_derive" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + +[[package]] +name = "predicates" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" +dependencies = [ + "anstyle", + "predicates-core", +] + +[[package]] +name = "predicates-core" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" + +[[package]] +name = "predicates-tree" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" +dependencies = [ + "predicates-core", + "termtree", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" + +[[package]] +name = "rust_kvs" +version = "0.1.0" +source = "git+https://github.com/eclipse-score/persistency.git?branch=main#5d9f8225aa5622f52a31003bec937d5ef227dba7" +dependencies = [ + "adler32", + "tinyjson", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "scc" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" +dependencies = [ + "sdd", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sdd" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "serial_test" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0b343e184fc3b7bb44dff0705fffcf4b3756ba6aff420dddd8b24ca145e555" +dependencies = [ + "futures-executor", + "futures-util", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f50427f258fb77356e4cd4aa0e87e2bd2c66dbcee41dc405282cae2bfc26c83" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sha1_smol" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-fn" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9659b108631d1e1cf3e8e489f894bee40bc9d68fd6cc67ec4d4ce9b72d565228" + +[[package]] +name = "tinyjson" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab95735ea2c8fd51154d01e39cf13912a78071c2d89abc49a7ef102a7dd725a" + +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "xtask" +version = "0.0.1" + +[[package]] +name = "zmij" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" diff --git a/Cargo.toml b/Cargo.toml index d1b5b1e..ed0b343 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,45 @@ -[package] -name = "fault-lib" -version = "0.1.0" +[workspace] +resolver = "2" + +members = [ + "src/common", + "src/dfm_lib", + "src/fault_lib", + "src/xtask", + "tests/integration", +] + +[workspace.package] +version = "0.0.1" edition = "2024" +license-file = "LICENSE" +authors = ["S-CORE Contributors"] +readme = "README.md" + +[workspace.lints.rust] +unsafe_code = "forbid" + +[workspace.lints.clippy] +todo = "deny" +unimplemented = "deny" +unwrap_used = "deny" +expect_used = "deny" +std_instead_of_core = "deny" +std_instead_of_alloc = "deny" +alloc_instead_of_core = "deny" +cast_possible_truncation = "warn" +arithmetic_side_effects = "warn" +new_without_default = "allow" -[dependencies] -thiserror = "2" \ No newline at end of file +[workspace.dependencies] +env_logger = "0.11.8" +iceoryx2 = { git = "https://github.com/eclipse-iceoryx/iceoryx2.git", rev = "eba5da4b8d8cb03bccf1394d88a05e31f58838dc" } +iceoryx2-bb-container = { git = "https://github.com/eclipse-iceoryx/iceoryx2.git", rev = "eba5da4b8d8cb03bccf1394d88a05e31f58838dc" } +log = "0.4.22" +mockall = "0.13.1" +serial_test = "3.2" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10" +thiserror = "2.0.17" +xtask = { path = "src/xtask" } diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000..f025564 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,113 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +module( + name = "score_fault_lib", + version = "0.0.1", + compatibility_level = 0, +) + +bazel_dep(name = "rules_python", version = "1.4.1") + +PYTHON_VERSION = "3.12" + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + is_default = True, + python_version = PYTHON_VERSION, +) +use_repo(python) + +# Add GoogleTest dependency +bazel_dep(name = "googletest", version = "1.17.0") +bazel_dep(name = "google_benchmark", version = "1.9.4") + +# Rust rules for Bazel +bazel_dep(name = "rules_rust", version = "0.67.0") + +# Checker rule for CopyRight checks/fixs +bazel_dep(name = "score_cr_checker", version = "0.3.1") + +# C/C++ rules for Bazel +bazel_dep(name = "rules_cc", version = "0.2.13") + +# LLVM Toolchains Rules - host configuration +bazel_dep(name = "score_starpls_lsp", version = "0.1.0") + +# Dash license checker +bazel_dep(name = "score_dash_license_checker", version = "0.1.2") + +# Format checker +bazel_dep(name = "score_format_checker", version = "0.1.1") +bazel_dep(name = "aspect_rules_lint", version = "1.4.4") +bazel_dep(name = "buildifier_prebuilt", version = "8.2.0.2") + +#docs-as-code +bazel_dep(name = "score_docs_as_code", version = "1.0.1") +git_override( + module_name = "score_docs_as_code", + commit = "13ba715a95cfe85158b60d7f4748ba8e28895d8c", + remote = "https://github.com/eclipse-score/docs-as-code.git", +) + +# Provides, pytest & venv +bazel_dep(name = "score_python_basics", version = "0.3.4") +bazel_dep(name = "score_platform", version = "0.3.0") +bazel_dep(name = "score_process", version = "1.1.0") + +# Testing utils dependency. +# Direct usage of tag in git_override reports false problem in editor, using hash of a tag +bazel_dep(name = "testing-utils") +git_override( + module_name = "testing-utils", + commit = "a847c7464cfa47e000141631d1223b92560d2e58", # tag v0.2.0 + remote = "https://github.com/qorix-group/testing_tools.git", +) + +# Module deps +rust = use_extension("@rules_rust//rust:extensions.bzl", "rust") +rust.toolchain( + edition = "2024", + versions = ["1.91.0"], +) + +crate = use_extension("@rules_rust//crate_universe:extensions.bzl", "crate") +crate.from_cargo( + name = "score_fault_lib_crates", + cargo_lockfile = "//:Cargo.lock", + manifests = [ + "//:Cargo.toml", + ], +) +use_repo(crate, "score_fault_lib_crates") + +#bazel_dep on module 'rules_boost' has no version -> override needed +archive_override( + module_name = "rules_boost", + strip_prefix = "rules_boost-master", + urls = ["https://github.com/nelhage/rules_boost/archive/refs/heads/master.tar.gz"], +) + +bazel_dep(name = "score-baselibs", version = "0.0.0") +git_override( + module_name = "score-baselibs", + commit = "46923f5c4f302bd9feae0261588687aaf32e3c5c", + remote = "https://github.com/eclipse-score/baselibs.git", +) + +bazel_dep(name = "score_cli_helper", version = "0.1.2") + +crate.annotation( + crate = "iceoryx2-bb-derive-macros", + patches = ["//patches:iceoryx2_bb_derive_macros_readme.patch"], + repositories = ["score_fault_lib_crates"], +) diff --git a/examples/BUILD b/examples/BUILD new file mode 100644 index 0000000..771515c --- /dev/null +++ b/examples/BUILD @@ -0,0 +1,8 @@ +# Needed for Dash tool to check python dependency licenses. +filegroup( + name = "cargo_lock", + srcs = [ + "//:Cargo.lock", + ], + visibility = ["//visibility:public"], +) diff --git a/patches/BUILD b/patches/BUILD new file mode 100644 index 0000000..62419f4 --- /dev/null +++ b/patches/BUILD @@ -0,0 +1,5 @@ +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "iceoryx2_bb_derive_macros_readme.patch", +]) diff --git a/patches/iceoryx2_bb_derive_macros_readme.patch b/patches/iceoryx2_bb_derive_macros_readme.patch new file mode 100644 index 0000000..adf8978 --- /dev/null +++ b/patches/iceoryx2_bb_derive_macros_readme.patch @@ -0,0 +1,20 @@ +diff --git Cargo.toml Cargo.toml +index 8fc1a73..a7cbab9 100644 +--- Cargo.toml ++++ Cargo.toml +@@ -7,6 +7,6 @@ keywords = { workspace = true } +-license = { workspace = true } +-readme = { workspace = true } ++license = { workspace = true } ++readme = "README.md" + repository = { workspace = true } + rust-version = { workspace = true } + version = { workspace = true } + +diff --git README.md README.md +new file mode 100644 +index 0000000..d969f9d +--- /dev/null ++++ README.md +@@ -0,0 +1 @@ ++Dummy redme for CARGO_PKG_README error. \ No newline at end of file diff --git a/project_config.bzl b/project_config.bzl new file mode 100644 index 0000000..f764a1d --- /dev/null +++ b/project_config.bzl @@ -0,0 +1,5 @@ +# project_config.bzl +PROJECT_CONFIG = { + "asil_level": "QM", + "source_code": ["rust"], +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 271800c..24d3c84 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly" \ No newline at end of file +channel = "nightly-2025-07-14" diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..78d74c8 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,6 @@ +# rust formatter rules. +# check configuration fields here: https://rust-lang.github.io/rustfmt/?version=v1.6.0&search= + + +tab_spaces = 4 +max_width = 150 \ No newline at end of file diff --git a/src/BUILD b/src/BUILD new file mode 100644 index 0000000..e69de29 diff --git a/src/api.rs b/src/api.rs deleted file mode 100644 index 0211621..0000000 --- a/src/api.rs +++ /dev/null @@ -1,118 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -use crate::{ - catalog::FaultCatalog, - config::ReporterConfig, - ids::FaultId, - model::{FaultDescriptor, FaultLifecycleStage, FaultRecord}, - sink::{FaultSink, LogHook, SinkError}, -}; -use std::{sync::{Arc, OnceLock}, time::SystemTime}; - -// FaultApi acts as a singleton façade. A component initializes it once and -// subsequent publishing paths retrieve the sink/logger via global accessors. -pub struct FaultApi; - -static SINK: OnceLock> = OnceLock::new(); -static LOGGER: OnceLock> = OnceLock::new(); - -impl FaultApi { - /// Initialize the singleton. Safe to call once; subsequent calls are ignored. - pub fn new(sink: Arc, logger: Arc) -> Self { - let _ = SINK.set(Arc::clone(&sink)); - let _ = LOGGER.set(Arc::clone(&logger)); - FaultApi - } - - pub(crate) fn get_sink() -> Arc { - SINK.get() - .cloned() - .expect("Sink not initialized - call FaultApi::new() before creating reporters") - } - - pub(crate) fn get_logger() -> Arc { - LOGGER.get() - .cloned() - .expect("Logger not initialized - call FaultApi::new() before creating reporters") - } - - /// Publish a record: log locally then enqueue via sink. Non-blocking semantics depend on sink impl. - pub fn publish(record: &FaultRecord) -> Result<(), SinkError> { - FaultApi::get_logger().on_report(record); - FaultApi::get_sink().publish(record) - } -} - -/// Per-fault reporter bound to a specific fault descriptor. -/// Create one instance per fault at startup. -#[derive(Clone)] -pub struct Reporter { - fault_id: FaultId, - descriptor: FaultDescriptor, - cfg: ReporterConfig, -} - -impl Reporter { - /// Create a new Reporter bound to a specific fault ID. - /// This should be called once per fault during initialization. - pub fn new( - catalog: &FaultCatalog, - cfg: ReporterConfig, - fault_id: &FaultId, - ) -> Self { - let descriptor = catalog - .find(fault_id) - .expect("fault ID must exist in catalog") - .clone(); - - Self { fault_id: fault_id.clone(), descriptor, cfg } - } - - /// Create a new fault record for this specific fault. - /// The returned record can be mutated before publishing. - pub fn create_record(&self) -> FaultRecord { - FaultRecord { - fault_id: self.fault_id.clone(), - time: SystemTime::now(), - severity: self.descriptor.default_severity, - source: self.cfg.source.clone(), - lifecycle_phase: self.cfg.lifecycle_phase, - stage: FaultLifecycleStage::NotTested, - environment_data: self.cfg.default_environment_data.clone(), - } - } - - /// Publish a fault record. Always logs via LogHook, then publishes via sink. - pub fn publish(&self, record: &FaultRecord) -> Result<(), crate::sink::SinkError> { - debug_assert_eq!( - &record.fault_id, &self.fault_id, - "FaultRecord fault_id doesn't match Reporter" - ); - FaultApi::publish(record) - } - - /// Convenience: create and return a record with Failed stage (confirmed failure) - pub fn fail(&self) -> FaultRecord { - let mut rec = self.create_record(); - rec.update_stage(FaultLifecycleStage::Failed); - rec - } - - /// Convenience: create and return a record with Passed stage (healthy) - pub fn pass(&self) -> FaultRecord { - let mut rec = self.create_record(); - rec.update_stage(FaultLifecycleStage::Passed); - rec - } -} diff --git a/src/catalog.rs b/src/catalog.rs deleted file mode 100644 index 74d576e..0000000 --- a/src/catalog.rs +++ /dev/null @@ -1,65 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -use crate::{ids::FaultId, model::FaultDescriptor}; -use std::borrow::Cow; - -/// Declarative catalog shared between reporters and the Diagnostic Fault Manager. -#[derive(Clone, Debug)] -pub struct FaultCatalog { - pub id: Cow<'static, str>, - pub version: u64, - pub descriptors: Cow<'static, [FaultDescriptor]>, -} - -impl FaultCatalog { - pub const fn new( - id: &'static str, - version: u64, - descriptors: &'static [FaultDescriptor], - ) -> Self { - Self { - id: Cow::Borrowed(id), - version, - descriptors: Cow::Borrowed(descriptors), - } - } - - /// When the DFM deserializes a JSON/YAML catalog at startup, this helper - /// lets it hand the owned data back to the library without rebuilding. - pub fn from_config( - id: impl Into>, - version: u64, - descriptors: Vec, - ) -> Self { - Self { - id: id.into(), - version, - descriptors: Cow::Owned(descriptors), - } - } - - /// Locate a descriptor by its FaultId, handy for tests or build tooling. - pub fn find(&self, id: &FaultId) -> Option<&FaultDescriptor> { - self.descriptors.iter().find(|d| &d.id == id) - } - - /// Number of descriptors in this catalog, useful for build-time validation. - pub fn len(&self) -> usize { - self.descriptors.len() - } - - pub fn is_empty(&self) -> bool { - self.descriptors.is_empty() - } -} diff --git a/src/config.rs b/src/config.rs deleted file mode 100644 index a27173f..0000000 --- a/src/config.rs +++ /dev/null @@ -1,95 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -use std::time::Duration; - -// Debounce descriptions capture how noisy fault sources should be filtered. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum DebounceMode { - /// Require N occurrences within a window to confirm fault Failed (transition PreFailed -> Failed). - CountWithinWindow { min_count: u32, window: Duration }, - /// Confirm when condition remains continuously bad for at least `duration`. - /// Use for stuck-at / persistent faults where transient glitches should be ignored. - /// Example: sensor delivers identical reading for 60s -> `HoldTime { duration: Duration::from_secs(60) }`. - HoldTime { duration: Duration }, - /// Trigger immediately on first occurrence, then suppress further activations until the cooldown elapses. - /// Use for faults that are meaningful on first edge but may flap rapidly. - /// Example: first CAN bus-off event activates fault, ignore subsequent bus-off transitions for 5s -> `EdgeWithCooldown { cooldown: Duration::from_secs(5) }`. - EdgeWithCooldown { cooldown: Duration }, - /// Pure count based: confirm after total (cumulative) occurrences reach threshold. - /// Useful for sporadic errors where temporal proximity is less important than frequency. - /// Example: activate after 10 checksum mismatches regardless of timing. - CountThreshold { min_count: u32 }, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DebouncePolicy { - pub mode: DebounceMode, - /// Optional suppression of repeats in logging within a time window. - pub log_throttle: Option, -} - -// Reset rules define how and when a confirmed (Failed) test result transitions back to Passed. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ResetTrigger { - /// Clear when a given operation cycle kind count meets threshold (e.g. ignition, drive, charge). - /// `cycle_ref` is a symbolic identifier (e.g. "ignition.main", "drive.standard") allowing - /// the DFM to correlate with its cycle counter source. - OperationCycles { kind: OperationCycleKind, min_cycles: u32, cycle_ref: &'static str }, - /// Clear after the fault condition has been continuously absent (tests passing) for `duration`. - /// Relation to cycles: If the reset must align to authoritative operation cycle boundaries, choose - /// `OperationCycles`; `StableFor` is wall/time-source based (monotonic) and independent of cycle counting. - StableFor(Duration), - /// Manual maintenance/tooling only (e.g., regulatory). - DiagnosticTester, -} - -/// Enumerates common operation cycle archetypes relevant for aging/reset semantics. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum OperationCycleKind { - Ignition, // Traditional ignition/power cycle - Drive, // Complete drive cycle (start -> run -> stop) - Charge, // Entire HV battery charge session - Thermal, // HVAC or thermal management cycle - Custom(&'static str), // Domain specific cycle identifier -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ResetPolicy { - pub trigger: ResetTrigger, - /// Some regulations require X cycles before clearable from user UI. - pub min_operating_cycles_before_clear: Option, -} - -// Per-component defaults that get baked into a Reporter instance. -#[derive(Debug, Clone)] -pub struct ReporterConfig { - pub source: crate::ids::SourceId, - pub lifecycle_phase: crate::model::LifecyclePhase, - /// Optional per-reporter defaults (e.g., common metadata). - pub default_environment_data: Vec, -} - -// Per-report options provided by the call site when a fault is emitted. -#[derive(Debug, Clone, Default)] -pub struct ReportOptions { - /// Override severity (else descriptor.default_severity). - pub severity: Option, - /// Attach extra metadata key-values (free form). - pub environment_data: Vec, - /// Override policies dynamically (rare, but useful for debug/A-B). - pub debounce: Option, - pub reset: Option, - /// Regulatory/operational flags—extra tags may be added at report time. - pub extra_compliance: Vec, -} diff --git a/src/ids.rs b/src/ids.rs deleted file mode 100644 index 744e22f..0000000 --- a/src/ids.rs +++ /dev/null @@ -1,58 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -use std::{borrow::Cow, fmt}; - -// Lightweight identifiers that keep fault attribution consistent across the fleet. - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum FaultId { - Numeric(u32), // e.g., DTC-like - Text(Cow<'static, str>), // human-stable symbolic ID (runtime or static) - Uuid([u8; 16]), // global uniqueness if needed -} - -impl FaultId { - /// Convenience for constructing a textual ID from either a static string or owned `String`. - pub fn text(value: impl Into>) -> Self { - Self::Text(value.into()) - } - - /// `const` helper so descriptors can be defined in static contexts. - pub const fn text_const(value: &'static str) -> Self { - Self::Text(Cow::Borrowed(value)) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SourceId { - pub entity: &'static str, // e.g., "ADAS.Perception", "HVAC" - pub ecu: Option<&'static str>, // e.g., "ECU-A" - pub domain: Option<&'static str>, // e.g., "ADAS", "IVI" - pub sw_component: Option<&'static str>, - pub instance: Option<&'static str>, // allow N instances -} - -impl fmt::Display for SourceId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let ecu = self.ecu.unwrap_or("-"); - let dom = self.domain.unwrap_or("-"); - let comp = self.sw_component.unwrap_or("-"); - let inst = self.instance.unwrap_or("-"); - write!( - f, - "{}@ecu:{} dom:{} comp:{} inst:{}", - self.entity, ecu, dom, comp, inst - ) - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 97588c9..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,37 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -#![forbid(unsafe_code)] // enforce safe Rust across the crate -#![feature(const_option_ops)] // -#![feature(const_trait_impl)] -// The public surface collects the building blocks for reporters, descriptors, -// and sinks so callers can just `use fault_lib::*` and go. -pub mod api; -pub mod catalog; -pub mod config; -pub mod ids; -pub mod model; -pub mod sink; -pub mod utils; - -// Re-export the main user-facing pieces, this keeps the crate ergonomic without -// forcing consumers to dig through modules. -pub use api::{FaultApi, Reporter}; -pub use catalog::FaultCatalog; -pub use config::{DebouncePolicy, ReportOptions, ReporterConfig, ResetPolicy}; -pub use ids::{FaultId, SourceId}; -pub use model::{ - ComplianceTag, FaultDescriptor, FaultLifecycleStage, FaultRecord, FaultSeverity, FaultType, - KeyValue, LifecyclePhase, -}; -pub use sink::{FaultSink, LogHook}; diff --git a/src/model.rs b/src/model.rs deleted file mode 100644 index 71a40da..0000000 --- a/src/model.rs +++ /dev/null @@ -1,130 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -use crate::FaultId; -// use crate::DebouncePolicy; -use std::{borrow::Cow, time::SystemTime}; - -// Shared domain types that move between reporters, sinks, and integrators. - -/// Align severities to DLT-like levels, stable for logging & UI filters. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum FaultSeverity { - Trace, - Debug, - Info, - Warn, - Error, - Fatal, -} - -/// Canonical fault type buckets used for analytics and tooling. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum FaultType { - Hardware, - Software, - Communication, - Configuration, - Timing, - Power, - /// Escape hatch for domain-specific groupings until the enum grows. - Custom(&'static str), -} - -/// Compliance/regulatory tags drive escalation, retention, and workflow. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum ComplianceTag { - EmissionRelevant, - SafetyCritical, - SecurityRelevant, - LegalHold, -} - -/// Lifecycle phase of the reporting component/system (for policy gating). -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum LifecyclePhase { - Init, - Running, - Suspend, - Resume, - Shutdown, -} - -/// Simplified internal test lifecycle aligned with ISO 14229-1 style semantics. -/// DTC lifecycle (confirmation, pending, aging, etc.) is handled centrally by the DFM. -/// The fault-lib only tracks raw test pass/fail progression + pre-states around debounce. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum FaultLifecycleStage { - NotTested, // test not executed yet for this reporting window - PreFailed, // initial failure observed but still within debounce/pending window - Failed, // confirmed failure (debounce satisfied / threshold met) - PrePassed, // transitioning back to healthy; stability window accumulating - Passed, // test executed and passed (healthy condition) -} - -/// Minimal, typed environment data; keep serde-agnostic at the API edge. -#[derive(Debug, Clone)] -pub struct KeyValue { - pub key: &'static str, - /// Values stay stringly-typed so logging/IPC layers stay decoupled. - pub value: String, -} - -/// Immutable, compile-time describer of a fault type (identity + defaults). -#[derive(Debug, Clone)] -pub struct FaultDescriptor { - pub id: crate::ids::FaultId, - pub name: Cow<'static, str>, - pub fault_type: FaultType, - pub default_severity: FaultSeverity, - pub compliance: Cow<'static, [ComplianceTag]>, - /// Default debounce/reset; can be overridden per-report via ReportOptions. - pub debounce: Option, - pub reset: Option, - /// Human-facing details. - pub summary: Option>, -} - -/// Concrete record produced on each report() call, also logged. -/// Contains only runtime-mutable data; static configuration lives in FaultDescriptor. -#[derive(Debug, Clone)] -pub struct FaultRecord { - pub fault_id: FaultId, - pub time: SystemTime, - pub severity: FaultSeverity, - pub source: crate::ids::SourceId, - pub lifecycle_phase: LifecyclePhase, - pub stage: FaultLifecycleStage, - pub environment_data: Vec, -} - -impl FaultRecord { - /// Append environment data (mutable) - pub fn add_environment_data(&mut self, key: &'static str, value: String) { - self.environment_data.push(KeyValue { key, value }); - self.time = SystemTime::now(); - } - - /// Update lifecycle stage (mutable) - pub fn update_stage(&mut self, stage: FaultLifecycleStage) { - self.stage = stage; - self.time = SystemTime::now(); - } - - /// Update severity (mutable) - pub fn update_severity(&mut self, severity: FaultSeverity) { - self.severity = severity; - self.time = SystemTime::now(); - } - -} diff --git a/src/sink.rs b/src/sink.rs deleted file mode 100644 index 38bc431..0000000 --- a/src/sink.rs +++ /dev/null @@ -1,50 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -use crate::model::FaultRecord; - -// Boundary traits for anything that has side-effects (logging + IPC). - -/// Hook to ensure that reporting a fault additionally results in a log entry. -/// Default impl can forward to log. -pub trait LogHook: Send + Sync + 'static { - fn on_report(&self, record: &FaultRecord); -} - -/// Sink abstracts the transport to the Diagnostic Fault Manager. -/// -/// Non-blocking contract: -/// - MUST return quickly (enqueue only) without waiting on IPC/network/disk. -/// - SHOULD avoid allocating excessively or performing locking that can contend with hot paths. -/// - Backpressure and retry are internal; caller only gets enqueue success/failure. -/// - Lifetime: installed once in `FaultApi::new` and lives for the duration of the process. -/// -/// Implementations can be S-CORE IPC. -pub trait FaultSink: Send + Sync + 'static { - /// Enqueue a record for delivery to the Diagnostic Fault Manager. - fn publish(&self, record: &FaultRecord) -> Result<(), SinkError>; -} - -#[derive(thiserror::Error, Debug)] -pub enum SinkError { - #[error("transport unavailable")] - TransportDown, - #[error("rate limited")] - RateLimited, - #[error("permission denied")] - PermissionDenied, - #[error("invalid descriptor: {0}")] - BadDescriptor(&'static str), - #[error("other: {0}")] - Other(&'static str), -} diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 3334116..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,51 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -// Small macro helpers that keep descriptor definitions tidy in user code. - -#[doc(hidden)] -#[macro_export] -macro_rules! __fault_descriptor_optional_str { - () => { - None - }; - ($value:literal) => { - Some(::std::borrow::Cow::Borrowed($value)) - }; -} - -#[macro_export] -macro_rules! fault_descriptor { - // Minimal form; policies can be added via builder functions if desired. - ( - id = $id:expr, - name = $name:literal, - kind = $kind:expr, - severity = $sev:expr - $(, compliance = [$($ctag:expr),* $(,)?])? - $(, summary = $summary:literal)? - $(, debounce = $debounce:expr)? - $(, reset = $reset:expr)? - ) => {{ - $crate::model::FaultDescriptor { - id: $id, - name: ::std::borrow::Cow::Borrowed($name), - fault_type: $kind, - default_severity: $sev, - compliance: ::std::borrow::Cow::Borrowed(&[$($($ctag),*,)?]), - debounce: $(Some($debounce))?, - reset: $(Some($reset))?, - summary: $crate::__fault_descriptor_optional_str!($($summary)?), - } - }}; -} diff --git a/src/xtask/Cargo.toml b/src/xtask/Cargo.toml new file mode 100644 index 0000000..2b01f71 --- /dev/null +++ b/src/xtask/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "xtask" +version.workspace = true +edition.workspace = true +license-file.workspace = true +readme.workspace = true + +[lints] +workspace = true diff --git a/src/xtask/src/main.rs b/src/xtask/src/main.rs new file mode 100644 index 0000000..21259d1 --- /dev/null +++ b/src/xtask/src/main.rs @@ -0,0 +1,312 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::env; +use std::fs; +use std::path::Path; +use std::process::{Command, exit}; + +fn main() { + let mut args = env::args().skip(1); // skip the binary name + + // println!("{:?}", args.next()); + let Some(command) = args.next() else { + print_usage_and_exit(); + }; + + // Split into env vars (KEY=VALUE) and passthrough args + let mut cli_env_vars = HashMap::new(); + let mut passthrough_args = Vec::new(); + + for arg in args { + if let Some((key, value)) = arg.split_once('=') { + cli_env_vars.insert(key.to_string(), value.to_string()); + } else { + passthrough_args.push(arg); + } + } + + let envs = HashMap::new(); + + match command.as_str() { + "build" => { + debug_build(envs, cli_env_vars, &passthrough_args); + } + "clippy" => { + clippy(envs, cli_env_vars, &passthrough_args); + } + "run" => { + run_build("debug_build", &["run"], envs, cli_env_vars, &passthrough_args); + } + "build:release" => { + run_build("release_build", &["build", "--release"], envs, cli_env_vars, &passthrough_args); + } + "run:release" => { + run_build("release_build", &["run", "--release"], envs, cli_env_vars, &passthrough_args); + } + "build:test" | "test" => { + test(envs, cli_env_vars, &passthrough_args); + } + "build:qnx_x86_64" => { + run_build( + "", + &["+qnx7.1_rust", "build", "--target", "x86_64-pc-nto-qnx710"], + envs, + cli_env_vars, + &passthrough_args, + ); + } + "build:qnx_arm" => { + run_build( + "", + &["+qnx7.1_rust", "build", "--target", "aarch64-unknown-nto-qnx710"], + envs, + cli_env_vars, + &passthrough_args, + ); + } + "check_lic" => { + check_license_header(); + } + "check" => { + check_license_header(); + run_command( + &["fmt", "--", "--check"], + HashMap::default(), + &passthrough_args, + Some("Wrong formatting@"), + ); + debug_build(envs.clone(), cli_env_vars.clone(), &passthrough_args); + clippy(envs.clone(), cli_env_vars.clone(), &passthrough_args); + test(envs, cli_env_vars, &passthrough_args); + } + "fmt" => { + run_command(&["fmt"], HashMap::default(), &passthrough_args, None); + } + "fmt:check" => { + run_command(&["fmt", "--", "--check"], HashMap::default(), &passthrough_args, Some("Wrong formatting")); + } + "miri" => { + miri(envs, cli_env_vars, &passthrough_args); + } + "coverage" => { + coverage(cli_env_vars, &passthrough_args); + } + _ => print_usage_and_exit(), + } +} + +fn clippy(envs: HashMap, cli_env_vars: HashMap, passthrough_args: &[String]) { + run_build( + "clippy", + &["clippy", "--all-targets", "--all-features"], + envs, + cli_env_vars, + passthrough_args, + ); +} + +fn test(envs: HashMap, cli_env_vars: HashMap, passthrough_args: &[String]) { + run_build("test_build", &["test"], envs, cli_env_vars, passthrough_args); +} + +fn miri(mut envs: HashMap, cli_env_vars: HashMap, passthrough_args: &[String]) { + envs.insert("MIRIFLAGS".into(), "-Zmiri-disable-isolation".into()); + // Miri cannot interpret IPC syscalls (integration_tests) or proc-macro + // build scripts (xtask), so both are excluded. + let mut args: Vec = vec![ + "+nightly".into(), + "miri".into(), + "test".into(), + "--workspace".into(), + "--exclude".into(), + "integration_tests".into(), + "--exclude".into(), + "xtask".into(), + ]; + args.extend(passthrough_args.iter().cloned()); + + let args_ref: Vec<&str> = args.iter().map(|s| s.as_str()).collect(); + + for (k, v) in cli_env_vars { + envs.insert(k, v); + } + + run_command(&args_ref, envs, &[], None); +} + +fn coverage(cli_env_vars: HashMap, passthrough_args: &[String]) { + let mut envs = HashMap::new(); + for (k, v) in cli_env_vars { + envs.insert(k, v); + } + + let mut args: Vec = vec![ + "llvm-cov".into(), + "--workspace".into(), + "--exclude".into(), + "xtask".into(), + "--ignore-filename-regex".into(), + "test_utils|dfm_test_utils".into(), + ]; + args.extend(passthrough_args.iter().cloned()); + + let args_ref: Vec<&str> = args.iter().map(|s| s.as_str()).collect(); + run_command(&args_ref, envs, &[], None); +} + +fn debug_build(envs: HashMap, cli_env_vars: HashMap, passthrough_args: &[String]) { + run_build("debug_build", &["build"], envs, cli_env_vars, passthrough_args); +} + +fn run_build( + target_dir: &str, + cargo_args: &[&str], + mut default_envs: HashMap, + cli_envs: HashMap, + extra_args: &[String], +) { + // Set target dir + default_envs.insert("CARGO_TARGET_DIR".into(), format!("target/{target_dir}")); + + // CLI overrides + for (k, v) in cli_envs { + default_envs.insert(k, v); + } + + run_command(cargo_args, default_envs, extra_args, None); +} + +fn run_command(cargo_args: &[&str], default_envs: HashMap, extra_args: &[String], explain: Option<&str>) { + let mut cmd = Command::new("cargo"); + cmd.args(cargo_args); + cmd.args(extra_args); + + for (key, value) in &default_envs { + cmd.env(key, value); + } + + println!("> Running: cargo {} {}", cargo_args.join(" "), extra_args.join(" ")); + println!("> With envs: {default_envs:?}"); + + let status = match cmd.status() { + Ok(s) => s, + Err(e) => { + eprintln!("Failed to run cargo (explain: {explain:?}): {e}"); + exit(1); + } + }; + if !status.success() { + exit(status.code().unwrap_or(1)); + } +} + +fn print_usage_and_exit() -> ! { + eprintln!( + "Usage: xtask {{ + build build in debug mode + run runs executable + build:release build in release mode + run:release runs executable in release mode + build:test build and runs tests + build:qnx_x86_64 build for QNX7.1 target: x86_64-pc-nto-qnx710 + build:qnx_arm build for QNX7.1 target: aarch64-pc-nto-qnx710 + clippy runs clippy + fmt runs rustfmt + fmt:check checks formatting without modifying files + miri runs Miri UB checker (excludes integration_tests, xtask) + coverage runs cargo-llvm-cov (excludes xtask) + check runs fundamental checks, good to run before push + check_lic runs source code license check + + [ENV_VAR=value ...] [-- cargo args...]" + ); + exit(1); +} + +const REQUIRED_HEADER: &str = r#"// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +//"#; + +fn check_license_header() { + let project_dir = match std::env::current_dir() { + Ok(d) => d.join("src"), + Err(e) => { + eprintln!("Failed to get current directory: {e}"); + exit(1); + } + }; + let mut missing_header_files = Vec::new(); + + visit_dirs(&project_dir, &mut missing_header_files); + + if missing_header_files.is_empty() { + println!("All files have the required license header."); + } else { + println!("The following files are missing the required license header:"); + println!("\n{REQUIRED_HEADER}\n"); + for file in missing_header_files { + println!("{}", file.display()); + } + + std::process::exit(-1); + } +} + +fn visit_dirs(dir: &Path, missing_header_files: &mut Vec) { + if dir.is_dir() { + let entries = match fs::read_dir(dir) { + Ok(e) => e, + Err(e) => { + eprintln!("Failed to read directory {}: {e}", dir.display()); + exit(1); + } + }; + for entry in entries { + let entry = match entry { + Ok(e) => e, + Err(e) => { + eprintln!("Failed to get directory entry: {e}"); + exit(1); + } + }; + let path = entry.path(); + if path.is_dir() { + visit_dirs(&path, missing_header_files); + } else if path.extension().is_some_and(|ext| ext == "rs") { + check_file(&path, missing_header_files); + } + } + } +} + +fn check_file(file_path: &Path, missing_header_files: &mut Vec) { + let content = match fs::read_to_string(file_path) { + Ok(c) => c, + Err(e) => { + eprintln!("Failed to read file {}: {e}", file_path.display()); + exit(1); + } + }; + if !content.starts_with(REQUIRED_HEADER) { + missing_header_files.push(file_path.to_path_buf()); + } +} diff --git a/tests/hvac_component.rs b/tests/hvac_component.rs deleted file mode 100644 index 54186a9..0000000 --- a/tests/hvac_component.rs +++ /dev/null @@ -1,200 +0,0 @@ -/* -* Copyright (c) 2025 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) -* -* See the NOTICE file(s) distributed with this work for additional -* information regarding copyright ownership. -* -* This program and the accompanying materials are made available under the -* terms of the Apache License Version 2.0 which is available at -* https://www.apache.org/licenses/LICENSE-2.0 -* -* SPDX-License-Identifier: Apache-2.0 -*/ - -//! Example only: illustrates how a vehicle component could wire up `fault-lib`. -//! This code is intentionally incomplete and is not meant to be built. - -// Shared ownership (Arc) lets us pass the API handles around -// Duration keeps the debounce/reset numbers readable. -use std::sync::Arc; -use std::time::Duration; - -// --- FAULT-LIB API USAGE PATTERN EXAMPLE --- -// This test demonstrates the recommended usage pattern for the fault-lib API. -// 1. Define static fault descriptors and catalog at compile time. -// 2. At startup, create one Reporter per fault ID (binding config, catalog, and API). -// 3. At runtime, create a mutable FaultRecord from the bound Reporter, update state, and publish. -// 4. Only runtime data is sent; static config is referenced via the Reporter. -use fault_lib::{ - Reporter, // Per-fault binding: one instance per fault ID - api::FaultApi, // Global API handle: owns sink and logger - catalog::FaultCatalog, // Static catalog of all descriptors - config::{DebounceMode, DebouncePolicy, ReporterConfig, ResetPolicy, ResetTrigger}, - fault_descriptor, // Macro for concise descriptor definition - ids::{FaultId, SourceId}, - model::{ - ComplianceTag, FaultLifecycleStage, FaultSeverity, FaultType, KeyValue, LifecyclePhase, - }, - sink::{FaultSink, LogHook, SinkError}, -}; - -/// 1. Define static fault descriptors and catalog at compile time. -/// In a real code base this could be generated so the component and DFM stay in sync about IDs and policies. -static HVAC_DESCRIPTORS: &[fault_lib::model::FaultDescriptor] = &[ - // `fault_descriptor!` is a small macro helper that expands to a struct literal. - fault_descriptor! { - id = FaultId::Numeric(0x7001), - name = "CabinTempSensorStuck", - kind = FaultType::Hardware, - severity = FaultSeverity::Warn, - compliance = [ComplianceTag::SafetyCritical], - summary = "Cabin temperature sensor delivered the same sample for >60s", - debounce = DebouncePolicy { - mode: DebounceMode::HoldTime { duration: Duration::from_secs(60) }, - log_throttle: Some(Duration::from_secs(300)), - }, - reset = ResetPolicy { - trigger: ResetTrigger::StableFor(Duration::from_secs(900)), - min_operating_cycles_before_clear: Some(5), - } - }, - fault_descriptor! { - id = FaultId::text_const("hvac.blower.speed_sensor_mismatch"), - name = "BlowerSpeedMismatch", - kind = FaultType::Communication, - severity = FaultSeverity::Error, - compliance = [ComplianceTag::EmissionRelevant], - summary = "Commanded and measured blower speeds diverged beyond tolerance", - debounce = DebouncePolicy { - mode: DebounceMode::HoldTime { duration: Duration::from_secs(60) }, - log_throttle: Some(Duration::from_secs(300)), - }, - reset = ResetPolicy { - trigger: ResetTrigger::StableFor(Duration::from_secs(900)), - min_operating_cycles_before_clear: Some(5), - } - }, -]; - -/// Bundle descriptors with an identifier + version so the DFM can verify compatibility. -static HVAC_CATALOG: FaultCatalog = FaultCatalog::new("hvac", 3, HVAC_DESCRIPTORS); - -/// Minimal log hook to keep the example focused on the API touchpoints. -/// In production, this would forward to a logging backend. -struct StdoutLogHook; - -impl LogHook for StdoutLogHook { - fn on_report(&self, record: &fault_lib::model::FaultRecord) { - println!( - "[fault-log] fault_id={:?} severity={:?} source={}", - record.fault_id, record.severity, record.source - ); - } -} - -/// Dummy sink used for illustration. Real code would forward to S-CORE IPC or another transport. -struct VehicleBusSink; - -impl FaultSink for VehicleBusSink { - // In real deployments this is where we would enqueue into IPC to the central manager. - fn publish(&self, record: &fault_lib::model::FaultRecord) -> Result<(), SinkError> { - println!( - "[fault-sink] queued fault_id={:?}", - record.fault_id - ); - Ok(()) - } -} - -/// 2. At startup, create one Reporter per fault ID (binding config, catalog, and API). -/// Each Reporter is bound to a single fault and holds all static config for that fault. -struct DummyApp { - #[allow(dead_code)] - temp_sensor_fault: Reporter, - blower_fault: Reporter, -} - -impl DummyApp { - /// Bind all reporters to their respective fault IDs at startup. - /// This ensures type safety and avoids runtime lookups. - /// It also can ensure that catalogue in app and DFM match. - pub fn new( - reporter_cfg: ReporterConfig, - catalog: &FaultCatalog, - ) -> Self { - Self { - temp_sensor_fault: Reporter::new( - catalog, - reporter_cfg.clone(), - &FaultId::Numeric(0x7001), - ), - blower_fault: Reporter::new( - catalog, - reporter_cfg, - &FaultId::text("hvac.blower.speed_sensor_mismatch"), - ), - } - } - - /// Simulate a control loop step that may raise a fault. - pub fn step(&self) { - self.handle_blower_fault(0.6, 0.9); - } - - /// 3. At runtime, create a mutable FaultRecord from the bound Reporter, update state, and publish. - /// This pattern ensures only runtime data is sent; static config is referenced via the Reporter. - #[allow(dead_code)] - fn handle_blower_fault(&self, measured_rpm: f32, commanded_rpm: f32) { - // Create a new record for this fault occurrence - let mut record = self.blower_fault.create_record(); - // Attach runtime environment data - record.add_environment_data("measured_rpm", measured_rpm.to_string()); - record.add_environment_data("commanded_rpm", commanded_rpm.to_string()); - // Mark test result as failed (confirmed after any debounce logic) for this occurrence - record.update_stage(FaultLifecycleStage::Failed); - - // Publish the record via the bound reporter. - // This enqueues the record to the configured FaultSink (IPC) - // and is non-blocking for the caller (does not wait for DFM response). - if let Err(err) = self.blower_fault.publish(&record) { - eprintln!("failed to enqueue blower mismatch fault: {err}"); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - /// Components wire this during init and hold on to the `Reporter`. - #[test] - fn test_hvac_faults_with_dummy_app() { - // 0. Setup: create the global FaultApi (owns sink/logger) - // Initialize singleton FaultApi (sink + logger registered globally) - let _api = FaultApi::new( - Arc::new(VehicleBusSink), - Arc::new(StdoutLogHook), - ); - - // 1. Setup: create the per-component ReporterConfig - let reporter_cfg = ReporterConfig { - source: SourceId { - entity: "HVAC.Controller", - ecu: Some("CCU-SoC-A"), - domain: Some("HVAC"), - sw_component: Some("ClimateManager"), - instance: None, - }, - lifecycle_phase: LifecyclePhase::Running, - default_environment_data: vec![KeyValue { - key: "sw.version", - value: "2024.10.0".into(), - }], - }; - - // 2. Bind all reporters to their respective fault IDs at startup - let dummy_app = DummyApp::new(reporter_cfg, &HVAC_CATALOG); - - // 3. Simulate a control loop step that may raise a fault - dummy_app.step(); - } -} From 2fd7cef2db589a5403a83ced4677117d83150fe2 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 25 Feb 2026 15:27:03 +0100 Subject: [PATCH 2/7] feat(common): add shared types crate for fault management IPC-safe types (IpcDuration, IpcTimestamp), fault descriptors, catalog configuration, debounce/enabling condition config, query protocol definitions, and iceoryx2 service types. --- src/common/BUILD | 54 +++ src/common/Cargo.toml | 21 ++ src/common/src/catalog.rs | 398 +++++++++++++++++++++ src/common/src/config.rs | 64 ++++ src/common/src/debounce.rs | 434 +++++++++++++++++++++++ src/common/src/enabling_condition.rs | 101 ++++++ src/common/src/fault.rs | 505 +++++++++++++++++++++++++++ src/common/src/ids.rs | 195 +++++++++++ src/common/src/ipc_service_name.rs | 65 ++++ src/common/src/ipc_service_type.rs | 15 + src/common/src/lib.rs | 64 ++++ src/common/src/query_protocol.rs | 206 +++++++++++ src/common/src/sink_error.rs | 43 +++ src/common/src/types.rs | 61 ++++ 14 files changed, 2226 insertions(+) create mode 100644 src/common/BUILD create mode 100644 src/common/Cargo.toml create mode 100644 src/common/src/catalog.rs create mode 100644 src/common/src/config.rs create mode 100644 src/common/src/debounce.rs create mode 100644 src/common/src/enabling_condition.rs create mode 100644 src/common/src/fault.rs create mode 100644 src/common/src/ids.rs create mode 100644 src/common/src/ipc_service_name.rs create mode 100644 src/common/src/ipc_service_type.rs create mode 100644 src/common/src/lib.rs create mode 100644 src/common/src/query_protocol.rs create mode 100644 src/common/src/sink_error.rs create mode 100644 src/common/src/types.rs diff --git a/src/common/BUILD b/src/common/BUILD new file mode 100644 index 0000000..ed53156 --- /dev/null +++ b/src/common/BUILD @@ -0,0 +1,54 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") + +filegroup( + name = "common_srcs", + srcs = glob(["src/**/*.rs"]), +) + +rust_library( + name = "common", + srcs = [":common_srcs"], + crate_name = "common", + edition = "2024", + visibility = ["//visibility:public"], + deps = [ + "@score_fault_lib_crates//:env_logger", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:serde", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:sha2", + "@score_fault_lib_crates//:thiserror", + ], +) + +rust_test( + name = "tests", + srcs = [":common_srcs"], + edition = "2024", + deps = [ + "@score_fault_lib_crates//:env_logger", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:mockall", + "@score_fault_lib_crates//:serde", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:sha2", + "@score_fault_lib_crates//:thiserror", + ], +) diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml new file mode 100644 index 0000000..40c4f94 --- /dev/null +++ b/src/common/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "common" +version.workspace = true +edition.workspace = true +license-file.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[dependencies] +iceoryx2.workspace = true +iceoryx2-bb-container.workspace = true +serde_json.workspace = true +serde = { workspace = true, features = ["derive"] } +log = { workspace = true, features = ["std"] } +sha2.workspace = true +thiserror.workspace = true + +[dev-dependencies] +mockall.workspace = true diff --git a/src/common/src/catalog.rs b/src/common/src/catalog.rs new file mode 100644 index 0000000..4bafb90 --- /dev/null +++ b/src/common/src/catalog.rs @@ -0,0 +1,398 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::fault::*; +use crate::types::LongString; +use alloc::borrow::Cow; +use sha2::{Digest, Sha256}; +use std::{collections::HashMap, fs, path::PathBuf}; + +/// Error type for fault catalog building failures. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum CatalogBuildError { + /// The input string was not valid JSON. + #[error("invalid JSON: {0}")] + InvalidJson(#[from] serde_json::Error), + + /// No configuration source was set before calling `try_build`. + #[error("missing configuration")] + MissingConfig, + + /// An I/O error occurred while reading a JSON file. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// The catalog identifier exceeds the 128-byte IPC limit. + #[error("catalog id too long for IPC: {0}")] + IdTooLong(String), + + /// A source was already configured on this builder. + #[error("builder already configured")] + AlreadyConfigured, + + /// Two descriptors share the same [`FaultId`]. + #[error("duplicate FaultId: {0:?}")] + DuplicateFaultId(FaultId), +} + +type FaultDescriptorsMap = HashMap; +type FaultCatalogHash = Vec; + +/// Runtime representation of a fault catalog. +/// +/// A `FaultCatalog` bundles a set of [`FaultDescriptor`]s under a shared +/// identifier and version, together with a SHA-256 hash of the serialised +/// configuration. The hash is used during the DFM handshake to verify +/// that reporter and manager agree on the same catalog revision. +#[derive(Debug)] +pub struct FaultCatalog { + /// Human-readable catalog identifier (e.g. `"hvac"`). + pub id: Cow<'static, str>, + /// Monotonically increasing catalog revision number. + pub version: u64, + descriptors: FaultDescriptorsMap, + config_hash: FaultCatalogHash, +} + +impl FaultCatalog { + /// Create a new `FaultCatalog` from pre-built components. + /// + /// Prefer [`FaultCatalogBuilder`] for constructing catalogs from JSON + /// or [`FaultCatalogConfig`] structs — it handles hashing and + /// duplicate-ID detection automatically. + pub fn new(id: Cow<'static, str>, version: u64, descriptors: FaultDescriptorsMap, config_hash: FaultCatalogHash) -> Self { + Self { + id, + version, + descriptors, + config_hash, + } + } + + /// SHA-256 hash of the canonical JSON representation of this catalog's + /// configuration. Used during the DFM handshake to detect version drift + /// between reporter and manager. + pub fn config_hash(&self) -> &[u8] { + &self.config_hash + } + + /// Try to get the catalog id as a fixed-size IPC-safe `LongString`. + /// + /// Returns `CatalogBuildError::IdTooLong` if the id exceeds 128 bytes. + pub fn try_id(&self) -> Result { + LongString::try_from(self.id.as_bytes()).map_err(|_| CatalogBuildError::IdTooLong(self.id.to_string())) + } + + /// Get the catalog id as a `LongString`. + /// + /// # Panics + /// + /// Panics if the id exceeds 128 bytes. Use [`try_id`](Self::try_id) + /// for fallible access. + #[allow(clippy::expect_used)] + pub fn id(&self) -> LongString { + self.try_id().expect("Fault catalog id too long") + } + + /// Look up a single descriptor by its [`FaultId`]. + /// + /// Returns `None` if the catalog does not contain a descriptor with the + /// given ID. + pub fn descriptor(&self, id: &FaultId) -> Option<&FaultDescriptor> { + self.descriptors.get(id) + } + + /// Return an iterator over all descriptors in this catalog. + /// + /// Iteration order is unspecified (backed by `HashMap`). + pub fn descriptors(&self) -> impl Iterator { + self.descriptors.values() + } + + /// Number of descriptors in this catalog, useful for build-time validation. + pub fn len(&self) -> usize { + self.descriptors.len() + } + + /// Returns `true` if this catalog contains no descriptors. + pub fn is_empty(&self) -> bool { + self.descriptors.is_empty() + } +} + +/// Fault Catalog configuration structure +/// +/// Can be used for code generation of fault catalog configuration. +/// +/// # Fields +/// +/// - `id` (`Cow<'static`) - fault catalog ID . +/// - `version` (`u64`) - the version of the fault catalog. +/// - `faults` (`Vec`) - vector of fault descriptors. +/// +#[derive(Debug, serde::Deserialize, serde::Serialize, Clone)] +pub struct FaultCatalogConfig { + /// Unique identifier for this fault catalog (e.g. `"hvac"`). + pub id: Cow<'static, str>, + /// Monotonically increasing catalog revision number. + pub version: u64, + /// Ordered list of fault descriptors belonging to this catalog. + pub faults: Vec, +} +/// Input source for [`FaultCatalogBuilder`]. +/// +/// Exactly one source must be set before calling +/// [`FaultCatalogBuilder::try_build`]. +#[non_exhaustive] +pub enum FaultCatalogBuilderInput<'a> { + /// No source configured yet. + None, + /// Raw JSON string. + JsonString(&'a str), + /// Path to a JSON file on disk. + JsonFile(PathBuf), + /// Pre-built configuration struct. + ConfigStruct(FaultCatalogConfig), +} + +/// Fault Catalog builder +pub struct FaultCatalogBuilder<'a> { + input: FaultCatalogBuilderInput<'a>, +} + +/// Implementation of the Default trait for the fault catalog builder +/// +/// # Returns +/// +/// - `Self` - FaultCatalogBuilder structure. +/// +impl<'a> Default for FaultCatalogBuilder<'a> { + fn default() -> Self { + Self { + input: FaultCatalogBuilderInput::None, + } + } +} + +impl<'a> FaultCatalogBuilder<'a> { + /// Fault catalog builder constructor + /// + /// # Return Values + /// * FaultCatalogBuilder instance + pub fn new() -> Self { + Self::default() + } + + /// Checks if the builder has been not configured yet. + /// + /// # Errors + /// + /// Returns `CatalogBuildError::AlreadyConfigured` if a source was already set. + fn check_if_not_set(&self) -> Result<(), CatalogBuildError> { + if !matches!(self.input, FaultCatalogBuilderInput::None) { + return Err(CatalogBuildError::AlreadyConfigured); + } + Ok(()) + } + + /// Configure 'FaultCatalog' with given json configuration string. + /// + /// You cannot use this function in case the configuration file has been passed before + /// # Arguments + /// + /// - `mut self` - the builder itself. + /// - `json_string` (`&'a str`) - the fault catalog configuration string in json format + /// + /// # Returns + /// + /// - `Self` - the `FaultCatalogBuilder` instance. + /// # Errors + /// + /// Returns `CatalogBuildError::AlreadyConfigured` if a source was already set. + pub fn json_string(mut self, json_string: &'a str) -> Result { + self.check_if_not_set()?; + self.input = FaultCatalogBuilderInput::JsonString(json_string); + Ok(self) + } + + /// Configure the `FaultCatalog` with the given JSON configuration file. + /// + /// Only one source may be set per builder — calling this after another + /// source method returns an error. + /// + /// # Arguments + /// + /// * `json_file` — path to the `FaultCatalog` JSON configuration file. + /// + /// # Errors + /// + /// Returns [`CatalogBuildError::AlreadyConfigured`] if a source was already set. + pub fn json_file(mut self, json_file: PathBuf) -> Result { + self.check_if_not_set()?; + self.input = FaultCatalogBuilderInput::JsonFile(json_file); + Ok(self) + } + + /// Configure the `FaultCatalog` from a pre-built [`FaultCatalogConfig`]. + /// + /// Only one source may be set per builder — calling this after another + /// source method returns an error. + /// + /// # Errors + /// + /// Returns [`CatalogBuildError::AlreadyConfigured`] if a source was already set. + pub fn cfg_struct(mut self, cfg: FaultCatalogConfig) -> Result { + self.check_if_not_set()?; + self.input = FaultCatalogBuilderInput::ConfigStruct(cfg); + Ok(self) + } + + /// Builds the `FaultCatalog`. + /// + /// # Errors + /// + /// Returns `CatalogBuildError` if configuration is missing, JSON is invalid, + /// or the file cannot be read. + pub fn try_build(self) -> Result { + match self.input { + FaultCatalogBuilderInput::JsonString(json_str) => Self::try_from_json_string(json_str), + FaultCatalogBuilderInput::JsonFile(json_file) => Self::try_from_file(json_file), + FaultCatalogBuilderInput::ConfigStruct(cfg_struct) => Self::from_cfg_struct(cfg_struct), + FaultCatalogBuilderInput::None => Err(CatalogBuildError::MissingConfig), + } + } + + /// Builds the `FaultCatalog`, panicking on error. + /// + /// # Panics + /// + /// Panics if configuration is missing, JSON is invalid, or the file cannot be read. + /// Use [`try_build`](Self::try_build) for the fallible version. + #[allow(clippy::expect_used)] + pub fn build(self) -> FaultCatalog { + self.try_build().expect("Failed to build FaultCatalog") + } + + /// Build a [`FaultCatalog`] from a [`FaultCatalogConfig`]. + /// + /// Computes the SHA-256 config hash, converts the descriptor list into an + /// indexed map, and validates that no duplicate [`FaultId`]s exist. + /// + /// # Errors + /// + /// Returns [`CatalogBuildError::DuplicateFaultId`] if two descriptors + /// share the same ID, or a serialisation error if hashing fails. + fn from_cfg_struct(cfg_struct: FaultCatalogConfig) -> Result { + let hash_sum = Self::calc_config_hash(&cfg_struct)?; + let mut descriptors = FaultDescriptorsMap::new(); + for descriptor in cfg_struct.faults { + let id = descriptor.id.clone(); + if descriptors.contains_key(&id) { + return Err(CatalogBuildError::DuplicateFaultId(id)); + } + descriptors.insert(id, descriptor); + } + Ok(FaultCatalog::new(cfg_struct.id, cfg_struct.version, descriptors, hash_sum)) + } + + /// Fallible version: generates fault catalog from JSON string. + fn try_from_json_string(json: &str) -> Result { + let cfg: FaultCatalogConfig = serde_json::from_str(json)?; + Self::from_cfg_struct(cfg) + } + + /// Fallible version: creates fault catalog from a JSON file. + fn try_from_file(json_path: PathBuf) -> Result { + let cfg_file_txt = fs::read_to_string(json_path)?; + Self::try_from_json_string(&cfg_file_txt) + } + + /// Compute the SHA-256 hash of the canonical JSON serialisation of `cfg`. + /// + /// The canonical form is produced by `serde_json::to_string`, ensuring + /// that two structurally identical configs always yield the same hash. + /// + /// # Errors + /// + /// Returns a [`CatalogBuildError`] if JSON serialisation fails. + fn calc_config_hash(cfg: &FaultCatalogConfig) -> Result, CatalogBuildError> { + let canon = serde_json::to_string(cfg)?; + Ok(Sha256::new().chain_update(canon.as_bytes()).finalize().to_vec()) + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + + use crate::types::to_static_short_string; + + fn make_descriptor(id: FaultId) -> FaultDescriptor { + FaultDescriptor { + id, + name: to_static_short_string("Test").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + } + } + + #[test] + fn duplicate_fault_id_returns_error() { + let config = FaultCatalogConfig { + id: "test".into(), + version: 1, + faults: vec![make_descriptor(FaultId::Numeric(42)), make_descriptor(FaultId::Numeric(42))], + }; + let result = FaultCatalogBuilder::new().cfg_struct(config).unwrap().try_build(); + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), CatalogBuildError::DuplicateFaultId(_))); + } + + #[test] + fn unique_fault_ids_build_successfully() { + let config = FaultCatalogConfig { + id: "test".into(), + version: 1, + faults: vec![ + make_descriptor(FaultId::Numeric(1)), + make_descriptor(FaultId::Numeric(2)), + make_descriptor(FaultId::Text(to_static_short_string("fault_a").unwrap())), + ], + }; + let result = FaultCatalogBuilder::new().cfg_struct(config).unwrap().try_build(); + assert!(result.is_ok()); + assert_eq!(result.unwrap().len(), 3); + } + + #[test] + fn numeric_and_text_with_same_value_are_not_duplicates() { + let config = FaultCatalogConfig { + id: "test".into(), + version: 1, + faults: vec![ + make_descriptor(FaultId::Numeric(1)), + make_descriptor(FaultId::Text(to_static_short_string("1").unwrap())), + ], + }; + let result = FaultCatalogBuilder::new().cfg_struct(config).unwrap().try_build(); + assert!(result.is_ok()); + assert_eq!(result.unwrap().len(), 2); + } +} diff --git a/src/common/src/config.rs b/src/common/src/config.rs new file mode 100644 index 0000000..53b2717 --- /dev/null +++ b/src/common/src/config.rs @@ -0,0 +1,64 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +use crate::debounce::IpcDuration; +use crate::{debounce::DebouncePolicy, fault::ComplianceVec, fault::FaultSeverity, types::*}; +use serde::{Deserialize, Serialize}; + +use iceoryx2::prelude::*; +use iceoryx2_bb_container::vector::*; + +/// Reset rules define how and when a latched fault can be cleared. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum ResetTrigger { + /// Clear on next ignition/power cycle count meeting threshold. + /// Uses the "power" operation cycle counter. + PowerCycles(u32), + /// Clear after N named operation cycles without recurrence. + /// `cycle_ref` identifies the counter (e.g., "ignition", "drive"). + OperationCycles { + /// Minimum number of cycles without recurrence before clearing. + min_cycles: u32, + /// Named cycle counter reference (e.g. `"ignition"`, `"drive"`). + cycle_ref: ShortString, + }, + /// Clear when condition absent for a duration. + StableFor(IpcDuration), + /// Manual maintenance/tooling only (e.g., regulatory). + ToolOnly, +} + +/// Policy governing when a latched fault may be cleared. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub struct ResetPolicy { + /// Trigger condition that must be met for the fault to clear. + pub trigger: ResetTrigger, + /// Some regulations require X cycles before clearable from user UI. + pub min_operating_cycles_before_clear: Option, +} + +/// Per-report options provided by the call site when a fault is emitted. +#[derive(Debug, Default, Clone, ZeroCopySend)] +#[repr(C)] +pub struct ReportOptions { + /// Override severity (else descriptor.default_severity). + pub severity: Option, + /// Attach extra metadata key-values (free form). + pub metadata: StaticVec<(ShortString, ShortString), 8>, + /// Override policies dynamically (rare, but useful for debug/A-B). + pub debounce: Option, + /// Override reset policy dynamically. + pub reset: Option, + /// Regulatory/operational flags—extra tags may be added at report time. + pub extra_compliance: ComplianceVec, +} diff --git a/src/common/src/debounce.rs b/src/common/src/debounce.rs new file mode 100644 index 0000000..d450b2b --- /dev/null +++ b/src/common/src/debounce.rs @@ -0,0 +1,434 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +use alloc::collections::VecDeque; +use core::time::Duration; +use iceoryx2::prelude::*; +use serde::{Deserialize, Serialize}; +use std::time::Instant; + +/// IPC-safe duration with guaranteed `#[repr(C)]` layout. +/// +/// `std::time::Duration` has no stable memory layout, making it unsuitable +/// for cross-process shared memory. This wrapper is used in all `#[repr(C)]` +/// types that cross IPC boundaries. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub struct IpcDuration { + /// Whole seconds component. + pub secs: u64, + /// Sub-second nanoseconds component (0…999 999 999). + pub nanos: u32, +} + +impl IpcDuration { + /// Maximum valid value for the `nanos` field. + pub const MAX_NANOS: u32 = 999_999_999; + + /// Validates that the nanoseconds field is within the documented range (0..=999_999_999). + /// + /// Note: `Duration::new()` does not panic on out-of-range nanos - it carries + /// excess into seconds. This method enforces the stricter invariant documented + /// on the `nanos` field for IPC trust boundaries. + pub fn validate(&self) -> Result<(), &'static str> { + if self.nanos > Self::MAX_NANOS { + return Err("IpcDuration nanoseconds out of range (max: 999_999_999)"); + } + Ok(()) + } +} + +impl From for IpcDuration { + fn from(d: Duration) -> Self { + Self { + secs: d.as_secs(), + nanos: d.subsec_nanos(), + } + } +} + +impl From for Duration { + fn from(d: IpcDuration) -> Self { + Duration::new(d.secs, d.nanos) + } +} + +/// Debounce descriptions capture how noisy fault sources should be filtered. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum DebounceMode { + /// Require N occurrences within a window to confirm fault Active. + CountWithinWindow { + /// Minimum number of events required within `window`. + min_count: u32, + /// Sliding time window for counting events. + window: IpcDuration, + }, + /// Confirm when signal remains bad for duration (e.g., stuck-at). + HoldTime { + /// Duration the fault condition must persist before confirmation. + duration: IpcDuration, + }, + /// Edge triggered (first occurrence) with cooldown to avoid flapping. + EdgeWithCooldown { + /// Minimum time between successive reports. + cooldown: IpcDuration, + }, +} + +impl DebounceMode { + /// Create a boxed [`Debounce`] implementation matching this mode. + pub fn into_debouncer(self) -> Box { + match self { + DebounceMode::CountWithinWindow { min_count, window } => Box::new(CountWithinWindow::new(min_count, window.into())), + DebounceMode::HoldTime { duration } => Box::new(HoldTime::new(duration.into())), + DebounceMode::EdgeWithCooldown { cooldown } => Box::new(EdgeWithCooldown::new(cooldown.into())), + } + } +} + +/// Debounce policy combining a mode with an optional log-throttle window. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub struct DebouncePolicy { + /// Debounce algorithm to apply. + pub mode: DebounceMode, + /// Optional suppression of repeats in logging within a time window. + pub log_throttle: Option, +} + +/// Trait for debounce algorithm implementations. +/// +/// All implementations must be [`Send`] + [`Sync`] to support use in +/// multi-threaded fault-reporting pipelines (e.g. behind `Arc>`). +pub trait Debounce: Send + Sync { + /// Called on each fault occurrence. Returns true if the event should be reported. + fn on_event(&mut self, now: Instant) -> bool; + + /// Resets the internal state, e.g., after a fault clears. + fn reset(&mut self, now: Instant); +} + +/// Count-within-window debouncer. +/// +/// Confirms a fault only after `min_count` events occur within a sliding +/// time window. The internal deque is capped at `min_count` entries. +pub struct CountWithinWindow { + min_count: u32, + window: Duration, + occurrences: VecDeque, +} + +impl CountWithinWindow { + /// Create a new count-within-window debouncer. + pub fn new(min_count: u32, window: Duration) -> Self { + Self { + min_count, + window, + occurrences: VecDeque::new(), + } + } +} + +impl Debounce for CountWithinWindow { + fn on_event(&mut self, now: Instant) -> bool { + while self.occurrences.front().is_some_and(|&ts| now.duration_since(ts) > self.window) { + self.occurrences.pop_front(); + } + self.occurrences.push_back(now); + // Cap the deque at min_count to prevent unbounded growth. + // We only need the most recent min_count entries to decide. + while self.occurrences.len() > self.min_count as usize { + self.occurrences.pop_front(); + } + self.occurrences.len() >= self.min_count as usize + } + + fn reset(&mut self, _now: Instant) { + self.occurrences.clear(); + } +} + +/// Hold-time debouncer. +/// +/// Confirms a fault only after the condition persists continuously for +/// the configured duration. +/// +/// # Semantics +/// +/// - The first call to [`on_event`](Debounce::on_event) starts an internal +/// timer but returns `false` (not yet confirmed). +/// - Subsequent calls return `true` only when the elapsed time since the +/// first event meets or exceeds `duration`. +/// - If [`reset`](Debounce::reset) is called (e.g. the fault condition +/// clears), the timer restarts from zero on the next `on_event`. +/// +/// # When to use +/// +/// Use `HoldTime` for "stuck-at" faults where a condition must remain +/// continuously active for a minimum period before it should be reported. +/// This prevents transient glitches from triggering fault confirmation. +/// +/// # Edge cases +/// +/// - A `duration` of zero confirms on the **second** call (the first +/// call always records the start time and returns `false`). +/// - Calling `reset` and then immediately `on_event` restarts the timer. +pub struct HoldTime { + duration: Duration, + start_time: Option, +} + +impl HoldTime { + /// Create a new hold-time debouncer. + pub fn new(duration: Duration) -> Self { + Self { duration, start_time: None } + } +} + +impl Debounce for HoldTime { + fn on_event(&mut self, now: Instant) -> bool { + match self.start_time { + None => { + self.start_time = Some(now); + false + } + Some(start) => now.duration_since(start) >= self.duration, + } + } + + fn reset(&mut self, _now: Instant) { + self.start_time = None; + } +} + +/// Edge-with-cooldown debouncer. +/// +/// Reports on the first occurrence, then suppresses further reports +/// until the cooldown period elapses. +pub struct EdgeWithCooldown { + cooldown: Duration, + last_report: Option, +} + +impl EdgeWithCooldown { + /// Create a new edge-with-cooldown debouncer. + pub fn new(cooldown: Duration) -> Self { + Self { cooldown, last_report: None } + } +} + +impl Debounce for EdgeWithCooldown { + fn on_event(&mut self, now: Instant) -> bool { + match self.last_report { + Some(last) if now.duration_since(last) < self.cooldown => false, + _ => { + self.last_report = Some(now); + true + } + } + } + + /// Reset the edge-with-cooldown debouncer. + /// + /// Sets `last_report` to `now`, which means the cooldown window + /// restarts from this instant. Any `on_event` call arriving before + /// `now + cooldown` will be suppressed. + /// + /// This is the correct behavior when the underlying fault clears: + /// the reporter should not immediately re-fire on the next edge. + /// + /// # Difference from other debouncers + /// + /// Unlike [`HoldTime::reset`] (which clears the timer entirely), + /// this method *anchors* a new cooldown window — the debouncer + /// does **not** return to the "never reported" initial state. + fn reset(&mut self, now: Instant) { + self.last_report = Some(now); + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + use core::time::Duration; + use std::time::Instant; + + #[test] + fn count_with_window_reports_only_after_min_count_within_window() { + let now = Instant::now(); + let mut d = CountWithinWindow::new(3, Duration::from_secs(5)); + assert!(!d.on_event(now)); + assert!(!d.on_event(now + Duration::from_secs(1))); + assert!(d.on_event(now + Duration::from_secs(2))); + assert!(d.on_event(now + Duration::from_secs(3))); + } + + #[test] + fn count_with_window_drops_old_events_outside_window() { + let mut d = CountWithinWindow::new(3, Duration::from_secs(2)); + let t0 = Instant::now(); + assert!(!d.on_event(t0)); + assert!(!d.on_event(t0 + Duration::from_secs(1))); + assert!(d.on_event(t0 + Duration::from_secs(1))); + assert!(!d.on_event(t0 + Duration::from_secs(4))); + assert_eq!(d.occurrences.len(), 1); + } + + #[test] + fn count_with_window_reset_clears_state() { + let mut d = CountWithinWindow::new(2, Duration::from_secs(3)); + let t0 = Instant::now(); + d.on_event(t0); + d.on_event(t0 + Duration::from_secs(1)); + assert!(d.on_event(t0 + Duration::from_secs(2))); + d.reset(t0 + Duration::from_secs(3)); + assert!(!d.on_event(t0 + Duration::from_secs(4))); + } + + #[test] + fn holdtime_requires_continuous_duration_before_report() { + let mut d = HoldTime::new(Duration::from_secs(5)); + let t0 = Instant::now(); + assert!(!d.on_event(t0)); + assert!(!d.on_event(t0 + Duration::from_secs(3))); + assert!(d.on_event(t0 + Duration::from_secs(6))); + } + + #[test] + fn holdtime_reset_resets_timer() { + let mut d = HoldTime::new(Duration::from_secs(5)); + let t0 = Instant::now(); + d.on_event(t0); + d.on_event(t0 + Duration::from_secs(4)); + d.reset(t0 + Duration::from_secs(5)); + assert!(!d.on_event(t0 + Duration::from_secs(6))); + } + + #[test] + fn edge_with_cooldown_reports_first_then_suppresses_during_cooldown() { + let mut d = EdgeWithCooldown::new(Duration::from_secs(5)); + let t0 = Instant::now(); + assert!(d.on_event(t0)); + assert!(!d.on_event(t0 + Duration::from_secs(2))); + assert!(d.on_event(t0 + Duration::from_secs(6))); + } + + #[test] + fn edge_with_cooldown_reset_forces_new_last_report() { + let mut d = EdgeWithCooldown::new(Duration::from_secs(5)); + let t0 = Instant::now(); + d.on_event(t0); + d.reset(t0 + Duration::from_secs(2)); + assert!(!d.on_event(t0 + Duration::from_secs(4))); + assert!(d.on_event(t0 + Duration::from_secs(8))); + } + + #[test] + fn debounce_mode_creates_proper_implementations() { + let d1 = DebounceMode::CountWithinWindow { + min_count: 2, + window: Duration::from_secs(3).into(), + } + .into_debouncer(); + let d2 = DebounceMode::HoldTime { + duration: Duration::from_secs(1).into(), + } + .into_debouncer(); + let d3 = DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_secs(10).into(), + } + .into_debouncer(); + + let now = Instant::now(); + for mut d in [d1, d2, d3] { + d.on_event(now); + d.reset(now); + } + } + + #[test] + fn debounce_policy_derive_traits_work() { + let p1 = DebouncePolicy { + mode: DebounceMode::HoldTime { + duration: Duration::from_secs(2).into(), + }, + log_throttle: Some(Duration::from_secs(10).into()), + }; + let p2 = p1.clone(); + assert_eq!(p1, p2); + assert!(format!("{p1:?}").contains("HoldTime")); + } + + #[test] + fn ipc_duration_roundtrip() { + let original = Duration::new(42, 999_999_999); + let ipc: IpcDuration = original.into(); + let back: Duration = ipc.into(); + assert_eq!(original, back); + } + + #[test] + fn ipc_duration_zero() { + let ipc: IpcDuration = Duration::ZERO.into(); + assert_eq!(ipc.secs, 0); + assert_eq!(ipc.nanos, 0); + let back: Duration = ipc.into(); + assert_eq!(back, Duration::ZERO); + } + + #[test] + fn ipc_duration_max_nanos() { + let original = Duration::new(u64::MAX, 999_999_999); + let ipc: IpcDuration = original.into(); + assert_eq!(ipc.secs, u64::MAX); + assert_eq!(ipc.nanos, 999_999_999); + } + + #[test] + fn ipc_duration_size_and_alignment() { + assert_eq!(core::mem::size_of::(), 16); + assert_eq!(core::mem::align_of::(), 8); + } + + #[test] + fn count_with_window_deque_capped_at_min_count() { + let mut d = CountWithinWindow::new(3, Duration::from_secs(60)); + let t0 = Instant::now(); + // Push 10 events — all within the window + for i in 0..10u32 { + d.on_event(t0 + Duration::from_millis(i as u64)); + } + // Deque should never grow beyond min_count (3) + assert!( + d.occurrences.len() <= 3, + "Deque should be capped at min_count, got: {}", + d.occurrences.len() + ); + } + + #[test] + fn count_with_window_cap_preserves_correctness() { + let mut d = CountWithinWindow::new(3, Duration::from_secs(5)); + let t0 = Instant::now(); + // First 2 events: not enough + assert!(!d.on_event(t0)); + assert!(!d.on_event(t0 + Duration::from_secs(1))); + // Third event: min_count reached + assert!(d.on_event(t0 + Duration::from_secs(2))); + // Many more events, deque stays bounded + for i in 3..100u32 { + assert!(d.on_event(t0 + Duration::from_millis(2000 + i as u64))); + } + assert!(d.occurrences.len() <= 3); + } +} diff --git a/src/common/src/enabling_condition.rs b/src/common/src/enabling_condition.rs new file mode 100644 index 0000000..9646ad1 --- /dev/null +++ b/src/common/src/enabling_condition.rs @@ -0,0 +1,101 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Enabling condition types for fault detection gating. +//! +//! Enabling conditions control whether fault monitors should actively +//! detect faults. When a condition is [`Active`](EnablingConditionStatus::Active), +//! the associated fault detection runs normally. When [`Inactive`](EnablingConditionStatus::Inactive), +//! fault detection is suspended. +//! +//! See `docs/puml/new_enable_condition.puml` and `docs/puml/enable_condition_ntf.puml` +//! for the design-level sequence diagrams. + +use crate::types::ShortString; +use iceoryx2::prelude::ZeroCopySend; +use serde::{Deserialize, Serialize}; + +/// Status of an enabling condition. +/// +/// Enabling conditions gate fault detection: when a condition is `Active`, +/// the associated fault monitors may report faults. When `Inactive`, +/// fault detection for dependent monitors is suspended. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum EnablingConditionStatus { + /// Condition is fulfilled — fault detection is enabled. + Active, + /// Condition is not fulfilled — fault detection is suspended. + Inactive, +} + +/// IPC notification broadcast from DFM to FaultLib instances when an +/// enabling condition status changes. +/// +/// Published on the `dfm/enabling_condition/notification` IPC channel. +/// FaultLib subscribers use this to update local FaultMonitor state and +/// invoke registered callbacks. +#[derive(Debug, Clone, PartialEq, Eq, ZeroCopySend)] +#[repr(C)] +pub struct EnablingConditionNotification { + /// Identifier of the enabling condition (matches the entity used during registration). + pub id: ShortString, + /// New status of the enabling condition. + pub status: EnablingConditionStatus, +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + + #[test] + fn status_copy_semantics() { + let a = EnablingConditionStatus::Active; + let b = a; + assert_eq!(a, b); + } + + #[test] + fn status_all_variants() { + let statuses = [EnablingConditionStatus::Active, EnablingConditionStatus::Inactive]; + assert_eq!(statuses.len(), 2); + } + + #[test] + fn status_equality() { + assert_eq!(EnablingConditionStatus::Active, EnablingConditionStatus::Active); + assert_ne!(EnablingConditionStatus::Active, EnablingConditionStatus::Inactive); + } + + #[test] + fn notification_construction() { + let id = ShortString::try_from("vehicle.speed".as_bytes()).unwrap(); + let ntf = EnablingConditionNotification { + id, + status: EnablingConditionStatus::Active, + }; + assert_eq!(ntf.id.to_string(), "vehicle.speed"); + assert_eq!(ntf.status, EnablingConditionStatus::Active); + } + + #[test] + fn notification_clone() { + let id = ShortString::try_from("engine.running".as_bytes()).unwrap(); + let ntf = EnablingConditionNotification { + id, + status: EnablingConditionStatus::Inactive, + }; + let cloned = ntf.clone(); + assert_eq!(cloned.status, EnablingConditionStatus::Inactive); + } +} diff --git a/src/common/src/fault.rs b/src/common/src/fault.rs new file mode 100644 index 0000000..066d685 --- /dev/null +++ b/src/common/src/fault.rs @@ -0,0 +1,505 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::ResetPolicy; +use crate::debounce::DebounceMode; +use crate::ids::*; +use crate::types::*; +use iceoryx2::prelude::ZeroCopySend; +use iceoryx2_bb_container::vector::StaticVec; +use serde::{Deserialize, Serialize}; + +/// Fixed-capacity vector of compliance tags (max 8). +pub type ComplianceVec = StaticVec; + +/// Unique identifier for a fault. +/// +/// Three representations are supported to cover different use cases: +/// numeric codes for DTC-like systems, human-readable text, and UUIDs +/// for globally unique identification. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum FaultId { + /// Numeric identifier (e.g. DTC-like `0x7001`). + Numeric(u32), + /// Human-readable symbolic identifier (e.g. `"hvac.blower.mismatch"`). + Text(ShortString), + /// 128-bit UUID for globally unique identification. + Uuid([u8; 16]), +} + +/// Canonical fault type buckets used for analytics and tooling. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum FaultType { + /// Hardware fault (sensor, actuator, etc.). + Hardware, + /// Software fault (assertion, logic error, etc.). + Software, + /// Communication fault (bus timeout, CRC mismatch, etc.). + Communication, + /// Configuration fault (invalid parameter, schema mismatch, etc.). + Configuration, + /// Timing fault (deadline miss, watchdog, etc.). + Timing, + /// Power-related fault (undervoltage, brownout, etc.). + Power, + /// Escape hatch for domain-specific groupings until the enum grows. + Custom(ShortString), +} + +/// Align severities to DLT-like levels, stable for logging & UI filters. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum FaultSeverity { + /// Finest-grained diagnostic output. + Trace, + /// Diagnostic output useful during development. + Debug, + /// Informational event (no error). + Info, + /// Non-critical issue that may require attention. + Warn, + /// Significant error requiring action. + Error, + /// Unrecoverable failure. + Fatal, +} + +/// Compliance/regulatory tags drive escalation, retention, and workflow. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum ComplianceTag { + /// Fault is relevant for emissions regulation (e.g. OBD-II). + EmissionRelevant, + /// Fault relates to a safety-critical function (e.g. ISO 26262). + SafetyCritical, + /// Fault has security implications. + SecurityRelevant, + /// Fault data must be retained for legal/regulatory purposes. + LegalHold, +} + +/// Lifecycle phase of the reporting component/system (for policy gating). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum LifecyclePhase { + /// System initialising. + Init, + /// Normal operation. + Running, + /// Entering low-power / sleep state. + Suspend, + /// Waking from suspend. + Resume, + /// Orderly shutdown in progress. + Shutdown, +} + +/// State of a fault’s lifecycle. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub enum LifecycleStage { + /// Test not executed yet for this reporting window. + NotTested, + /// Initial failure observed but still within debounce/pending window. + PreFailed, + /// Confirmed failure (debounce satisfied / threshold met). + Failed, + /// Transitioning back to healthy; stability window accumulating. + PrePassed, + /// Test executed and passed (healthy condition). + Passed, +} + +impl LifecycleStage { + /// Check whether a transition from `self` to `to` is valid per the + /// ISO 14229 / DFM fault lifecycle state machine. + /// + /// Valid transitions: + /// ```text + /// NotTested → PreFailed | PrePassed | Failed | Passed + /// PreFailed → Failed | PrePassed | NotTested + /// Failed → PrePassed | Passed | NotTested + /// PrePassed → Passed | PreFailed | NotTested + /// Passed → PreFailed | Failed | NotTested + /// ``` + /// + /// Self-transitions (e.g. Failed → Failed) are allowed as no-ops. + pub fn is_valid_transition(&self, to: &LifecycleStage) -> bool { + if self == to { + return true; + } + use LifecycleStage::*; + matches!( + (self, to), + (NotTested, PreFailed | PrePassed | Failed | Passed) + | (PreFailed, Failed | PrePassed | NotTested) + | (Failed, PrePassed | Passed | NotTested) + | (PrePassed, Passed | PreFailed | NotTested) + | (Passed, PreFailed | Failed | NotTested) + ) + } +} + +/// Immutable, compile-time describer of a fault type (identity + defaults). +/// +/// # Debounce/Reset Fields +/// +/// - `reporter_side_debounce`: Applied before IPC send, reduces network traffic +/// - `manager_side_debounce`: Applied at DFM, enables multi-source aggregation +/// (**reserved for future implementation**) +/// - `reporter_side_reset`: Clears fault after condition passes for duration +/// (**reserved for future implementation**) +/// - `manager_side_reset`: Clears fault after global aging policy +/// (**reserved for future implementation**) +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FaultDescriptor { + /// Unique fault identifier. + pub id: FaultId, + + /// Short display name for the fault. + pub name: ShortString, + /// Human-readable summary (used as SOVD symptom field). + pub summary: Option, + + /// Fault category bucket (hardware, software, comms, etc.). + pub category: FaultType, + /// Default severity level. + pub severity: FaultSeverity, + /// Regulatory/compliance tags. + pub compliance: ComplianceVec, + + /// Reporter-side debounce configuration. + /// Applied in `Reporter::publish()` to filter events before IPC transport. + pub reporter_side_debounce: Option, + /// Reporter-side reset/aging configuration. + /// **Status: Reserved for future implementation.** + pub reporter_side_reset: Option, + /// Manager-side debounce configuration. + /// **Status: Reserved for future implementation.** + pub manager_side_debounce: Option, + /// Manager-side reset/aging configuration. + /// When set, `confirmed_dtc` stays latched after the fault passes until + /// the aging policy trigger (power cycles, operation cycles, or time) is met. + pub manager_side_reset: Option, +} + +/// IPC-safe timestamp with epoch-relative seconds and nanoseconds. +/// +/// `std::time::SystemTime` has no stable memory layout, so this type +/// is used for all timestamps that cross IPC boundaries. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, ZeroCopySend)] +#[repr(C)] +pub struct IpcTimestamp { + /// Seconds elapsed since the Unix epoch. + pub seconds_since_epoch: u64, + /// Sub-second nanoseconds component (0…999 999 999). + pub nanoseconds: u32, +} + +impl IpcTimestamp { + /// Maximum valid value for the `nanoseconds` field. + pub const MAX_NANOS: u32 = 999_999_999; + + /// Create a new `IpcTimestamp` with validation. + /// + /// # Errors + /// + /// Returns `Err` if `nanoseconds` exceeds 999,999,999. + pub fn new(seconds_since_epoch: u64, nanoseconds: u32) -> Result { + if nanoseconds > Self::MAX_NANOS { + return Err(IpcTimestampError::NanosecondsOutOfRange(nanoseconds)); + } + Ok(Self { + seconds_since_epoch, + nanoseconds, + }) + } +} + +/// Error returned when constructing an [`IpcTimestamp`] with invalid fields. +#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] +#[non_exhaustive] +pub enum IpcTimestampError { + /// Nanoseconds value exceeded the valid range. + #[error("nanoseconds out of range: {0} (max: 999_999_999)")] + NanosecondsOutOfRange(u32), +} + +/// Concrete record produced on each report() call, also logged. +#[derive(Debug, Clone, PartialEq, ZeroCopySend)] +#[repr(C)] +pub struct FaultRecord { + /// Fault identifier linking this record to its descriptor. + pub id: FaultId, + /// Timestamp of the report. + pub time: IpcTimestamp, + /// Identity of the reporting component. + pub source: SourceId, + /// Current lifecycle phase of the reporter. + pub lifecycle_phase: LifecyclePhase, + /// Current lifecycle stage of this fault occurrence. + pub lifecycle_stage: LifecycleStage, + /// Free-form key-value environment data. + pub env_data: MetadataVec, +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + use std::collections::HashSet; + + // ========== FaultId Tests ========== + + #[test] + fn fault_id_numeric_construction() { + let id = FaultId::Numeric(42); + if let FaultId::Numeric(n) = id { + assert_eq!(n, 42); + } else { + panic!("Expected Numeric variant"); + } + } + + #[test] + fn fault_id_text_construction() { + let id = FaultId::Text(ShortString::try_from("test_fault").unwrap()); + if let FaultId::Text(ref s) = id { + assert_eq!(s.to_string(), "test_fault"); + } else { + panic!("Expected Text variant"); + } + } + + #[test] + fn fault_id_uuid_construction() { + let uuid = [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let id = FaultId::Uuid(uuid); + if let FaultId::Uuid(u) = id { + assert_eq!(u, uuid); + } else { + panic!("Expected Uuid variant"); + } + } + + #[test] + fn fault_id_equality() { + let a = FaultId::Numeric(1); + let b = FaultId::Numeric(1); + let c = FaultId::Numeric(2); + assert_eq!(a, b); + assert_ne!(a, c); + } + + #[test] + fn fault_id_ordering() { + let a = FaultId::Numeric(1); + let b = FaultId::Numeric(2); + assert!(a < b); + } + + #[test] + fn fault_id_hash_works() { + let mut set = HashSet::new(); + set.insert(FaultId::Numeric(1)); + set.insert(FaultId::Numeric(2)); + set.insert(FaultId::Numeric(1)); // Duplicate + assert_eq!(set.len(), 2); + } + + #[test] + fn fault_id_clone() { + let a = FaultId::Numeric(42); + let b = a.clone(); + assert_eq!(a, b); + } + + // ========== FaultSeverity Tests ========== + + #[test] + fn fault_severity_copy() { + let a = FaultSeverity::Error; + let b = a; // Copy + assert_eq!(a, b); + } + + #[test] + fn fault_severity_all_variants() { + let severities = [ + FaultSeverity::Trace, + FaultSeverity::Debug, + FaultSeverity::Info, + FaultSeverity::Warn, + FaultSeverity::Error, + FaultSeverity::Fatal, + ]; + assert_eq!(severities.len(), 6); + } + + // ========== LifecycleStage Tests ========== + + #[test] + fn lifecycle_stage_copy() { + let a = LifecycleStage::Failed; + let b = a; + assert_eq!(a, b); + } + + #[test] + fn lifecycle_stage_all_variants() { + let stages = [ + LifecycleStage::NotTested, + LifecycleStage::PreFailed, + LifecycleStage::Failed, + LifecycleStage::PrePassed, + LifecycleStage::Passed, + ]; + assert_eq!(stages.len(), 5); + } + + // ========== LifecycleStage::is_valid_transition Tests ========== + + #[test] + fn lifecycle_self_transitions_are_valid() { + for stage in [ + LifecycleStage::NotTested, + LifecycleStage::PreFailed, + LifecycleStage::Failed, + LifecycleStage::PrePassed, + LifecycleStage::Passed, + ] { + assert!(stage.is_valid_transition(&stage), "{stage:?} → {stage:?} should be valid"); + } + } + + #[test] + fn lifecycle_valid_transitions() { + use LifecycleStage::*; + let valid = [ + (NotTested, PreFailed), + (NotTested, PrePassed), + (NotTested, Failed), + (NotTested, Passed), + (PreFailed, Failed), + (PreFailed, PrePassed), + (PreFailed, NotTested), + (Failed, PrePassed), + (Failed, Passed), + (Failed, NotTested), + (PrePassed, Passed), + (PrePassed, PreFailed), + (PrePassed, NotTested), + (Passed, PreFailed), + (Passed, Failed), + (Passed, NotTested), + ]; + for (from, to) in valid { + assert!(from.is_valid_transition(&to), "{from:?} → {to:?} should be valid"); + } + } + + #[test] + fn lifecycle_invalid_transitions() { + use LifecycleStage::*; + let invalid = [(PreFailed, Passed), (Failed, PreFailed), (PrePassed, Failed), (Passed, PrePassed)]; + for (from, to) in invalid { + assert!(!from.is_valid_transition(&to), "{from:?} → {to:?} should be invalid"); + } + } + + // ========== IpcTimestamp Tests ========== + + #[test] + fn ipc_timestamp_default_is_zero() { + let ts = IpcTimestamp::default(); + assert_eq!(ts.seconds_since_epoch, 0); + assert_eq!(ts.nanoseconds, 0); + } + + #[test] + fn ipc_timestamp_construction() { + let ts = IpcTimestamp { + seconds_since_epoch: 1705312200, + nanoseconds: 123456789, + }; + assert_eq!(ts.seconds_since_epoch, 1705312200); + assert_eq!(ts.nanoseconds, 123456789); + } + + // ========== IpcTimestamp::new validation ========== + + #[test] + fn ipc_timestamp_new_zero_nanos_ok() { + let ts = IpcTimestamp::new(0, 0); + assert!(ts.is_ok()); + let ts = ts.unwrap(); + assert_eq!(ts.seconds_since_epoch, 0); + assert_eq!(ts.nanoseconds, 0); + } + + #[test] + fn ipc_timestamp_new_max_nanos_ok() { + let ts = IpcTimestamp::new(42, 999_999_999); + assert!(ts.is_ok()); + let ts = ts.unwrap(); + assert_eq!(ts.seconds_since_epoch, 42); + assert_eq!(ts.nanoseconds, 999_999_999); + } + + #[test] + fn ipc_timestamp_new_nanos_overflow_err() { + let ts = IpcTimestamp::new(0, 1_000_000_000); + assert!(ts.is_err()); + assert_eq!(ts.unwrap_err(), super::IpcTimestampError::NanosecondsOutOfRange(1_000_000_000)); + } + + #[test] + fn ipc_timestamp_new_max_u32_nanos_err() { + let ts = IpcTimestamp::new(0, u32::MAX); + assert!(ts.is_err()); + } + + #[test] + fn ipc_timestamp_error_display() { + let err = super::IpcTimestampError::NanosecondsOutOfRange(1_000_000_000); + let msg = format!("{err}"); + assert!(msg.contains("nanoseconds out of range")); + assert!(msg.contains("1000000000")); + } + + // ========== ComplianceTag Tests ========== + + #[test] + fn compliance_tag_all_variants() { + let tags = [ + ComplianceTag::EmissionRelevant, + ComplianceTag::SafetyCritical, + ComplianceTag::SecurityRelevant, + ComplianceTag::LegalHold, + ]; + assert_eq!(tags.len(), 4); + } + + // ========== FaultType Tests ========== + + #[test] + fn fault_type_custom() { + let custom = FaultType::Custom(ShortString::try_from("domain_specific").unwrap()); + if let FaultType::Custom(ref s) = custom { + assert_eq!(s.to_string(), "domain_specific"); + } else { + panic!("Expected Custom variant"); + } + } +} diff --git a/src/common/src/ids.rs b/src/common/src/ids.rs new file mode 100644 index 0000000..6296456 --- /dev/null +++ b/src/common/src/ids.rs @@ -0,0 +1,195 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +use crate::types::*; +use core::fmt; +use iceoryx2::prelude::ZeroCopySend; +use serde::{Deserialize, Serialize}; + +// Lightweight identifiers that keep fault attribution consistent across the fleet. + +/// Identity of the component reporting a fault. +/// +/// Encoded as a set of optional tags that together uniquely identify a +/// reporting entity within a vehicle. All fields use IPC-safe fixed-size +/// strings. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, ZeroCopySend)] +#[repr(C)] +pub struct SourceId { + /// Logical entity name (e.g. `"ADAS.Perception"`, `"HVAC"`). + pub entity: ShortString, + /// ECU identifier (e.g. `"ECU-A"`). + pub ecu: Option, + /// Domain grouping (e.g. `"ADAS"`, `"IVI"`). + pub domain: Option, + /// Software component name within the entity. + pub sw_component: Option, + /// Instance discriminator when multiple replicas exist. + pub instance: Option, +} + +impl fmt::Display for SourceId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.entity)?; + let tags: &[(&str, &Option)] = &[ + ("ecu", &self.ecu), + ("dom", &self.domain), + ("comp", &self.sw_component), + ("inst", &self.instance), + ]; + for (label, value) in tags { + if let Some(v) = value { + write!(f, " {label}:{v}")?; + } + } + Ok(()) + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + + fn make_short(s: &str) -> ShortString { + ShortString::try_from(s.as_bytes()).unwrap() + } + + fn full_source_id() -> SourceId { + SourceId { + entity: make_short("HVAC"), + ecu: Some(make_short("ECU-A")), + domain: Some(make_short("Climate")), + sw_component: Some(make_short("Compressor")), + instance: Some(make_short("0")), + } + } + + fn minimal_source_id() -> SourceId { + SourceId { + entity: make_short("ADAS"), + ecu: None, + domain: None, + sw_component: None, + instance: None, + } + } + + #[test] + fn construction_all_fields() { + let sid = full_source_id(); + assert_eq!(sid.entity.to_string(), "HVAC"); + assert_eq!(sid.ecu.unwrap().to_string(), "ECU-A"); + assert_eq!(sid.domain.unwrap().to_string(), "Climate"); + assert_eq!(sid.sw_component.unwrap().to_string(), "Compressor"); + assert_eq!(sid.instance.unwrap().to_string(), "0"); + } + + #[test] + fn construction_entity_only() { + let sid = minimal_source_id(); + assert_eq!(sid.entity.to_string(), "ADAS"); + assert!(sid.ecu.is_none()); + assert!(sid.domain.is_none()); + assert!(sid.sw_component.is_none()); + assert!(sid.instance.is_none()); + } + + #[test] + fn display_all_fields_present() { + let sid = full_source_id(); + let out = sid.to_string(); + assert_eq!(out, "HVAC ecu:ECU-A dom:Climate comp:Compressor inst:0"); + } + + #[test] + fn display_entity_only_hides_none() { + let sid = minimal_source_id(); + let out = sid.to_string(); + assert_eq!(out, "ADAS"); + } + + #[test] + fn display_partial_fields() { + let sid = SourceId { + entity: make_short("IVI"), + ecu: None, + domain: Some(make_short("Infotainment")), + sw_component: None, + instance: Some(make_short("1")), + }; + let out = sid.to_string(); + assert_eq!(out, "IVI dom:Infotainment inst:1"); + } + + #[test] + fn partial_eq_identical() { + let a = full_source_id(); + let b = full_source_id(); + assert_eq!(a, b); + } + + #[test] + fn partial_eq_different_entity() { + let a = minimal_source_id(); + let mut b = minimal_source_id(); + b.entity = make_short("Different"); + assert_ne!(a, b); + } + + #[test] + fn clone_produces_equal_value() { + let a = full_source_id(); + let b = a.clone(); + assert_eq!(a, b); + } + + #[test] + fn empty_string_entity() { + let sid = SourceId { + entity: ShortString::new(), + ecu: None, + domain: None, + sw_component: None, + instance: None, + }; + assert_eq!(sid.to_string(), ""); + } + + #[test] + fn hash_consistency() { + use core::hash::{Hash, Hasher}; + use std::collections::hash_map::DefaultHasher; + let a = full_source_id(); + let b = full_source_id(); + let mut ha = DefaultHasher::new(); + let mut hb = DefaultHasher::new(); + a.hash(&mut ha); + b.hash(&mut hb); + assert_eq!(ha.finish(), hb.finish()); + } + + #[test] + fn serde_roundtrip_json() { + let original = full_source_id(); + let json = serde_json::to_string(&original).expect("serialize"); + let restored: SourceId = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(original, restored); + } + + #[test] + fn serde_roundtrip_minimal() { + let original = minimal_source_id(); + let json = serde_json::to_string(&original).expect("serialize"); + let restored: SourceId = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(original, restored); + } +} diff --git a/src/common/src/ipc_service_name.rs b/src/common/src/ipc_service_name.rs new file mode 100644 index 0000000..bae5e8f --- /dev/null +++ b/src/common/src/ipc_service_name.rs @@ -0,0 +1,65 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! IPC service name constants for iceoryx2 publish-subscribe channels. +//! +//! # Naming convention +//! +//! All service names use a hierarchical slash-separated format: +//! +//! ```text +//! /[/] +//! ``` +//! +//! - **``** — logical component, e.g. `dfm` (Diagnostic Fault Manager). +//! - **``** — communication purpose, e.g. `event`, `enabling_condition`. +//! - **``** — optional sub-channel, e.g. `hash/response`, `notification`. +//! +//! # Constraints +//! +//! - Must be valid iceoryx2 [`ServiceName`](iceoryx2::prelude::ServiceName) values. +//! - Maximum length is 255 bytes (iceoryx2 limit). +//! - Characters allowed: alphanumeric, `/`, `_`, `-`, `.`. +//! - Must not start or end with `/`. +//! +//! # Examples +//! +//! | Constant | Value | Direction | +//! |----------|-------|-----------| +//! | `DIAGNOSTIC_FAULT_MANAGER_EVENT_SERVICE_NAME` | `dfm/event` | reporter → DFM | +//! | `DIAGNOSTIC_FAULT_MANAGER_HASH_CHECK_RESPONSE_SERVICE_NAME` | `dfm/event/hash/response` | DFM → reporter | +//! | `ENABLING_CONDITION_NOTIFICATION_SERVICE_NAME` | `dfm/enabling_condition/notification` | DFM → reporters | + +/// Iceoryx2 service name for the main diagnostic-event channel (reporter → DFM). +/// +/// Reporters publish [`DiagnosticEvent`](crate::types::DiagnosticEvent) messages on this +/// channel. The Diagnostic Fault Manager subscribes and processes them. +pub const DIAGNOSTIC_FAULT_MANAGER_EVENT_SERVICE_NAME: &str = "dfm/event"; + +/// Iceoryx2 service name for hash-check responses (DFM → reporter). +/// +/// After a reporter registers, the DFM replies on this channel with a +/// hash-check response confirming catalog consistency. +pub const DIAGNOSTIC_FAULT_MANAGER_HASH_CHECK_RESPONSE_SERVICE_NAME: &str = "dfm/event/hash/response"; + +/// Iceoryx2 service name for enabling-condition notifications (DFM → reporters). +/// +/// The DFM publishes [`EnablingConditionNotification`](crate::EnablingConditionNotification) +/// messages whenever an enabling condition changes state. +pub const ENABLING_CONDITION_NOTIFICATION_SERVICE_NAME: &str = "dfm/enabling_condition/notification"; + +/// Iceoryx2 service name for DFM query request-response (external tool -> DFM). +/// +/// Used by the iceoryx2 native request-response API. External diagnostic +/// tools send [`DfmQueryRequest`](crate::query_protocol::DfmQueryRequest) +/// and receive [`DfmQueryResponse`](crate::query_protocol::DfmQueryResponse). +pub const DFM_QUERY_SERVICE_NAME: &str = "dfm/query"; diff --git a/src/common/src/ipc_service_type.rs b/src/common/src/ipc_service_type.rs new file mode 100644 index 0000000..f28c71d --- /dev/null +++ b/src/common/src/ipc_service_type.rs @@ -0,0 +1,15 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +use iceoryx2::prelude::*; + +/// Iceoryx2 service flavour used for all fault-lib IPC channels. +pub type ServiceType = ipc_threadsafe::Service; diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs new file mode 100644 index 0000000..c6d95d2 --- /dev/null +++ b/src/common/src/lib.rs @@ -0,0 +1,64 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Shared types and utilities for the fault-lib ecosystem. +//! +//! This crate provides the foundational types used by both the reporter side +//! (`fault_lib`) and the Diagnostic Fault Manager side (`dfm_lib`): +//! +//! - **Fault model** — [`FaultId`], [`FaultDescriptor`](fault::FaultDescriptor), +//! [`FaultRecord`](fault::FaultRecord), severity levels, lifecycle stages, +//! and compliance tags (see [`fault`]). +//! - **Catalog** — [`FaultCatalog`](catalog::FaultCatalog) and its builder for +//! loading fault definitions from JSON or code. +//! - **Debounce** — IPC-safe duration type and debounce strategies +//! (`CountWithinWindow`, `HoldTime`, `EdgeWithCooldown`) in [`debounce`]. +//! - **Configuration** — reset/aging policies and per-report options +//! ([`config`]). +//! - **IPC plumbing** — fixed-size string types, service names, and the +//! `DiagnosticEvent` envelope shared over iceoryx2. +//! +//! All public `#[repr(C)]` types in this crate implement `ZeroCopySend` so +//! they can be transferred through iceoryx2 shared-memory channels. + +#![warn(missing_docs)] + +extern crate alloc; + +/// Fault catalog construction and management. +pub mod catalog; +/// Reset/aging policies and per-report runtime options. +pub mod config; +/// Debounce strategies and IPC-safe duration type. +pub mod debounce; +/// Enabling condition types for fault detection gating. +pub mod enabling_condition; +/// Core fault model: IDs, descriptors, records, severities, lifecycle. +pub mod fault; +/// Source identification types for fleet-wide fault attribution. +pub mod ids; +/// Well-known iceoryx2 service name constants. +pub mod ipc_service_name; +/// IPC service type alias for iceoryx2. +pub mod ipc_service_type; +/// IPC wire types for DFM query/clear request-response protocol. +pub mod query_protocol; +/// Error types returned by fault sink implementations. +pub mod sink_error; +/// Fixed-size string types, metadata vectors, and the `DiagnosticEvent` envelope. +pub mod types; + +pub use config::{ReportOptions, ResetPolicy}; +pub use enabling_condition::{EnablingConditionNotification, EnablingConditionStatus}; +pub use fault::FaultId; +pub use ids::SourceId; +pub use types::{to_static_long_string, to_static_short_string}; diff --git a/src/common/src/query_protocol.rs b/src/common/src/query_protocol.rs new file mode 100644 index 0000000..96955a3 --- /dev/null +++ b/src/common/src/query_protocol.rs @@ -0,0 +1,206 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! IPC wire types for DFM query/clear request-response protocol. +//! +//! These types are `#[repr(C)]` + [`ZeroCopySend`] for iceoryx2 shared-memory +//! transport. [`IpcSovdFault`] is the IPC-safe equivalent of +//! `SovdFault` (in dfm_lib) with fixed-size fields. +//! +//! # Lossy conversion +//! +//! `SovdFault` -> `IpcSovdFault` is lossy: +//! - `String` fields truncated to `ShortString` (64B) or `LongString` (128B) +//! - `HashMap status` omitted (reconstructable from bool fields) +//! - `Option schema` omitted (not used in runtime query flow) +//! - `Option` -> `bool` (None -> false) + +use crate::types::{LongString, ShortString}; +use iceoryx2::prelude::ZeroCopySend; +use iceoryx2_bb_container::vector::StaticVec; + +/// Maximum number of faults in a single IPC response. +pub const MAX_FAULTS_PER_RESPONSE: usize = 64; + +/// IPC-safe fault representation with fixed-size fields. +/// +/// Converted from/to `SovdFault` via conversion functions in dfm_lib. +/// See module-level docs for lossy conversion details. +#[derive(Debug, Clone, ZeroCopySend)] +#[repr(C)] +pub struct IpcSovdFault { + // --- Core identification --- + /// Fault code (e.g., "0x1001", "fault_a"). Truncated to 64B. + pub code: ShortString, + /// Human-readable display code. Truncated to 64B. + pub display_code: ShortString, + /// Fault scope (e.g., "ecu"). Truncated to 64B. + pub scope: ShortString, + /// Fault name. Truncated to 64B. + pub fault_name: ShortString, + /// Translation key. Truncated to 64B. + pub fault_translation_id: ShortString, + /// Severity level. + pub severity: u32, + + // --- DTC status flags (flattened from SovdFaultStatus) --- + /// ISO 14229 status mask byte. + pub status_mask: u8, + /// UDS bit 0: testFailed. + pub test_failed: bool, + /// UDS bit 1: testFailedThisOperationCycle. + pub test_failed_this_operation_cycle: bool, + /// UDS bit 2: pendingDTC. + pub pending_dtc: bool, + /// UDS bit 3: confirmedDTC. + pub confirmed_dtc: bool, + /// UDS bit 4: testNotCompletedSinceLastClear. + pub test_not_completed_since_last_clear: bool, + /// UDS bit 5: testFailedSinceLastClear. + pub test_failed_since_last_clear: bool, + /// UDS bit 6: testNotCompletedThisOperationCycle. + pub test_not_completed_this_operation_cycle: bool, + /// UDS bit 7: warningIndicatorRequested. + pub warning_indicator_requested: bool, + + // --- Counters & timestamps --- + /// Number of occurrences. + pub occurrence_counter: u32, + /// Aging cycles since last occurrence. + pub aging_counter: u32, + /// Number of heals/resets. + pub healing_counter: u32, + /// Unix timestamp (secs) of first occurrence. 0 = not set. + pub first_occurrence_secs: u64, + /// Unix timestamp (secs) of last occurrence. 0 = not set. + pub last_occurrence_secs: u64, + + // --- Optional fields (has_* pattern for Option encoding) --- + /// Symptom description. Truncated to 128B. Check `has_symptom`. + pub symptom: LongString, + /// Whether `symptom` is populated (encodes `Option::Some`). + pub has_symptom: bool, + /// Symptom translation key. Check `has_symptom_translation_id`. + pub symptom_translation_id: ShortString, + /// Whether `symptom_translation_id` is populated. + pub has_symptom_translation_id: bool, +} + +/// IPC-safe environment data: up to 8 key-value pairs. +pub type IpcEnvData = StaticVec<(ShortString, ShortString), 8>; + +/// Response payload for `GetAllFaults`. +#[derive(Debug, Clone, ZeroCopySend)] +#[repr(C)] +pub struct IpcFaultListResponse { + /// Faults in this response (up to [`MAX_FAULTS_PER_RESPONSE`]). + pub faults: StaticVec, + /// Total number of faults in the catalog (may exceed `faults.len()`). + pub total_count: u32, +} + +/// Request variants for the DFM query service. +#[derive(Debug, Clone, ZeroCopySend)] +#[repr(C)] +pub enum DfmQueryRequest { + /// List all faults for entity at `path`. + GetAllFaults(LongString), + /// Get single fault: `(path, fault_code)`. + GetFault(LongString, ShortString), + /// Delete all fault state for entity at `path` (removes from storage entirely). + /// Note: this is SOVD DeleteFault, not UDS $14 ClearDiagnosticInformation. + // TODO: Add ClearDtc/ClearSingleDtc variants for ISO 14229 UDS $14 compliance + DeleteAllFaults(LongString), + /// Delete single fault state: `(path, fault_code)` (removes from storage entirely). + /// Note: this is SOVD DeleteFault, not UDS $14 ClearDiagnosticInformation. + DeleteFault(LongString, ShortString), +} + +/// Error variants returned over IPC. +#[derive(Debug, Clone, ZeroCopySend)] +#[repr(C)] +pub enum DfmQueryError { + /// Invalid path or argument. + BadArgument, + /// Fault not found. + NotFound, + /// Storage backend error (message truncated to 64B). + StorageError(ShortString), +} + +/// Response variants for the DFM query service. +/// +/// Large variant size disparity (FaultList vs Ok) is intentional: +/// boxing is not possible for IPC types crossing shared-memory boundaries. +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone, ZeroCopySend)] +#[repr(C)] +pub enum DfmQueryResponse { + /// List of faults (response to `GetAllFaults`). + FaultList(IpcFaultListResponse), + /// Single fault with env data (response to `GetFault`). + SingleFault(IpcSovdFault, IpcEnvData), + /// Success (response to `DeleteAllFaults` / `DeleteFault`). + Ok, + /// Error response. + Error(DfmQueryError), +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use iceoryx2_bb_container::vector::Vector; + + #[test] + fn ipc_sovd_fault_is_zero_copy_send() { + // Compile-time check: ZeroCopySend is required for iceoryx2 shared memory. + fn assert_zero_copy_send() {} + assert_zero_copy_send::(); + assert_zero_copy_send::(); + assert_zero_copy_send::(); + assert_zero_copy_send::(); + } + + #[test] + fn ipc_fault_list_response_capacity() { + let response = IpcFaultListResponse { + faults: StaticVec::new(), + total_count: 100, + }; + assert_eq!(response.faults.capacity(), MAX_FAULTS_PER_RESPONSE); + assert_eq!(response.faults.len(), 0); + assert_eq!(response.total_count, 100); + } + + #[test] + fn dfm_query_request_variants_constructible() { + let _req1 = DfmQueryRequest::GetAllFaults(LongString::from_str_truncated("hvac").unwrap()); + let _req2 = DfmQueryRequest::GetFault(LongString::from_str_truncated("hvac").unwrap(), ShortString::try_from("0x7001").unwrap()); + let _req3 = DfmQueryRequest::DeleteAllFaults(LongString::from_str_truncated("hvac").unwrap()); + let _req4 = DfmQueryRequest::DeleteFault(LongString::from_str_truncated("hvac").unwrap(), ShortString::try_from("0x7001").unwrap()); + } + + #[test] + fn dfm_query_response_variants_constructible() { + let _ok = DfmQueryResponse::Ok; + let _err_bad = DfmQueryResponse::Error(DfmQueryError::BadArgument); + let _err_nf = DfmQueryResponse::Error(DfmQueryError::NotFound); + let _err_stor = DfmQueryResponse::Error(DfmQueryError::StorageError(ShortString::try_from("disk full").unwrap())); + } +} diff --git a/src/common/src/sink_error.rs b/src/common/src/sink_error.rs new file mode 100644 index 0000000..e2e63d1 --- /dev/null +++ b/src/common/src/sink_error.rs @@ -0,0 +1,43 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use alloc::borrow::Cow; + +/// Errors that may occur when publishing a fault record through a sink. +#[derive(thiserror::Error, Debug, PartialEq, Eq, Clone)] +#[non_exhaustive] +pub enum SinkError { + /// The IPC transport is not available (e.g. DFM not running). + #[error("transport unavailable")] + TransportDown, + /// The event was dropped because the publish rate exceeded a limit. + #[error("rate limited")] + RateLimited, + /// The caller lacks permission to publish on this channel. + #[error("permission denied")] + PermissionDenied, + /// The fault descriptor is invalid or refers to an unknown fault. + #[error("invalid descriptor: {0}")] + BadDescriptor(Cow<'static, str>), + /// Catch-all for errors not covered by specific variants. + #[error("other: {0}")] + Other(Cow<'static, str>), + /// The iceoryx2 service name could not be created. + #[error("invalid service name")] + InvalidServiceName, + /// The operation timed out. + #[error("timeout")] + Timeout, + /// The internal send queue is full. + #[error("queue full")] + QueueFull, +} diff --git a/src/common/src/types.rs b/src/common/src/types.rs new file mode 100644 index 0000000..6f845bc --- /dev/null +++ b/src/common/src/types.rs @@ -0,0 +1,61 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::enabling_condition::EnablingConditionStatus; +use crate::fault::FaultRecord; +use iceoryx2::prelude::ZeroCopySend; +use iceoryx2_bb_container::string::{StaticString, StringModificationError}; +use iceoryx2_bb_container::vector::StaticVec; + +/// IPC-safe fixed-size string (64 bytes). +pub type ShortString = StaticString<64>; +/// Capacity of [`LongString`] — the const-generic bound passed to `StaticString`. +pub const LONG_STRING_CAPACITY: usize = 128; +/// IPC-safe fixed-size string (128 bytes). +pub type LongString = StaticString; +/// Fixed-capacity key-value metadata vector (max 8 entries). +pub type MetadataVec = StaticVec<(ShortString, ShortString), 8>; +/// Fixed-capacity vector for SHA-256 hash bytes (32 bytes). +pub type Sha256Vec = StaticVec; + +/// IPC envelope that carries different diagnostic event types over a +/// single iceoryx2 channel. +/// +/// Size disparity: `Fault` (~1KB) vs `EnablingConditionRegister` (~64B). +/// Boxing the large variant is NOT possible because this type crosses IPC +/// boundaries via iceoryx2 shared memory, which requires `#[repr(C)]` layout +/// and `ZeroCopySend`. `Box` is a heap pointer — not valid across processes. +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone, ZeroCopySend)] +#[repr(C)] +pub enum DiagnosticEvent { + /// Catalog hash for handshake verification. + Hash((LongString, Sha256Vec)), + /// Fault record with its catalog identifier. + Fault((LongString, FaultRecord)), + /// Register a new enabling condition with the DFM. + /// Payload: SOVD entity name used as the condition identifier. + EnablingConditionRegister(ShortString), + /// Report an enabling condition status change to the DFM. + /// Payload: (condition id, new status). + EnablingConditionStatusChange((ShortString, EnablingConditionStatus)), +} + +/// Convert a byte-like value into a [`ShortString`] (64-byte static string). +pub fn to_static_short_string>(input: T) -> Result { + StaticString::try_from(input.as_ref()) +} + +/// Convert a byte-like value into a [`LongString`] (128-byte static string). +pub fn to_static_long_string>(input: T) -> Result { + StaticString::try_from(input.as_ref()) +} From 4757402f9e83983c873b69c4a0f0d936b2194f16 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 25 Feb 2026 15:27:41 +0100 Subject: [PATCH 3/7] feat(fault_lib): implement reporter-side fault management Fault reporter API, IPC worker with exponential backoff retry, fault catalog validation, enabling condition management, and FaultManagerSink for iceoryx2 transport. --- src/fault_lib/BUILD | 113 ++ src/fault_lib/Cargo.toml | 36 + .../examples/catalog_and_reporter.rs | 90 + src/fault_lib/examples/config_1.json | 54 + src/fault_lib/examples/tst_app.rs | 90 + src/fault_lib/src/api.rs | 300 +++ src/fault_lib/src/catalog.rs | 215 +++ src/fault_lib/src/enabling_condition.rs | 635 ++++++ src/fault_lib/src/fault_manager_sink.rs | 633 ++++++ src/fault_lib/src/ipc_worker.rs | 774 ++++++++ src/fault_lib/src/lib.rs | 62 + src/fault_lib/src/reporter.rs | 1715 +++++++++++++++++ src/fault_lib/src/sink.rs | 69 + src/fault_lib/src/test_utils.rs | 268 +++ src/fault_lib/src/utils.rs | 95 + .../tests/data/hvac_fault_catalog.json | 53 + .../tests/data/ivi_fault_catalog.json | 54 + 17 files changed, 5256 insertions(+) create mode 100644 src/fault_lib/BUILD create mode 100644 src/fault_lib/Cargo.toml create mode 100644 src/fault_lib/examples/catalog_and_reporter.rs create mode 100644 src/fault_lib/examples/config_1.json create mode 100644 src/fault_lib/examples/tst_app.rs create mode 100644 src/fault_lib/src/api.rs create mode 100644 src/fault_lib/src/catalog.rs create mode 100644 src/fault_lib/src/enabling_condition.rs create mode 100644 src/fault_lib/src/fault_manager_sink.rs create mode 100644 src/fault_lib/src/ipc_worker.rs create mode 100644 src/fault_lib/src/lib.rs create mode 100644 src/fault_lib/src/reporter.rs create mode 100644 src/fault_lib/src/sink.rs create mode 100644 src/fault_lib/src/test_utils.rs create mode 100644 src/fault_lib/src/utils.rs create mode 100644 src/fault_lib/tests/data/hvac_fault_catalog.json create mode 100644 src/fault_lib/tests/data/ivi_fault_catalog.json diff --git a/src/fault_lib/BUILD b/src/fault_lib/BUILD new file mode 100644 index 0000000..23bc28b --- /dev/null +++ b/src/fault_lib/BUILD @@ -0,0 +1,113 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_library", "rust_test") + +filegroup( + name = "fault_lib_srcs", + srcs = glob(["src/**/*.rs"]), +) + +rust_library( + name = "fault_lib", + srcs = [":fault_lib_srcs"], + crate_name = "fault_lib", + edition = "2024", + visibility = ["//visibility:public"], + deps = [ + "//src/common", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:serde", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:sha2", + "@score_fault_lib_crates//:thiserror", + ], +) + +# Library variant with test utilities exposed (for example binaries). +rust_library( + name = "fault_lib_testutils", + srcs = [":fault_lib_srcs"], + crate_features = ["testutils"], + crate_name = "fault_lib", + edition = "2024", + deps = [ + "//src/common", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:serde", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:sha2", + "@score_fault_lib_crates//:thiserror", + ], +) + +# IPC integration tests are guarded by `#[serial(ipc)]` from the serial_test +# crate, ensuring tests that use real iceoryx2 shared-memory resources never +# run concurrently. No global RUST_TEST_THREADS=1 is needed. +rust_test( + name = "tests", + srcs = [":fault_lib_srcs"], + data = glob(["tests/data/**"]), + edition = "2024", + env = { + "CARGO_MANIFEST_DIR": "src/fault_lib", + }, + deps = [ + "//src/common", + "@score_fault_lib_crates//:env_logger", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:mockall", + "@score_fault_lib_crates//:serde", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:serial_test", + "@score_fault_lib_crates//:sha2", + "@score_fault_lib_crates//:thiserror", + ], +) + +rust_binary( + name = "tst_app", + srcs = ["examples/tst_app.rs"], + edition = "2024", + deps = [ + ":fault_lib", + "//src/common", + "@score_fault_lib_crates//:clap", + "@score_fault_lib_crates//:env_logger", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + ], +) + +rust_binary( + name = "catalog_and_reporter", + srcs = ["examples/catalog_and_reporter.rs"], + edition = "2024", + deps = [ + ":fault_lib_testutils", + "//src/common", + "@score_fault_lib_crates//:env_logger", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:serde", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:sha2", + ], +) diff --git a/src/fault_lib/Cargo.toml b/src/fault_lib/Cargo.toml new file mode 100644 index 0000000..8056d62 --- /dev/null +++ b/src/fault_lib/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "fault_lib" +version.workspace = true +edition.workspace = true +license-file.workspace = true +readme.workspace = true + +[features] +default = [] +testutils = [] + +[lints] +workspace = true + +[dependencies] +common = { path = "../common" } +iceoryx2.workspace = true +iceoryx2-bb-container.workspace = true +log = { workspace = true, features = ["std"] } +serde_json.workspace = true +serde = { workspace = true, features = ["derive"] } +sha2.workspace = true +thiserror.workspace = true + +[dev-dependencies] +clap = { version = "4.5.53", features = ["derive"] } +env_logger.workspace = true +mockall.workspace = true +serial_test.workspace = true + +[[example]] +name = "tst_app" +required-features = ["testutils"] + +[[example]] +name = "catalog_and_reporter" diff --git a/src/fault_lib/examples/catalog_and_reporter.rs b/src/fault_lib/examples/catalog_and_reporter.rs new file mode 100644 index 0000000..da19dcf --- /dev/null +++ b/src/fault_lib/examples/catalog_and_reporter.rs @@ -0,0 +1,90 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use common::{ + SourceId, + fault::{FaultId, LifecyclePhase, LifecycleStage}, + types::*, +}; +use fault_lib::{ + FaultApi, + catalog::FaultCatalogBuilder, + reporter::{Reporter, ReporterApi, ReporterConfig}, +}; +use std::thread; + +fn main() { + let json = std::fs::read_to_string("src/fault_lib/tests/data/hvac_fault_catalog.json").expect("catalog file"); + let _api = FaultApi::new(FaultCatalogBuilder::new().json_string(&json).expect("builder config").build()); + + let t1 = thread::spawn(move || { + let source = SourceId { + entity: to_static_short_string("entity1").unwrap(), + ecu: Some(to_static_short_string("ecu").unwrap()), + domain: Some(to_static_short_string("domain").unwrap()), + sw_component: Some(to_static_short_string("component1").unwrap()), + instance: Some(to_static_short_string("1").unwrap()), + }; + let config = ReporterConfig { + source, + lifecycle_phase: LifecyclePhase::Running, + default_env_data: MetadataVec::try_from( + &[ + (to_static_short_string("k1").unwrap(), to_static_short_string("v1").unwrap()), + (to_static_short_string("k2").unwrap(), to_static_short_string("v2").unwrap()), + ][..], + ) + .unwrap(), + }; + + let mut reporter = Reporter::new(&FaultId::Numeric(0x7001), config).expect("get_descriptor failed"); + + let record = reporter.create_record(LifecycleStage::Passed); + + let _ = reporter.publish("test/path", record); + }); + + let t2 = thread::spawn(move || { + let source = SourceId { + entity: to_static_short_string("entity2").unwrap(), + ecu: Some(to_static_short_string("ecu").unwrap()), + domain: Some(to_static_short_string("domain").unwrap()), + sw_component: Some(to_static_short_string("component2").unwrap()), + instance: Some(to_static_short_string("2").unwrap()), + }; + let config = ReporterConfig { + source, + lifecycle_phase: LifecyclePhase::Running, + default_env_data: MetadataVec::try_from( + &[ + (to_static_short_string("k1").unwrap(), to_static_short_string("v1").unwrap()), + (to_static_short_string("k2").unwrap(), to_static_short_string("v2").unwrap()), + ][..], + ) + .unwrap(), + }; + + let mut reporter = Reporter::new( + &FaultId::Text(to_static_short_string("hvac.blower.speed_sensor_mismatch").unwrap()), + config, + ) + .expect("get_descriptor failed"); + + let record = reporter.create_record(LifecycleStage::Passed); + + let _ = reporter.publish("test/path", record); + }); + + let _ = t1.join(); + let _ = t2.join(); +} diff --git a/src/fault_lib/examples/config_1.json b/src/fault_lib/examples/config_1.json new file mode 100644 index 0000000..9bc7877 --- /dev/null +++ b/src/fault_lib/examples/config_1.json @@ -0,0 +1,54 @@ +{ + "id": "test_app1", + "version": 1, + "faults": [ + { + "id": { + "Text": "d1" + }, + "name": "Descriptor 1", + "summary": null, + "category": "Software", + "severity": "Debug", + "compliance": [ + "EmissionRelevant", + "SafetyCritical" + ], + "reporter_side_debounce": { + "EdgeWithCooldown": { + "cooldown": { + "secs": 0, + "nanos": 100000000 + } + } + }, + "reporter_side_reset": null, + "manager_side_debounce": null, + "manager_side_reset": null + }, + { + "id": { + "Text": "d2" + }, + "name": "Descriptor 2", + "summary": "Human-readable summary", + "category": "Configuration", + "severity": "Warn", + "compliance": [ + "SecurityRelevant", + "SafetyCritical" + ], + "reporter_side_debounce": null, + "reporter_side_reset": null, + "manager_side_debounce": { + "EdgeWithCooldown": { + "cooldown": { + "secs": 0, + "nanos": 100000000 + } + } + }, + "manager_side_reset": null + } + ] +} \ No newline at end of file diff --git a/src/fault_lib/examples/tst_app.rs b/src/fault_lib/examples/tst_app.rs new file mode 100644 index 0000000..70835a9 --- /dev/null +++ b/src/fault_lib/examples/tst_app.rs @@ -0,0 +1,90 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use clap::Parser; +use common::fault::*; +use common::types::MetadataVec; +use env_logger::Env; +use fault_lib::FaultApi; +use fault_lib::catalog::FaultCatalogBuilder; + +use fault_lib::reporter::Reporter; +use fault_lib::reporter::ReporterApi; +use fault_lib::reporter::ReporterConfig; +use fault_lib::utils::to_static_short_string; + +use log::*; +use std::path::PathBuf; +use std::thread; +use std::time::Duration; + +/// Command line arguments +#[derive(Parser, Debug)] +#[command(version, about, long_about= None)] +struct Args { + /// path to fault catalog json file + #[arg(short, long)] + config_file: PathBuf, +} + +fn main() { + let args = Args::parse(); + + let env = Env::default().filter_or("RUST_LOG", "debug"); + env_logger::init_from_env(env); + info!("Start Basic fault library example"); + // Create the FaultLib API object. We have to create it before any Fault API can be used + // and keep it on stack until end of the program. No need to hand it over somewhere + let _api = FaultApi::new(FaultCatalogBuilder::new().json_file(args.config_file).expect("builder config").build()); + + // here you can use any public api from fault-api + playground(); + info!("End Basic fault library example"); +} + +fn playground() { + let config = ReporterConfig { + source: common::ids::SourceId { + entity: to_static_short_string("source").unwrap(), + ecu: Some(common::types::ShortString::from_bytes("ECU-A".as_bytes()).unwrap()), + domain: Some(to_static_short_string("ADAS").unwrap()), + sw_component: Some(to_static_short_string("Perception").unwrap()), + instance: Some(to_static_short_string("0").unwrap()), + }, + lifecycle_phase: LifecyclePhase::Running, + default_env_data: MetadataVec::new(), + }; + + let sovd_path = FaultApi::get_fault_catalog().id.to_string(); + let mut faults = Vec::new(); + + for desc in FaultApi::get_fault_catalog().descriptors() { + faults.push(desc.id.clone()); + } + + let mut reporters = Vec::new(); + + for fault in faults { + reporters.push(Reporter::new(&fault, config.clone()).expect("get_descriptor failed")); + } + + for x in 0..20 { + debug!("Loop {x}"); + + for reporter in reporters.iter_mut() { + let stage = if (x % 2) == 0 { LifecycleStage::Passed } else { LifecycleStage::Failed }; + reporter.publish(&sovd_path, reporter.create_record(stage)).expect("publish failed"); + } + thread::sleep(Duration::from_millis(200)); + } +} diff --git a/src/fault_lib/src/api.rs b/src/fault_lib/src/api.rs new file mode 100644 index 0000000..f5eda06 --- /dev/null +++ b/src/fault_lib/src/api.rs @@ -0,0 +1,300 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::enabling_condition::{EnablingCondition, EnablingConditionCallback, EnablingConditionError, EnablingConditionManager, FaultMonitor}; +use crate::fault_manager_sink::SinkInitError; +use crate::{FaultSinkApi, LogHook, catalog::FaultCatalog, fault_manager_sink::FaultManagerSink}; +use alloc::sync::{Arc, Weak}; +use common::enabling_condition::EnablingConditionStatus; +use common::sink_error::SinkError; +use std::sync::OnceLock; + +/// Consolidated global state — single OnceLock prevents partial initialization. +struct FaultApiState { + sink: Weak, + catalog: Weak, + ec_manager: Weak, +} + +static STATE: OnceLock = OnceLock::new(); +static LOG_HOOK: OnceLock> = OnceLock::new(); +/// Serializes the entire `try_new` sequence (hash check + STATE.set) to +/// prevent a TOCTOU race where two threads could both pass the hash check +/// against different catalogs before either commits to STATE. +static INIT_GUARD: std::sync::Mutex<()> = std::sync::Mutex::new(()); + +/// Error type for FaultApi initialization failures. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum InitError { + #[error("FaultApi already initialized")] + AlreadyInitialized, + + #[error("catalog hash verification failed: {0}")] + CatalogVerification(#[from] SinkError), + + #[error("IPC sink initialization failed: {0}")] + SinkInit(#[from] SinkInitError), +} + +/// FaultApi is the long-lived handle that wires a sink and logger together. +#[derive(Clone)] +pub struct FaultApi { + _fault_sink: Arc, + _fault_catalog: Arc, + _ec_manager: Arc, +} + +impl FaultApi { + /// Initialize the FaultApi singleton. + /// + /// # Errors + /// + /// Returns `InitError::AlreadyInitialized` if called more than once. + /// Returns `InitError::CatalogVerification` if catalog hash check fails. + pub fn try_new(catalog: FaultCatalog) -> Result { + let _guard = INIT_GUARD.lock().unwrap_or_else(|e| e.into_inner()); + + // Fast path: already initialized. + if STATE.get().is_some() { + return Err(InitError::AlreadyInitialized); + } + + let catalog = Arc::new(catalog); + + // EC manager created first (without sink - circular dependency). + // The IpcWorker receives a valid Weak so + // it can forward DFM notifications to local monitors. + let ec_manager = Arc::new(EnablingConditionManager::new()); + + // Sink created with valid Weak to EC manager. + let concrete_sink = FaultManagerSink::with_ec_manager(Arc::downgrade(&ec_manager))?; + + // Validate catalog hash BEFORE committing to global state. + // On failure the caller can retry with different input. + if !concrete_sink.check_catalog_hash(&catalog)? { + return Err(InitError::CatalogVerification(SinkError::Other("catalog hash mismatch with DFM".into()))); + } + + let sink: Arc = Arc::new(concrete_sink); + + // Wire sink back to EC manager (resolves circular dependency). + ec_manager.set_sink(Arc::downgrade(&sink)); + + // Atomic commit - either all three refs are stored or none. + // The INIT_GUARD ensures no other thread can race between the + // hash check above and this STATE.set() call. + STATE + .set(FaultApiState { + sink: Arc::downgrade(&sink), + catalog: Arc::downgrade(&catalog), + ec_manager: Arc::downgrade(&ec_manager), + }) + .map_err(|_| InitError::AlreadyInitialized)?; + + Ok(FaultApi { + _fault_sink: sink, + _fault_catalog: catalog, + _ec_manager: ec_manager, + }) + } + + /// Initialize the FaultApi singleton, panicking on error. + /// + /// # Panics + /// + /// Panics if FaultApi is already initialized or catalog verification fails. + /// Use [`try_new`](Self::try_new) for the fallible version. + #[allow(clippy::expect_used)] + pub fn new(catalog: FaultCatalog) -> FaultApi { + Self::try_new(catalog).expect("FaultApi initialization failed") + } + + /// Get the fault sink, if FaultApi is initialized and not dropped. + /// + /// # Errors + /// + /// Returns `SinkError::Other` if FaultApi was never initialized or has been dropped. + pub(crate) fn try_get_fault_sink() -> Result, SinkError> { + STATE + .get() + .ok_or(SinkError::Other(alloc::borrow::Cow::Borrowed("FaultApi not initialized")))? + .sink + .upgrade() + .ok_or(SinkError::Other(alloc::borrow::Cow::Borrowed("FaultApi has been dropped"))) + } + + /// Get the fault sink. Panics if FaultApi is not initialized or has been dropped. + /// + /// Use [`try_get_fault_sink`](Self::try_get_fault_sink) for the fallible version. + #[allow(dead_code, clippy::expect_used)] + pub(crate) fn get_fault_sink() -> Arc { + Self::try_get_fault_sink().expect("FaultApi not initialized or dropped") + } + + /// Get the fault catalog, if FaultApi is initialized and not dropped. + pub fn try_get_fault_catalog() -> Option> { + STATE.get().and_then(|s| s.catalog.upgrade()) + } + + /// Get the fault catalog. Panics if FaultApi is not initialized or has been dropped. + /// + /// Use [`try_get_fault_catalog`](Self::try_get_fault_catalog) for the fallible version. + #[allow(clippy::expect_used)] + pub fn get_fault_catalog() -> Arc { + Self::try_get_fault_catalog().expect("FaultApi not initialized or dropped") + } + + /// Register a log hook for fault reporting observability. + /// + /// Can only be called once. Call before creating Reporters so they + /// pick up the hook during construction. + /// + /// # Errors + /// + /// Returns the hook back if one was already registered. + pub fn set_log_hook(hook: Arc) -> Result<(), Arc> { + LOG_HOOK.set(hook) + } + + /// Try to get the registered log hook. + /// + /// Returns `None` if no hook was registered via [`set_log_hook`](Self::set_log_hook). + pub fn try_get_log_hook() -> Option> { + LOG_HOOK.get().cloned() + } + + // ======================================================================== + // Enabling Conditions API + // ======================================================================== + + /// Get the enabling condition manager, if FaultApi is initialized and not dropped. + pub(crate) fn try_get_ec_manager() -> Option> { + STATE.get().and_then(|s| s.ec_manager.upgrade()) + } + + /// Register a new enabling condition provider. + /// + /// Returns a handle that the caller uses to report status changes. + /// The condition is registered with the DFM via IPC and starts in + /// [`Inactive`](EnablingConditionStatus::Inactive) state. + /// + /// # Errors + /// + /// - `EnablingConditionError::AlreadyRegistered` if the entity is already registered. + /// - `EnablingConditionError::NotInitialized` if FaultApi was not initialized. + /// - `EnablingConditionError::EntityTooLong` if the entity name exceeds IPC limits. + pub fn get_enabling_condition(entity: &str) -> Result { + let manager = Self::try_get_ec_manager().ok_or(EnablingConditionError::NotInitialized)?; + manager.register_condition(entity) + } + + /// Create a fault monitor that watches one or more enabling conditions. + /// + /// When any of the specified conditions changes status, the callback + /// is invoked with the condition ID and new status. + /// + /// The monitor automatically unregisters when dropped. + /// + /// # Errors + /// + /// - `EnablingConditionError::NotInitialized` if FaultApi was not initialized. + pub fn create_fault_monitor(condition_ids: &[&str], callback: impl EnablingConditionCallback) -> Result { + let manager = Self::try_get_ec_manager().ok_or(EnablingConditionError::NotInitialized)?; + let ids: Vec = condition_ids.iter().map(|s| s.to_string()).collect(); + manager.register_monitor(ids, Arc::new(callback)) + } + + /// Get the current status of a registered enabling condition. + /// + /// Returns `None` if FaultApi is not initialized or the condition + /// is not registered. + pub fn get_enabling_condition_status(entity: &str) -> Option { + Self::try_get_ec_manager()?.get_status(entity) + } +} + +// ============================================================================ +// Unit tests — cover the pre-initialisation error paths that do NOT +// require a live iceoryx2 daemon. +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use alloc::format; + + // ---- try_get_fault_sink (before init) ---- + + #[test] + fn try_get_fault_sink_before_init_returns_error() { + // STATE is process-global, so this only works reliably if no other + // test has successfully initialised FaultApi. Because the real + // init path requires iceoryx2, the singleton is always empty in + // plain `cargo test`. + let result = FaultApi::try_get_fault_sink(); + assert!(result.is_err()); + } + + // ---- try_get_fault_catalog (before init) ---- + + #[test] + fn try_get_fault_catalog_before_init_returns_none() { + assert!(FaultApi::try_get_fault_catalog().is_none()); + } + + // ---- try_get_ec_manager (before init) ---- + + #[test] + fn try_get_ec_manager_before_init_returns_none() { + assert!(FaultApi::try_get_ec_manager().is_none()); + } + + // ---- set_log_hook / try_get_log_hook ---- + + #[test] + fn try_get_log_hook_returns_none_when_unset() { + // LOG_HOOK is also process-global. If this is the first test to + // run, it will be empty. If another test already set it, skip. + // We cannot reset a OnceLock, so this is best-effort. + let _hook = FaultApi::try_get_log_hook(); + // Just verify it does not panic. + } + + // ---- InitError Display ---- + + #[test] + fn init_error_already_initialized_display() { + let err = InitError::AlreadyInitialized; + assert_eq!(format!("{err}"), "FaultApi already initialized"); + } + + #[test] + fn init_error_catalog_verification_display() { + let err = InitError::CatalogVerification(SinkError::TransportDown); + let msg = format!("{err}"); + assert!(msg.contains("catalog hash verification failed")); + } + + // ---- Enabling condition error paths ---- + + #[test] + fn get_enabling_condition_before_init_returns_error() { + let result = FaultApi::get_enabling_condition("test_ec"); + assert!(matches!(result, Err(EnablingConditionError::NotInitialized))); + } + + #[test] + fn get_enabling_condition_status_before_init_returns_none() { + assert!(FaultApi::get_enabling_condition_status("anything").is_none()); + } +} diff --git a/src/fault_lib/src/catalog.rs b/src/fault_lib/src/catalog.rs new file mode 100644 index 0000000..45dfc64 --- /dev/null +++ b/src/fault_lib/src/catalog.rs @@ -0,0 +1,215 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +// Re-export catalog types from common crate. +// FaultCatalog now lives in common so that dfm_lib can use it +// without depending on fault_lib. +pub use common::catalog::*; + +#[cfg(test)] +#[cfg(not(miri))] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use common::debounce::DebounceMode; + use common::fault::*; + use common::types::{to_static_long_string, to_static_short_string}; + use iceoryx2_bb_container::vector::Vector; + use std::path::PathBuf; + use std::time::Duration; + + /// Resolves test data file paths for both Cargo and Bazel test environments. + /// Cargo runs from crate root, Bazel uses CARGO_MANIFEST_DIR env var. + fn test_data_path(relative_path: &str) -> PathBuf { + // First try CARGO_MANIFEST_DIR (works for both Cargo and Bazel with env set) + if let Ok(manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") { + let path = PathBuf::from(&manifest_dir).join(relative_path); + if path.exists() { + return path; + } + } + // Fallback: relative path (Cargo default behavior) + PathBuf::from(relative_path) + } + + /// Test helper function - creates the test fault catalog configuration structure + /// + /// # Attention Any change in this function shall also be reflected in the `./tests/ivi_fault_catalog.json` file + /// + /// # Returns + /// + /// - `FaultCatalogConfig` - fault catalog test configuration. + /// + fn create_config() -> FaultCatalogConfig { + FaultCatalogConfig { + id: "ivi".into(), + version: 1, + faults: create_descriptors(), + } + } + + /// Creates test fault descriptors + /// + /// # Attention when you change something in the returned descriptors, please edit also + /// `../tests/ivi_fault_catalog.json` file adequately + /// + /// # Returns + /// + /// - `Vec` - vector of test fault descriptors + fn create_descriptors() -> Vec { + use common::fault::*; + + let mut d1_compliance = ComplianceVec::new(); + let _ = d1_compliance.push(ComplianceTag::EmissionRelevant); + let _ = d1_compliance.push(ComplianceTag::SafetyCritical); + + let mut d2_compliance = ComplianceVec::new(); + let _ = d2_compliance.push(ComplianceTag::SecurityRelevant); + let _ = d2_compliance.push(ComplianceTag::SafetyCritical); + + vec![ + FaultDescriptor { + id: FaultId::Text(to_static_short_string("d1").unwrap()), + + name: to_static_short_string("Descriptor 1").unwrap(), + summary: None, + + category: FaultType::Software, + severity: FaultSeverity::Debug, + compliance: ComplianceVec::try_from(&[ComplianceTag::EmissionRelevant, ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Text(to_static_short_string("d2").unwrap()), + + name: to_static_short_string("Descriptor 2").unwrap(), + summary: Some(to_static_long_string("Human-readable summary").unwrap()), + + category: FaultType::Configuration, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::try_from(&[ComplianceTag::SecurityRelevant, ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + manager_side_reset: None, + }, + ] + } + + #[test] + fn from_config() { + let cfg = create_config(); + + let catalog = FaultCatalogBuilder::new().cfg_struct(cfg.clone()).unwrap().build(); + + let d1 = catalog + .descriptor(&FaultId::Text(to_static_short_string("d1").unwrap())) + .expect("get_descriptor failed"); + let d2 = catalog + .descriptor(&FaultId::Text(to_static_short_string("d2").unwrap())) + .expect("get_descriptor failed"); + + assert_eq!(*d1, cfg.faults[0]); + assert_eq!(*d2, cfg.faults[1]); + } + + #[test] + fn empty_config() { + let cfg = FaultCatalogConfig { + id: "".into(), + version: 7, + faults: Vec::new(), + }; + + let catalog = FaultCatalogBuilder::new().cfg_struct(cfg.clone()).unwrap().build(); + let d1 = catalog.descriptor(&FaultId::Text(to_static_short_string("d1").unwrap())); + assert_eq!(d1, Option::None); + } + + #[test] + fn from_json_string() { + let cfg = create_config(); + let json_string = serde_json::to_string_pretty(&cfg).unwrap(); + + let fault_catalog = FaultCatalogBuilder::new().json_string(json_string.as_str()).unwrap().build(); + let d1 = fault_catalog + .descriptor(&FaultId::Text(to_static_short_string("d1").unwrap())) + .expect("get_descriptor failed"); + let d2 = fault_catalog + .descriptor(&FaultId::Text(to_static_short_string("d2").unwrap())) + .expect("get_descriptor failed"); + + assert_eq!(*d1, cfg.faults[0]); + assert_eq!(*d2, cfg.faults[1]); + } + + #[test] + fn from_json_file() { + // Note: use test_data_path helper for Cargo/Bazel compatibility + let fault_catalog = FaultCatalogBuilder::new() + .json_file(test_data_path("tests/data/ivi_fault_catalog.json")) + .unwrap() + .build(); + let d1 = fault_catalog + .descriptor(&FaultId::Text(to_static_short_string("d1").unwrap())) + .expect("get_descriptor failed"); + let d2 = fault_catalog + .descriptor(&FaultId::Text(to_static_short_string("d2").unwrap())) + .expect("get_descriptor failed"); + // create a reference catalog config - shall be equal to the one in json + let cfg = create_config(); + + assert_eq!(*d1, cfg.faults[0]); + assert_eq!(*d2, cfg.faults[1]); + } + + #[test] + #[should_panic] + fn from_not_existing_json_file() { + let _ = FaultCatalogBuilder::new() + .json_file(PathBuf::from("tests/data/xxx.json")) + .unwrap() + .build(); + } + + #[test] + fn hash_sum() { + let catalog_from_file = FaultCatalogBuilder::new() + .json_file(test_data_path("tests/data/ivi_fault_catalog.json")) + .unwrap() + .build(); + let cfg = create_config(); + let catalog_from_cfg = FaultCatalogBuilder::new().cfg_struct(cfg.clone()).unwrap().build(); + let catalog_from_json = FaultCatalogBuilder::new() + .json_string(&serde_json::to_string_pretty(&cfg).unwrap()) + .unwrap() + .build(); + + assert_eq!(catalog_from_cfg.config_hash(), catalog_from_file.config_hash()); + assert_eq!(catalog_from_cfg.config_hash(), catalog_from_json.config_hash()); + } +} diff --git a/src/fault_lib/src/enabling_condition.rs b/src/fault_lib/src/enabling_condition.rs new file mode 100644 index 0000000..60e7f45 --- /dev/null +++ b/src/fault_lib/src/enabling_condition.rs @@ -0,0 +1,635 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Enabling condition provider handles and fault monitors. +//! +//! This module implements the enabling condition flow described in +//! `docs/puml/new_enable_condition.puml` (registration), +//! `docs/puml/enable_condition_ntf.puml` (remote notifications), and +//! `docs/puml/local_enable_condition_ntf.puml` (local notifications). +//! +//! # Architecture +//! +//! - [`EnablingCondition`]: Provider-side handle for reporting condition status. +//! - [`FaultMonitor`]: Consumer-side handle for receiving condition change notifications. +//! - `EnablingConditionManager`: Internal singleton that tracks condition state +//! and dispatches notifications to registered monitors. + +use alloc::sync::{Arc, Weak}; +use common::enabling_condition::EnablingConditionStatus; +use common::sink_error::SinkError; +use common::types::{DiagnosticEvent, ShortString}; +use core::panic::AssertUnwindSafe; +use core::sync::atomic::{AtomicU64, Ordering}; +use log::{debug, error, warn}; +use std::collections::HashMap; +use std::panic::catch_unwind; +use std::sync::RwLock; + +// ============================================================================ +// Error types +// ============================================================================ + +/// Errors that can occur during enabling condition operations. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum EnablingConditionError { + #[error("enabling condition '{0}' already registered")] + AlreadyRegistered(String), + #[error("FaultApi not initialized or dropped")] + NotInitialized, + #[error("entity name too long for IPC transport")] + EntityTooLong, + #[error("IPC transport error: {0}")] + Transport(#[from] SinkError), + #[error("internal error: {0}")] + InternalError(String), +} + +// ============================================================================ +// Callback trait +// ============================================================================ + +/// Callback interface for enabling condition change notifications. +/// +/// Implementations must be thread-safe and non-blocking. The callback +/// is invoked from the notification dispatch thread; long-running work +/// should be offloaded to a separate task/thread. +pub trait EnablingConditionCallback: Send + Sync + 'static { + /// Called when an enabling condition status changes. + /// + /// `id` is the SOVD entity name of the condition. + fn on_condition_change(&self, id: &str, status: EnablingConditionStatus); +} + +/// Blanket implementation for closures. +impl EnablingConditionCallback for F +where + F: Fn(&str, EnablingConditionStatus) + Send + Sync + 'static, +{ + fn on_condition_change(&self, id: &str, status: EnablingConditionStatus) { + self(id, status); + } +} + +// ============================================================================ +// EnablingCondition (provider handle) +// ============================================================================ + +/// Handle for an enabling condition provider. +/// +/// Created via [`FaultApi::get_enabling_condition`](crate::api::FaultApi::get_enabling_condition). The provider uses +/// this handle to report status changes (active/inactive), which are +/// forwarded to the DFM via IPC and to local [`FaultMonitor`] subscribers. +/// +/// # Example +/// +/// ```ignore +/// let ec = FaultApi::get_enabling_condition("vehicle.speed.valid")?; +/// ec.report_status(EnablingConditionStatus::Active)?; +/// ``` +pub struct EnablingCondition { + id: ShortString, + manager: Arc, +} + +impl EnablingCondition { + /// Report the current status of this enabling condition. + /// + /// Updates local state, notifies local monitors, and sends the + /// status change to the DFM via IPC. + pub fn report_status(&self, status: EnablingConditionStatus) -> Result<(), SinkError> { + self.manager.report_status(&self.id, status) + } + + /// Get the identifier of this enabling condition. + pub fn id(&self) -> &ShortString { + &self.id + } +} + +// ============================================================================ +// FaultMonitor (consumer handle) +// ============================================================================ + +/// Monitor handle for receiving enabling condition change notifications. +/// +/// Created via [`FaultApi::create_fault_monitor`](crate::api::FaultApi::create_fault_monitor). When any of the +/// monitored enabling conditions changes status, the registered callback +/// is invoked. +/// +/// The monitor automatically unregisters from the manager when dropped. +/// +/// # Example +/// +/// ```ignore +/// let monitor = FaultApi::create_fault_monitor( +/// &["vehicle.speed.valid", "engine.running"], +/// |id, status| println!("condition {} changed to {:?}", id, status), +/// )?; +/// // monitor lives as long as the variable; dropped → unregistered +/// ``` +pub struct FaultMonitor { + monitor_id: u64, + manager: Arc, +} + +impl Drop for FaultMonitor { + fn drop(&mut self) { + self.manager.unregister_monitor(self.monitor_id); + } +} + +// ============================================================================ +// Internal: MonitorEntry +// ============================================================================ + +struct MonitorEntry { + id: u64, + condition_ids: Vec, + callback: Arc, +} + +// ============================================================================ +// EnablingConditionManager (internal singleton) +// ============================================================================ + +/// Monotonic counter for unique monitor IDs. +static MONITOR_ID_COUNTER: AtomicU64 = AtomicU64::new(0); + +/// Internal manager that tracks enabling condition state and dispatches +/// notifications to registered fault monitors. +/// +/// Stored as a global singleton via `OnceLock` in [`FaultApi`]. +/// Thread-safe: all mutable state is behind `RwLock`. +pub(crate) struct EnablingConditionManager { + /// Registered enabling conditions: entity → current status. + conditions: RwLock>, + /// Registered monitors with their callbacks. + monitors: RwLock>, + /// Weak reference to the sink for IPC communication. + /// Behind RwLock to allow deferred wiring (sink created after manager + /// due to circular Arc ↔ Arc dependency). + sink: RwLock>>, +} + +impl EnablingConditionManager { + /// Create a new manager without a sink (will be wired later via `set_sink`). + pub(crate) fn new() -> Self { + Self { + conditions: RwLock::new(HashMap::new()), + monitors: RwLock::new(Vec::new()), + sink: RwLock::new(None), + } + } + + /// Create a new manager with a weak reference to the IPC sink. + #[allow(dead_code)] + pub(crate) fn with_sink(sink: Weak) -> Self { + Self { + conditions: RwLock::new(HashMap::new()), + monitors: RwLock::new(Vec::new()), + sink: RwLock::new(Some(sink)), + } + } + + /// Wire the sink after construction (resolves circular dependency). + pub(crate) fn set_sink(&self, sink: Weak) { + if let Ok(mut s) = self.sink.write() { + *s = Some(sink); + } + } + + fn get_sink(&self) -> Option> { + self.sink.read().ok()?.as_ref()?.upgrade() + } + + /// Register a new enabling condition. + /// + /// Returns `Err` if the condition is already registered. + pub(crate) fn register_condition(self: &Arc, entity: &str) -> Result { + let id = ShortString::try_from(entity.as_bytes()).map_err(|_| EnablingConditionError::EntityTooLong)?; + + { + let mut conditions = self.conditions.write().map_err(|e| { + error!("conditions lock poisoned in register_condition: {e}"); + EnablingConditionError::InternalError(format!("conditions lock poisoned: {e}")) + })?; + + if conditions.contains_key(entity) { + return Err(EnablingConditionError::AlreadyRegistered(entity.to_string())); + } + conditions.insert(entity.to_string(), EnablingConditionStatus::Inactive); + } + + // Send registration to DFM via IPC (best-effort) + if let Some(sink) = self.get_sink() { + let event = DiagnosticEvent::EnablingConditionRegister(id); + if let Err(e) = sink.send_event(event) { + warn!("Failed to register enabling condition '{entity}' with DFM: {e:?}"); + } + } + + debug!("Registered enabling condition: {entity}"); + Ok(EnablingCondition { + id, + manager: Arc::clone(self), + }) + } + + /// Report a status change for an enabling condition. + /// + /// Updates local state, notifies local monitors, and sends to DFM. + pub(crate) fn report_status(&self, id: &ShortString, status: EnablingConditionStatus) -> Result<(), SinkError> { + let id_str = id.to_string(); + + // Update local state + { + let mut conditions = self.conditions.write().map_err(|e| { + error!("conditions lock poisoned in report_status: {e}"); + SinkError::Other(alloc::borrow::Cow::Owned(format!("conditions lock poisoned: {e}"))) + })?; + + if let Some(current) = conditions.get_mut(&id_str) { + if *current == status { + // No change — skip notification + return Ok(()); + } + *current = status; + } else { + // Condition not registered locally but may be registered remotely + conditions.insert(id_str.clone(), status); + } + } + + // Notify local monitors (lock released before callback invocation) + self.dispatch_to_monitors(&id_str, status); + + // Send to DFM via IPC + if let Some(sink) = self.get_sink() { + let event = DiagnosticEvent::EnablingConditionStatusChange((*id, status)); + sink.send_event(event)?; + } + + debug!("Enabling condition '{id_str}' status: {status:?}"); + Ok(()) + } + + /// Handle an incoming notification from DFM (remote status change). + /// + /// Called by the notification listener when a status change is + /// received from DFM. Updates local state and dispatches to monitors. + pub(crate) fn handle_remote_notification(&self, id: &ShortString, status: EnablingConditionStatus) { + let id_str = id.to_string(); + + // Update local state + match self.conditions.write() { + Ok(mut conditions) => { + if let Some(current) = conditions.get_mut(&id_str) { + if *current == status { + return; // No change + } + *current = status; + } else { + conditions.insert(id_str.clone(), status); + } + } + Err(e) => { + error!("conditions lock poisoned in handle_remote_notification: {e}"); + return; + } + } + + // Dispatch to monitors + self.dispatch_to_monitors(&id_str, status); + debug!("Remote enabling condition '{id_str}' status: {status:?}"); + } + + /// Register a fault monitor for specific enabling conditions. + pub(crate) fn register_monitor( + self: &Arc, + condition_ids: Vec, + callback: Arc, + ) -> Result { + let monitor_id = MONITOR_ID_COUNTER.fetch_add(1, Ordering::Relaxed); + + let entry = MonitorEntry { + id: monitor_id, + condition_ids, + callback, + }; + + self.monitors + .write() + .map_err(|e| { + error!("monitors lock poisoned in register_monitor: {e}"); + EnablingConditionError::InternalError(format!("monitors lock poisoned: {e}")) + })? + .push(entry); + + debug!("Registered fault monitor #{monitor_id}"); + Ok(FaultMonitor { + monitor_id, + manager: Arc::clone(self), + }) + } + + /// Unregister a fault monitor by ID. + fn unregister_monitor(&self, monitor_id: u64) { + match self.monitors.write() { + Ok(mut monitors) => { + monitors.retain(|m| m.id != monitor_id); + debug!("Unregistered fault monitor #{monitor_id}"); + } + Err(e) => { + error!("monitors lock poisoned in unregister_monitor: {e}"); + } + } + } + + /// Get the current status of an enabling condition. + pub(crate) fn get_status(&self, entity: &str) -> Option { + self.conditions.read().ok()?.get(entity).copied() + } + + /// Dispatch a status change to all monitors watching the given condition. + /// + /// Collects callbacks under read lock, then invokes them after releasing + /// the lock to avoid potential deadlocks. + fn dispatch_to_monitors(&self, id: &str, status: EnablingConditionStatus) { + let callbacks: Vec> = { + let monitors = match self.monitors.read() { + Ok(m) => m, + Err(e) => { + error!("monitors lock poisoned in dispatch_to_monitors: {e}"); + return; + } + }; + monitors + .iter() + .filter(|m| m.condition_ids.iter().any(|c| c == id)) + .map(|m| Arc::clone(&m.callback)) + .collect() + }; + + for callback in callbacks { + if let Err(e) = catch_unwind(AssertUnwindSafe(|| { + callback.on_condition_change(id, status); + })) { + error!("Callback panicked for condition '{id}': {e:?}"); + } + } + } +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use std::sync::atomic::AtomicUsize; + + fn make_manager() -> Arc { + let sink: Arc = Arc::new(crate::test_utils::RecordingSink::new()); + Arc::new(EnablingConditionManager::with_sink(Arc::downgrade(&sink))) + } + + #[test] + fn register_condition_succeeds() { + let manager = make_manager(); + let ec = manager.register_condition("vehicle.speed.valid"); + assert!(ec.is_ok()); + assert_eq!(manager.get_status("vehicle.speed.valid"), Some(EnablingConditionStatus::Inactive)); + } + + #[test] + fn register_duplicate_returns_error() { + let manager = make_manager(); + let _ = manager.register_condition("vehicle.speed.valid").unwrap(); + let result = manager.register_condition("vehicle.speed.valid"); + assert!(result.is_err()); + } + + #[test] + fn report_status_updates_state() { + let manager = make_manager(); + let ec = manager.register_condition("engine.running").unwrap(); + ec.report_status(EnablingConditionStatus::Active).unwrap(); + assert_eq!(manager.get_status("engine.running"), Some(EnablingConditionStatus::Active)); + } + + #[test] + fn report_status_skips_duplicate() { + let manager = make_manager(); + let ec = manager.register_condition("engine.running").unwrap(); + ec.report_status(EnablingConditionStatus::Inactive).unwrap(); + // Same status again — should be a no-op + ec.report_status(EnablingConditionStatus::Inactive).unwrap(); + assert_eq!(manager.get_status("engine.running"), Some(EnablingConditionStatus::Inactive)); + } + + #[test] + fn monitor_receives_callback_on_status_change() { + let manager = make_manager(); + let ec = manager.register_condition("vehicle.speed.valid").unwrap(); + + let call_count = Arc::new(AtomicUsize::new(0)); + let count_clone = Arc::clone(&call_count); + + let _monitor = manager + .register_monitor( + vec!["vehicle.speed.valid".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + count_clone.fetch_add(1, Ordering::SeqCst); + }), + ) + .unwrap(); + + ec.report_status(EnablingConditionStatus::Active).unwrap(); + assert_eq!(call_count.load(Ordering::SeqCst), 1); + } + + #[test] + fn monitor_not_called_for_unrelated_condition() { + let manager = make_manager(); + let _ec1 = manager.register_condition("vehicle.speed.valid").unwrap(); + let ec2 = manager.register_condition("engine.running").unwrap(); + + let call_count = Arc::new(AtomicUsize::new(0)); + let count_clone = Arc::clone(&call_count); + + let _monitor = manager + .register_monitor( + vec!["vehicle.speed.valid".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + count_clone.fetch_add(1, Ordering::SeqCst); + }), + ) + .unwrap(); + + // Change engine.running — monitor watches vehicle.speed.valid only + ec2.report_status(EnablingConditionStatus::Active).unwrap(); + assert_eq!(call_count.load(Ordering::SeqCst), 0); + } + + #[test] + fn monitor_watches_multiple_conditions() { + let manager = make_manager(); + let ec1 = manager.register_condition("vehicle.speed.valid").unwrap(); + let ec2 = manager.register_condition("engine.running").unwrap(); + + let call_count = Arc::new(AtomicUsize::new(0)); + let count_clone = Arc::clone(&call_count); + + let _monitor = manager + .register_monitor( + vec!["vehicle.speed.valid".to_string(), "engine.running".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + count_clone.fetch_add(1, Ordering::SeqCst); + }), + ) + .unwrap(); + + ec1.report_status(EnablingConditionStatus::Active).unwrap(); + ec2.report_status(EnablingConditionStatus::Active).unwrap(); + assert_eq!(call_count.load(Ordering::SeqCst), 2); + } + + #[test] + fn monitor_unregisters_on_drop() { + let manager = make_manager(); + let ec = manager.register_condition("vehicle.speed.valid").unwrap(); + + let call_count = Arc::new(AtomicUsize::new(0)); + let count_clone = Arc::clone(&call_count); + + { + let _monitor = manager + .register_monitor( + vec!["vehicle.speed.valid".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + count_clone.fetch_add(1, Ordering::SeqCst); + }), + ) + .unwrap(); + + ec.report_status(EnablingConditionStatus::Active).unwrap(); + assert_eq!(call_count.load(Ordering::SeqCst), 1); + } + // Monitor dropped — should not receive further callbacks + ec.report_status(EnablingConditionStatus::Inactive).unwrap(); + assert_eq!(call_count.load(Ordering::SeqCst), 1); + } + + #[test] + fn handle_remote_notification_updates_state_and_dispatches() { + let manager = make_manager(); + + let call_count = Arc::new(AtomicUsize::new(0)); + let count_clone = Arc::clone(&call_count); + + let _monitor = manager + .register_monitor( + vec!["remote.condition".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + count_clone.fetch_add(1, Ordering::SeqCst); + }), + ) + .unwrap(); + + let id = ShortString::try_from("remote.condition".as_bytes()).unwrap(); + manager.handle_remote_notification(&id, EnablingConditionStatus::Active); + + assert_eq!(manager.get_status("remote.condition"), Some(EnablingConditionStatus::Active)); + assert_eq!(call_count.load(Ordering::SeqCst), 1); + } + + #[test] + fn entity_too_long_returns_error() { + let manager = make_manager(); + let long_entity = "a".repeat(100); // > 64 bytes + let result = manager.register_condition(&long_entity); + assert!(result.is_err()); + } + + #[test] + fn callback_panic_does_not_break_other_monitors() { + let manager = make_manager(); + let ec = manager.register_condition("vehicle.speed.valid").unwrap(); + + let count_before = Arc::new(AtomicUsize::new(0)); + let count_after = Arc::new(AtomicUsize::new(0)); + let before_clone = Arc::clone(&count_before); + let after_clone = Arc::clone(&count_after); + + // Monitor 1: increments counter + let _m1 = manager + .register_monitor( + vec!["vehicle.speed.valid".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + before_clone.fetch_add(1, Ordering::SeqCst); + }), + ) + .unwrap(); + + // Monitor 2: panics + let _m2 = manager + .register_monitor( + vec!["vehicle.speed.valid".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + panic!("deliberate test panic"); + }), + ) + .unwrap(); + + // Monitor 3: increments counter + let _m3 = manager + .register_monitor( + vec!["vehicle.speed.valid".to_string()], + Arc::new(move |_id: &str, _status: EnablingConditionStatus| { + after_clone.fetch_add(1, Ordering::SeqCst); + }), + ) + .unwrap(); + + ec.report_status(EnablingConditionStatus::Active).unwrap(); + + assert_eq!(count_before.load(Ordering::SeqCst), 1, "Monitor before panic should receive callback"); + assert_eq!(count_after.load(Ordering::SeqCst), 1, "Monitor after panic should receive callback"); + + // Subsequent notifications still work + ec.report_status(EnablingConditionStatus::Inactive).unwrap(); + assert_eq!(count_before.load(Ordering::SeqCst), 2); + assert_eq!(count_after.load(Ordering::SeqCst), 2); + } + + #[test] + fn set_sink_wires_after_construction() { + let manager = Arc::new(EnablingConditionManager::new()); + let sink: Arc = Arc::new(crate::test_utils::RecordingSink::new()); + + // Before wiring: register works (IPC send is best-effort) + let ec = manager.register_condition("test.condition").unwrap(); + + // Wire sink + manager.set_sink(Arc::downgrade(&sink)); + + // After wiring: report_status sends to sink without panic + ec.report_status(EnablingConditionStatus::Active).unwrap(); + assert_eq!(manager.get_status("test.condition"), Some(EnablingConditionStatus::Active)); + } +} diff --git a/src/fault_lib/src/fault_manager_sink.rs b/src/fault_lib/src/fault_manager_sink.rs new file mode 100644 index 0000000..8b25583 --- /dev/null +++ b/src/fault_lib/src/fault_manager_sink.rs @@ -0,0 +1,633 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::FaultApi; +use crate::enabling_condition::EnablingConditionManager; +use crate::ipc_worker::IpcWorker; +use crate::sink::*; +use crate::utils::to_static_long_string; +use alloc::sync::Weak; +use common::fault::FaultRecord; +use common::ipc_service_name::DIAGNOSTIC_FAULT_MANAGER_HASH_CHECK_RESPONSE_SERVICE_NAME; +use common::ipc_service_type::ServiceType; +use common::sink_error::SinkError; +use common::types::{DiagnosticEvent, LONG_STRING_CAPACITY, Sha256Vec}; +use core::time::Duration; +use iceoryx2::port::subscriber::Subscriber; +use iceoryx2::prelude::{NodeBuilder, ServiceName}; +use log::*; +use std::sync::mpsc::TrySendError; +use std::time::Instant; +use std::{ + sync::mpsc, + thread::{self, JoinHandle}, +}; + +/// Initialization errors for the IPC sink and worker. +#[derive(Debug, Clone, thiserror::Error)] +#[non_exhaustive] +pub enum SinkInitError { + /// Worker thread could not be spawned. + #[error("thread spawn failed: {0}")] + ThreadSpawn(String), + + /// Worker thread failed during IPC resource setup. + #[error("worker initialization failed: {0}")] + WorkerInit(String), + + /// iceoryx2 node, service, or port creation failed. + #[error("IPC service creation failed: {0}")] + IpcService(String), + + /// Internal channel communication failed. + #[error("channel error: {0}")] + Channel(String), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum FaultManagerError { + SendError(String), + Timeout, +} + +impl From for SinkError { + fn from(err: FaultManagerError) -> Self { + match err { + FaultManagerError::SendError(msg) => SinkError::Other(alloc::borrow::Cow::Owned(msg)), + FaultManagerError::Timeout => SinkError::Other(alloc::borrow::Cow::Borrowed("timeout")), + } + } +} + +/// Request channel type used by the sink_thread to receive events +pub type WorkerReceiver = mpsc::Receiver; + +#[derive(Debug)] +pub(crate) enum WorkerMsg { + /// transports start message with the parent thread which will be unparked when the sink_thread thread is up and running + Start(std::thread::Thread), + + /// Sent by the FaultManagerSink when the fault monitor reports an event. + Event { event: Box }, + + /// Terminate the FaultManagerSink working thread + Exit, +} + +const TIMEOUT: Duration = Duration::from_millis(500); + +/// Maximum number of pending messages in the channel. +/// Provides backpressure when the IPC worker is slow to process. +const CHANNEL_CAPACITY: usize = 1024; + +pub struct FaultManagerSink { + sink_sender: mpsc::SyncSender, + sink_thread: Option>, + hash_check_response_subscriber: Option>, +} + +impl FaultManagerSink { + /// Create a new FaultManagerSink with IPC worker thread and hash check subscriber. + /// + /// # Errors + /// + /// Returns `SinkInitError` if thread spawn or iceoryx2 IPC service creation fails. + #[allow(dead_code)] + pub(crate) fn new() -> Result { + Self::with_ec_manager(Weak::::new()) + } + + /// Create a new FaultManagerSink with a reference to the enabling condition manager. + /// + /// The EC manager is passed to the IPC worker so it can receive + /// enabling condition notifications from the DFM. + /// + /// # Errors + /// + /// Returns `SinkInitError` if thread spawn or iceoryx2 IPC service creation fails. + pub(crate) fn with_ec_manager(ec_manager: Weak) -> Result { + let (tx, rx) = mpsc::sync_channel(CHANNEL_CAPACITY); + + // Channel for the worker to report its initialization result back. + let (init_tx, init_rx) = mpsc::sync_channel::>(1); + + let handle = thread::Builder::new() + .name("fault_client_worker".into()) + .spawn(move || match IpcWorker::new(rx, ec_manager) { + Ok(mut ipc_worker) => { + let _ = init_tx.send(Ok(())); + ipc_worker.run(); + } + Err(e) => { + let _ = init_tx.send(Err(e)); + } + }) + .map_err(|e| SinkInitError::ThreadSpawn(e.to_string()))?; + + // Wait for worker to finish IPC resource setup. + init_rx + .recv_timeout(Duration::from_secs(5)) + .map_err(|_| SinkInitError::WorkerInit("timeout waiting for worker initialization".into()))? + .map_err(|e| SinkInitError::WorkerInit(e.to_string()))?; + + tx.send(WorkerMsg::Start(thread::current())) + .map_err(|e| SinkInitError::Channel(format!("start message: {e}")))?; + // Use park_timeout to avoid hanging forever if the worker thread + // panics before calling unpark. 5 seconds is generous for thread init. + thread::park_timeout(Duration::from_secs(5)); + + let node = NodeBuilder::new() + .create::() + .map_err(|e| SinkInitError::IpcService(format!("hash check node: {e}")))?; + let hash_check_response_service_name = ServiceName::new(DIAGNOSTIC_FAULT_MANAGER_HASH_CHECK_RESPONSE_SERVICE_NAME) + .map_err(|e| SinkInitError::IpcService(format!("hash check service name: {e}")))?; + let hash_check_response_service = node + .service_builder(&hash_check_response_service_name) + .publish_subscribe::() + .open_or_create() + .map_err(|e| SinkInitError::IpcService(format!("hash check service: {e}")))?; + let hash_check_response_subscriber = hash_check_response_service + .subscriber_builder() + .create() + .map_err(|e| SinkInitError::IpcService(format!("hash check subscriber: {e}")))?; + + Ok(Self { + sink_sender: tx, + sink_thread: Some(handle), + hash_check_response_subscriber: Some(hash_check_response_subscriber), + }) + } + + /// Validate catalog hash against DFM without reading from global state. + /// + /// Used during initialization before the global OnceLock is committed, + /// so that a hash mismatch doesn't leave the system in a partial state. + pub(crate) fn check_catalog_hash(&self, catalog: &crate::catalog::FaultCatalog) -> Result { + let catalog_id = catalog + .try_id() + .map_err(|e| SinkError::Other(alloc::borrow::Cow::Owned(format!("catalog id error: {e}"))))?; + let hash_vec = common::types::Sha256Vec::try_from(catalog.config_hash()) + .map_err(|_| SinkError::Other(alloc::borrow::Cow::Borrowed("catalog hash too long for IPC")))?; + let event = common::types::DiagnosticEvent::Hash((catalog_id, hash_vec)); + match self.sink_sender.try_send(WorkerMsg::Event { event: Box::new(event) }) { + Ok(()) => {} + Err(TrySendError::Full(_)) => return Err(SinkError::QueueFull), + Err(TrySendError::Disconnected(_)) => { + return Err(FaultManagerError::SendError("Cannot send hash check: channel disconnected".into()).into()); + } + } + self.listen_hash_check_response() + } + + /// Listen for hash check response from DFM. + /// + /// Uses polling with 50ms interval. A future improvement would be + /// to use iceoryx2 WaitSet for true event-driven notification. + fn listen_hash_check_response(&self) -> Result { + let start = Instant::now(); + let poll_interval = Duration::from_millis(50); + + let subscriber = self + .hash_check_response_subscriber + .as_ref() + .ok_or(SinkError::Other(alloc::borrow::Cow::Borrowed("hash check subscriber not initialized")))?; + + while start.elapsed() < TIMEOUT { + if let Some(msg) = subscriber.receive().map_err(|_| SinkError::TransportDown)? { + return Ok(*msg.payload()); + } + std::thread::sleep(poll_interval); + } + + Err(SinkError::Timeout) + } +} + +/// Maximum path length for fault entity paths. +/// Compile-time guarantee: stays in sync with `LongString` capacity. +const MAX_PATH_LENGTH: usize = LONG_STRING_CAPACITY; +const _: () = assert!(MAX_PATH_LENGTH == LONG_STRING_CAPACITY); + +/// Validate entity path format. +/// +/// Valid paths must: +/// - Not be empty +/// - Start with alphanumeric character +/// - Not end with '/' +/// - Not contain path traversal sequences ("..") +/// - Contain only: alphanumeric, '/', '.', '-', '_' +fn is_valid_path(path: &str) -> bool { + if path.is_empty() { + return false; + } + if !path.starts_with(|c: char| c.is_alphanumeric()) { + return false; + } + if path.ends_with('/') { + return false; + } + if path.contains("..") { + return false; + } + path.chars().all(|c| c.is_alphanumeric() || c == '/' || c == '.' || c == '-' || c == '_') +} + +/// API to be used by the modules of the fault-lib which need to communicate with +/// Diagnostic Fault Manager. This trait shall never become public +impl FaultSinkApi for FaultManagerSink { + fn send_event(&self, event: DiagnosticEvent) -> Result<(), SinkError> { + match self.sink_sender.try_send(WorkerMsg::Event { event: Box::new(event) }) { + Ok(()) => Ok(()), + Err(TrySendError::Full(_)) => { + warn!("Event queue full, dropping event"); + Err(SinkError::QueueFull) + } + Err(TrySendError::Disconnected(_)) => Err(FaultManagerError::SendError("Cannot send event: channel disconnected".into()).into()), + } + } + + /// Validate and enqueue a fault record for IPC delivery. + /// + /// The entity `path` is converted to a `LongString<128>` (`StaticString<128>`). + /// Paths exceeding 128 bytes are rejected with [`SinkError::BadDescriptor`] + /// rather than silently truncated. + fn publish(&self, path: &str, record: FaultRecord) -> Result<(), SinkError> { + // Validate path before IPC + if path.len() > MAX_PATH_LENGTH { + return Err(SinkError::BadDescriptor(alloc::borrow::Cow::Owned(format!( + "path too long: {} bytes (max {})", + path.len(), + MAX_PATH_LENGTH + )))); + } + if !is_valid_path(path) { + return Err(SinkError::BadDescriptor(alloc::borrow::Cow::Borrowed("invalid path format"))); + } + + let long_path = to_static_long_string(path).map_err(|_| { + SinkError::BadDescriptor(alloc::borrow::Cow::Owned(format!( + "path exceeds LongString capacity: {} bytes", + path.len() + ))) + })?; + let event = DiagnosticEvent::Fault((long_path, record)); + match self.sink_sender.try_send(WorkerMsg::Event { event: Box::new(event) }) { + Ok(()) => Ok(()), + Err(TrySendError::Full(_)) => { + warn!("Fault queue full, dropping record"); + Err(SinkError::QueueFull) + } + Err(TrySendError::Disconnected(_)) => Err(FaultManagerError::SendError("Cannot send event: channel disconnected".into()).into()), + } + } + + fn check_fault_catalog(&self) -> Result { + let catalog = FaultApi::get_fault_catalog(); + let catalog_id = catalog + .try_id() + .map_err(|e| SinkError::Other(alloc::borrow::Cow::Owned(format!("catalog id error: {e}"))))?; + let hash_vec = Sha256Vec::try_from(catalog.config_hash()) + .map_err(|_| SinkError::Other(alloc::borrow::Cow::Borrowed("catalog hash too long for IPC")))?; + let event = DiagnosticEvent::Hash((catalog_id, hash_vec)); + match self.sink_sender.try_send(WorkerMsg::Event { event: Box::new(event) }) { + Ok(()) => {} + Err(TrySendError::Full(_)) => return Err(SinkError::QueueFull), + Err(TrySendError::Disconnected(_)) => { + return Err(FaultManagerError::SendError("Cannot send hash check: channel disconnected".into()).into()); + } + } + // this will wait for the response + self.listen_hash_check_response() + } +} + +/// Timeout for joining the worker thread during drop. +const DROP_JOIN_TIMEOUT: Duration = Duration::from_secs(2); + +impl Drop for FaultManagerSink { + fn drop(&mut self) { + debug!("Drop FaultManagerSink"); + if let Some(hndl) = self.sink_thread.take() { + // try_send avoids blocking on a full channel during shutdown + if let Err(e) = self.sink_sender.try_send(WorkerMsg::Exit) { + debug!("Exit signal send failed (worker may have already exited): {e:?}"); + } + + let current_id = std::thread::current().id(); + let worker_id = hndl.thread().id(); + + if current_id == worker_id { + error!("Skipping join: drop called from the sink_thread thread"); + return; + } + + debug!("Joining sink_thread thread"); + let (join_tx, join_rx) = std::sync::mpsc::channel(); + let watchdog = thread::spawn(move || { + let result = hndl.join(); + let _ = join_tx.send(result); + }); + + match join_rx.recv_timeout(DROP_JOIN_TIMEOUT) { + Ok(Ok(())) => debug!("Worker thread joined successfully"), + Ok(Err(err)) => error!("Worker thread panicked: {err:?}"), + Err(_) => { + error!("Worker thread did not exit within {DROP_JOIN_TIMEOUT:?}, abandoning"); + drop(watchdog); + } + } + } + } +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use crate::test_utils::*; + use common::FaultId; + use common::types::*; + use std::sync::Arc; + use std::time::Duration; + + fn new_for_publish_test() -> (FaultManagerSink, mpsc::Receiver) { + let (tx, rx) = mpsc::sync_channel(CHANNEL_CAPACITY); + let client = FaultManagerSink { + sink_sender: tx, + sink_thread: None, + hash_check_response_subscriber: None, + }; + (client, rx) + } + + fn new_for_drop_test() -> (FaultManagerSink, mpsc::Receiver) { + let (tx, rx) = mpsc::sync_channel(CHANNEL_CAPACITY); + let handle = thread::spawn(|| { + thread::sleep(Duration::from_millis(1)); + }); + let client = FaultManagerSink { + sink_sender: tx, + sink_thread: Some(handle), + hash_check_response_subscriber: None, + }; + (client, rx) + } + + #[test] + fn test_publish_sends_event_message() { + let (client, rx) = new_for_publish_test(); + let fault_id = FaultId::Numeric(42); + let fault_name = ShortString::from_bytes("Test Fault".as_bytes()).unwrap(); + let desc = stub_descriptor(fault_id, fault_name, None, None); + let path = "test/path"; + + let result = ::publish(&client, path, stub_record(desc.clone())); + assert!(result.is_ok()); + + match rx.recv_timeout(Duration::from_millis(50)).unwrap() { + WorkerMsg::Event { event } => match &*event { + DiagnosticEvent::Fault((path, record)) => { + assert_eq!(path.to_string(), "test/path"); + assert_eq!(record.id, FaultId::Numeric(42)); + } + other => { + panic!("Expected Fault event, got {other:?}"); + } + }, + other => panic!("Received wrong message type: {other:?}"), + } + } + + #[test] + fn test_drop_sends_exit_message() { + let (client, rx) = new_for_drop_test(); + drop(client); + + match rx.recv_timeout(Duration::from_millis(50)).unwrap() { + WorkerMsg::Exit => {} + other => panic!("Received wrong message type, expected Exit: {other:?}"), + } + } + + // ---------- is_valid_path tests ---------- + + #[test] + fn path_traversal_rejected() { + assert!(!is_valid_path("entity/../etc")); + assert!(!is_valid_path("entity/..")); + assert!(!is_valid_path("../entity")); + assert!(!is_valid_path("a/b/../c")); + } + + #[test] + fn single_dots_in_path_allowed() { + assert!(is_valid_path("entity/v1.2.3/sub")); + assert!(is_valid_path("entity.name")); + assert!(is_valid_path("a.b.c")); + } + + #[test] + fn empty_path_rejected() { + assert!(!is_valid_path("")); + } + + #[test] + fn path_starting_with_slash_rejected() { + assert!(!is_valid_path("/entity")); + } + + #[test] + fn path_ending_with_slash_rejected() { + assert!(!is_valid_path("entity/")); + } + + #[test] + fn valid_paths_accepted() { + assert!(is_valid_path("entity")); + assert!(is_valid_path("test/path")); + assert!(is_valid_path("a/b/c")); + assert!(is_valid_path("entity-name_v1")); + } + + #[test] + fn path_with_special_chars_rejected() { + assert!(!is_valid_path("entity path")); + assert!(!is_valid_path("entity@name")); + assert!(!is_valid_path("entity#1")); + } + + // ---------- full queue behavior ---------- + + #[test] + fn publish_rejects_when_queue_is_full() { + // Create a very small channel to fill up + let (tx, _rx) = mpsc::sync_channel(1); + let client = FaultManagerSink { + sink_sender: tx, + sink_thread: None, + hash_check_response_subscriber: None, + }; + + let fault_name = ShortString::from_bytes("Test".as_bytes()).unwrap(); + let desc = stub_descriptor(FaultId::Numeric(1), fault_name, None, None); + let path = "test/path"; + + // First publish fills the channel + let result = ::publish(&client, path, stub_record(desc.clone())); + assert!(result.is_ok()); + + // Second publish should fail with QueueFull + let result = ::publish(&client, path, stub_record(desc)); + assert!(matches!(result, Err(SinkError::QueueFull))); + } + + // ---------- invalid path handling ---------- + + #[test] + fn publish_rejects_path_too_long() { + let (client, _rx) = new_for_publish_test(); + let fault_name = ShortString::from_bytes("Test".as_bytes()).unwrap(); + let desc = stub_descriptor(FaultId::Numeric(1), fault_name, None, None); + + // Path longer than MAX_PATH_LENGTH + let long_path = "a".repeat(MAX_PATH_LENGTH + 1); + let result = ::publish(&client, &long_path, stub_record(desc)); + assert!(matches!(result, Err(SinkError::BadDescriptor(_)))); + } + + #[test] + fn publish_rejects_invalid_path_format() { + let (client, _rx) = new_for_publish_test(); + let fault_name = ShortString::from_bytes("Test".as_bytes()).unwrap(); + let desc = stub_descriptor(FaultId::Numeric(1), fault_name, None, None); + + let result = ::publish(&client, "../etc/passwd", stub_record(desc.clone())); + assert!(matches!(result, Err(SinkError::BadDescriptor(_)))); + + let result = ::publish(&client, "", stub_record(desc)); + assert!(matches!(result, Err(SinkError::BadDescriptor(_)))); + } + + // ---------- send_event ---------- + + #[test] + fn send_event_succeeds_with_open_channel() { + let (client, _rx) = new_for_publish_test(); + let event = DiagnosticEvent::Hash((crate::utils::to_static_long_string("test").unwrap(), common::types::Sha256Vec::default())); + let result = ::send_event(&client, event); + assert!(result.is_ok()); + } + + #[test] + fn send_event_fails_on_disconnected_channel() { + let (tx, rx) = mpsc::sync_channel(CHANNEL_CAPACITY); + let client = FaultManagerSink { + sink_sender: tx, + sink_thread: None, + hash_check_response_subscriber: None, + }; + // Drop the receiver to disconnect the channel + drop(rx); + + let event = DiagnosticEvent::Hash((crate::utils::to_static_long_string("test").unwrap(), common::types::Sha256Vec::default())); + let result = ::send_event(&client, event); + assert!(result.is_err()); + } + + // ---------- drop behavior ---------- + + #[test] + fn drop_without_thread_is_safe() { + let (tx, _rx) = mpsc::sync_channel(CHANNEL_CAPACITY); + let client = FaultManagerSink { + sink_sender: tx, + sink_thread: None, + hash_check_response_subscriber: None, + }; + // Should not panic + drop(client); + } + + #[test] + fn drop_with_disconnected_channel_is_safe() { + let (tx, rx) = mpsc::sync_channel(CHANNEL_CAPACITY); + let handle = thread::spawn(|| { + thread::sleep(Duration::from_millis(1)); + }); + drop(rx); // Disconnect receiver + let client = FaultManagerSink { + sink_sender: tx, + sink_thread: Some(handle), + hash_check_response_subscriber: None, + }; + // Should not panic even though channel is disconnected + drop(client); + } + + // ---------- concurrent publish ---------- + + #[test] + fn concurrent_publish_does_not_panic() { + let (tx, rx) = mpsc::sync_channel(CHANNEL_CAPACITY); + let client = Arc::new(FaultManagerSink { + sink_sender: tx, + sink_thread: None, + hash_check_response_subscriber: None, + }); + + // Spawn receiver that drains events + let drain = thread::spawn(move || while rx.recv_timeout(Duration::from_millis(500)).is_ok() {}); + + // Spawn multiple threads publishing concurrently + let mut handles = vec![]; + for i in 0..4 { + let client_clone = Arc::clone(&client); + handles.push(thread::spawn(move || { + for j in 0..10 { + let fault_name = ShortString::from_bytes("Test".as_bytes()).unwrap(); + let desc = stub_descriptor(FaultId::Numeric(i * 100 + j), fault_name, None, None); + let _ = ::publish(&*client_clone, "test/path", stub_record(desc)); + } + })); + } + + for h in handles { + h.join().unwrap(); + } + drop(client); // Drop client to disconnect channel + drain.join().unwrap(); + } + + // ---------- SinkInitError ---------- + + #[test] + fn sink_init_error_display() { + let err = SinkInitError::ThreadSpawn("os error".into()); + assert!(err.to_string().contains("thread spawn failed")); + + let err = SinkInitError::WorkerInit("timeout".into()); + assert!(err.to_string().contains("worker initialization failed")); + + let err = SinkInitError::IpcService("no shm".into()); + assert!(err.to_string().contains("IPC service creation failed")); + + let err = SinkInitError::Channel("disconnected".into()); + assert!(err.to_string().contains("channel error")); + } +} diff --git a/src/fault_lib/src/ipc_worker.rs b/src/fault_lib/src/ipc_worker.rs new file mode 100644 index 0000000..2b80164 --- /dev/null +++ b/src/fault_lib/src/ipc_worker.rs @@ -0,0 +1,774 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +use crate::enabling_condition::EnablingConditionManager; +use crate::fault_manager_sink::{SinkInitError, WorkerMsg, WorkerReceiver}; +use alloc::collections::VecDeque; +use alloc::sync::Weak; +use common::enabling_condition::EnablingConditionNotification; +use common::ipc_service_name::{DIAGNOSTIC_FAULT_MANAGER_EVENT_SERVICE_NAME, ENABLING_CONDITION_NOTIFICATION_SERVICE_NAME}; +use common::ipc_service_type::ServiceType; +use common::sink_error::SinkError; +use common::types::DiagnosticEvent; +use core::time::Duration; +use iceoryx2::port::publisher::Publisher; +use iceoryx2::port::subscriber::Subscriber; +use iceoryx2::prelude::{NodeBuilder, ServiceName}; +use log::*; +use std::sync::mpsc; +use std::time::Instant; + +// ============================================================================ +// Retry Configuration (fault_lib-internal, NOT transferred via IPC) +// ============================================================================ + +/// Configuration for IPC retry behavior within the worker thread. +/// +/// This struct is internal to fault_lib and controls how the IPC worker +/// handles transient send failures. It is NOT part of the IPC protocol +/// and NOT transferred over iceoryx2. +#[derive(Debug, Clone)] +pub struct RetryConfig { + /// Maximum number of retry attempts per fault before dropping. + pub max_retries: u32, + /// Maximum number of faults to cache for retry. + pub cache_capacity: usize, + /// Base interval between retry attempts (exponential backoff base). + pub retry_interval: Duration, + /// Maximum retry interval (exponential backoff cap). + pub max_retry_interval: Duration, +} + +impl Default for RetryConfig { + fn default() -> Self { + Self { + max_retries: 10, + cache_capacity: 512, + retry_interval: Duration::from_millis(100), + max_retry_interval: Duration::from_secs(5), + } + } +} + +// ============================================================================ +// Cached Fault Entry +// ============================================================================ + +/// A diagnostic event that failed to send and is queued for retry. +struct CachedFault { + event: DiagnosticEvent, + attempts: u32, + next_retry: Instant, +} + +impl CachedFault { + fn new(event: DiagnosticEvent) -> Self { + Self { + event, + attempts: 0, + next_retry: Instant::now(), + } + } + + /// Calculate exponential backoff delay: base × 2^attempts, capped at max. + /// + /// Progression with default config: 100ms → 200ms → 400ms → 800ms → 1.6s → 3.2s → 5s (cap) + fn backoff_delay(&self, config: &RetryConfig) -> Duration { + let multiplier = 2u32.saturating_pow(self.attempts.min(10)); + let delay = config.retry_interval.saturating_mul(multiplier); + delay.min(config.max_retry_interval) + } +} + +// ============================================================================ +// IPC Worker Retry State +// ============================================================================ + +/// Internal retry queue state for the IPC worker. +/// +/// Extracted as a separate struct for testability — the retry logic +/// can be tested without iceoryx2 by providing a mock publish function. +pub(crate) struct IpcWorkerState { + retry_queue: VecDeque, + config: RetryConfig, +} + +impl IpcWorkerState { + fn new(config: RetryConfig) -> Self { + Self { + retry_queue: VecDeque::new(), + config, + } + } + + /// Handle a failed send by caching for retry (transient) or logging and + /// dropping (permanent). + fn handle_send_failure(&mut self, event: DiagnosticEvent, error: &SinkError) { + if Self::is_transient(error) { + self.cache_for_retry(event); + } else { + error!("Permanent IPC error, dropping event: {error:?}"); + } + } + + /// Add a failed event to the retry queue, evicting oldest if at capacity. + fn cache_for_retry(&mut self, event: DiagnosticEvent) { + if self.retry_queue.len() >= self.config.cache_capacity + && let Some(evicted) = self.retry_queue.pop_front() + { + warn!("Retry cache full, evicting event after {} attempts", evicted.attempts); + } + self.retry_queue.push_back(CachedFault::new(event)); + } + + /// Process retry queue — called periodically in worker loop. + /// + /// The `publish_fn` closure abstracts the actual IPC send, enabling + /// unit testing without iceoryx2. + #[allow(clippy::arithmetic_side_effects)] + fn process_retries(&mut self, publish_fn: &F) + where + F: Fn(&DiagnosticEvent) -> Result<(), SinkError>, + { + let now = Instant::now(); + let mut still_pending = VecDeque::new(); + + while let Some(mut cached) = self.retry_queue.pop_front() { + if cached.next_retry > now { + still_pending.push_back(cached); + continue; + } + + cached.attempts += 1; + + match publish_fn(&cached.event) { + Ok(()) => { + debug!("Retry success after {} attempts", cached.attempts); + } + Err(ref e) if cached.attempts < self.config.max_retries => { + let delay = cached.backoff_delay(&self.config); + debug!("Retry {} failed, next retry in {:?}: {:?}", cached.attempts, delay, e); + cached.next_retry = now + delay; + still_pending.push_back(cached); + } + Err(e) => { + error!("Dropping event after {} attempts: {:?}", cached.attempts, e); + } + } + } + + self.retry_queue = still_pending; + } + + /// Determine if an error is transient (worth retrying) or permanent. + fn is_transient(error: &SinkError) -> bool { + matches!( + error, + SinkError::TransportDown | SinkError::Timeout | SinkError::QueueFull | SinkError::RateLimited + ) + } + + /// Number of events currently in the retry queue. + #[cfg(test)] + fn retry_queue_len(&self) -> usize { + self.retry_queue.len() + } +} + +// ============================================================================ +// IPC Worker +// ============================================================================ + +/// Timeout for `recv_timeout` in the worker loop. +/// Short enough to process retries promptly, long enough to avoid busy-waiting. +const RECV_TIMEOUT: Duration = Duration::from_millis(50); + +pub struct IpcWorker { + sink_receiver: WorkerReceiver, + diagnostic_publisher: Option>, + ec_notification_subscriber: Option>, + ec_manager: Weak, + state: IpcWorkerState, +} + +impl IpcWorker { + pub fn new(sink_receiver: WorkerReceiver, ec_manager: Weak) -> Result { + Self::with_retry_config(sink_receiver, RetryConfig::default(), ec_manager) + } + + /// Create an IPC worker with custom retry config. + /// + /// Uses the default production service name for the event publisher. + pub fn with_retry_config( + sink_receiver: WorkerReceiver, + retry_config: RetryConfig, + ec_manager: Weak, + ) -> Result { + Self::create(sink_receiver, retry_config, DIAGNOSTIC_FAULT_MANAGER_EVENT_SERVICE_NAME, ec_manager) + } + + /// Create an IPC worker with a custom service name (test isolation). + /// + /// Each test can supply a unique service name to avoid iceoryx2 shared + /// memory conflicts when tests run in parallel. + #[cfg(test)] + pub fn with_test_service(sink_receiver: WorkerReceiver, retry_config: RetryConfig, service_name: &str) -> Result { + Self::create(sink_receiver, retry_config, service_name, Weak::::new()) + } + + /// Internal constructor: creates iceoryx2 node, service, and publisher. + /// + /// # Errors + /// + /// Returns `SinkInitError::IpcService` if iceoryx2 node, service, or + /// publisher creation fails (e.g. no shared memory, no permissions). + fn create( + sink_receiver: WorkerReceiver, + retry_config: RetryConfig, + service_name: &str, + ec_manager: Weak, + ) -> Result { + let node = NodeBuilder::new() + .create::() + .map_err(|e| SinkInitError::IpcService(format!("node creation: {e}")))?; + let event_publisher_service_name = + ServiceName::new(service_name).map_err(|e| SinkInitError::IpcService(format!("service name '{service_name}': {e}")))?; + let event_publisher_service = node + .service_builder(&event_publisher_service_name) + .publish_subscribe::() + .open_or_create() + .map_err(|e| SinkInitError::IpcService(format!("event publisher service: {e}")))?; + let publisher = event_publisher_service + .publisher_builder() + .create() + .map_err(|e| SinkInitError::IpcService(format!("event publisher: {e}")))?; + + // EC notification subscriber is best-effort — may fail if DFM not running + let ec_notification_subscriber = ServiceName::new(ENABLING_CONDITION_NOTIFICATION_SERVICE_NAME) + .ok() + .and_then(|svc_name| { + node.service_builder(&svc_name) + .publish_subscribe::() + .open_or_create() + .ok() + }) + .and_then(|service| service.subscriber_builder().create().ok()); + + if ec_notification_subscriber.is_some() { + debug!("EC notification subscriber created"); + } else { + debug!("EC notification subscriber not available (DFM may not be running)"); + } + + Ok(Self { + sink_receiver, + diagnostic_publisher: Some(publisher), + ec_notification_subscriber, + ec_manager, + state: IpcWorkerState::new(retry_config), + }) + } + + /// Attempt to publish an event via iceoryx2. + /// + /// Takes a reference and clones into shared memory — the clone cost is + /// negligible for `#[repr(C)]` fixed-size types (effectively a memcpy). + fn publish_event(&self, event: &DiagnosticEvent) -> Result<(), SinkError> { + let publisher = self.diagnostic_publisher.as_ref().ok_or(SinkError::TransportDown)?; + let sample = publisher.loan_uninit().map_err(|_| SinkError::TransportDown)?; + let sample = sample.write_payload(event.clone()); + match sample.send().map_err(|_| SinkError::TransportDown) { + Ok(_) => { + debug!("Event successfully sent!"); + Ok(()) + } + Err(e) => Err(e), + } + } + + pub fn run(&mut self) { + // Handle Start message first (blocking, parent thread is waiting to be unparked) + if let Ok(WorkerMsg::Start(parent)) = self.sink_receiver.recv() { + debug!("Diag IPC worker running"); + parent.unpark(); + } + + // Main loop: process channel messages + retry queue + EC notifications + loop { + match self.sink_receiver.recv_timeout(RECV_TIMEOUT) { + Ok(WorkerMsg::Event { event }) => { + if let Err(e) = self.publish_event(&event) { + // Only retry Fault events; Hash/EC events are time-sensitive + if matches!(event.as_ref(), DiagnosticEvent::Fault(_)) { + self.state.handle_send_failure(*event, &e); + } else { + error!("Event send failed (not retrying): {e:?}"); + } + } + } + Ok(WorkerMsg::Start(parent)) => { + debug!("Late start message received"); + parent.unpark(); + } + Ok(WorkerMsg::Exit) => { + info!("FaultMgrClient worker ends"); + break; + } + Err(mpsc::RecvTimeoutError::Timeout) => { + // No new messages — fall through to retry + EC notification processing + } + Err(mpsc::RecvTimeoutError::Disconnected) => { + info!("Channel disconnected, worker exiting"); + break; + } + } + + // Process retry queue (borrows publisher and state as separate fields) + let publisher = &self.diagnostic_publisher; + self.state.process_retries(&|event: &DiagnosticEvent| { + let pub_ref = publisher.as_ref().ok_or(SinkError::TransportDown)?; + let sample = pub_ref.loan_uninit().map_err(|_| SinkError::TransportDown)?; + let sample = sample.write_payload(event.clone()); + sample.send().map_err(|_| SinkError::TransportDown).map(|_| ()) + }); + + // Poll for enabling condition notifications from DFM + self.poll_ec_notifications(); + } + + // Final flush: attempt to deliver remaining cached faults before shutdown + self.final_flush(); + } + + /// Poll the enabling condition notification subscriber and dispatch + /// received notifications to the local EnablingConditionManager. + fn poll_ec_notifications(&self) { + let subscriber = match self.ec_notification_subscriber.as_ref() { + Some(s) => s, + None => return, + }; + + let manager = match self.ec_manager.upgrade() { + Some(m) => m, + None => return, + }; + + // Drain all available notifications + loop { + match subscriber.receive() { + Ok(Some(sample)) => { + let notification = sample.payload(); + debug!("Received EC notification: {} -> {:?}", notification.id, notification.status); + manager.handle_remote_notification(¬ification.id, notification.status); + } + Ok(None) => break, + Err(e) => { + error!("EC notification receive error: {e:?}"); + break; + } + } + } + } + + /// Attempt to deliver any remaining cached faults before shutdown. + fn final_flush(&mut self) { + let remaining = self.state.retry_queue.len(); + if remaining > 0 { + info!("Final flush: attempting to deliver {remaining} cached events"); + } + while let Some(cached) = self.state.retry_queue.pop_front() { + if let Err(e) = self.publish_event(&cached.event) { + warn!("Final flush failed for event after {} attempts: {:?}", cached.attempts, e); + } else { + debug!("Final flush: event delivered after {} attempts", cached.attempts); + } + } + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +#[cfg(not(miri))] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use crate::fault_manager_sink::WorkerMsg; + use crate::test_utils::*; + use crate::utils::to_static_long_string; + use common::fault::FaultId; + use serial_test::serial; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::thread; + + // ---------- Helpers ---------- + + /// Create a stub `DiagnosticEvent::Fault` for testing. + fn stub_fault_event() -> DiagnosticEvent { + let path = to_static_long_string("test/retry/path").unwrap(); + let desc = stub_descriptor( + FaultId::Numeric(42), + crate::utils::to_static_short_string("RetryTest").unwrap(), + None, + None, + ); + DiagnosticEvent::Fault((path, stub_record(desc))) + } + + fn make_state(config: RetryConfig) -> IpcWorkerState { + IpcWorkerState::new(config) + } + + // ---------- RetryConfig defaults ---------- + + #[test] + fn retry_config_defaults_are_sensible() { + let config = RetryConfig::default(); + assert_eq!(config.max_retries, 10); + assert_eq!(config.cache_capacity, 512); + assert_eq!(config.retry_interval, Duration::from_millis(100)); + assert_eq!(config.max_retry_interval, Duration::from_secs(5)); + } + + // ---------- CachedFault backoff ---------- + + #[test] + fn backoff_delay_grows_exponentially() { + let config = RetryConfig::default(); + let mut cached = CachedFault::new(stub_fault_event()); + + // attempts=0 → 100ms * 2^0 = 100ms + assert_eq!(cached.backoff_delay(&config), Duration::from_millis(100)); + + cached.attempts = 1; + assert_eq!(cached.backoff_delay(&config), Duration::from_millis(200)); + + cached.attempts = 2; + assert_eq!(cached.backoff_delay(&config), Duration::from_millis(400)); + + cached.attempts = 3; + assert_eq!(cached.backoff_delay(&config), Duration::from_millis(800)); + } + + #[test] + fn backoff_delay_caps_at_max() { + let config = RetryConfig { + max_retry_interval: Duration::from_secs(5), + ..Default::default() + }; + let mut cached = CachedFault::new(stub_fault_event()); + + // 100ms * 2^10 = 102400ms ≫ 5s cap + cached.attempts = 10; + assert_eq!(cached.backoff_delay(&config), Duration::from_secs(5)); + } + + // ---------- is_transient ---------- + + #[test] + fn transient_errors_classified_correctly() { + assert!(IpcWorkerState::is_transient(&SinkError::TransportDown)); + assert!(IpcWorkerState::is_transient(&SinkError::Timeout)); + assert!(IpcWorkerState::is_transient(&SinkError::QueueFull)); + assert!(IpcWorkerState::is_transient(&SinkError::RateLimited)); + } + + #[test] + fn permanent_errors_classified_correctly() { + assert!(!IpcWorkerState::is_transient(&SinkError::PermissionDenied)); + assert!(!IpcWorkerState::is_transient(&SinkError::BadDescriptor("test".into()))); + assert!(!IpcWorkerState::is_transient(&SinkError::InvalidServiceName)); + } + + // ---------- cache_for_retry ---------- + + #[test] + fn cache_for_retry_adds_to_queue() { + let mut state = make_state(RetryConfig::default()); + assert_eq!(state.retry_queue_len(), 0); + + state.cache_for_retry(stub_fault_event()); + assert_eq!(state.retry_queue_len(), 1); + + state.cache_for_retry(stub_fault_event()); + assert_eq!(state.retry_queue_len(), 2); + } + + #[test] + fn cache_for_retry_evicts_oldest_when_full() { + let mut state = make_state(RetryConfig { + cache_capacity: 2, + ..Default::default() + }); + + state.cache_for_retry(stub_fault_event()); + state.cache_for_retry(stub_fault_event()); + assert_eq!(state.retry_queue_len(), 2); + + // Third entry should evict the oldest + state.cache_for_retry(stub_fault_event()); + assert_eq!(state.retry_queue_len(), 2); + } + + // ---------- handle_send_failure ---------- + + #[test] + fn handle_send_failure_caches_on_transient_error() { + let mut state = make_state(RetryConfig::default()); + + state.handle_send_failure(stub_fault_event(), &SinkError::TransportDown); + assert_eq!(state.retry_queue_len(), 1); + } + + #[test] + fn handle_send_failure_drops_on_permanent_error() { + let mut state = make_state(RetryConfig::default()); + + state.handle_send_failure(stub_fault_event(), &SinkError::PermissionDenied); + assert_eq!(state.retry_queue_len(), 0); + } + + #[test] + fn handle_send_failure_caches_on_rate_limited() { + let mut state = make_state(RetryConfig::default()); + + state.handle_send_failure(stub_fault_event(), &SinkError::RateLimited); + assert_eq!(state.retry_queue_len(), 1, "RateLimited is transient, event should be cached for retry"); + } + + // ---------- process_retries ---------- + + #[test] + fn process_retries_succeeds_on_first_retry() { + let mut state = make_state(RetryConfig::default()); + state.cache_for_retry(stub_fault_event()); + + let call_count = AtomicUsize::new(0); + state.process_retries(&|_event| { + call_count.fetch_add(1, Ordering::SeqCst); + Ok(()) + }); + + assert_eq!(call_count.load(Ordering::SeqCst), 1); + assert_eq!(state.retry_queue_len(), 0, "Queue should be empty after successful retry"); + } + + #[test] + fn process_retries_requeues_on_transient_failure() { + let mut state = make_state(RetryConfig { + max_retries: 5, + ..Default::default() + }); + state.cache_for_retry(stub_fault_event()); + + // First process: fails, requeued + state.process_retries(&|_event| Err(SinkError::TransportDown)); + assert_eq!(state.retry_queue_len(), 1, "Should requeue on transient failure"); + + // The requeued item has attempts=1 and a future next_retry + let cached = &state.retry_queue[0]; + assert_eq!(cached.attempts, 1); + assert!(cached.next_retry > Instant::now()); + } + + #[test] + fn process_retries_drops_after_max_retries() { + let config = RetryConfig { + max_retries: 3, + // Use zero retry interval so all retries are immediately eligible + retry_interval: Duration::ZERO, + ..Default::default() + }; + let mut state = make_state(config); + state.cache_for_retry(stub_fault_event()); + + // Process 3 times — each time fails, last time drops + for _ in 0..3 { + state.process_retries(&|_event| Err(SinkError::TransportDown)); + } + + assert_eq!(state.retry_queue_len(), 0, "Should drop after max_retries"); + } + + #[test] + fn process_retries_respects_backoff_timing() { + let mut state = make_state(RetryConfig { + max_retries: 5, + retry_interval: Duration::from_millis(200), + ..Default::default() + }); + state.cache_for_retry(stub_fault_event()); + + // First process: fail → requeued with next_retry ~200ms in future + state.process_retries(&|_event| Err(SinkError::TransportDown)); + assert_eq!(state.retry_queue_len(), 1); + + // Immediate second process: backoff not elapsed, should NOT retry + let call_count = AtomicUsize::new(0); + state.process_retries(&|_event| { + call_count.fetch_add(1, Ordering::SeqCst); + Err(SinkError::TransportDown) + }); + assert_eq!(call_count.load(Ordering::SeqCst), 0, "Should not retry before backoff elapses"); + assert_eq!(state.retry_queue_len(), 1, "Event should remain in queue"); + } + + #[test] + fn process_retries_with_flakey_publisher() { + let fail_count = AtomicUsize::new(0); + let config = RetryConfig { + max_retries: 10, + // Zero interval so retries are immediately eligible + retry_interval: Duration::ZERO, + max_retry_interval: Duration::ZERO, + ..Default::default() + }; + let mut state = make_state(config); + state.cache_for_retry(stub_fault_event()); + + // Fail twice, then succeed + let attempts = AtomicUsize::new(0); + let fail_times = 2; + + // Round 1: fail + state.process_retries(&|_event| { + let n = attempts.fetch_add(1, Ordering::SeqCst); + if n < fail_times { + fail_count.fetch_add(1, Ordering::SeqCst); + Err(SinkError::TransportDown) + } else { + Ok(()) + } + }); + assert_eq!(state.retry_queue_len(), 1); // Still queued (failed) + + // Round 2: fail + state.process_retries(&|_event| { + let n = attempts.fetch_add(1, Ordering::SeqCst); + if n < fail_times { + fail_count.fetch_add(1, Ordering::SeqCst); + Err(SinkError::TransportDown) + } else { + Ok(()) + } + }); + assert_eq!(state.retry_queue_len(), 1); // Still queued (failed again) + + // Round 3: succeed + state.process_retries(&|_event| { + let n = attempts.fetch_add(1, Ordering::SeqCst); + if n < fail_times { + fail_count.fetch_add(1, Ordering::SeqCst); + Err(SinkError::TransportDown) + } else { + Ok(()) + } + }); + assert_eq!(state.retry_queue_len(), 0, "Should be delivered after retries"); + assert_eq!(fail_count.load(Ordering::SeqCst), 2); + } + + #[test] + fn process_retries_handles_multiple_cached_events() { + let config = RetryConfig { + max_retries: 5, + retry_interval: Duration::ZERO, + max_retry_interval: Duration::ZERO, + ..Default::default() + }; + let mut state = make_state(config); + + // Cache 3 events + for _ in 0..3 { + state.cache_for_retry(stub_fault_event()); + } + assert_eq!(state.retry_queue_len(), 3); + + // All succeed + let call_count = AtomicUsize::new(0); + state.process_retries(&|_event| { + call_count.fetch_add(1, Ordering::SeqCst); + Ok(()) + }); + + assert_eq!(call_count.load(Ordering::SeqCst), 3); + assert_eq!(state.retry_queue_len(), 0); + } + + // ---------- IpcWorker::run integration (uses real iceoryx2) ---------- + // + // These tests create real iceoryx2 shared-memory resources. + // They MUST run serially to avoid resource conflicts. + // Each test uses a unique service name for defense-in-depth isolation. + + #[test] + #[serial(ipc)] + fn worker_start_and_exit_with_retry_config() { + let svc = unique_ipc_service_name("worker_start_exit"); + let (tx, rx) = mpsc::channel::(); + let mut worker = IpcWorker::with_test_service(rx, RetryConfig::default(), &svc).unwrap(); + let handle = thread::spawn(move || worker.run()); + + tx.send(WorkerMsg::Start(thread::current())).unwrap(); + tx.send(WorkerMsg::Exit).unwrap(); + + let (join_tx, join_rx) = mpsc::channel(); + + thread::spawn(move || { + let join_result = handle.join(); + join_tx.send(join_result).ok(); + }); + + let test_timeout = Duration::from_secs(5); + match join_rx.recv_timeout(test_timeout) { + Ok(Ok(())) => {} + Ok(Err(panic_err)) => { + std::panic::resume_unwind(panic_err); + } + Err(_) => { + panic!("Test failed: Worker thread did not exit within 5 seconds"); + } + } + } + + #[test] + #[serial(ipc)] + fn worker_exits_on_channel_disconnect() { + let svc = unique_ipc_service_name("worker_disconnect"); + let (tx, rx) = mpsc::channel::(); + let mut worker = IpcWorker::with_test_service(rx, RetryConfig::default(), &svc).unwrap(); + let handle = thread::spawn(move || worker.run()); + + tx.send(WorkerMsg::Start(thread::current())).unwrap(); + // Drop sender → channel disconnects → worker should exit + drop(tx); + + let (join_tx, join_rx) = mpsc::channel(); + thread::spawn(move || { + let join_result = handle.join(); + join_tx.send(join_result).ok(); + }); + + match join_rx.recv_timeout(Duration::from_secs(5)) { + Ok(Ok(())) => {} + Ok(Err(panic_err)) => std::panic::resume_unwind(panic_err), + Err(_) => panic!("Worker did not exit after channel disconnect"), + } + } +} diff --git a/src/fault_lib/src/lib.rs b/src/fault_lib/src/lib.rs new file mode 100644 index 0000000..937ee51 --- /dev/null +++ b/src/fault_lib/src/lib.rs @@ -0,0 +1,62 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Reporter-side fault library for the OpenSOVD / S-CORE ecosystem. +//! +//! `fault_lib` provides the application-facing API for reporting faults to +//! the Diagnostic Fault Manager (DFM) over IPC. The main workflow is: +//! +//! 1. Build a [`FaultCatalog`](common::catalog::FaultCatalog) from JSON or +//! code. +//! 2. Initialise the global [`FaultApi`] singleton with a transport sink and +//! an optional log hook. +//! 3. Create one [`Reporter`](reporter::Reporter) per fault ID — each +//! reporter binds static descriptor data once. +//! 4. At runtime, call `reporter.create_record()`, mutate the +//! [`FaultRecord`](common::fault::FaultRecord), and `reporter.publish()` +//! to enqueue it (non-blocking). +//! +//! ## Feature highlights +//! +//! - **Enabling conditions** — [`EnablingCondition`] / [`FaultMonitor`] gate +//! fault detection based on DFM-broadcast status changes. +//! - **Reporter-side debounce** — descriptor-declared policies filter noisy +//! events before they hit IPC. +//! - **Pluggable transport** — implement [`FaultSinkApi`] (or use the built-in +//! iceoryx2 sink) and [`LogHook`] for observability. +//! +//! See [`enabling_condition`] for the enabling-condition subsystem. + +extern crate alloc; +// The public surface collects the building blocks for reporters, descriptors, +// and sinks so callers can just `use fault_lib::*` and go. +pub mod api; +pub mod catalog; +pub mod enabling_condition; +pub mod reporter; +pub mod sink; + +mod fault_manager_sink; +mod ipc_worker; + +pub use api::FaultApi; +// Re-export the main user-facing pieces, this keeps the crate ergonomic without +// forcing consumers to dig through modules. +// pub use api::{FaultApi, Reporter}; +// pub use catalog::FaultCatalog; +pub use enabling_condition::{EnablingCondition, EnablingConditionCallback, EnablingConditionError, FaultMonitor}; +pub use sink::{FaultSinkApi, LogHook, NoOpLogHook}; + +pub mod utils; + +#[cfg(any(test, feature = "testutils"))] +pub mod test_utils; diff --git a/src/fault_lib/src/reporter.rs b/src/fault_lib/src/reporter.rs new file mode 100644 index 0000000..67e4312 --- /dev/null +++ b/src/fault_lib/src/reporter.rs @@ -0,0 +1,1715 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +use crate::{FaultApi, sink::*}; +use alloc::sync::Arc; +use common::debounce::Debounce; +use common::{fault::*, sink_error::*, types::*}; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; + +// Per-component defaults that get baked into a Reporter instance. +#[derive(Debug, Clone)] +pub struct ReporterConfig { + pub source: common::ids::SourceId, + pub lifecycle_phase: LifecyclePhase, + /// Optional per-reporter defaults (e.g., common metadata). + pub default_env_data: MetadataVec, +} + +/// Errors that can occur when constructing a [`Reporter`]. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum ReporterError { + /// The fault catalog has not been initialized via [`FaultApi::try_new`]. + #[error("fault catalog not initialized")] + CatalogNotInitialized, + /// The given fault ID does not exist in the loaded catalog. + #[error("fault ID not found in catalog: {0:?}")] + FaultIdNotFound(FaultId), + /// The IPC sink is not available (dropped or never created). + #[error("sink not available: {0}")] + SinkNotAvailable(#[from] SinkError), +} + +pub trait ReporterApi { + fn new(id: &FaultId, config: ReporterConfig) -> Result + where + Self: Sized; + fn create_record(&self, lifecycle_stage: LifecycleStage) -> FaultRecord; + + /// Publish a fault record to the DFM via the configured sink. + /// + /// `path` is the SOVD entity path (e.g. `"ecu/app_name"`). The path is + /// converted to a 128-byte `LongString` for IPC transport; paths longer + /// than 128 bytes are rejected with [`SinkError::BadDescriptor`]. + fn publish(&mut self, path: &str, record: FaultRecord) -> Result<(), SinkError>; +} + +pub struct Reporter { + pub(crate) sink: Arc, + pub(crate) descriptor: FaultDescriptor, + pub(crate) config: ReporterConfig, + /// Runtime debounce state derived from `descriptor.reporter_side_debounce`. + /// When `Some`, `publish()` filters events through the debouncer before + /// forwarding to the sink, reducing IPC traffic to the DFM. + pub(crate) debouncer: Option>, + /// Tracks the last published lifecycle stage so that stage transitions + /// (e.g. Passed → Failed) can reset the debouncer. + pub(crate) last_stage: LifecycleStage, + /// Optional log hook for fault reporting observability. + /// Called after each publish attempt (success or error). + /// Populated from `FaultApi::try_get_log_hook()` during `Reporter::new()`, + /// or set directly for testing. + pub(crate) log_hook: Option>, +} + +impl ReporterApi for Reporter { + fn new(id: &FaultId, config: ReporterConfig) -> Result { + let sink = FaultApi::try_get_fault_sink()?; + let catalog = FaultApi::try_get_fault_catalog().ok_or(ReporterError::CatalogNotInitialized)?; + let descriptor = catalog.descriptor(id).ok_or_else(|| ReporterError::FaultIdNotFound(id.clone()))?.clone(); + let debouncer = descriptor.reporter_side_debounce.map(|mode| mode.into_debouncer()); + let log_hook = FaultApi::try_get_log_hook(); + Ok(Self { + sink, + descriptor, + config, + debouncer, + last_stage: LifecycleStage::NotTested, + log_hook, + }) + } + + /// Create a new fault record with the current timestamp. + /// + /// The timestamp is captured at record creation time to accurately + /// reflect when the fault condition was detected. + fn create_record(&self, lifecycle_stage: LifecycleStage) -> FaultRecord { + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default(); + + FaultRecord { + id: self.descriptor.id.clone(), + time: IpcTimestamp { + seconds_since_epoch: now.as_secs(), + nanoseconds: now.subsec_nanos(), + }, + source: self.config.source.clone(), + lifecycle_phase: self.config.lifecycle_phase, + lifecycle_stage, + env_data: self.config.default_env_data.clone(), + } + } + + fn publish(&mut self, path: &str, record: FaultRecord) -> Result<(), SinkError> { + let now = Instant::now(); + + // Reset debouncer on lifecycle stage transitions (e.g. Passed → Failed) + // so a new fault occurrence starts with a clean debounce window. + if Self::should_reset_debouncer(&self.last_stage, &record.lifecycle_stage) + && let Some(ref mut d) = self.debouncer + { + d.reset(now); + } + + // Apply debounce filter — suppressed events return Ok(()) silently + // to reduce IPC traffic to the DFM per design.md REQ-4. + if let Some(ref mut debouncer) = self.debouncer + && !debouncer.on_event(now) + { + self.last_stage = record.lifecycle_stage; + return Ok(()); + } + + self.last_stage = record.lifecycle_stage; + + // Publish to sink, then notify log hook (REQ-10). + // Clone is needed because sink.publish() takes ownership; FaultRecord + // is repr(C) fixed-size so clone is a cheap memcpy. + if let Some(ref logger) = self.log_hook { + let record_copy = record.clone(); + let result = self.sink.publish(path, record); + match &result { + Ok(()) => logger.on_publish(&record_copy), + Err(e) => logger.on_error(&record_copy, e), + } + result + } else { + self.sink.publish(path, record) + } + } +} + +impl Reporter { + /// Returns `true` when the lifecycle stage transition indicates the fault + /// condition has changed direction (healthy → faulty), which should reset + /// the debounce window so that a fresh occurrence is not suppressed. + /// + /// Only triggers on Passed → Failed/PreFailed transitions. + /// NotTested → Failed is the initial detection, not a reset scenario. + fn should_reset_debouncer(last_stage: &LifecycleStage, new_stage: &LifecycleStage) -> bool { + use LifecycleStage::*; + matches!((last_stage, new_stage), (Passed, Failed) | (Passed, PreFailed)) + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + use crate::sink::MockFaultSinkApi; + use crate::test_utils::*; + use crate::utils::to_static_short_string; + + #[test] + fn create_record() { + let reporter = Reporter { + sink: Arc::new(MockFaultSinkApi::new()), + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test fault").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Passed); + + assert_eq!(record.id, FaultId::Numeric(42)); + assert_eq!(record.source, stub_source()); + assert_eq!(record.lifecycle_phase, LifecyclePhase::Running); + assert_eq!(record.lifecycle_stage, LifecycleStage::Passed); + } + + #[test] + fn publish_success() { + let mut mock_sink = MockFaultSinkApi::new(); + mock_sink.expect_publish().once().returning(|path, record| { + assert_eq!(path, "test/path"); + assert_eq!(record.id, FaultId::Numeric(42)); + assert_eq!(record.source, stub_source()); + assert_eq!(record.lifecycle_phase, LifecyclePhase::Running); + assert_eq!(record.lifecycle_stage, LifecycleStage::Passed); + + Ok(()) + }); + + let mut reporter = Reporter { + sink: Arc::new(mock_sink), + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test fault").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Passed); + + assert!(reporter.publish("test/path", record).is_ok()); + } + + #[test] + fn publish_fail() { + let mut mock_sink = MockFaultSinkApi::new(); + mock_sink.expect_publish().once().returning(|_, _| Err(SinkError::TransportDown)); + + let mut reporter = Reporter { + sink: Arc::new(mock_sink), + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test fault").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Passed); + assert_eq!(reporter.publish("test/path", record), Err(SinkError::TransportDown)); + } +} + +#[cfg(test)] +mod design_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; + use crate::reporter::{Reporter, ReporterApi}; + use crate::sink::{FaultSinkApi, LogHook}; + use crate::test_utils::*; + use crate::utils::to_static_short_string; + use common::config::{ResetPolicy, ResetTrigger}; + use common::debounce::DebounceMode; + use common::fault::*; + use common::sink_error::SinkError; + use std::sync::Arc; + use std::sync::atomic::{AtomicU32, Ordering}; + use std::time::{Duration, Instant}; + + // ============================================================================ + // Helper functions + // ============================================================================ + + fn make_reporter(sink: Arc, fault_id: FaultId) -> Reporter { + Reporter { + sink, + descriptor: stub_descriptor(fault_id, to_static_short_string("Test fault").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + } + } + + fn make_reporter_with_descriptor(sink: Arc, descriptor: FaultDescriptor) -> Reporter { + let debouncer = descriptor.reporter_side_debounce.map(|mode| mode.into_debouncer()); + Reporter { + sink, + descriptor, + config: stub_config(), + debouncer, + last_stage: LifecycleStage::NotTested, + log_hook: None, + } + } + + // ============================================================================ + // REQ-1: Framework Agnostic API + // ============================================================================ + + /// REQ-1: The API must not require any specific async runtime. + /// Reporter is constructed synchronously, no tokio/async-std needed. + #[test] + fn req1_reporter_requires_no_async_runtime() { + let sink = Arc::new(RecordingSink::new()); + let _reporter = make_reporter(sink, FaultId::Numeric(1)); + // If this compiles and runs without an async runtime, REQ-1 is satisfied. + } + + /// REQ-1: Reporter must be Send + Sync for use across threads. + #[test] + fn req1_reporter_is_send_sync() { + fn assert_send() {} + fn assert_sync() {} + assert_send::(); + assert_sync::(); + } + + /// REQ-1: FaultRecord must be ZeroCopySend for IPC transport. + #[test] + fn req1_fault_record_is_zero_copy_send() { + fn assert_zero_copy() {} + assert_zero_copy::(); + } + + // ============================================================================ + // REQ-2: Relays faults via IPC to DFM + // ============================================================================ + + /// REQ-2: Published records must arrive at the sink. + #[test] + fn req2_sink_receives_published_records() { + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter(sink.clone(), FaultId::Numeric(42)); + + let record = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record).unwrap(); + + assert_eq!(sink.count(), 1); + let received = sink.received_records(); + assert_eq!(received[0].id, FaultId::Numeric(42)); + assert_eq!(received[0].lifecycle_stage, LifecycleStage::Failed); + } + + /// REQ-2: Sink errors must propagate back to the caller. + #[test] + fn req2_sink_error_propagates_to_caller() { + let sink = Arc::new(FailingSink::transport_down()); + let mut reporter = make_reporter(sink, FaultId::Numeric(42)); + + let record = reporter.create_record(LifecycleStage::Failed); + let result = reporter.publish("test/path", record); + + assert_eq!(result, Err(SinkError::TransportDown)); + } + + /// REQ-2: Multiple records can be published sequentially. + #[test] + fn req2_multiple_records_delivered_in_order() { + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter(sink.clone(), FaultId::Numeric(100)); + + for stage in [LifecycleStage::NotTested, LifecycleStage::PreFailed, LifecycleStage::Failed] { + let record = reporter.create_record(stage); + reporter.publish("test/path", record).unwrap(); + } + + let received = sink.received_records(); + assert_eq!(received.len(), 3); + assert_eq!(received[0].lifecycle_stage, LifecycleStage::NotTested); + assert_eq!(received[1].lifecycle_stage, LifecycleStage::PreFailed); + assert_eq!(received[2].lifecycle_stage, LifecycleStage::Failed); + } + + // ============================================================================ + // REQ-3: Domain-specific error logic (debouncing) + // ============================================================================ + + /// REQ-3: FaultDescriptor supports reporter-side debounce configuration. + #[test] + fn req3_descriptor_supports_reporter_side_debounce() { + let descriptor = stub_descriptor( + FaultId::Numeric(1), + to_static_short_string("Test").unwrap(), + Some(DebounceMode::HoldTime { + duration: Duration::from_secs(5).into(), + }), + None, + ); + assert!(descriptor.reporter_side_debounce.is_some()); + } + + /// REQ-3: FaultDescriptor supports manager-side debounce configuration. + #[test] + fn req3_descriptor_supports_manager_side_debounce() { + let descriptor = FaultDescriptor { + id: FaultId::Numeric(1), + name: to_static_short_string("Test").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(DebounceMode::CountWithinWindow { + min_count: 3, + window: Duration::from_secs(10).into(), + }), + manager_side_reset: None, + }; + assert!(descriptor.manager_side_debounce.is_some()); + } + + /// REQ-3: FaultDescriptor supports reset policy configuration. + #[test] + fn req3_descriptor_supports_reset_policy() { + let descriptor = stub_descriptor( + FaultId::Numeric(1), + to_static_short_string("Test").unwrap(), + None, + Some(ResetPolicy { + trigger: ResetTrigger::ToolOnly, + min_operating_cycles_before_clear: None, + }), + ); + assert!(descriptor.reporter_side_reset.is_some()); + } + + /// REQ-3: Reporter applies debounce filtering before publishing to sink. + /// EdgeWithCooldown lets the first event through, then suppresses until cooldown expires. + #[test] + fn req3_reporter_applies_debounce_before_publish() { + let debounce = DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_secs(10).into(), + }; + let descriptor = stub_descriptor(FaultId::Numeric(1), to_static_short_string("Debounced").unwrap(), Some(debounce), None); + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter_with_descriptor(sink.clone(), descriptor); + + let record = reporter.create_record(LifecycleStage::Failed); + + // First publish passes through + reporter.publish("test/path", record.clone()).unwrap(); + assert_eq!(sink.count(), 1, "First event should pass debounce"); + + // Rapid second publish is suppressed by cooldown + reporter.publish("test/path", record.clone()).unwrap(); + assert_eq!(sink.count(), 1, "Second event within cooldown should be suppressed"); + + // Third immediate publish also suppressed + reporter.publish("test/path", record).unwrap(); + assert_eq!(sink.count(), 1, "Third event within cooldown should be suppressed"); + } + + // ============================================================================ + // REQ-3: Debounce variant tests + // ============================================================================ + + /// CountWithinWindow suppresses events until min_count is reached within the window. + #[test] + fn req3_debounce_count_within_window_suppresses_early_events() { + let descriptor = stub_descriptor( + FaultId::Numeric(2), + to_static_short_string("CountWindow").unwrap(), + Some(DebounceMode::CountWithinWindow { + min_count: 3, + window: Duration::from_secs(60).into(), + }), + None, + ); + + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter_with_descriptor(sink.clone(), descriptor); + + let record = reporter.create_record(LifecycleStage::Failed); + + // First 2 publishes suppressed (count < min_count) + reporter.publish("test/path", record.clone()).unwrap(); + reporter.publish("test/path", record.clone()).unwrap(); + assert_eq!(sink.count(), 0, "Events before min_count should be suppressed"); + + // Third publish fires (count == min_count) + reporter.publish("test/path", record.clone()).unwrap(); + assert_eq!(sink.count(), 1, "Event at min_count should pass through"); + + // Fourth also passes (count > min_count, still in window) + reporter.publish("test/path", record).unwrap(); + assert_eq!(sink.count(), 2, "Events after min_count should pass through"); + } + + /// HoldTime suppresses until the configured duration has elapsed since first event. + #[test] + fn req3_debounce_hold_time_waits_for_duration() { + let descriptor = stub_descriptor( + FaultId::Numeric(3), + to_static_short_string("HoldTime").unwrap(), + Some(DebounceMode::HoldTime { + duration: Duration::from_millis(50).into(), + }), + None, + ); + + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter_with_descriptor(sink.clone(), descriptor); + + let record = reporter.create_record(LifecycleStage::Failed); + + // Immediate publish should be suppressed (hold time not elapsed) + reporter.publish("test/path", record.clone()).unwrap(); + assert_eq!(sink.count(), 0, "Immediate event should be suppressed"); + + // Wait for hold time to elapse + std::thread::sleep(Duration::from_millis(60)); + + // After delay, should fire + reporter.publish("test/path", record).unwrap(); + assert_eq!(sink.count(), 1, "Event after hold time should pass through"); + } + + /// EdgeWithCooldown passes the first event, then suppresses until cooldown expires. + #[test] + fn req3_debounce_edge_with_cooldown_passes_first_then_suppresses() { + let descriptor = stub_descriptor( + FaultId::Numeric(4), + to_static_short_string("EdgeCooldown").unwrap(), + Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(50).into(), + }), + None, + ); + + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter_with_descriptor(sink.clone(), descriptor); + + let record = reporter.create_record(LifecycleStage::Failed); + + // First event passes through (edge-triggered) + reporter.publish("test/path", record.clone()).unwrap(); + assert_eq!(sink.count(), 1, "First event should pass through"); + + // Immediate second event suppressed (within cooldown) + reporter.publish("test/path", record.clone()).unwrap(); + assert_eq!(sink.count(), 1, "Event within cooldown should be suppressed"); + + // Wait for cooldown to expire + std::thread::sleep(Duration::from_millis(60)); + + // After cooldown, next event passes through + reporter.publish("test/path", record).unwrap(); + assert_eq!(sink.count(), 2, "Event after cooldown should pass through"); + } + + /// Debouncer resets when lifecycle stage transitions from Passed to Failed. + #[test] + fn req3_debounce_resets_on_lifecycle_transition() { + let descriptor = stub_descriptor( + FaultId::Numeric(5), + to_static_short_string("ResetTest").unwrap(), + Some(DebounceMode::CountWithinWindow { + min_count: 3, + window: Duration::from_secs(60).into(), + }), + None, + ); + + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter_with_descriptor(sink.clone(), descriptor); + + // Accumulate 3 Failed events → fires at count=3 + let record_fail = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record_fail.clone()).unwrap(); + reporter.publish("test/path", record_fail.clone()).unwrap(); + reporter.publish("test/path", record_fail).unwrap(); + assert_eq!(sink.count(), 1, "3 events reach min_count: published"); + + // Transition to Passed (the Passed event also goes through debouncer, + // but counter is already >= min_count so it passes) + let record_pass = reporter.create_record(LifecycleStage::Passed); + reporter.publish("test/path", record_pass).unwrap(); + let count_after_pass = sink.count(); + + // Transition back to Failed — debouncer resets, counter restarts from 0 + let record_fail2 = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record_fail2.clone()).unwrap(); + reporter.publish("test/path", record_fail2.clone()).unwrap(); + // After reset: only 2 events, min_count=3, should be suppressed + assert_eq!(sink.count(), count_after_pass, "After Passed→Failed reset, 2 events still suppressed"); + + // Third event after reset fires + reporter.publish("test/path", record_fail2).unwrap(); + assert_eq!(sink.count(), count_after_pass + 1, "Third event after reset should pass through"); + } + + /// Reporter without debounce passes all events through to sink. + #[test] + fn req3_no_debounce_passes_all_events() { + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter(sink.clone(), FaultId::Numeric(10)); + + for _ in 0..10 { + let record = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record).unwrap(); + } + + assert_eq!(sink.count(), 10, "Without debounce, all events should pass through"); + } + + // ============================================================================ + // REQ-4: Reporting test results (passed/failed lifecycle stages) + // ============================================================================ + + /// REQ-4: All LifecycleStage variants exist and are distinct. + #[test] + fn req4_all_lifecycle_stages_exist() { + let stages = [ + LifecycleStage::NotTested, + LifecycleStage::PreFailed, + LifecycleStage::Failed, + LifecycleStage::PrePassed, + LifecycleStage::Passed, + ]; + // All 5 stages must be distinct + for i in 0..stages.len() { + for j in (i + 1)..stages.len() { + assert_ne!(stages[i], stages[j]); + } + } + } + + /// REQ-4: Created record contains the specified lifecycle_stage. + #[test] + fn req4_record_contains_lifecycle_stage() { + let sink = Arc::new(RecordingSink::new()); + let reporter = make_reporter(sink, FaultId::Numeric(42)); + + for stage in [ + LifecycleStage::NotTested, + LifecycleStage::PreFailed, + LifecycleStage::Failed, + LifecycleStage::PrePassed, + LifecycleStage::Passed, + ] { + let record = reporter.create_record(stage); + assert_eq!(record.lifecycle_stage, stage, "Mismatch for stage {stage:?}"); + } + } + + /// REQ-4: All lifecycle stages can be published without error. + #[test] + fn req4_all_stages_publishable() { + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter(sink.clone(), FaultId::Numeric(42)); + + for stage in [ + LifecycleStage::NotTested, + LifecycleStage::PreFailed, + LifecycleStage::Failed, + LifecycleStage::PrePassed, + LifecycleStage::Passed, + ] { + let record = reporter.create_record(stage); + let result = reporter.publish("test/path", record); + assert!(result.is_ok(), "Failed to publish stage {stage:?}"); + } + + assert_eq!(sink.count(), 5); + } + + // ============================================================================ + // REQ-5: Per-fault handles (Reporter pattern) + // ============================================================================ + + /// REQ-5: Reporter binds to a single fault ID. + #[test] + fn req5_reporter_binds_to_single_fault_id() { + let sink = Arc::new(RecordingSink::new()); + let reporter = make_reporter(sink, FaultId::Numeric(0x1001)); + + let record = reporter.create_record(LifecycleStage::Failed); + assert_eq!(record.id, FaultId::Numeric(0x1001)); + } + + /// REQ-5: Multiple reporters are independent. + #[test] + fn req5_multiple_reporters_independent() { + let sink = Arc::new(RecordingSink::new()); + let reporter1 = make_reporter(sink.clone(), FaultId::Numeric(0x1001)); + let reporter2 = make_reporter(sink, FaultId::Numeric(0x1002)); + + let record1 = reporter1.create_record(LifecycleStage::Failed); + let record2 = reporter2.create_record(LifecycleStage::Passed); + + assert_ne!(record1.id, record2.id); + assert_ne!(record1.lifecycle_stage, record2.lifecycle_stage); + } + + /// REQ-5: Reporter preserves source identity in records. + #[test] + fn req5_reporter_preserves_source_identity() { + let sink = Arc::new(RecordingSink::new()); + let config = stub_config(); + let expected_source = config.source.clone(); + + let reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(1), to_static_short_string("Test").unwrap(), None, None), + config, + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + assert_eq!(record.source, expected_source); + } + + // ============================================================================ + // REQ-6: Non-blocking publish path + // ============================================================================ + + /// REQ-6: Publish with a slow sink should still return quickly + /// (because the real sink enqueues and returns, not blocking on IPC). + /// Here we test that the API call through Reporter.publish() is direct - + /// it calls sink.publish() synchronously, so if the sink is slow, + /// it will be slow. The non-blocking contract is on the REAL FaultManagerSink + /// which enqueues to a channel. + #[test] + fn req6_publish_is_synchronous_call_to_sink() { + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter(sink.clone(), FaultId::Numeric(42)); + + let record = reporter.create_record(LifecycleStage::Failed); + let start = Instant::now(); + reporter.publish("test/path", record).unwrap(); + let elapsed = start.elapsed(); + + // RecordingSink is fast - publish should be near-instant + assert!(elapsed < Duration::from_millis(50), "publish took {elapsed:?}, expected <50ms"); + assert_eq!(sink.count(), 1); + } + + /// REQ-6: Multiple rapid publishes should not block each other. + #[test] + #[cfg_attr(miri, ignore)] // Miri is 10-100× slower; timing assertion is meaningless. + fn req6_rapid_publishes_complete_quickly() { + let sink = Arc::new(RecordingSink::new()); + let mut reporter = make_reporter(sink.clone(), FaultId::Numeric(42)); + + let start = Instant::now(); + for _ in 0..100 { + let record = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record).unwrap(); + } + let elapsed = start.elapsed(); + + assert_eq!(sink.count(), 100); + assert!(elapsed < Duration::from_millis(100), "100 publishes took {elapsed:?}, expected <100ms"); + } + + // ============================================================================ + // REQ-7: Decentral catalogue definition + // ============================================================================ + + /// REQ-7: Catalog hash is deterministic for same config. + #[test] + fn req7_catalog_hash_deterministic() { + let config = FaultCatalogConfig { + id: "test_app".into(), + version: 1, + faults: vec![], + }; + + let catalog1 = FaultCatalogBuilder::new().cfg_struct(config.clone()).unwrap().build(); + let catalog2 = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + + assert_eq!(catalog1.config_hash(), catalog2.config_hash()); + } + + /// REQ-7: Different catalogs produce different hashes. + #[test] + fn req7_different_catalogs_different_hash() { + let config1 = FaultCatalogConfig { + id: "app1".into(), + version: 1, + faults: vec![], + }; + let config2 = FaultCatalogConfig { + id: "app2".into(), + version: 1, + faults: vec![], + }; + + let catalog1 = FaultCatalogBuilder::new().cfg_struct(config1).unwrap().build(); + let catalog2 = FaultCatalogBuilder::new().cfg_struct(config2).unwrap().build(); + + assert_ne!(catalog1.config_hash(), catalog2.config_hash()); + } + + /// REQ-7: Catalog can look up descriptors by ID. + #[test] + fn req7_catalog_descriptor_lookup() { + let desc = stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test fault").unwrap(), None, None); + let config = FaultCatalogConfig { + id: "test".into(), + version: 1, + faults: vec![desc.clone()], + }; + + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + + assert!(catalog.descriptor(&FaultId::Numeric(42)).is_some()); + assert_eq!(catalog.descriptor(&FaultId::Numeric(42)).unwrap().name, desc.name); + assert!(catalog.descriptor(&FaultId::Numeric(999)).is_none()); + } + + // ============================================================================ + // REQ-8: No DFM redeploy on app changes + // ============================================================================ + + /// REQ-8: Catalog is built at compile time, not loaded from DFM at runtime. + /// If this test compiles and runs, the catalog is embedded in the binary. + #[test] + fn req8_catalog_embedded_in_app_binary() { + let config = FaultCatalogConfig { + id: "embedded_app".into(), + version: 1, + faults: vec![], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + assert!(!catalog.config_hash().is_empty()); + // Catalog created without DFM connection = no DFM redeploy needed + } + + // ============================================================================ + // REQ-10: LogHook support + // ============================================================================ + + /// Test helper: LogHook that counts on_publish calls. + struct CountingLogHook { + publish_count: AtomicU32, + error_count: AtomicU32, + } + + impl CountingLogHook { + fn new() -> Self { + Self { + publish_count: AtomicU32::new(0), + error_count: AtomicU32::new(0), + } + } + } + + impl LogHook for CountingLogHook { + fn on_publish(&self, _record: &FaultRecord) { + self.publish_count.fetch_add(1, Ordering::SeqCst); + } + fn on_error(&self, _record: &FaultRecord, _error: &SinkError) { + self.error_count.fetch_add(1, Ordering::SeqCst); + } + } + + /// REQ-10: LogHook on_publish is called after successful sink publish. + #[test] + fn req10_log_hook_called_on_successful_publish() { + let sink = Arc::new(RecordingSink::new()); + let hook = Arc::new(CountingLogHook::new()); + let mut reporter = Reporter { + sink: sink.clone(), + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("LogHookTest").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: Some(hook.clone()), + }; + + let record = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record).unwrap(); + + assert_eq!(hook.publish_count.load(Ordering::SeqCst), 1, "on_publish should be called once"); + assert_eq!(hook.error_count.load(Ordering::SeqCst), 0, "on_error should not be called on success"); + assert_eq!(sink.count(), 1, "Record should reach the sink"); + } + + /// REQ-10: LogHook on_error is called when sink publish fails. + #[test] + fn req10_log_hook_on_error_called_when_sink_fails() { + let sink = Arc::new(FailingSink::transport_down()); + let hook = Arc::new(CountingLogHook::new()); + let mut reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("LogHookErrorTest").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: Some(hook.clone()), + }; + + let record = reporter.create_record(LifecycleStage::Failed); + let result = reporter.publish("test/path", record); + + assert!(result.is_err()); + assert_eq!(hook.error_count.load(Ordering::SeqCst), 1, "on_error should be called once"); + assert_eq!(hook.publish_count.load(Ordering::SeqCst), 0, "on_publish should not be called on failure"); + } + + /// REQ-10: Publish works without log hook (backward compatible). + #[test] + fn req10_publish_works_without_log_hook() { + let sink = Arc::new(RecordingSink::new()); + let mut reporter = Reporter { + sink: sink.clone(), + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("NoHookTest").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + assert!(reporter.publish("test/path", record).is_ok()); + assert_eq!(sink.count(), 1); + } + + /// REQ-10: LogHook called for every publish, not just the first. + #[test] + fn req10_log_hook_called_on_every_publish() { + let sink = Arc::new(RecordingSink::new()); + let hook = Arc::new(CountingLogHook::new()); + let mut reporter = Reporter { + sink: sink.clone(), + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("MultiPublish").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: Some(hook.clone()), + }; + + for _ in 0..5 { + let record = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record).unwrap(); + } + + assert_eq!( + hook.publish_count.load(Ordering::SeqCst), + 5, + "on_publish should be called for each publish" + ); + assert_eq!(sink.count(), 5); + } + + /// REQ-10: LogHook is NOT called for debounce-suppressed events. + #[test] + fn req10_log_hook_not_called_for_suppressed_events() { + let sink = Arc::new(RecordingSink::new()); + let hook = Arc::new(CountingLogHook::new()); + let descriptor = stub_descriptor( + FaultId::Numeric(42), + to_static_short_string("DebouncedHook").unwrap(), + Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_secs(10).into(), + }), + None, + ); + let debouncer = descriptor.reporter_side_debounce.map(|mode| mode.into_debouncer()); + let mut reporter = Reporter { + sink: sink.clone(), + descriptor, + config: stub_config(), + debouncer, + last_stage: LifecycleStage::NotTested, + log_hook: Some(hook.clone()), + }; + + // First event passes through debounce and sink + let record = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record).unwrap(); + assert_eq!(hook.publish_count.load(Ordering::SeqCst), 1, "First event should trigger hook"); + + // Second event is suppressed by debounce — hook should NOT be called + let record = reporter.create_record(LifecycleStage::Failed); + reporter.publish("test/path", record).unwrap(); + assert_eq!(hook.publish_count.load(Ordering::SeqCst), 1, "Suppressed event should NOT trigger hook"); + assert_eq!(sink.count(), 1, "Only first event should reach sink"); + } + + /// REQ-10: NoOpLogHook implements LogHook with zero overhead. + #[test] + fn req10_noop_log_hook_compiles_and_runs() { + use crate::sink::NoOpLogHook; + + let hook = NoOpLogHook; + let record = stub_record(stub_descriptor(FaultId::Numeric(1), to_static_short_string("Noop").unwrap(), None, None)); + let error = SinkError::TransportDown; + + // These should compile and do nothing + hook.on_publish(&record); + hook.on_error(&record, &error); + } +} + +#[cfg(test)] +mod error_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; + use crate::reporter::{Reporter, ReporterApi}; + use crate::test_utils::*; + use crate::utils::to_static_short_string; + use common::fault::*; + use common::sink_error::SinkError; + use std::borrow::Cow; + use std::sync::Arc; + + // ============================================================================ + // SinkError variant tests + // ============================================================================ + + /// All SinkError variants implement Debug and Display. + #[test] + fn sinkerror_all_variants_implement_debug_display() { + let errors = vec![ + SinkError::TransportDown, + SinkError::RateLimited, + SinkError::PermissionDenied, + SinkError::BadDescriptor(Cow::Borrowed("test descriptor issue")), + SinkError::Other(Cow::Borrowed("test other error")), + SinkError::InvalidServiceName, + SinkError::Timeout, + ]; + + for err in &errors { + // All variants must implement Debug + Display (via thiserror) + let debug_str = format!("{err:?}"); + let display_str = format!("{err}"); + assert!(!debug_str.is_empty()); + assert!(!display_str.is_empty()); + } + } + + /// SinkError::TransportDown represents a recoverable transport failure. + #[test] + fn sinkerror_transport_down_is_recoverable() { + let err = SinkError::TransportDown; + assert!(matches!(err, SinkError::TransportDown)); + let msg = format!("{err}"); + assert!( + msg.to_lowercase().contains("transport"), + "Display message should mention transport: {msg}" + ); + } + + /// SinkError::Timeout contains meaningful context. + #[test] + fn sinkerror_timeout_contains_context() { + let err = SinkError::Timeout; + let msg = format!("{err}"); + assert!(msg.to_lowercase().contains("timeout"), "Display should contain 'timeout': {msg}"); + } + + /// SinkError::RateLimited indicates backpressure. + #[test] + fn sinkerror_rate_limited_backpressure() { + let err = SinkError::RateLimited; + let msg = format!("{err}"); + assert!( + msg.to_lowercase().contains("rate") || msg.to_lowercase().contains("limit"), + "Display should mention rate limiting: {msg}" + ); + } + + /// SinkError::BadDescriptor carries a contextual message. + #[test] + fn sinkerror_bad_descriptor_message() { + let err = SinkError::BadDescriptor(Cow::Borrowed("invalid fault ID format")); + let msg = format!("{err}"); + assert!( + msg.contains("invalid fault ID format"), + "Display should contain the descriptor message: {msg}" + ); + } + + /// SinkError::Other accepts arbitrary static messages. + #[test] + fn sinkerror_other_accepts_static_msg() { + let err = SinkError::Other(Cow::Borrowed("custom error context")); + let msg = format!("{err}"); + assert!(msg.contains("custom error context"), "Display should contain the message: {msg}"); + } + + /// SinkError implements PartialEq for test assertions. + #[test] + fn sinkerror_equality_comparison() { + assert_eq!(SinkError::TransportDown, SinkError::TransportDown); + assert_eq!(SinkError::Timeout, SinkError::Timeout); + assert_ne!(SinkError::TransportDown, SinkError::Timeout); + assert_eq!( + SinkError::BadDescriptor(Cow::Borrowed("same")), + SinkError::BadDescriptor(Cow::Borrowed("same")) + ); + assert_ne!(SinkError::BadDescriptor(Cow::Borrowed("a")), SinkError::BadDescriptor(Cow::Borrowed("b"))); + } + + /// SinkError is Clone (Cow prevents Copy, but Clone is still available). + #[test] + fn sinkerror_is_clone() { + let err = SinkError::TransportDown; + let cloned = err.clone(); + assert_eq!(err, cloned); + + // Verify Cow::Owned variant also clones correctly + let owned_err = SinkError::Other(Cow::Owned("dynamic error".to_string())); + let cloned_owned = owned_err.clone(); + assert_eq!(owned_err, cloned_owned); + } + + // ============================================================================ + // Publish error propagation tests + // ============================================================================ + + /// FailingSink::transport_down propagates TransportDown through publish. + #[test] + fn publish_propagates_transport_down() { + let sink = Arc::new(FailingSink::transport_down()); + let mut reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + let result = reporter.publish("test/path", record); + + assert_eq!(result, Err(SinkError::TransportDown)); + } + + /// FailingSink::timeout propagates Timeout through publish. + #[test] + fn publish_propagates_timeout() { + let sink = Arc::new(FailingSink::timeout()); + let mut reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + let result = reporter.publish("test/path", record); + + assert_eq!(result, Err(SinkError::Timeout)); + } + + /// Multiple publishes to a failing sink all return errors. + #[test] + fn publish_consistently_returns_error() { + let sink = Arc::new(FailingSink::transport_down()); + let mut reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + for _ in 0..5 { + let record = reporter.create_record(LifecycleStage::Failed); + let result = reporter.publish("test/path", record); + assert!(result.is_err()); + } + } + + // ============================================================================ + // Catalog builder error tests + // ============================================================================ + + /// FaultCatalogBuilder returns error on invalid JSON via try_build(). + #[test] + fn catalog_builder_errors_on_invalid_json() { + use common::catalog::CatalogBuildError; + let result = FaultCatalogBuilder::new().json_string("{ invalid json }").unwrap().try_build(); + assert!( + matches!(result, Err(CatalogBuildError::InvalidJson(_))), + "Should return InvalidJson error on invalid JSON input, got: {result:?}" + ); + } + + /// FaultCatalogBuilder returns error on missing required fields. + #[test] + fn catalog_builder_errors_on_missing_fields() { + let result = FaultCatalogBuilder::new() + .json_string(r#"{"id": "test"}"#) // missing version, faults + .unwrap() + .try_build(); + assert!(result.is_err(), "Should return error on missing required fields"); + } + + /// FaultCatalogBuilder returns error when no input is configured. + #[test] + fn catalog_builder_errors_on_no_input() { + use common::catalog::CatalogBuildError; + let result = FaultCatalogBuilder::new().try_build(); + assert!( + matches!(result, Err(CatalogBuildError::MissingConfig)), + "Should return MissingConfig error when building with no input" + ); + } + + /// FaultCatalogBuilder returns error when configured twice. + #[test] + fn catalog_builder_errors_on_double_configure() { + use common::catalog::CatalogBuildError; + let config = FaultCatalogConfig { + id: "test".into(), + version: 1, + faults: vec![], + }; + let result = FaultCatalogBuilder::new().cfg_struct(config.clone()).unwrap().cfg_struct(config); // double configure → Err(AlreadyConfigured) + assert!( + matches!(result, Err(CatalogBuildError::AlreadyConfigured)), + "Should return AlreadyConfigured error when builder is configured twice" + ); + } + + /// Catalog returns None for unknown fault ID. + #[test] + fn catalog_returns_none_for_unknown_id() { + let config = FaultCatalogConfig { + id: "test".into(), + version: 1, + faults: vec![stub_descriptor(FaultId::Numeric(1), to_static_short_string("Known").unwrap(), None, None)], + }; + + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + assert!(catalog.descriptor(&FaultId::Numeric(1)).is_some()); + assert!(catalog.descriptor(&FaultId::Numeric(999)).is_none()); + } + + /// Catalog try_id returns IdTooLong for excessively long catalog ids. + #[test] + fn catalog_try_id_returns_error_for_long_id() { + use common::catalog::CatalogBuildError; + // LongString capacity is 128 bytes — exceed it + let long_id = "a".repeat(200); + let catalog = common::catalog::FaultCatalog::new(long_id.into(), 1, std::collections::HashMap::new(), vec![]); + let result = catalog.try_id(); + assert!( + matches!(result, Err(CatalogBuildError::IdTooLong(_))), + "Should return IdTooLong error for 200-char id" + ); + } + + /// Catalog try_id succeeds for valid-length ids. + #[test] + fn catalog_try_id_succeeds_for_valid_id() { + let catalog = common::catalog::FaultCatalog::new("my_catalog".into(), 1, std::collections::HashMap::new(), vec![]); + assert!(catalog.try_id().is_ok()); + } + + /// try_build returns MissingConfig for unconfigured builder. + #[test] + fn catalog_try_build_returns_missing_config() { + use common::catalog::CatalogBuildError; + let result = FaultCatalogBuilder::new().try_build(); + assert!( + matches!(result, Err(CatalogBuildError::MissingConfig)), + "Should return MissingConfig for unconfigured builder" + ); + } + + /// try_build returns InvalidJson for malformed JSON. + #[test] + fn catalog_try_build_returns_invalid_json() { + use common::catalog::CatalogBuildError; + let result = FaultCatalogBuilder::new().json_string("not valid json").unwrap().try_build(); + assert!( + matches!(result, Err(CatalogBuildError::InvalidJson(_))), + "Should return InvalidJson for malformed JSON" + ); + } + + /// try_build returns Io error for non-existent file. + #[test] + fn catalog_try_build_returns_io_error_for_missing_file() { + use common::catalog::CatalogBuildError; + let result = FaultCatalogBuilder::new() + .json_file(std::path::PathBuf::from("/nonexistent/path/catalog.json")) + .unwrap() + .try_build(); + assert!(matches!(result, Err(CatalogBuildError::Io(_))), "Should return Io error for missing file"); + } + + /// Publish rejects paths that exceed max length. + #[test] + fn publish_rejects_path_too_long() { + let sink = Arc::new(crate::test_utils::RecordingSink::new()); + let mut reporter = crate::reporter::Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: common::fault::LifecycleStage::NotTested, + log_hook: None, + }; + + // The RecordingSink accepts everything, but we can verify path validation + // happens at the FaultManagerSink level. For unit testing, verify the + // Reporter itself doesn't panic on long paths. + let long_path = "a".repeat(300); + let record = reporter.create_record(common::fault::LifecycleStage::Failed); + // This goes through RecordingSink which doesn't validate, but + // the important thing is no panic occurs. + let _result = reporter.publish(&long_path, record); + } + + /// SinkError variants cover all expected error conditions. + #[test] + fn sinkerror_queue_full_variant() { + let err = SinkError::QueueFull; + let msg = format!("{err}"); + assert!( + msg.to_lowercase().contains("queue") || msg.to_lowercase().contains("full"), + "Display should mention queue full: {msg}" + ); + } +} + +#[cfg(test)] +mod concurrent_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::reporter::Reporter; + use crate::reporter::ReporterApi; + use crate::sink::FaultSinkApi; + use crate::test_utils::*; + use crate::utils::to_static_short_string; + use common::fault::*; + use std::sync::Arc; + use std::thread; + use std::time::{Duration, Instant}; + + // ============================================================================ + // Concurrent publish tests + // ============================================================================ + + /// Multiple reporters from different threads publish to the same sink safely. + #[test] + fn concurrent_reporters_publish_safely() { + let sink = Arc::new(AtomicCountingSink::new()); + + let handles: Vec<_> = (0..10) + .map(|i| { + let sink = sink.clone(); + thread::spawn(move || { + let mut reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(i), to_static_short_string("Fault").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + for _ in 0..100 { + let record = reporter.create_record(LifecycleStage::Failed); + let _ = reporter.publish("test/path", record); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("Thread panicked"); + } + + assert_eq!(sink.count(), 1000, "All 1000 publishes should be counted"); + } + + /// Stress test: many rapid publishes from multiple threads complete without panic. + #[test] + #[cfg_attr(miri, ignore)] // Miri is 10-100× slower; timing assertion is meaningless. + fn stress_test_high_throughput() { + let sink = Arc::new(AtomicCountingSink::new()); + let target_per_thread = 2500; + let num_threads = 4; + + let start = Instant::now(); + let handles: Vec<_> = (0..num_threads) + .map(|i| { + let sink = sink.clone(); + thread::spawn(move || { + let mut reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(i), to_static_short_string("Stress").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + for _ in 0..target_per_thread { + let record = reporter.create_record(LifecycleStage::Failed); + let _ = reporter.publish("test/path", record); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("Thread panicked during stress test"); + } + + let elapsed = start.elapsed(); + let count = sink.count(); + let expected = (num_threads * target_per_thread) as usize; + + assert_eq!(count, expected, "Expected {expected} records, got {count}"); + // Should complete in reasonable time (under 5 seconds) + assert!(elapsed < Duration::from_secs(5), "Stress test took {elapsed:?}, expected <5s"); + } + + /// RecordingSink is thread-safe for concurrent writes. + #[test] + fn recording_sink_thread_safe() { + let sink = Arc::new(RecordingSink::new()); + + let handles: Vec<_> = (0..5) + .map(|i| { + let sink = sink.clone(); + thread::spawn(move || { + for _ in 0..20 { + let record = stub_record(stub_descriptor(FaultId::Numeric(i), to_static_short_string("Test").unwrap(), None, None)); + sink.publish("test/path", record).unwrap(); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("Thread panicked"); + } + + assert_eq!(sink.count(), 100, "All 100 records should be stored"); + assert_eq!(sink.received_records().len(), 100); + } + + /// Reporter can be shared across threads via Arc. + #[test] + fn reporter_shared_via_arc() { + let sink = Arc::new(AtomicCountingSink::new()); + let reporter = Arc::new(std::sync::Mutex::new(Reporter { + sink: sink.clone(), + descriptor: stub_descriptor(FaultId::Numeric(1), to_static_short_string("Shared").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + })); + + let handles: Vec<_> = (0..4) + .map(|_| { + let reporter = reporter.clone(); + thread::spawn(move || { + for _ in 0..25 { + let mut r = reporter.lock().unwrap(); + let record = r.create_record(LifecycleStage::Failed); + let _ = r.publish("test/path", record); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("Thread panicked"); + } + + assert_eq!(sink.count(), 100); + } + + /// Drop of FaultManagerSink does not deadlock with concurrent operations. + #[test] + fn drop_sink_no_deadlock() { + let sink = Arc::new(SlowSink::new(Duration::from_millis(1))); + let sink_clone = sink.clone(); + + let handle = thread::spawn(move || { + let mut reporter = Reporter { + sink: sink_clone, + descriptor: stub_descriptor(FaultId::Numeric(1), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + for _ in 0..5 { + let record = reporter.create_record(LifecycleStage::Failed); + let _ = reporter.publish("test/path", record); + } + }); + + // Drop our reference to the sink + drop(sink); + + // Thread should complete without deadlock + let (tx, rx) = std::sync::mpsc::channel(); + thread::spawn(move || { + let _ = handle.join(); + let _ = tx.send(()); + }); + + let result = rx.recv_timeout(Duration::from_secs(5)); + assert!(result.is_ok(), "Deadlock detected - thread didn't complete in 5s"); + } + + /// Creating records from multiple threads is safe (create_record is read-only). + #[test] + fn concurrent_create_record() { + let sink = Arc::new(RecordingSink::new()); + let reporter = Arc::new(Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Concurrent").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }); + + let handles: Vec<_> = (0..10) + .map(|_| { + let reporter = reporter.clone(); + thread::spawn(move || { + for stage in [ + LifecycleStage::NotTested, + LifecycleStage::PreFailed, + LifecycleStage::Failed, + LifecycleStage::PrePassed, + LifecycleStage::Passed, + ] { + let record = reporter.create_record(stage); + assert_eq!(record.id, FaultId::Numeric(42)); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("Thread panicked during concurrent create_record"); + } + } +} + +#[cfg(test)] +mod timestamp_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::reporter::Reporter; + use crate::reporter::ReporterApi; + use crate::test_utils::*; + use crate::utils::to_static_short_string; + use common::fault::*; + use std::sync::Arc; + + // ============================================================================ + // Current behavior (timestamps are zero) + // ============================================================================ + + /// Documents current behavior: IpcTimestamp::default() is zero. + #[test] + fn timestamp_default_is_zero() { + let default_ts = IpcTimestamp::default(); + assert_eq!(default_ts.seconds_since_epoch, 0); + assert_eq!(default_ts.nanoseconds, 0); + } + + /// Verifies that create_record populates timestamps from system time. + #[test] + fn timestamp_is_populated_in_create_record() { + let sink = Arc::new(RecordingSink::new()); + let reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + + // After fix: timestamp should be non-zero + assert!(record.time.seconds_since_epoch > 0, "Timestamp should be populated: {:?}", record.time); + } + + // ============================================================================ + // Timestamp population tests + // ============================================================================ + + /// Timestamp should be non-zero when record is created. + #[test] + fn timestamp_is_populated_after_fix() { + let sink = Arc::new(RecordingSink::new()); + let reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + + assert!( + record.time.seconds_since_epoch > 0 || record.time.nanoseconds > 0, + "Timestamp should not be zero after fix: {:?}", + record.time + ); + } + + /// Timestamp should be recent (within test execution window). + #[test] + fn timestamp_is_recent_after_fix() { + use std::time::{SystemTime, UNIX_EPOCH}; + + let before = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(); + + let sink = Arc::new(RecordingSink::new()); + let reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + + let after = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(); + + assert!( + record.time.seconds_since_epoch >= before, + "Timestamp {} should be >= before {}", + record.time.seconds_since_epoch, + before + ); + assert!( + record.time.seconds_since_epoch <= after, + "Timestamp {} should be <= after {}", + record.time.seconds_since_epoch, + after + ); + } + + /// Nanoseconds should be valid (< 1 billion). + #[test] + fn timestamp_nanoseconds_valid_after_fix() { + let sink = Arc::new(RecordingSink::new()); + let reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let record = reporter.create_record(LifecycleStage::Failed); + + assert!( + record.time.nanoseconds < 1_000_000_000, + "Nanoseconds {} should be < 1 billion", + record.time.nanoseconds + ); + } + + /// Sequential records should have non-decreasing timestamps. + #[test] + fn timestamp_monotonic_after_fix() { + let sink = Arc::new(RecordingSink::new()); + let reporter = Reporter { + sink, + descriptor: stub_descriptor(FaultId::Numeric(42), to_static_short_string("Test").unwrap(), None, None), + config: stub_config(), + debouncer: None, + last_stage: LifecycleStage::NotTested, + log_hook: None, + }; + + let mut prev_total: u128 = 0; + + for _ in 0..10 { + let record = reporter.create_record(LifecycleStage::Failed); + + let curr_total = record.time.seconds_since_epoch as u128 * 1_000_000_000 + record.time.nanoseconds as u128; + + assert!(curr_total >= prev_total, "Timestamps should be monotonically non-decreasing"); + + prev_total = curr_total; + } + } +} diff --git a/src/fault_lib/src/sink.rs b/src/fault_lib/src/sink.rs new file mode 100644 index 0000000..9b670ce --- /dev/null +++ b/src/fault_lib/src/sink.rs @@ -0,0 +1,69 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +use common::fault::FaultRecord; +use common::sink_error::SinkError; +use common::types::DiagnosticEvent; + +#[cfg(test)] +use mockall::automock; + +// Boundary traits for anything that has side-effects (logging + IPC). + +/// Hook for logging/observability of fault reporting. +/// +/// Called after each publish attempt with success or error context, +/// enabling applications to mirror fault events into their preferred +/// logging stack (DLT, syslog, tracing, etc.). +/// +/// Default implementation: [`NoOpLogHook`] (zero overhead). +pub trait LogHook: Send + Sync + 'static { + /// Called after successful publish to sink. + fn on_publish(&self, record: &FaultRecord); + /// Called when publish to sink fails. + fn on_error(&self, record: &FaultRecord, error: &SinkError); +} + +/// Default LogHook that does nothing. Zero overhead. +pub struct NoOpLogHook; + +impl LogHook for NoOpLogHook { + #[inline] + fn on_publish(&self, _record: &FaultRecord) {} + #[inline] + fn on_error(&self, _record: &FaultRecord, _error: &SinkError) {} +} + +/// Sink abstracts the transport to the Diagnostic Fault Manager. +/// +/// Non-blocking contract: +/// - MUST return quickly (enqueue only) without waiting on IPC/network/disk. +/// - SHOULD avoid allocating excessively or performing locking that can contend with hot paths. +/// - Backpressure and retry are internal; caller only gets enqueue success/failure. +/// - Lifetime: installed once in `FaultApi::new` and lives for the duration of the process. +/// +/// Implementations can be S-CORE IPC. +#[cfg_attr(test, automock)] +pub trait FaultSinkApi: Send + Sync + 'static { + /// Enqueue a record for delivery to the Diagnostic Fault Manager. + fn publish(&self, path: &str, record: FaultRecord) -> Result<(), SinkError>; + fn check_fault_catalog(&self) -> Result; + + /// Send a raw diagnostic event to the DFM. + /// + /// Used for non-fault events such as enabling condition registration + /// and status changes. Default implementation is a no-op that returns + /// `Ok(())`, suitable for test sinks that don't need IPC. + fn send_event(&self, _event: DiagnosticEvent) -> Result<(), SinkError> { + Ok(()) + } +} diff --git a/src/fault_lib/src/test_utils.rs b/src/fault_lib/src/test_utils.rs new file mode 100644 index 0000000..289f26f --- /dev/null +++ b/src/fault_lib/src/test_utils.rs @@ -0,0 +1,268 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +// Test utilities are exclusively used by tests — unwrap/expect is acceptable. +#![allow(clippy::unwrap_used, clippy::expect_used)] +use crate::reporter::ReporterConfig; +use crate::sink::FaultSinkApi; +use crate::utils::*; +use alloc::string::String; +use common::config::ResetPolicy; +use common::debounce::DebounceMode; +use common::fault::*; +use common::ids::*; +use common::sink_error::SinkError; +use common::types::*; +use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; +use core::time::Duration; +use std::sync::Mutex; + +// ============================================================================ +// IPC Test Isolation Helpers +// ============================================================================ + +/// Monotonic counter ensuring unique service names across all tests in a process. +static IPC_TEST_COUNTER: AtomicU32 = AtomicU32::new(0); + +/// Generate a unique iceoryx2 service name for test isolation. +/// +/// Each call returns a distinct name incorporating the process ID and an +/// atomic counter, preventing shared-memory conflicts when tests run in +/// parallel or when stale resources linger from a previous run. +/// +/// # Example +/// +/// ```ignore +/// let svc = unique_ipc_service_name("worker_start"); +/// // → "test/worker_start/12345/0" +/// ``` +pub fn unique_ipc_service_name(prefix: &str) -> String { + let id = IPC_TEST_COUNTER.fetch_add(1, Ordering::Relaxed); + let pid = std::process::id(); + format!("test/{prefix}/{pid}/{id}") +} + +#[allow(dead_code)] +pub fn stub_source() -> SourceId { + SourceId { + entity: to_static_short_string("source").unwrap(), + ecu: Some(ShortString::from_bytes("ECU-A".as_bytes()).unwrap()), + domain: Some(to_static_short_string("ADAS").unwrap()), + sw_component: Some(to_static_short_string("Perception").unwrap()), + instance: Some(to_static_short_string("0").unwrap()), + } +} + +#[allow(dead_code)] +pub fn stub_config() -> ReporterConfig { + ReporterConfig { + source: stub_source(), + lifecycle_phase: LifecyclePhase::Running, + default_env_data: MetadataVec::new(), + } +} + +#[allow(dead_code)] +pub fn stub_descriptor(id: FaultId, name: ShortString, debounce: Option, reset: Option) -> FaultDescriptor { + FaultDescriptor { + id, + name, + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: debounce, + reporter_side_reset: reset, + manager_side_debounce: None, + manager_side_reset: None, + } +} + +#[allow(dead_code)] +pub fn stub_record(desc: FaultDescriptor) -> FaultRecord { + FaultRecord { + id: desc.id, + time: IpcTimestamp::default(), + source: stub_source(), + lifecycle_phase: LifecyclePhase::Running, + lifecycle_stage: LifecycleStage::NotTested, + env_data: MetadataVec::new(), + } +} + +pub fn create_dummy_descriptors() -> Vec { + let d1 = FaultDescriptor { + id: FaultId::Text(to_static_short_string("d1").unwrap()), + + name: to_static_short_string("Descriptor 1").unwrap(), + summary: None, + + category: FaultType::Software, + severity: FaultSeverity::Debug, + compliance: ComplianceVec::try_from(&[ComplianceTag::EmissionRelevant, ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }; + + let d2 = FaultDescriptor { + id: FaultId::Text(to_static_short_string("d2").unwrap()), + + name: to_static_short_string("Descriptor 2").unwrap(), + summary: Some(to_static_long_string("Human-readable summary").unwrap()), + + category: FaultType::Configuration, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::try_from(&[ComplianceTag::SecurityRelevant, ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + manager_side_reset: None, + }; + vec![d1, d2] +} + +pub fn load_dummy_config_file() -> String { + serde_json::to_string(&create_dummy_descriptors()).expect("serde_json::to_string failed") +} + +// ============================================================================ +// Mock Sink Implementations (implement FaultSinkApi trait) +// ============================================================================ + +/// Sink that records all published FaultRecords for test assertions. +#[allow(dead_code)] +pub struct RecordingSink { + records: Mutex>, +} + +impl Default for RecordingSink { + fn default() -> Self { + Self::new() + } +} + +#[allow(dead_code)] +impl RecordingSink { + pub fn new() -> Self { + Self { records: Mutex::new(vec![]) } + } + + pub fn received_records(&self) -> Vec { + self.records.lock().expect("RecordingSink poisoned").clone() + } + + pub fn count(&self) -> usize { + self.records.lock().expect("RecordingSink poisoned").len() + } +} + +impl FaultSinkApi for RecordingSink { + fn publish(&self, _path: &str, record: FaultRecord) -> Result<(), SinkError> { + self.records.lock().expect("RecordingSink poisoned").push(record); + Ok(()) + } + fn check_fault_catalog(&self) -> Result { + Ok(true) + } +} + +/// Sink that simulates slow transport for non-blocking tests (REQ-6). +#[allow(dead_code)] +pub struct SlowSink { + delay: Duration, +} + +#[allow(dead_code)] +impl SlowSink { + pub fn new(delay: Duration) -> Self { + Self { delay } + } +} + +impl FaultSinkApi for SlowSink { + fn publish(&self, _path: &str, _record: FaultRecord) -> Result<(), SinkError> { + std::thread::sleep(self.delay); + Ok(()) + } + fn check_fault_catalog(&self) -> Result { + Ok(true) + } +} + +/// Thread-safe atomic counter sink for concurrent access tests. +#[allow(dead_code)] +pub struct AtomicCountingSink { + count: AtomicUsize, +} + +impl Default for AtomicCountingSink { + fn default() -> Self { + Self::new() + } +} + +#[allow(dead_code)] +impl AtomicCountingSink { + pub fn new() -> Self { + Self { count: AtomicUsize::new(0) } + } + pub fn count(&self) -> usize { + self.count.load(Ordering::Acquire) + } +} + +impl FaultSinkApi for AtomicCountingSink { + fn publish(&self, _path: &str, _record: FaultRecord) -> Result<(), SinkError> { + self.count.fetch_add(1, Ordering::Release); + Ok(()) + } + fn check_fault_catalog(&self) -> Result { + Ok(true) + } +} + +/// Sink that always returns a specified error (for error path tests). +#[allow(dead_code)] +pub struct FailingSink { + error: SinkError, +} + +#[allow(dead_code)] +impl FailingSink { + pub fn new(error: SinkError) -> Self { + Self { error } + } + + pub fn transport_down() -> Self { + Self::new(SinkError::TransportDown) + } + + pub fn timeout() -> Self { + Self::new(SinkError::Timeout) + } +} + +impl FaultSinkApi for FailingSink { + fn publish(&self, _path: &str, _record: FaultRecord) -> Result<(), SinkError> { + Err(self.error.clone()) + } + fn check_fault_catalog(&self) -> Result { + Err(self.error.clone()) + } +} diff --git a/src/fault_lib/src/utils.rs b/src/fault_lib/src/utils.rs new file mode 100644 index 0000000..928336d --- /dev/null +++ b/src/fault_lib/src/utils.rs @@ -0,0 +1,95 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +// Re-export utility functions from common for backward compatibility. +pub use common::types::{to_static_long_string, to_static_short_string}; + +#[doc(hidden)] +#[macro_export] +macro_rules! __fault_descriptor_option { + () => { + None + }; + ($value:expr) => { + Some($value) + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __fault_descriptor_optional_summary { + () => { + None + }; + ($value:literal) => {{ + #[allow(clippy::expect_used)] + { + Some($crate::utils::to_static_long_string($value).expect(concat!( + "fault_descriptor!: summary '", + $value, + "' exceeds LongString capacity" + ))) + } + }}; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __fault_descriptor_compliance_vec { + // No compliance tags => empty ComplianceVec + () => {{ + let v: common::fault::ComplianceVec = common::fault::ComplianceVec::new(); + v + }}; + // One or more tags => fill the ComplianceVec + ($($ctag:expr),+ $(,)?) => {{ + let mut v: common::fault::ComplianceVec = common::fault::ComplianceVec::new(); + $( + v.push($ctag); + )+ + v + }}; +} + +#[macro_export] +macro_rules! fault_descriptor { + // Minimal form; policies can be added via builder functions if desired. + ( + id = $id:expr, + name = $name:literal, + kind = $kind:expr, + severity = $sev:expr + $(, compliance = [$($ctag:expr),* $(,)?])? + $(, summary = $summary:literal)? + $(, debounce = $debounce:expr)? + $(, reset = $reset:expr)? + ) => {{ + #[allow(clippy::expect_used)] + { + common::fault::FaultDescriptor { + id: $id, + name: $crate::utils::to_static_short_string($name) + .expect(concat!("fault_descriptor!: name '", $name, "' exceeds ShortString capacity")), + category: $kind, + severity: $sev, + compliance: $crate::__fault_descriptor_compliance_vec!($($($ctag),*)?), + reporter_side_debounce: $crate::__fault_descriptor_option!($($debounce)?), + reporter_side_reset: $crate::__fault_descriptor_option!($($reset)?), + manager_side_debounce: None, + manager_side_reset: None, + summary: $crate::__fault_descriptor_optional_summary!($($summary)?), + } + } + }}; +} + +// Note: to_static_short_string and to_static_long_string are now defined +// in common::types and re-exported above for backward compatibility. diff --git a/src/fault_lib/tests/data/hvac_fault_catalog.json b/src/fault_lib/tests/data/hvac_fault_catalog.json new file mode 100644 index 0000000..3291a87 --- /dev/null +++ b/src/fault_lib/tests/data/hvac_fault_catalog.json @@ -0,0 +1,53 @@ +{ + "id": "hvac", + "version": 3, + "faults": [ + { + "id": { + "Numeric": 28673 + }, + "name": "CabinTempSensorStuck", + "summary": null, + "category": "Communication", + "severity": "Error", + "compliance": [ + "EmissionRelevant" + ], + "reporter_side_debounce": { + "HoldTime": { + "duration": { + "secs": 60, + "nanos": 0 + } + } + }, + "reporter_side_reset": null, + "manager_side_debounce": null, + "manager_side_reset": null + }, + { + "id": { + "Text": "hvac.blower.speed_sensor_mismatch" + }, + "name": "BlowerSpeedMismatch", + "summary": "Human-readable summary", + "category": "Communication", + "severity": "Error", + "compliance": [ + "SecurityRelevant", + "SafetyCritical" + ], + "reporter_side_debounce": null, + "reporter_side_reset": null, + "manager_side_debounce": { + "EdgeWithCooldown": { + "cooldown": { + "secs": 0, + "nanos": 100000000 + } + } + }, + "manager_side_reset": null + } + ] +} \ No newline at end of file diff --git a/src/fault_lib/tests/data/ivi_fault_catalog.json b/src/fault_lib/tests/data/ivi_fault_catalog.json new file mode 100644 index 0000000..a756a9c --- /dev/null +++ b/src/fault_lib/tests/data/ivi_fault_catalog.json @@ -0,0 +1,54 @@ +{ + "id": "ivi", + "version": 1, + "faults": [ + { + "id": { + "Text": "d1" + }, + "name": "Descriptor 1", + "summary": null, + "category": "Software", + "severity": "Debug", + "compliance": [ + "EmissionRelevant", + "SafetyCritical" + ], + "reporter_side_debounce": { + "EdgeWithCooldown": { + "cooldown": { + "secs": 0, + "nanos": 100000000 + } + } + }, + "reporter_side_reset": null, + "manager_side_debounce": null, + "manager_side_reset": null + }, + { + "id": { + "Text": "d2" + }, + "name": "Descriptor 2", + "summary": "Human-readable summary", + "category": "Configuration", + "severity": "Warn", + "compliance": [ + "SecurityRelevant", + "SafetyCritical" + ], + "reporter_side_debounce": null, + "reporter_side_reset": null, + "manager_side_debounce": { + "EdgeWithCooldown": { + "cooldown": { + "secs": 0, + "nanos": 100000000 + } + } + }, + "manager_side_reset": null + } + ] +} \ No newline at end of file From 7a3f5118e059d31e4c0e240abcf520c8c684bfc0 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 25 Feb 2026 15:28:02 +0100 Subject: [PATCH 4/7] feat(dfm_lib): implement Diagnostic Fault Manager SOVD-compliant fault manager with KVS persistent storage, aging manager, operation cycle tracking, fault record processor, and query server with iceoryx2 IPC transport. --- src/dfm_lib/BUILD | 77 ++ src/dfm_lib/Cargo.toml | 24 + src/dfm_lib/examples/dfm.rs | 144 +++ src/dfm_lib/examples/sovd_fault_manager.rs | 126 +++ src/dfm_lib/src/aging_manager.rs | 718 ++++++++++++++ src/dfm_lib/src/dfm_test_utils.rs | 453 +++++++++ src/dfm_lib/src/diagnostic_fault_manager.rs | 344 +++++++ .../src/enabling_condition_registry.rs | 179 ++++ src/dfm_lib/src/fault_catalog_registry.rs | 50 + src/dfm_lib/src/fault_lib_communicator.rs | 666 +++++++++++++ src/dfm_lib/src/fault_record_processor.rs | 904 ++++++++++++++++++ src/dfm_lib/src/lib.rs | 64 ++ src/dfm_lib/src/operation_cycle.rs | 371 +++++++ src/dfm_lib/src/query_api.rs | 175 ++++ src/dfm_lib/src/query_conversion.rs | 415 ++++++++ src/dfm_lib/src/query_ipc.rs | 153 +++ src/dfm_lib/src/query_server.rs | 144 +++ src/dfm_lib/src/sovd_fault_manager.rs | 898 +++++++++++++++++ src/dfm_lib/src/sovd_fault_storage.rs | 585 ++++++++++++ src/dfm_lib/src/transport.rs | 69 ++ 20 files changed, 6559 insertions(+) create mode 100644 src/dfm_lib/BUILD create mode 100644 src/dfm_lib/Cargo.toml create mode 100644 src/dfm_lib/examples/dfm.rs create mode 100644 src/dfm_lib/examples/sovd_fault_manager.rs create mode 100644 src/dfm_lib/src/aging_manager.rs create mode 100644 src/dfm_lib/src/dfm_test_utils.rs create mode 100644 src/dfm_lib/src/diagnostic_fault_manager.rs create mode 100644 src/dfm_lib/src/enabling_condition_registry.rs create mode 100644 src/dfm_lib/src/fault_catalog_registry.rs create mode 100644 src/dfm_lib/src/fault_lib_communicator.rs create mode 100644 src/dfm_lib/src/fault_record_processor.rs create mode 100644 src/dfm_lib/src/lib.rs create mode 100644 src/dfm_lib/src/operation_cycle.rs create mode 100644 src/dfm_lib/src/query_api.rs create mode 100644 src/dfm_lib/src/query_conversion.rs create mode 100644 src/dfm_lib/src/query_ipc.rs create mode 100644 src/dfm_lib/src/query_server.rs create mode 100644 src/dfm_lib/src/sovd_fault_manager.rs create mode 100644 src/dfm_lib/src/sovd_fault_storage.rs create mode 100644 src/dfm_lib/src/transport.rs diff --git a/src/dfm_lib/BUILD b/src/dfm_lib/BUILD new file mode 100644 index 0000000..a6be496 --- /dev/null +++ b/src/dfm_lib/BUILD @@ -0,0 +1,77 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_library", "rust_test") + +filegroup( + name = "dfm_lib_srcs", + srcs = glob(["src/**/*.rs"]), +) + +rust_library( + name = "dfm_lib", + srcs = [":dfm_lib_srcs"], + crate_name = "dfm_lib", + edition = "2024", + visibility = ["//visibility:public"], + deps = [ + "//src/common", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:rust_kvs", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:thiserror", + ], +) + +rust_test( + name = "tests", + srcs = [":dfm_lib_srcs"], + edition = "2024", + deps = [ + "//src/common", + "@score_fault_lib_crates//:iceoryx2", + "@score_fault_lib_crates//:iceoryx2-bb-container", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:mockall", + "@score_fault_lib_crates//:rust_kvs", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:serial_test", + "@score_fault_lib_crates//:thiserror", + ], +) + +rust_binary( + name = "dfm", + srcs = ["examples/dfm.rs"], + edition = "2024", + deps = [ + ":dfm_lib", + "//src/common", + "@score_fault_lib_crates//:env_logger", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:tempfile", + ], +) + +rust_binary( + name = "sovd_fm", + srcs = ["examples/sovd_fault_manager.rs"], + edition = "2024", + deps = [ + ":dfm_lib", + "//src/common", + "@score_fault_lib_crates//:tempfile", + ], +) diff --git a/src/dfm_lib/Cargo.toml b/src/dfm_lib/Cargo.toml new file mode 100644 index 0000000..85fb009 --- /dev/null +++ b/src/dfm_lib/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "dfm_lib" +version.workspace = true +edition.workspace = true +license-file.workspace = true +authors.workspace = true + +[lints] +workspace = true + +[dependencies] +env_logger.workspace = true +common = { path = "../common" } +iceoryx2.workspace = true +iceoryx2-bb-container.workspace = true +thiserror.workspace = true +log = { workspace = true, features = ["std"] } +rust_kvs = { git = "https://github.com/eclipse-score/persistency.git", branch = "main" } +serde_json.workspace = true + +[dev-dependencies] +mockall.workspace = true +serial_test.workspace = true +tempfile = "3.20" diff --git a/src/dfm_lib/examples/dfm.rs b/src/dfm_lib/examples/dfm.rs new file mode 100644 index 0000000..0120261 --- /dev/null +++ b/src/dfm_lib/examples/dfm.rs @@ -0,0 +1,144 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use common::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; +use common::debounce; +use common::fault; +use common::types::to_static_long_string; +use common::types::to_static_short_string; +use core::time::Duration; +use dfm_lib::diagnostic_fault_manager::DiagnosticFaultManager; +use dfm_lib::fault_catalog_registry::*; +use dfm_lib::sovd_fault_manager::*; +use dfm_lib::sovd_fault_storage::*; +use env_logger::Env; +use tempfile::tempdir; + +fn load_hvac_config() -> FaultCatalogConfig { + let f1 = fault::FaultDescriptor { + id: fault::FaultId::Numeric(0x7001), + + name: to_static_short_string("CabinTempSensorStuck").unwrap(), + summary: None, + + category: fault::FaultType::Communication, + severity: fault::FaultSeverity::Error, + compliance: fault::ComplianceVec::try_from(&[fault::ComplianceTag::EmissionRelevant][..]).unwrap(), + + reporter_side_debounce: Some(debounce::DebounceMode::HoldTime { + duration: Duration::from_secs(60).into(), + }), + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }; + + let f2 = fault::FaultDescriptor { + id: fault::FaultId::Text(to_static_short_string("hvac.blower.speed_sensor_mismatch").unwrap()), + + name: to_static_short_string("BlowerSpeedMismatch").unwrap(), + summary: Some(to_static_long_string("Human-readable summary").unwrap()), + + category: fault::FaultType::Communication, + severity: fault::FaultSeverity::Error, + compliance: fault::ComplianceVec::try_from(&[fault::ComplianceTag::SecurityRelevant, fault::ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(debounce::DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + manager_side_reset: None, + }; + + let faults = vec![f1, f2]; + FaultCatalogConfig { + id: "hvac".into(), + version: 3, + faults, + } +} + +fn load_ivi_config() -> FaultCatalogConfig { + let f1 = fault::FaultDescriptor { + id: fault::FaultId::Text(to_static_short_string("d1").unwrap()), + + name: to_static_short_string("Descriptor 1").unwrap(), + summary: None, + + category: fault::FaultType::Software, + severity: fault::FaultSeverity::Debug, + compliance: fault::ComplianceVec::try_from(&[fault::ComplianceTag::EmissionRelevant, fault::ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: Some(debounce::DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }; + + let f2 = fault::FaultDescriptor { + id: fault::FaultId::Text(to_static_short_string("d2").unwrap()), + + name: to_static_short_string("Descriptor 2").unwrap(), + summary: Some(to_static_long_string("Human-readable summary").unwrap()), + + category: fault::FaultType::Configuration, + severity: fault::FaultSeverity::Warn, + compliance: fault::ComplianceVec::try_from(&[fault::ComplianceTag::SecurityRelevant, fault::ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(debounce::DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + manager_side_reset: None, + }; + + let faults = vec![f1, f2]; + FaultCatalogConfig { + id: "ivi".into(), + version: 1, + faults, + } +} +fn main() { + let env = Env::default().filter_or("RUST_LOG", "debug"); + env_logger::init_from_env(env); + + let storage_dir = tempdir().unwrap(); + let storage = KvsSovdFaultStateStorage::new(storage_dir.path(), 0).expect("storage init"); + + let hvac_catalog = FaultCatalogBuilder::new().cfg_struct(load_hvac_config()).expect("builder config").build(); + let ivi_catalog = FaultCatalogBuilder::new().cfg_struct(load_ivi_config()).expect("builder config").build(); + + let registry = FaultCatalogRegistry::new(vec![hvac_catalog, ivi_catalog]); + + let dfm = DiagnosticFaultManager::new(storage, registry); + let manager = dfm.create_sovd_fault_manager(); + + // Try to get faults for a non-existent path. + let faults = manager.get_all_faults("invalid_hvac"); + assert!(faults.is_err()); + assert_eq!(faults.unwrap_err(), Error::BadArgument); + + let faults = manager.get_all_faults("hvac").unwrap(); + println!("{faults:?}"); + + let faults = manager.get_all_faults("hvac").unwrap(); + println!("{faults:?}"); + + let fault = manager.get_fault("hvac", &faults[0].code).unwrap(); + println!("{fault:?}"); +} diff --git a/src/dfm_lib/examples/sovd_fault_manager.rs b/src/dfm_lib/examples/sovd_fault_manager.rs new file mode 100644 index 0000000..9f83932 --- /dev/null +++ b/src/dfm_lib/examples/sovd_fault_manager.rs @@ -0,0 +1,126 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::std_instead_of_alloc)] + +use common::SourceId; +use common::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; +use common::debounce; +use common::fault; +use common::fault::IpcTimestamp; +use common::types::MetadataVec; +use common::types::ShortString; +use common::types::to_static_long_string; +use common::types::to_static_short_string; +use core::time::Duration; +use dfm_lib::OperationCycleTracker; +use dfm_lib::fault_catalog_registry::*; +use dfm_lib::fault_record_processor::FaultRecordProcessor; +use dfm_lib::sovd_fault_manager::*; +use dfm_lib::sovd_fault_storage::*; +use std::sync::Arc; +use tempfile::tempdir; + +fn load_config_file() -> FaultCatalogConfig { + let d1 = fault::FaultDescriptor { + id: fault::FaultId::Text(to_static_short_string("d1").unwrap()), + + name: to_static_short_string("Descriptor 1").unwrap(), + summary: None, + + category: fault::FaultType::Software, + severity: fault::FaultSeverity::Debug, + compliance: fault::ComplianceVec::try_from(&[fault::ComplianceTag::EmissionRelevant, fault::ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: Some(debounce::DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }; + + let d2 = fault::FaultDescriptor { + id: fault::FaultId::Text(to_static_short_string("d2").unwrap()), + + name: to_static_short_string("Descriptor 2").unwrap(), + summary: Some(to_static_long_string("Human-readable summary").unwrap()), + + category: fault::FaultType::Configuration, + severity: fault::FaultSeverity::Warn, + compliance: fault::ComplianceVec::try_from(&[fault::ComplianceTag::SecurityRelevant, fault::ComplianceTag::SafetyCritical][..]).unwrap(), + + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(debounce::DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100_u64).into(), + }), + manager_side_reset: None, + }; + let faults = vec![d1, d2]; + FaultCatalogConfig { + id: "hvac".into(), + version: 3, + faults, + } +} + +fn main() { + let storage_dir = tempdir().unwrap(); + + let storage = Arc::new(KvsSovdFaultStateStorage::new(storage_dir.path(), 0).expect("storage init")); + + let cfg = load_config_file(); + let registry = Arc::new(FaultCatalogRegistry::new(vec![ + FaultCatalogBuilder::new().cfg_struct(cfg).expect("builder config").build(), + ])); + + let cycle_tracker = Arc::new(std::sync::RwLock::new(OperationCycleTracker::new())); + let mut processor = FaultRecordProcessor::new(Arc::clone(&storage), Arc::clone(®istry), cycle_tracker); + let manager = SovdFaultManager::new(storage, registry); + + // Try to get faults for a non-existent path. + let faults = manager.get_all_faults("invalid_hvac"); + assert!(faults.is_err()); + assert_eq!(faults.unwrap_err(), Error::BadArgument); + + let faults = manager.get_all_faults("hvac").unwrap(); + println!("{faults:?}"); + + let record = fault::FaultRecord { + id: fault::FaultId::Text(to_static_short_string("d1").unwrap()), + time: IpcTimestamp::default(), + source: SourceId { + entity: to_static_short_string("source").unwrap(), + ecu: Some(ShortString::from_bytes("ECU-A".as_bytes()).unwrap()), + domain: Some(to_static_short_string("ADAS").unwrap()), + sw_component: Some(to_static_short_string("Perception").unwrap()), + instance: Some(to_static_short_string("0").unwrap()), + }, + lifecycle_phase: fault::LifecyclePhase::Running, + lifecycle_stage: fault::LifecycleStage::Failed, + env_data: MetadataVec::try_from( + &[ + (to_static_short_string("k1").unwrap(), to_static_short_string("v1").unwrap()), + (to_static_short_string("k2").unwrap(), to_static_short_string("v2").unwrap()), + ][..], + ) + .unwrap(), + }; + + processor.process_record(&to_static_long_string("hvac").unwrap(), &record); + + let faults = manager.get_all_faults("hvac").unwrap(); + println!("{faults:?}"); + + let fault = manager.get_fault("hvac", &faults[0].code).unwrap(); + println!("{fault:?}"); +} diff --git a/src/dfm_lib/src/aging_manager.rs b/src/dfm_lib/src/aging_manager.rs new file mode 100644 index 0000000..9951fe3 --- /dev/null +++ b/src/dfm_lib/src/aging_manager.rs @@ -0,0 +1,718 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Evaluates fault aging policies and determines when faults should be reset. +//! +//! Aging logic checks whether a fault has been stable long enough +//! (either in operation cycles or wall-clock time) to be cleared. + +use crate::operation_cycle::OperationCycleTracker; +use crate::sovd_fault_storage::SovdFaultState; +use alloc::sync::Arc; +use common::config::{ResetPolicy, ResetTrigger}; +use std::collections::HashMap; +use std::sync::RwLock; +use std::time::Instant; + +/// Per-fault aging state (runtime-only, **not persisted** across restarts). +/// +/// Tracks when a fault was last active and at what cycle counts, +/// enabling the aging manager to determine if reset conditions are met. +/// +/// # Persistence caveat +/// +/// `last_active_cycle` and `is_healed` live only in process memory. +/// On DFM restart, aging progress is lost: a fault that was 4/5 through +/// its aging window restarts from 0. The cumulative `aging_counter` and +/// `healing_counter` ARE persisted via `SovdFaultState` in KVS, so the +/// total count survives restarts - only the in-progress window is reset. +#[derive(Debug, Clone)] +pub struct AgingState { + /// Cycle count at which fault last occurred, keyed by cycle_ref. + pub last_active_cycle: HashMap, + /// Timestamp of last active (Failed/PreFailed) state. + pub last_active_time: Instant, + /// Number of times aging/reset was applied to this fault. + pub aging_counter: u32, + /// Whether the fault is currently in healed state. + pub is_healed: bool, +} + +impl AgingState { + /// Create a new aging state for a fault that just became active. + pub fn new() -> Self { + Self { + last_active_cycle: HashMap::new(), + last_active_time: Instant::now(), + aging_counter: 0, + is_healed: false, + } + } + + /// Mark fault as active (resets aging progress). + /// Call when fault transitions to Failed or PreFailed. + pub fn mark_active(&mut self, cycle_tracker: &OperationCycleTracker) { + self.last_active_time = Instant::now(); + self.is_healed = false; + // Snapshot current cycles + self.last_active_cycle = cycle_tracker.snapshot(); + } + + /// Mark fault as healed after reset policy triggered. + pub fn mark_healed(&mut self) { + self.aging_counter = self.aging_counter.saturating_add(1); + self.is_healed = true; + } +} + +impl Default for AgingState { + fn default() -> Self { + Self::new() + } +} + +/// Evaluates aging (reset) policies for faults. +/// +/// The aging manager holds a reference to the operation cycle tracker +/// and can evaluate whether a fault's reset policy conditions are met. +pub struct AgingManager { + cycle_tracker: Arc>, +} + +impl AgingManager { + /// Create an aging manager with a shared cycle tracker. + pub fn new(cycle_tracker: Arc>) -> Self { + Self { cycle_tracker } + } + + /// Check if the given reset policy is satisfied for this aging state. + /// Returns `true` if the fault should be reset (healed). + pub fn should_reset(&self, policy: &ResetPolicy, state: &AgingState) -> bool { + // Already healed faults don't need re-evaluation + if state.is_healed { + return false; + } + + // ISO 14229: some regulations require a minimum number of operation + // cycles before a fault may be cleared. Gate the trigger evaluation + // behind this threshold when configured. + if let Some(min_cycles) = policy.min_operating_cycles_before_clear { + let tracker = self.cycle_tracker.read().unwrap_or_else(|e| e.into_inner()); + let current_power = tracker.get("power"); + let fault_power = state.last_active_cycle.get("power").copied().unwrap_or(0); + if current_power.saturating_sub(fault_power) < u64::from(min_cycles) { + return false; + } + } + + self.evaluate_trigger(&policy.trigger, state) + } + + fn evaluate_trigger(&self, trigger: &ResetTrigger, state: &AgingState) -> bool { + match trigger { + ResetTrigger::PowerCycles(min_cycles) => { + // PowerCycles uses the "power" counter for backward compatibility. + // Recover from RwLock poisoning — data integrity is more important + // than propagating a panic from an unrelated thread. + let tracker = self.cycle_tracker.read().unwrap_or_else(|e| e.into_inner()); + let current = tracker.get("power"); + let fault_cycle = state.last_active_cycle.get("power").copied().unwrap_or(0); + current.saturating_sub(fault_cycle) >= u64::from(*min_cycles) + } + + ResetTrigger::OperationCycles { min_cycles, cycle_ref } => { + let tracker = self.cycle_tracker.read().unwrap_or_else(|e| e.into_inner()); + let ref_str = cycle_ref.to_string(); + let current = tracker.get(&ref_str); + let fault_cycle = state.last_active_cycle.get(&ref_str).copied().unwrap_or(0); + current.saturating_sub(fault_cycle) >= u64::from(*min_cycles) + } + + ResetTrigger::StableFor(duration) => Instant::now().duration_since(state.last_active_time) >= (*duration).into(), + + ResetTrigger::ToolOnly => false, // Never auto-reset + } + } + + /// Apply reset to fault state, clearing DTC flags. + /// Call this when `should_reset()` returns `true`. + pub fn apply_reset(&self, aging_state: &mut AgingState, sovd_state: &mut SovdFaultState) { + aging_state.mark_healed(); + + // Clear DTC flags in the SOVD state + sovd_state.confirmed_dtc = false; + sovd_state.pending_dtc = false; + sovd_state.test_failed = false; + sovd_state.warning_indicator_requested = false; + + // Increment persisted counters directly (saturating to prevent overflow). + // Using saturating_add on sovd_state rather than syncing from in-memory + // aging_state ensures correctness across process restarts where + // aging_state starts at 0 but sovd_state is loaded from KVS. + sovd_state.aging_counter = sovd_state.aging_counter.saturating_add(1); + sovd_state.healing_counter = sovd_state.healing_counter.saturating_add(1); + } +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use common::types::ShortString; + use std::time::Duration; + + fn make_tracker() -> Arc> { + Arc::new(RwLock::new(OperationCycleTracker::new())) + } + + #[test] + fn power_cycles_trigger_not_met() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker.clone()); + + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(3), + min_operating_cycles_before_clear: None, + }; + + let mut state = AgingState::new(); + state.mark_active(&tracker.read().unwrap()); + + // Only 2 power cycles — should not reset + tracker.write().unwrap().increment("power"); + tracker.write().unwrap().increment("power"); + + assert!(!manager.should_reset(&policy, &state)); + } + + #[test] + fn power_cycles_trigger_met() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker.clone()); + + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(3), + min_operating_cycles_before_clear: None, + }; + + let mut state = AgingState::new(); + state.mark_active(&tracker.read().unwrap()); + + // 3 power cycles — should reset + for _ in 0..3 { + tracker.write().unwrap().increment("power"); + } + + assert!(manager.should_reset(&policy, &state)); + } + + #[test] + fn operation_cycles_named_counter() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker.clone()); + + let policy = ResetPolicy { + trigger: ResetTrigger::OperationCycles { + min_cycles: 2, + cycle_ref: ShortString::try_from("ignition").unwrap(), + }, + min_operating_cycles_before_clear: None, + }; + + let mut state = AgingState::new(); + state.mark_active(&tracker.read().unwrap()); + + // Increment different counter — should not affect ignition trigger + tracker.write().unwrap().increment("power"); + tracker.write().unwrap().increment("power"); + assert!(!manager.should_reset(&policy, &state)); + + // Increment ignition counter + tracker.write().unwrap().increment("ignition"); + assert!(!manager.should_reset(&policy, &state)); // Only 1 + + tracker.write().unwrap().increment("ignition"); + assert!(manager.should_reset(&policy, &state)); // Now 2 + } + + #[test] + fn stable_for_trigger() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker); + + let policy = ResetPolicy { + trigger: ResetTrigger::StableFor(Duration::from_millis(10).into()), + min_operating_cycles_before_clear: None, + }; + + let mut state = AgingState::new(); + // State was just marked active — not enough time passed + assert!(!manager.should_reset(&policy, &state)); + + // Backdate the last_active_time + state.last_active_time = Instant::now() - Duration::from_millis(50); + assert!(manager.should_reset(&policy, &state)); + } + + #[test] + fn tool_only_never_auto_resets() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker.clone()); + + let policy = ResetPolicy { + trigger: ResetTrigger::ToolOnly, + min_operating_cycles_before_clear: None, + }; + + let mut state = AgingState::new(); + state.mark_active(&tracker.read().unwrap()); + + // Even after 100 cycles, should not reset + for _ in 0..100 { + tracker.write().unwrap().increment("power"); + } + + assert!(!manager.should_reset(&policy, &state)); + } + + #[test] + fn apply_reset_clears_flags() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker); + + let mut aging_state = AgingState::new(); + let mut sovd_state = SovdFaultState { + test_failed: true, + confirmed_dtc: true, + pending_dtc: true, + warning_indicator_requested: true, + ..Default::default() + }; + + manager.apply_reset(&mut aging_state, &mut sovd_state); + + assert!(aging_state.is_healed); + assert_eq!(aging_state.aging_counter, 1); + assert!(!sovd_state.test_failed); + assert!(!sovd_state.confirmed_dtc); + assert!(!sovd_state.pending_dtc); + assert!(!sovd_state.warning_indicator_requested); + assert_eq!(sovd_state.aging_counter, 1); + assert_eq!(sovd_state.healing_counter, 1); + } + + #[test] + fn apply_reset_n_cycles_increments_aging_counter() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker); + + let mut aging_state = AgingState::new(); + let mut sovd_state = SovdFaultState { + test_failed: true, + confirmed_dtc: true, + ..Default::default() + }; + + for expected in 1..=5u32 { + aging_state.is_healed = false; + sovd_state.test_failed = true; + sovd_state.confirmed_dtc = true; + manager.apply_reset(&mut aging_state, &mut sovd_state); + assert_eq!(sovd_state.aging_counter, expected, "after {expected} resets"); + assert_eq!(sovd_state.healing_counter, expected, "healing_counter after {expected} resets"); + } + } + + #[test] + fn already_healed_not_rechecked() { + let tracker = make_tracker(); + let manager = AgingManager::new(tracker.clone()); + + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(1), + min_operating_cycles_before_clear: None, + }; + + let mut state = AgingState::new(); + state.mark_active(&tracker.read().unwrap()); + state.mark_healed(); // Already healed + + tracker.write().unwrap().increment("power"); + + // Already healed — should not re-trigger + assert!(!manager.should_reset(&policy, &state)); + } +} + +#[cfg(test)] +mod aging_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::dfm_test_utils::*; + use crate::fault_record_processor::FaultRecordProcessor; + use crate::sovd_fault_storage::SovdFaultStateStorage; + use common::config::{ResetPolicy, ResetTrigger}; + use common::fault::*; + use common::types::ShortString; + use std::sync::Arc; + use std::time::Duration; + + // ============================================================================ + // PowerCycles aging + // ============================================================================ + + /// E2E: fault Failed → Passed → power cycles → aging reset clears confirmed_dtc. + #[test] + fn aging_power_cycles_clears_confirmed_dtc() { + let fault_id = FaultId::Numeric(500); + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(3), + min_operating_cycles_before_clear: None, + }; + let registry = make_aging_registry("test_entity", fault_id.clone(), policy); + let storage = Arc::new(InMemoryStorage::new()); + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker.clone()); + let path = make_path("test_entity"); + + // Step 1: Fault occurs + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + processor.process_record(&path, &failed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "Failed should set confirmed_dtc"); + assert!(state.test_failed, "Failed should set test_failed"); + + // Step 2: Fault passes — confirmed_dtc should stay latched (aging policy) + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + processor.process_record(&path, &passed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "confirmed_dtc should stay latched after Passed (aging policy)"); + assert!(!state.test_failed, "test_failed should clear on Passed"); + + // Step 3: Advance power cycles but not enough (only 2 of 3 needed) + tracker.write().unwrap().increment("power"); + tracker.write().unwrap().increment("power"); + + // Send another Passed event to trigger re-evaluation + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "confirmed_dtc should still be latched (only 2/3 power cycles)"); + + // Step 4: Third power cycle → aging conditions met + tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.confirmed_dtc, "confirmed_dtc should clear after 3 power cycles (aging reset)"); + assert!(!state.test_failed, "test_failed should remain cleared after aging reset"); + assert_eq!(state.healing_counter, 1, "healing_counter should increment on aging reset"); + } + + // ============================================================================ + // OperationCycles aging (named counter) + // ============================================================================ + + /// E2E: fault resets after N named operation cycles (ignition). + #[test] + fn aging_operation_cycles_named_counter_resets() { + let fault_id = FaultId::Numeric(501); + let policy = ResetPolicy { + trigger: ResetTrigger::OperationCycles { + min_cycles: 2, + cycle_ref: ShortString::try_from("ignition").unwrap(), + }, + min_operating_cycles_before_clear: None, + }; + let registry = make_aging_registry("test_entity", fault_id.clone(), policy); + let storage = Arc::new(InMemoryStorage::new()); + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker.clone()); + let path = make_path("test_entity"); + + // Fault occurs and then passes + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "confirmed_dtc latched"); + + // Increment a different counter — should not affect ignition trigger + tracker.write().unwrap().increment("power"); + tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "power cycles should not trigger ignition-based aging"); + + // Increment ignition counter: 1 of 2 + tracker.write().unwrap().increment("ignition"); + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "Only 1/2 ignition cycles — not enough"); + + // Increment ignition counter: 2 of 2 — should trigger + tracker.write().unwrap().increment("ignition"); + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.confirmed_dtc, "2/2 ignition cycles should trigger aging reset"); + assert_eq!(state.healing_counter, 1); + } + + // ============================================================================ + // StableFor aging (time-based) + // ============================================================================ + + /// E2E: fault resets after being stable (Passed) for a given duration. + #[test] + #[cfg_attr(miri, ignore)] // timing-dependent: Miri's ~100x slowdown causes wall-clock drift + fn aging_stable_for_duration_resets() { + let fault_id = FaultId::Numeric(502); + let policy = ResetPolicy { + trigger: ResetTrigger::StableFor(Duration::from_millis(10).into()), + min_operating_cycles_before_clear: None, + }; + let registry = make_aging_registry("test_entity", fault_id.clone(), policy); + let storage = Arc::new(InMemoryStorage::new()); + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker); + let path = make_path("test_entity"); + + // Fault occurs and passes + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "confirmed_dtc should be latched immediately after Passed"); + + // Wait for the stability duration to elapse + std::thread::sleep(Duration::from_millis(30)); + + // Re-process a Passed event — aging evaluation should now trigger + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.confirmed_dtc, "confirmed_dtc should clear after stability period elapsed"); + assert_eq!(state.healing_counter, 1); + } + + // ============================================================================ + // ToolOnly — never auto-resets + // ============================================================================ + + /// E2E: fault with ToolOnly policy never auto-resets. + #[test] + fn aging_tool_only_never_auto_resets() { + let fault_id = FaultId::Numeric(503); + let policy = ResetPolicy { + trigger: ResetTrigger::ToolOnly, + min_operating_cycles_before_clear: None, + }; + let registry = make_aging_registry("test_entity", fault_id.clone(), policy); + let storage = Arc::new(InMemoryStorage::new()); + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker.clone()); + let path = make_path("test_entity"); + + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + + // Even after many cycles, should not auto-reset + for _ in 0..100 { + tracker.write().unwrap().increment("power"); + } + processor.process_record(&path, &passed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "ToolOnly should never auto-reset — confirmed_dtc stays latched"); + assert_eq!(state.healing_counter, 0); + } + + // ============================================================================ + // No aging policy — immediate clear on Passed + // ============================================================================ + + /// Without aging policy, confirmed_dtc clears immediately on Passed. + #[test] + fn no_aging_policy_clears_immediately() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); // No aging configured + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker); + let path = make_path("test_entity"); + + let failed = make_record(FaultId::Numeric(42), LifecycleStage::Failed); + let passed = make_record(FaultId::Numeric(42), LifecycleStage::Passed); + processor.process_record(&path, &failed); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!(state.confirmed_dtc); + + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!(!state.confirmed_dtc, "Without aging, confirmed_dtc should clear immediately on Passed"); + } + + // ============================================================================ + // Flapping: re-failure resets aging progress + // ============================================================================ + + /// Fast flapping: if a fault re-fails before aging completes, aging resets. + #[test] + fn flapping_resets_aging_progress() { + let fault_id = FaultId::Numeric(504); + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(5), + min_operating_cycles_before_clear: None, + }; + let registry = make_aging_registry("test_entity", fault_id.clone(), policy); + let storage = Arc::new(InMemoryStorage::new()); + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker.clone()); + let path = make_path("test_entity"); + + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + + // First occurrence + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + + // Advance 3 of 5 power cycles + for _ in 0..3 { + tracker.write().unwrap().increment("power"); + } + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "Only 3/5 cycles — should not have aged out yet"); + + // Fault re-occurs! Aging resets + processor.process_record(&path, &failed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "Re-failure keeps confirmed_dtc latched"); + + // Now pass again — aging counter should restart from THIS failure + processor.process_record(&path, &passed); + + // Previous 3 power cycles should NOT count toward the new aging window. + // Need 5 more power cycles from the new failure point. + for _ in 0..4 { + tracker.write().unwrap().increment("power"); + } + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.confirmed_dtc, "Only 4/5 cycles since re-failure — should not have aged out"); + + tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.confirmed_dtc, "5/5 cycles since re-failure — aging should complete now"); + assert_eq!(state.healing_counter, 1); + } + + // ============================================================================ + // Regression: existing status bits preserved + // ============================================================================ + + /// Aging reset preserves warning_indicator_requested=false and doesn't + /// corrupt other status bits. + #[test] + fn aging_reset_preserves_status_consistency() { + let fault_id = FaultId::Numeric(505); + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(1), + min_operating_cycles_before_clear: None, + }; + let registry = make_aging_registry("test_entity", fault_id.clone(), policy); + let storage = Arc::new(InMemoryStorage::new()); + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker.clone()); + let path = make_path("test_entity"); + + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + + // One power cycle — reset triggers + tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.confirmed_dtc, "confirmed_dtc cleared"); + assert!(!state.pending_dtc, "pending_dtc cleared"); + assert!(!state.test_failed, "test_failed cleared"); + assert!(!state.warning_indicator_requested, "WIR cleared"); + assert_eq!(state.healing_counter, 1, "healing counter incremented"); + } + + /// Multiple aging resets increment healing_counter each time. + #[test] + fn multiple_aging_resets_increment_healing_counter() { + let fault_id = FaultId::Numeric(506); + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(1), + min_operating_cycles_before_clear: None, + }; + let registry = make_aging_registry("test_entity", fault_id.clone(), policy); + let storage = Arc::new(InMemoryStorage::new()); + let tracker = make_cycle_tracker(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry, tracker.clone()); + let path = make_path("test_entity"); + + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + + // Cycle 1: fail → pass → age + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert_eq!(state.healing_counter, 1); + + // Cycle 2: re-fail → pass → age + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert_eq!(state.healing_counter, 2); + + // Cycle 3: re-fail → pass → age + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert_eq!(state.healing_counter, 3); + } +} diff --git a/src/dfm_lib/src/dfm_test_utils.rs b/src/dfm_lib/src/dfm_test_utils.rs new file mode 100644 index 0000000..3178669 --- /dev/null +++ b/src/dfm_lib/src/dfm_test_utils.rs @@ -0,0 +1,453 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Shared test utilities for dfm_lib tests. +//! +//! Provides `InMemoryStorage` (a thread-safe mock implementing +//! `SovdFaultStateStorage`) and helper functions for building registries, +//! records, and paths used across all dfm_lib test modules. +#![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] + +use crate::fault_catalog_registry::FaultCatalogRegistry; +use crate::operation_cycle::OperationCycleTracker; +use crate::sovd_fault_storage::{SovdFaultState, SovdFaultStateStorage, StorageError}; +use crate::transport::DfmTransport; +use common::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; +use common::config::ResetPolicy; +use common::debounce::DebounceMode; +use common::enabling_condition::EnablingConditionNotification; +use common::fault; +use common::fault::*; +use common::sink_error::SinkError; +use common::types::*; +use std::collections::HashMap; +use std::sync::{Arc, Mutex, RwLock, mpsc}; + +// ============================================================================ +// In-memory mock storage for testing +// ============================================================================ + +/// Thread-safe in-memory storage implementing SovdFaultStateStorage. +pub struct InMemoryStorage { + data: Mutex>>, +} + +impl InMemoryStorage { + pub fn new() -> Self { + Self { + data: Mutex::new(HashMap::new()), + } + } +} + +impl SovdFaultStateStorage for InMemoryStorage { + fn put(&self, path: &str, fault_id: &fault::FaultId, state: SovdFaultState) -> Result<(), StorageError> { + let key = fault_id_to_string(fault_id); + let mut data = self.data.lock().unwrap(); + data.entry(path.to_string()).or_default().insert(key, state); + Ok(()) + } + + fn get_all(&self, path: &str) -> Result, StorageError> { + let data = self.data.lock().unwrap(); + match data.get(path) { + Some(faults) => Ok(faults.iter().map(|(key, state)| (fault_id_from_string(key), state.clone())).collect()), + None => Ok(Vec::new()), + } + } + + fn get(&self, path: &str, fault_id: &fault::FaultId) -> Result, StorageError> { + let key = fault_id_to_string(fault_id); + let data = self.data.lock().unwrap(); + Ok(data.get(path).and_then(|faults| faults.get(&key).cloned())) + } + + fn delete_all(&self, path: &str) -> Result<(), StorageError> { + let mut data = self.data.lock().unwrap(); + data.remove(path); + Ok(()) + } + + fn delete(&self, path: &str, fault_id: &fault::FaultId) -> Result<(), StorageError> { + let key = fault_id_to_string(fault_id); + let mut data = self.data.lock().unwrap(); + if let Some(faults) = data.get_mut(path) + && faults.remove(&key).is_some() + { + return Ok(()); + } + Err(StorageError::NotFound) + } +} + +/// Encode FaultId to a typed string key, mirroring the real storage format. +pub fn fault_id_to_string(fault_id: &fault::FaultId) -> String { + match fault_id { + fault::FaultId::Numeric(x) => format!("n:{x}"), + fault::FaultId::Text(t) => format!("t:{t}"), + fault::FaultId::Uuid(u) => { + let hex: String = u.iter().map(|b| format!("{b:02x}")).collect(); + format!("u:{hex}") + } + } +} + +/// Decode a typed string key back to a FaultId. +fn fault_id_from_string(key: &str) -> fault::FaultId { + if let Some(num_str) = key.strip_prefix("n:") + && let Ok(n) = num_str.parse::() + { + return fault::FaultId::Numeric(n); + } + if let Some(hex_str) = key.strip_prefix("u:") + && hex_str.len() == 32 + { + let mut bytes = [0u8; 16]; + for (i, byte) in bytes.iter_mut().enumerate() { + if let Ok(b) = u8::from_str_radix(&hex_str[i * 2..i * 2 + 2], 16) { + *byte = b; + } + } + return fault::FaultId::Uuid(bytes); + } + if let Some(text) = key.strip_prefix("t:") { + return fault::FaultId::Text(ShortString::try_from(text).unwrap()); + } + // Backward compat: untyped keys → Text + fault::FaultId::Text(ShortString::try_from(key).unwrap()) +} + +// ============================================================================ +// Test helpers +// ============================================================================ + +/// Registry with FaultId::Text descriptors (SovdFaultManager requires Text IDs). +pub fn make_text_registry() -> Arc { + let config = FaultCatalogConfig { + id: "test_entity".into(), + version: 1, + faults: vec![ + FaultDescriptor { + id: FaultId::Text(to_static_short_string("fault_a").unwrap()), + name: to_static_short_string("Fault A").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Text(to_static_short_string("fault_b").unwrap()), + name: to_static_short_string("Fault B").unwrap(), + summary: None, + category: FaultType::Hardware, + severity: FaultSeverity::Error, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + ], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + Arc::new(FaultCatalogRegistry::new(vec![catalog])) +} + +/// Registry with all three FaultId variants (Text, Numeric, Uuid). +pub fn make_mixed_registry() -> Arc { + let config = FaultCatalogConfig { + id: "mixed_entity".into(), + version: 1, + faults: vec![ + FaultDescriptor { + id: FaultId::Text(to_static_short_string("fault_text").unwrap()), + name: to_static_short_string("Text Fault").unwrap(), + summary: Some(to_static_long_string("A text-identified fault").unwrap()), + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Numeric(0x1001), + name: to_static_short_string("Numeric Fault").unwrap(), + summary: Some(to_static_long_string("A numeric DTC-like fault").unwrap()), + category: FaultType::Hardware, + severity: FaultSeverity::Error, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Uuid([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + ]), + name: to_static_short_string("UUID Fault").unwrap(), + summary: Some(to_static_long_string("A UUID-identified fault").unwrap()), + category: FaultType::Communication, + severity: FaultSeverity::Fatal, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + ], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + Arc::new(FaultCatalogRegistry::new(vec![catalog])) +} + +/// Registry with FaultId::Numeric descriptors (for processor tests only). +pub fn make_registry() -> Arc { + let config = FaultCatalogConfig { + id: "test_entity".into(), + version: 1, + faults: vec![FaultDescriptor { + id: FaultId::Numeric(42), + name: to_static_short_string("Test fault").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + Arc::new(FaultCatalogRegistry::new(vec![catalog])) +} + +pub fn make_record(fault_id: FaultId, stage: LifecycleStage) -> FaultRecord { + FaultRecord { + id: fault_id, + time: IpcTimestamp::default(), + source: common::ids::SourceId { + entity: to_static_short_string("test").unwrap(), + ecu: None, + domain: None, + sw_component: None, + instance: None, + }, + lifecycle_phase: LifecyclePhase::Running, + lifecycle_stage: stage, + env_data: MetadataVec::new(), + } +} + +pub fn make_path(path: &str) -> LongString { + LongString::from_str_truncated(path).unwrap() +} + +/// Registry with manager-side debounce configured. +pub fn make_debounce_registry(entity_id: &str, fault_id: FaultId, debounce: DebounceMode) -> Arc { + let config = FaultCatalogConfig { + id: entity_id.to_string().into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id, + name: to_static_short_string("Debounced fault").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(debounce), + manager_side_reset: None, + }], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + Arc::new(FaultCatalogRegistry::new(vec![catalog])) +} + +/// Create a shared OperationCycleTracker for testing. +pub fn make_cycle_tracker() -> Arc> { + Arc::new(RwLock::new(OperationCycleTracker::new())) +} + +/// Registry with manager-side reset (aging) policy configured. +pub fn make_aging_registry(entity_id: &str, fault_id: FaultId, reset_policy: ResetPolicy) -> Arc { + let config = FaultCatalogConfig { + id: entity_id.to_string().into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id, + name: to_static_short_string("Aging fault").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: Some(reset_policy), + }], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + Arc::new(FaultCatalogRegistry::new(vec![catalog])) +} + +/// Registry with compliance tags set (for warning_indicator_requested tests). +pub fn make_compliance_registry(entity_id: &str, fault_id: FaultId, compliance: ComplianceVec) -> Arc { + let config = FaultCatalogConfig { + id: entity_id.to_string().into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id, + name: to_static_short_string("Compliance fault").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance, + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + Arc::new(FaultCatalogRegistry::new(vec![catalog])) +} + +/// Registry with two sources sharing the same fault ID but independent debounce. +pub fn make_two_source_debounce_registry(fault_id: FaultId, debounce: DebounceMode) -> Arc { + let config1 = FaultCatalogConfig { + id: "app1".into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id.clone(), + name: to_static_short_string("Fault from app1").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(debounce), + manager_side_reset: None, + }], + }; + let config2 = FaultCatalogConfig { + id: "app2".into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id, + name: to_static_short_string("Fault from app2").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(debounce), + manager_side_reset: None, + }], + }; + let catalog1 = FaultCatalogBuilder::new().cfg_struct(config1).unwrap().build(); + let catalog2 = FaultCatalogBuilder::new().cfg_struct(config2).unwrap().build(); + Arc::new(FaultCatalogRegistry::new(vec![catalog1, catalog2])) +} + +// ============================================================================ +// In-memory DFM transport for testing (no iceoryx2 dependency) +// ============================================================================ + +/// Channel-based [`DfmTransport`] for unit tests. +/// +/// Uses `mpsc` channels for events and collects published responses +/// and notifications in thread-safe vectors. No iceoryx2 shared memory +/// is required, so tests can run under Miri and in parallel without +/// `#[serial(ipc)]`. +/// +/// # Example +/// +/// ```rust,ignore +/// let (transport, sender) = InMemoryTransport::new(); +/// sender.send(DiagnosticEvent::Fault((path, record))).unwrap(); +/// // pass transport to run_dfm_loop(...) +/// ``` +#[allow(dead_code)] +pub struct InMemoryTransport { + receiver: Mutex>, + hash_responses: Arc>>, + ec_notifications: Arc>>, +} + +#[allow(dead_code)] +impl InMemoryTransport { + /// Create a new in-memory transport and its event sender. + /// + /// The returned `mpsc::Sender` is used to inject events into the DFM + /// loop (simulating IPC messages from reporter applications). + pub fn new() -> (Self, mpsc::Sender) { + let (tx, rx) = mpsc::channel(); + let transport = Self { + receiver: Mutex::new(rx), + hash_responses: Arc::new(Mutex::new(Vec::new())), + ec_notifications: Arc::new(Mutex::new(Vec::new())), + }; + (transport, tx) + } + + /// Get all hash responses published so far. + pub fn hash_responses(&self) -> Vec { + self.hash_responses.lock().unwrap().clone() + } + + /// Get all EC notifications published so far. + pub fn ec_notifications(&self) -> Vec { + self.ec_notifications.lock().unwrap().clone() + } +} + +impl DfmTransport for InMemoryTransport { + fn receive_event(&self) -> Result, SinkError> { + let rx = self.receiver.lock().unwrap(); + match rx.try_recv() { + Ok(event) => Ok(Some(event)), + Err(mpsc::TryRecvError::Empty) => Ok(None), + Err(mpsc::TryRecvError::Disconnected) => Ok(None), + } + } + + fn publish_hash_response(&self, response: bool) -> Result<(), SinkError> { + self.hash_responses.lock().unwrap().push(response); + Ok(()) + } + + fn publish_ec_notification(&self, notification: EnablingConditionNotification) -> Result<(), SinkError> { + self.ec_notifications.lock().unwrap().push(notification); + Ok(()) + } + + fn wait(&self, timeout: core::time::Duration) -> Result { + std::thread::sleep(timeout); + Ok(true) + } +} diff --git a/src/dfm_lib/src/diagnostic_fault_manager.rs b/src/dfm_lib/src/diagnostic_fault_manager.rs new file mode 100644 index 0000000..496ba5d --- /dev/null +++ b/src/dfm_lib/src/diagnostic_fault_manager.rs @@ -0,0 +1,344 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Top-level Diagnostic Fault Manager (DFM) orchestrator. +//! +//! [`DiagnosticFaultManager`] wires together the IPC transport, +//! fault-record processor, catalog/enabling-condition registries, +//! operation-cycle tracker, and SOVD fault manager into a single +//! run-loop that processes incoming diagnostic events from distributed +//! reporter applications. +//! +//! The DFM is generic over both the storage backend +//! ([`SovdFaultStateStorage`]) and the IPC transport ([`DfmTransport`]). +//! The default transport is [`Iceoryx2Transport`] (iceoryx2 shared memory). + +use crate::{ + enabling_condition_registry::EnablingConditionRegistry, + fault_catalog_registry::FaultCatalogRegistry, + fault_lib_communicator::{DEFAULT_DFM_CYCLE_TIME, DfmLoopExtensions, Iceoryx2Transport, run_dfm_loop}, + fault_record_processor::FaultRecordProcessor, + operation_cycle::{OperationCycleProvider, OperationCycleTracker}, + query_server::DfmQueryServer, + sovd_fault_manager::SovdFaultManager, + sovd_fault_storage::SovdFaultStateStorage, + transport::DfmTransport, +}; +use alloc::sync::Arc; +use core::sync::atomic::{AtomicBool, Ordering}; +use log::{error, info}; +use std::{ + sync::{Mutex, RwLock}, + thread::{self, JoinHandle}, +}; + +/// Central DFM orchestrator, generic over storage `S` and transport `T`. +/// +/// By default (via [`new`](DiagnosticFaultManager::new) and +/// [`with_cycle_provider`](DiagnosticFaultManager::with_cycle_provider)), +/// the transport is [`Iceoryx2Transport`]. Use +/// [`with_transport`](DiagnosticFaultManager::with_transport) to inject +/// a custom [`DfmTransport`] implementation. +pub struct DiagnosticFaultManager { + shutdown: Arc, + fault_lib_receiver_thread: Option>, + storage: Arc, + registry: Arc, + cycle_tracker: Arc>, + cycle_provider: Option>>>, + _transport: core::marker::PhantomData, +} + +impl DiagnosticFaultManager { + /// Create a new DiagnosticFaultManager with default iceoryx2 transport. + /// + /// # Panics + /// + /// Panics if the receiver thread cannot be spawned. This is a system-level + /// failure that indicates a fundamentally broken environment. + #[allow(clippy::expect_used)] + pub fn new(storage: S, registry: FaultCatalogRegistry) -> Self { + Self::with_cycle_provider(storage, registry, None) + } + + /// Create a DFM with an explicit [`OperationCycleProvider`] and default + /// iceoryx2 transport. + /// + /// When a provider is set, each DFM iteration polls it for new events + /// and feeds them into the shared [`OperationCycleTracker`]. This is the + /// preferred way to integrate external lifecycle signals (ECU, HPC, etc.). + /// + /// # Panics + /// + /// Panics if the receiver thread cannot be spawned. + #[allow(clippy::expect_used)] + pub fn with_cycle_provider(storage: S, registry: FaultCatalogRegistry, provider: Option>) -> Self { + Self::with_transport(storage, registry, provider, false, Iceoryx2Transport::new) + } + + /// Create a DFM with query server enabled for external IPC access. + /// + /// When enabled, the DFM loop polls for incoming [`DfmQueryRequest`](common::query_protocol::DfmQueryRequest)s + /// and responds via iceoryx2 request-response on the `dfm/query` service. + /// + /// # Panics + /// + /// Panics if the receiver thread cannot be spawned. + #[allow(clippy::expect_used)] + pub fn with_query_server(storage: S, registry: FaultCatalogRegistry) -> Self { + Self::with_transport(storage, registry, None, true, Iceoryx2Transport::new) + } +} + +impl DiagnosticFaultManager { + /// Create a DFM with a custom [`DfmTransport`] implementation. + /// + /// `transport_factory` is called on the worker thread to create the + /// transport (some transports, e.g. iceoryx2, must be created on the + /// thread that will use them). + /// + /// When `enable_query_server` is `true`, a [`DfmQueryServer`] is created + /// on a separate iceoryx2 node inside the worker thread and polled each + /// event-loop iteration. + /// + /// # Panics + /// + /// Panics if the receiver thread cannot be spawned. + #[allow(clippy::expect_used)] + pub fn with_transport( + storage: S, + registry: FaultCatalogRegistry, + provider: Option>, + enable_query_server: bool, + transport_factory: impl FnOnce() -> T + Send + 'static, + ) -> Self { + let shutdown = Arc::new(AtomicBool::new(false)); + let storage = Arc::new(storage); + let registry = Arc::new(registry); + let cycle_tracker = Arc::new(RwLock::new(OperationCycleTracker::new())); + let processor = FaultRecordProcessor::new(Arc::clone(&storage), Arc::clone(®istry), Arc::clone(&cycle_tracker)); + + let cycle_provider: Option>>> = provider.map(|p| Arc::new(Mutex::new(p))); + let worker_cycle_provider = cycle_provider.clone(); + let worker_cycle_tracker = Arc::clone(&cycle_tracker); + let worker_storage = Arc::clone(&storage); + let worker_registry = Arc::clone(®istry); + + let worker_shutdown = Arc::clone(&shutdown); + let handle: JoinHandle<()> = thread::Builder::new() + .name("fault_lib_receiver_thread".into()) + .spawn(move || { + let mut ec_registry = EnablingConditionRegistry::new(); + let transport = transport_factory(); + + let query_server = if enable_query_server { + match iceoryx2::node::NodeBuilder::new().create::() { + Ok(query_node) => { + let sovd = SovdFaultManager::new(worker_storage, worker_registry); + match DfmQueryServer::new(&query_node, sovd) { + Ok(server) => Some(server), + Err(e) => { + error!("Failed to create DfmQueryServer, query/clear disabled: {e}"); + None + } + } + } + Err(e) => { + error!("Failed to create query server node, query/clear disabled: {e}"); + None + } + } + } else { + None + }; + + let extensions = DfmLoopExtensions { + query_server: query_server.as_ref(), + }; + + run_dfm_loop( + &transport, + &worker_shutdown, + &mut { processor }, + &mut ec_registry, + worker_cycle_provider.as_ref(), + &worker_cycle_tracker, + DEFAULT_DFM_CYCLE_TIME, + extensions, + ); + }) + .expect("Failed to spawn the fault_lib_receiver_thread"); + + Self { + shutdown, + fault_lib_receiver_thread: Some(handle), + storage, + registry, + cycle_tracker, + cycle_provider, + _transport: core::marker::PhantomData, + } + } + + /// Provides shared access to the operation cycle tracker. + /// + /// External lifecycle events (power-on, ignition, etc.) should use this + /// to increment the appropriate cycle counters, which in turn drive + /// fault aging/reset evaluation. + pub fn cycle_tracker(&self) -> &Arc> { + &self.cycle_tracker + } + + /// Provides shared access to the operation cycle provider, if one was set. + pub fn cycle_provider(&self) -> Option<&Arc>>> { + self.cycle_provider.as_ref() + } + + pub fn create_sovd_fault_manager(&self) -> SovdFaultManager { + SovdFaultManager::new(Arc::clone(&self.storage), Arc::clone(&self.registry)) + } + + /// Returns a [`DirectDfmQuery`] - in-process implementation of [`DfmQueryApi`]. + /// + /// This is the preferred way to obtain a query API handle when the SOVD + /// consumer runs in the same process as the DFM. + pub fn query_api(&self) -> crate::query_api::DirectDfmQuery { + crate::query_api::DirectDfmQuery::new(Arc::clone(&self.storage), Arc::clone(&self.registry)) + } +} + +/// Timeout for joining the receiver thread during drop. +const DROP_JOIN_TIMEOUT: core::time::Duration = core::time::Duration::from_secs(2); + +impl Drop for DiagnosticFaultManager { + fn drop(&mut self) { + self.shutdown.store(true, Ordering::Release); + + if let Some(handle) = self.fault_lib_receiver_thread.take() { + info!("Joining fault_lib_receiver_thread"); + let (join_tx, join_rx) = std::sync::mpsc::channel(); + thread::spawn(move || { + let result = handle.join(); + let _ = join_tx.send(result); + }); + + match join_rx.recv_timeout(DROP_JOIN_TIMEOUT) { + Ok(Ok(())) => info!("fault_lib_receiver_thread done"), + Ok(Err(err)) => error!("fault_lib_receiver_thread panicked: {err:?}"), + Err(_) => error!("fault_lib_receiver_thread did not exit within {DROP_JOIN_TIMEOUT:?}, abandoning"), + } + } + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +#[cfg(not(miri))] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + use crate::dfm_test_utils::*; + use crate::operation_cycle::ManualCycleProvider; + use serial_test::serial; + + /// Unwrap Arc from test helpers into owned value. + fn owned_registry() -> FaultCatalogRegistry { + Arc::try_unwrap(make_text_registry()).ok().expect("Arc has exactly one strong ref") + } + + // ---------- Wiring and construction ---------- + + #[test] + #[serial(ipc)] + fn dfm_creates_and_drops_cleanly() { + let dfm = DiagnosticFaultManager::new(InMemoryStorage::new(), owned_registry()); + + // No cycle provider when created with new() + assert!(dfm.cycle_provider().is_none()); + + // Drop should cleanly shut down the receiver thread + drop(dfm); + } + + // ---------- with_cycle_provider ---------- + + #[test] + #[serial(ipc)] + fn dfm_with_cycle_provider_stores_provider() { + let provider = ManualCycleProvider::new(); + let dfm = DiagnosticFaultManager::with_cycle_provider(InMemoryStorage::new(), owned_registry(), Some(Box::new(provider))); + + assert!(dfm.cycle_provider().is_some()); + drop(dfm); + } + + #[test] + #[serial(ipc)] + fn dfm_without_cycle_provider_returns_none() { + let dfm = DiagnosticFaultManager::with_cycle_provider(InMemoryStorage::new(), owned_registry(), None); + + assert!(dfm.cycle_provider().is_none()); + drop(dfm); + } + + // ---------- create_sovd_fault_manager ---------- + + #[test] + #[serial(ipc)] + fn dfm_returns_sovd_fault_manager() { + let dfm = DiagnosticFaultManager::new(InMemoryStorage::new(), owned_registry()); + + let sovd_manager = dfm.create_sovd_fault_manager(); + let faults = sovd_manager.get_all_faults("test_entity").unwrap(); + assert_eq!(faults.len(), 2, "Should return all descriptors from catalog"); + + drop(dfm); + } + + // ---------- cycle_tracker ---------- + + #[test] + #[serial(ipc)] + fn dfm_cycle_tracker_is_accessible() { + let dfm = DiagnosticFaultManager::new(InMemoryStorage::new(), owned_registry()); + + let tracker = dfm.cycle_tracker(); + // Should be able to read from the tracker + let read = tracker.read().unwrap(); + assert_eq!(read.get("power"), 0, "Empty tracker has no cycles"); + drop(read); + + // Should be able to write to the tracker + let mut write = tracker.write().unwrap(); + write.increment("power"); + assert_eq!(write.get("power"), 1); + drop(write); + + drop(dfm); + } + + // ---------- Shutdown behavior ---------- + + #[test] + #[serial(ipc)] + fn dfm_shutdown_is_idempotent() { + // Create and immediately drop multiple times — should not panic + let dfm1 = DiagnosticFaultManager::new(InMemoryStorage::new(), owned_registry()); + drop(dfm1); + + let dfm2 = DiagnosticFaultManager::new(InMemoryStorage::new(), owned_registry()); + drop(dfm2); + } +} diff --git a/src/dfm_lib/src/enabling_condition_registry.rs b/src/dfm_lib/src/enabling_condition_registry.rs new file mode 100644 index 0000000..2ce130a --- /dev/null +++ b/src/dfm_lib/src/enabling_condition_registry.rs @@ -0,0 +1,179 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! DFM-side registry for enabling conditions. +//! +//! Tracks registered enabling conditions and their current statuses. +//! When a status changes, the registry notifies the communicator to +//! broadcast the change to all FaultLib subscribers. + +use common::enabling_condition::EnablingConditionStatus; +use log::{debug, info, warn}; +use std::collections::HashMap; + +/// DFM-side registry of enabling conditions. +/// +/// Thread-safe: the DFM communicator calls methods from its receiver thread. +/// No internal locking needed since the communicator is single-threaded. +pub struct EnablingConditionRegistry { + /// Registered conditions: entity → current status. + conditions: HashMap, +} + +impl Default for EnablingConditionRegistry { + fn default() -> Self { + Self::new() + } +} + +impl EnablingConditionRegistry { + pub fn new() -> Self { + Self { conditions: HashMap::new() } + } + + /// Register a new enabling condition. + /// + /// If the condition is already registered, logs a warning and returns + /// the current status. New conditions start as `Inactive`. + pub fn register(&mut self, entity: &str) -> EnablingConditionStatus { + if let Some(status) = self.conditions.get(entity) { + warn!("Enabling condition '{entity}' already registered, current status: {status:?}"); + return *status; + } + let status = EnablingConditionStatus::Inactive; + self.conditions.insert(entity.to_string(), status); + info!("Registered enabling condition: {entity}"); + status + } + + /// Update the status of an enabling condition. + /// + /// Returns `Some(new_status)` if the status actually changed (for + /// notification dispatch), `None` if no change occurred. + pub fn update_status(&mut self, entity: &str, status: EnablingConditionStatus) -> Option { + if let Some(current) = self.conditions.get_mut(entity) { + if *current == status { + debug!("Enabling condition '{entity}' status unchanged: {status:?}"); + return None; + } + *current = status; + info!("Enabling condition '{entity}' status changed to {status:?}"); + Some(status) + } else { + // Auto-register on first status report + self.conditions.insert(entity.to_string(), status); + info!("Auto-registered enabling condition '{entity}' with status {status:?}"); + Some(status) + } + } + + /// Get the current status of an enabling condition. + pub fn get_status(&self, entity: &str) -> Option { + self.conditions.get(entity).copied() + } + + /// Get all registered conditions and their statuses. + pub fn all_conditions(&self) -> &HashMap { + &self.conditions + } + + /// Number of registered enabling conditions. + pub fn len(&self) -> usize { + self.conditions.len() + } + + /// Whether there are no registered enabling conditions. + pub fn is_empty(&self) -> bool { + self.conditions.is_empty() + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + + #[test] + fn new_registry_is_empty() { + let reg = EnablingConditionRegistry::new(); + assert!(reg.is_empty()); + assert_eq!(reg.len(), 0); + } + + #[test] + fn register_creates_inactive_condition() { + let mut reg = EnablingConditionRegistry::new(); + let status = reg.register("vehicle.speed.valid"); + assert_eq!(status, EnablingConditionStatus::Inactive); + assert_eq!(reg.len(), 1); + assert_eq!(reg.get_status("vehicle.speed.valid"), Some(EnablingConditionStatus::Inactive)); + } + + #[test] + fn register_duplicate_returns_current_status() { + let mut reg = EnablingConditionRegistry::new(); + reg.register("engine.running"); + reg.update_status("engine.running", EnablingConditionStatus::Active); + let status = reg.register("engine.running"); + assert_eq!(status, EnablingConditionStatus::Active); + assert_eq!(reg.len(), 1); + } + + #[test] + fn update_status_returns_new_on_change() { + let mut reg = EnablingConditionRegistry::new(); + reg.register("engine.running"); + let result = reg.update_status("engine.running", EnablingConditionStatus::Active); + assert_eq!(result, Some(EnablingConditionStatus::Active)); + } + + #[test] + fn update_status_returns_none_on_no_change() { + let mut reg = EnablingConditionRegistry::new(); + reg.register("engine.running"); + let result = reg.update_status("engine.running", EnablingConditionStatus::Inactive); + assert_eq!(result, None); + } + + #[test] + fn update_status_auto_registers_unknown_condition() { + let mut reg = EnablingConditionRegistry::new(); + let result = reg.update_status("new.condition", EnablingConditionStatus::Active); + assert_eq!(result, Some(EnablingConditionStatus::Active)); + assert_eq!(reg.len(), 1); + } + + #[test] + fn get_status_returns_none_for_unknown() { + let reg = EnablingConditionRegistry::new(); + assert_eq!(reg.get_status("nonexistent"), None); + } + + #[test] + fn all_conditions_returns_full_map() { + let mut reg = EnablingConditionRegistry::new(); + reg.register("a"); + reg.register("b"); + reg.update_status("a", EnablingConditionStatus::Active); + + let all = reg.all_conditions(); + assert_eq!(all.len(), 2); + assert_eq!(all.get("a"), Some(&EnablingConditionStatus::Active)); + assert_eq!(all.get("b"), Some(&EnablingConditionStatus::Inactive)); + } + + #[test] + fn default_creates_empty_registry() { + let reg = EnablingConditionRegistry::default(); + assert!(reg.is_empty()); + } +} diff --git a/src/dfm_lib/src/fault_catalog_registry.rs b/src/dfm_lib/src/fault_catalog_registry.rs new file mode 100644 index 0000000..ea16e45 --- /dev/null +++ b/src/dfm_lib/src/fault_catalog_registry.rs @@ -0,0 +1,50 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! In-memory registry of fault catalogs received during handshake. +//! +//! Each connected application registers its [`FaultCatalog`] keyed by an +//! entity path. The registry is consulted by +//! [`FaultRecordProcessor`](crate::fault_record_processor::FaultRecordProcessor) +//! for hash verification and debounce lookup, and by +//! [`SovdFaultManager`](crate::sovd_fault_manager::SovdFaultManager) for +//! descriptor resolution. + +use alloc::borrow::Cow; +use common::catalog::FaultCatalog; +use std::collections::HashMap; + +/// In-memory registry of fault catalogs, keyed by their entity path. +/// +/// Each connected application registers its catalog during handshake. +/// The registry is used by `SovdFaultManager` to resolve fault descriptors +/// and by `FaultRecordProcessor` for hash verification and debounce lookup. +pub struct FaultCatalogRegistry { + pub(crate) catalogs: HashMap, FaultCatalog>, +} + +impl FaultCatalogRegistry { + pub fn new(entries: Vec) -> Self { + let mut catalogs = HashMap::with_capacity(entries.len()); + for entry in entries { + if catalogs.contains_key(&entry.id) { + log::warn!("Duplicate catalog ID '{}' - overwriting previous entry", entry.id); + } + catalogs.insert(entry.id.clone(), entry); + } + Self { catalogs } + } + + pub fn get(&self, path: &str) -> Option<&FaultCatalog> { + self.catalogs.get(path) + } +} diff --git a/src/dfm_lib/src/fault_lib_communicator.rs b/src/dfm_lib/src/fault_lib_communicator.rs new file mode 100644 index 0000000..d2c734f --- /dev/null +++ b/src/dfm_lib/src/fault_lib_communicator.rs @@ -0,0 +1,666 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! IPC communication layer between reporter applications and the DFM. +//! +//! [`Iceoryx2Transport`] is the default [`DfmTransport`](crate::transport::DfmTransport) +//! implementation that uses iceoryx2 zero-copy shared memory for: +//! - **Diagnostic events** — fault reports, hash checks, enabling-condition +//! registrations/status changes. +//! - **Hash-check responses** — sent back to reporters after catalog +//! verification. +//! - **Enabling-condition notifications** — broadcast to all FaultLib +//! instances when a condition status changes. +//! +//! The generic [`run_dfm_loop`] function drives the DFM event loop using +//! any `DfmTransport` implementation, enabling alternative transports +//! (in-memory channels, network-based, etc.) without modifying the core +//! DFM logic. + +use common::enabling_condition::EnablingConditionNotification; +use common::ipc_service_name::{ + DIAGNOSTIC_FAULT_MANAGER_EVENT_SERVICE_NAME, DIAGNOSTIC_FAULT_MANAGER_HASH_CHECK_RESPONSE_SERVICE_NAME, + ENABLING_CONDITION_NOTIFICATION_SERVICE_NAME, +}; +use common::ipc_service_type::ServiceType; +use common::sink_error::SinkError; +use common::types::DiagnosticEvent; + +use crate::enabling_condition_registry::EnablingConditionRegistry; +use crate::fault_record_processor::FaultRecordProcessor; +use crate::query_server::DfmQueryServer; +use crate::sovd_fault_storage::SovdFaultStateStorage; +use crate::transport::DfmTransport; +use alloc::sync::Arc; +use core::sync::atomic::{AtomicBool, Ordering}; +use core::time::Duration; +use iceoryx2::node::NodeBuilder; +use iceoryx2::port::publisher::Publisher; +use iceoryx2::port::subscriber::Subscriber; +use iceoryx2::prelude::{Node, NodeName, ServiceName}; +use log::info; + +const DIAGNOSTIC_FAULT_MANAGER_LISTENER_NODE_NAME: &str = "fault_listener_node"; + +/// Default DFM cycle time used by [`Iceoryx2Transport`]. +pub const DEFAULT_DFM_CYCLE_TIME: Duration = Duration::from_millis(10); + +/// Errors that can occur during [`Iceoryx2Transport`] initialization. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum TransportInitError { + /// iceoryx2 node creation failed. + #[error("node creation failed: {0}")] + NodeCreation(String), + /// iceoryx2 service or port creation failed. + #[error("service creation failed: {0}")] + ServiceCreation(String), +} + +/// iceoryx2-based [`DfmTransport`] implementation (production default). +/// +/// Uses iceoryx2 zero-copy shared memory publishers/subscribers for +/// communication between the DFM and reporter applications (FaultLib). +pub struct Iceoryx2Transport { + catalog_hash_response_publisher: Publisher, + ec_notification_publisher: Publisher, + diagnostic_event_subscriber: Subscriber, + node: Node, +} + +/// Service names bundle for test isolation. +#[cfg(test)] +pub(crate) struct TestServiceNames { + pub event: String, + pub hash_response: String, + pub ec_notification: String, +} + +impl Default for Iceoryx2Transport { + /// Creates a new transport using [`try_new`](Iceoryx2Transport::try_new). + /// + /// # Panics + /// + /// Panics if iceoryx2 IPC service creation fails. + #[allow(clippy::expect_used)] + fn default() -> Self { + Self::try_new().expect("Iceoryx2Transport initialization failed") + } +} + +impl Iceoryx2Transport { + /// Create a new iceoryx2 transport with default service names. + /// + /// # Panics + /// + /// Panics if iceoryx2 IPC service creation fails. These are system-level + /// failures during initialization (no shared memory, no permissions). + #[allow(clippy::expect_used)] + pub fn new() -> Self { + Self::try_new().expect("Iceoryx2Transport initialization failed") + } + + /// Fallible constructor for iceoryx2 transport with default service names. + /// + /// Prefer this over [`new`](Self::new) when the caller can handle + /// initialization failures gracefully. + pub fn try_new() -> Result { + let node_name = NodeName::new(DIAGNOSTIC_FAULT_MANAGER_LISTENER_NODE_NAME).map_err(|e| TransportInitError::NodeCreation(format!("{e:?}")))?; + let node = NodeBuilder::new() + .name(&node_name) + .create::() + .map_err(|e| TransportInitError::NodeCreation(format!("{e:?}")))?; + + let diagnostic_event_subscriber_service_name = + ServiceName::new(DIAGNOSTIC_FAULT_MANAGER_EVENT_SERVICE_NAME).map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + let diagnostic_event_subscriber_service = node + .service_builder(&diagnostic_event_subscriber_service_name) + .publish_subscribe::() + .open_or_create() + .map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + let diagnostic_event_subscriber = diagnostic_event_subscriber_service + .subscriber_builder() + .create() + .map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + + let hash_response_service_name = ServiceName::new(DIAGNOSTIC_FAULT_MANAGER_HASH_CHECK_RESPONSE_SERVICE_NAME) + .map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + let hash_response_service = node + .service_builder(&hash_response_service_name) + .publish_subscribe::() + .open_or_create() + .map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + let catalog_hash_response_publisher = hash_response_service + .publisher_builder() + .create() + .map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + + // Enabling condition notification publisher (DFM → FaultLib) + let ec_notification_service_name = + ServiceName::new(ENABLING_CONDITION_NOTIFICATION_SERVICE_NAME).map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + let ec_notification_service = node + .service_builder(&ec_notification_service_name) + .publish_subscribe::() + .open_or_create() + .map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + let ec_notification_publisher = ec_notification_service + .publisher_builder() + .create() + .map_err(|e| TransportInitError::ServiceCreation(format!("{e:?}")))?; + + Ok(Iceoryx2Transport { + diagnostic_event_subscriber, + catalog_hash_response_publisher, + ec_notification_publisher, + node, + }) + } + + /// Create an iceoryx2 transport with custom service names for test isolation. + /// + /// Each test can supply unique service names to avoid iceoryx2 shared + /// memory conflicts when tests run in parallel. + #[cfg(test)] + #[allow(clippy::expect_used, clippy::unwrap_used)] + pub(crate) fn with_test_services(names: &TestServiceNames) -> Self { + let node_name = NodeName::new("test_fault_listener_node").unwrap(); + let node = NodeBuilder::new() + .name(&node_name) + .create::() + .expect("Failed to create test listener node"); + + let event_svc_name = ServiceName::new(&names.event).unwrap(); + let event_service = node + .service_builder(&event_svc_name) + .publish_subscribe::() + .open_or_create() + .expect("Failed to create test event service"); + let diagnostic_event_subscriber = event_service.subscriber_builder().create().expect("Failed to create test subscriber"); + + let hash_svc_name = ServiceName::new(&names.hash_response).unwrap(); + let hash_service = node + .service_builder(&hash_svc_name) + .publish_subscribe::() + .open_or_create() + .expect("Failed to create test hash service"); + let catalog_hash_response_publisher = hash_service.publisher_builder().create().expect("Failed to create test hash publisher"); + + let ec_svc_name = ServiceName::new(&names.ec_notification).unwrap(); + let ec_service = node + .service_builder(&ec_svc_name) + .publish_subscribe::() + .open_or_create() + .expect("Failed to create test EC service"); + let ec_notification_publisher = ec_service.publisher_builder().create().expect("Failed to create test EC publisher"); + + Iceoryx2Transport { + diagnostic_event_subscriber, + catalog_hash_response_publisher, + ec_notification_publisher, + node, + } + } +} + +impl DfmTransport for Iceoryx2Transport { + fn receive_event(&self) -> Result, SinkError> { + match self.diagnostic_event_subscriber.receive() { + Ok(Some(sample)) => Ok(Some(sample.payload().clone())), + Ok(None) => Ok(None), + Err(_) => Err(SinkError::TransportDown), + } + } + + fn publish_hash_response(&self, response: bool) -> Result<(), SinkError> { + let sample = self.catalog_hash_response_publisher.loan_uninit().map_err(|_| SinkError::TransportDown)?; + let sample = sample.write_payload(response); + sample.send().map_err(|_| SinkError::TransportDown).map(|_| ()) + } + + fn publish_ec_notification(&self, notification: EnablingConditionNotification) -> Result<(), SinkError> { + let sample = self.ec_notification_publisher.loan_uninit().map_err(|_| SinkError::TransportDown)?; + let sample = sample.write_payload(notification); + sample.send().map_err(|_| SinkError::TransportDown).map(|_| ()) + } + + fn wait(&self, timeout: Duration) -> Result { + match self.node.wait(timeout) { + Ok(_) => Ok(true), + Err(_) => Ok(false), + } + } +} + +/// Optional extensions for the DFM event loop. +/// +/// Groups optional services to avoid growing `run_dfm_loop`'s parameter list +/// each time a new capability is added. +pub struct DfmLoopExtensions<'a, S: SovdFaultStateStorage> { + /// Query server for handling external SOVD query/clear requests. + pub query_server: Option<&'a DfmQueryServer>, +} + +/// Run the DFM event loop using a generic [`DfmTransport`]. +/// +/// This is the core DFM run-loop extracted from the former +/// `FaultLibCommunicator::run_with_provider`. It is transport-agnostic: +/// any implementation of [`DfmTransport`] can be used. +/// +/// The loop polls for events at `cycle_time` intervals (default: +/// [`DEFAULT_DFM_CYCLE_TIME`]) and processes them through the +/// `FaultRecordProcessor` and `EnablingConditionRegistry`. +#[allow(clippy::too_many_arguments)] +pub fn run_dfm_loop( + transport: &T, + shutdown: &AtomicBool, + processor: &mut FaultRecordProcessor, + ec_registry: &mut EnablingConditionRegistry, + cycle_provider: Option<&Arc>>>, + cycle_tracker: &Arc>, + cycle_time: Duration, + extensions: DfmLoopExtensions<'_, S>, +) { + info!("DFM transport listening..."); + while !shutdown.load(Ordering::Acquire) { + // Wait for one cycle. Returns false if the transport node died. + match transport.wait(cycle_time) { + Ok(true) => {} + Ok(false) => { + info!("Transport node died, exiting DFM loop"); + break; + } + Err(e) => { + log::error!("Transport wait error: {e:?}"); + break; + } + } + + // Poll operation-cycle provider (if attached) and apply events. + if let Some(provider_arc) = cycle_provider { + // Mutex lock scope is intentionally narrow to avoid holding + // the lock while processing IPC messages. + let events = { + let mut provider = provider_arc.lock().unwrap_or_else(|e| e.into_inner()); + provider.poll() + }; + if !events.is_empty() { + let mut tracker = cycle_tracker.write().unwrap_or_else(|e| e.into_inner()); + let incremented = tracker.apply_events(&events); + for name in &incremented { + log::trace!("Operation cycle '{name}' incremented via provider"); + } + } + } + + // Drain all available messages + loop { + let event = match transport.receive_event() { + Ok(Some(e)) => e, + Ok(None) => break, // no more messages + Err(e) => { + log::error!("IPC receive error: {e:?}"); + break; + } + }; + match &event { + // NOTE: Enabling conditions are informational (monitor/callback pattern), + // not enforcement gates. Faults are processed regardless of condition + // status. If gating is needed, check ec_registry.is_condition_met() + // before process_record() and skip with a trace! log. + DiagnosticEvent::Fault((path, fault)) => { + info!("Received new fault ID: {:?}", fault.id); + processor.process_record(path, fault); + } + DiagnosticEvent::Hash((path, hash_sum)) => { + let result = processor.check_hash_sum(path, hash_sum); + info!("Received hash: {hash_sum:?}"); + transport.publish_hash_response(result).unwrap_or_else(|e| { + log::error!("Failed to publish hash response: {e:?}"); + }); + } + DiagnosticEvent::EnablingConditionRegister(entity) => { + let entity_str = entity.to_string(); + info!("Received enabling condition registration: {entity_str}"); + ec_registry.register(&entity_str); + } + DiagnosticEvent::EnablingConditionStatusChange((id, status)) => { + let id_str = id.to_string(); + info!("Received enabling condition status change: {id_str} -> {status:?}"); + if let Some(new_status) = ec_registry.update_status(&id_str, *status) { + // Broadcast notification to all FaultLib subscribers + let notification = EnablingConditionNotification { id: *id, status: new_status }; + if let Err(e) = transport.publish_ec_notification(notification) { + log::error!("Failed to publish EC notification for '{id_str}': {e:?}"); + } + } + } + } + } + + // Process query requests (SOVD query/clear from external apps) + if let Some(qs) = extensions.query_server + && let Err(e) = qs.poll() + { + log::error!("Query server poll error: {e:?}"); + } + } + info!("DFM transport loop shutdown complete"); +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +#[cfg(not(miri))] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use crate::dfm_test_utils::*; + use crate::enabling_condition_registry::EnablingConditionRegistry; + use crate::fault_record_processor::FaultRecordProcessor; + use crate::sovd_fault_storage::SovdFaultStateStorage; + use common::fault::{FaultId, LifecycleStage}; + use common::types::{LongString, to_static_short_string}; + use serial_test::serial; + use std::sync::Arc; + use std::sync::atomic::AtomicU32; + use std::thread; + use std::time::Duration; + + static TEST_COUNTER: AtomicU32 = AtomicU32::new(0); + + fn unique_service_names(prefix: &str) -> TestServiceNames { + let id = TEST_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let pid = std::process::id(); + TestServiceNames { + event: format!("test/{prefix}/{pid}/{id}/event"), + hash_response: format!("test/{prefix}/{pid}/{id}/hash"), + ec_notification: format!("test/{prefix}/{pid}/{id}/ec"), + } + } + + /// Send an event to the communicator via a publisher on the same service name. + fn send_event(svc_name: &str, event: DiagnosticEvent) { + let node = NodeBuilder::new().create::().expect("node"); + let svc = ServiceName::new(svc_name).expect("svc name"); + let service = node + .service_builder(&svc) + .publish_subscribe::() + .open_or_create() + .expect("event service"); + let publisher = service.publisher_builder().create().expect("publisher"); + // Allow iceoryx2 discovery to complete before publishing + thread::sleep(Duration::from_millis(50)); + let sample = publisher.loan_uninit().expect("loan"); + let sample = sample.write_payload(event); + sample.send().expect("send"); + } + + // ---------- Startup / Shutdown ---------- + + #[test] + #[serial(ipc)] + fn communicator_starts_and_shuts_down() { + let names = unique_service_names("start_stop"); + let transport = Iceoryx2Transport::with_test_services(&names); + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let cycle_tracker = make_cycle_tracker(); + let processor = FaultRecordProcessor::new(storage, registry, cycle_tracker.clone()); + let mut ec_registry = EnablingConditionRegistry::new(); + + let shutdown = Arc::new(AtomicBool::new(false)); + let shutdown_clone = shutdown.clone(); + + let handle = thread::spawn(move || { + run_dfm_loop( + &transport, + &shutdown_clone, + &mut { processor }, + &mut ec_registry, + None, + &cycle_tracker, + DEFAULT_DFM_CYCLE_TIME, + DfmLoopExtensions { query_server: None }, + ); + }); + + // Let it run briefly + thread::sleep(Duration::from_millis(50)); + shutdown.store(true, Ordering::Release); + + let (join_tx, join_rx) = std::sync::mpsc::channel(); + thread::spawn(move || { + let _ = join_tx.send(handle.join()); + }); + + match join_rx.recv_timeout(Duration::from_secs(5)) { + Ok(Ok(())) => {} + Ok(Err(e)) => std::panic::resume_unwind(e), + Err(_) => panic!("Communicator did not shut down within 5 seconds"), + } + } + + // ---------- Fault Event Processing ---------- + + #[test] + #[serial(ipc)] + fn communicator_processes_fault_event() { + let names = unique_service_names("fault_event"); + let transport = Iceoryx2Transport::with_test_services(&names); + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let cycle_tracker = make_cycle_tracker(); + let processor = FaultRecordProcessor::new(storage.clone(), registry, cycle_tracker.clone()); + let mut ec_registry = EnablingConditionRegistry::new(); + + let shutdown = Arc::new(AtomicBool::new(false)); + let shutdown_clone = shutdown.clone(); + + let handle = thread::spawn(move || { + run_dfm_loop( + &transport, + &shutdown_clone, + &mut { processor }, + &mut ec_registry, + None, + &cycle_tracker, + DEFAULT_DFM_CYCLE_TIME, + DfmLoopExtensions { query_server: None }, + ); + }); + + // Give the communicator time to start listening + thread::sleep(Duration::from_millis(100)); + + // Send a fault event via IPC + let path = LongString::from_str_truncated("test_entity").unwrap(); + let record = make_record(FaultId::Numeric(42), LifecycleStage::Failed); + let event = DiagnosticEvent::Fault((path, record)); + send_event(&names.event, event); + + // Wait for processing + thread::sleep(Duration::from_millis(200)); + shutdown.store(true, Ordering::Release); + + let (join_tx, join_rx) = std::sync::mpsc::channel(); + thread::spawn(move || { + let _ = join_tx.send(handle.join()); + }); + join_rx.recv_timeout(Duration::from_secs(5)).unwrap().unwrap(); + + // Verify the fault was stored + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap(); + assert!(state.is_some(), "Communicator should have processed the fault event"); + let state = state.unwrap(); + assert!(state.test_failed); + assert!(state.confirmed_dtc); + } + + // ---------- Enabling Condition Registration ---------- + + #[test] + #[serial(ipc)] + fn communicator_handles_ec_registration() { + let names = unique_service_names("ec_reg"); + let transport = Iceoryx2Transport::with_test_services(&names); + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let cycle_tracker = make_cycle_tracker(); + let processor = FaultRecordProcessor::new(storage, registry, cycle_tracker.clone()); + + let ec_registry = Arc::new(std::sync::Mutex::new(EnablingConditionRegistry::new())); + let ec_for_thread = ec_registry.clone(); + + let shutdown = Arc::new(AtomicBool::new(false)); + let shutdown_clone = shutdown.clone(); + + let handle = thread::spawn(move || { + let mut ec = ec_for_thread.lock().unwrap(); + run_dfm_loop( + &transport, + &shutdown_clone, + &mut { processor }, + &mut ec, + None, + &cycle_tracker, + DEFAULT_DFM_CYCLE_TIME, + DfmLoopExtensions { query_server: None }, + ); + }); + + thread::sleep(Duration::from_millis(50)); + + // Send EC registration event + let entity = to_static_short_string("engine.running").unwrap(); + let event = DiagnosticEvent::EnablingConditionRegister(entity); + send_event(&names.event, event); + + thread::sleep(Duration::from_millis(100)); + shutdown.store(true, Ordering::Release); + + let (join_tx, join_rx) = std::sync::mpsc::channel(); + thread::spawn(move || { + let _ = join_tx.send(handle.join()); + }); + join_rx.recv_timeout(Duration::from_secs(5)).unwrap().unwrap(); + } + + // ---------- Immediate Shutdown ---------- + + #[test] + #[serial(ipc)] + fn communicator_exits_immediately_on_shutdown() { + let names = unique_service_names("immediate_stop"); + let transport = Iceoryx2Transport::with_test_services(&names); + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let cycle_tracker = make_cycle_tracker(); + let processor = FaultRecordProcessor::new(storage, registry, cycle_tracker.clone()); + let mut ec_registry = EnablingConditionRegistry::new(); + + // Pre-set shutdown before starting + let shutdown = Arc::new(AtomicBool::new(true)); + let shutdown_clone = shutdown.clone(); + + let handle = thread::spawn(move || { + run_dfm_loop( + &transport, + &shutdown_clone, + &mut { processor }, + &mut ec_registry, + None, + &cycle_tracker, + DEFAULT_DFM_CYCLE_TIME, + DfmLoopExtensions { query_server: None }, + ); + }); + + let (join_tx, join_rx) = std::sync::mpsc::channel(); + thread::spawn(move || { + let _ = join_tx.send(handle.join()); + }); + + match join_rx.recv_timeout(Duration::from_secs(5)) { + Ok(Ok(())) => {} + Ok(Err(e)) => std::panic::resume_unwind(e), + Err(_) => panic!("Communicator should exit immediately when shutdown is pre-set"), + } + } + + // ---------- Multiple Messages ---------- + + #[test] + #[serial(ipc)] + fn communicator_processes_multiple_events() { + let names = unique_service_names("multi_event"); + let transport = Iceoryx2Transport::with_test_services(&names); + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let cycle_tracker = make_cycle_tracker(); + let processor = FaultRecordProcessor::new(storage.clone(), registry, cycle_tracker.clone()); + let mut ec_registry = EnablingConditionRegistry::new(); + + let shutdown = Arc::new(AtomicBool::new(false)); + let shutdown_clone = shutdown.clone(); + + let handle = thread::spawn(move || { + run_dfm_loop( + &transport, + &shutdown_clone, + &mut { processor }, + &mut ec_registry, + None, + &cycle_tracker, + DEFAULT_DFM_CYCLE_TIME, + DfmLoopExtensions { query_server: None }, + ); + }); + + thread::sleep(Duration::from_millis(100)); + + // Send two fault events + let path = LongString::from_str_truncated("test_entity").unwrap(); + let record_a = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + let record_b = make_record(FaultId::Text(to_static_short_string("fault_b").unwrap()), LifecycleStage::Passed); + send_event(&names.event, DiagnosticEvent::Fault((path, record_a))); + send_event(&names.event, DiagnosticEvent::Fault((path, record_b))); + + thread::sleep(Duration::from_millis(200)); + shutdown.store(true, Ordering::Release); + + let (join_tx, join_rx) = std::sync::mpsc::channel(); + thread::spawn(move || { + let _ = join_tx.send(handle.join()); + }); + join_rx.recv_timeout(Duration::from_secs(5)).unwrap().unwrap(); + + let state_a = storage + .get("test_entity", &FaultId::Text(to_static_short_string("fault_a").unwrap())) + .unwrap(); + assert!(state_a.is_some(), "fault_a should be stored"); + assert!(state_a.unwrap().test_failed); + + let state_b = storage + .get("test_entity", &FaultId::Text(to_static_short_string("fault_b").unwrap())) + .unwrap(); + assert!(state_b.is_some(), "fault_b should be stored"); + assert!(!state_b.unwrap().test_failed); + } +} diff --git a/src/dfm_lib/src/fault_record_processor.rs b/src/dfm_lib/src/fault_record_processor.rs new file mode 100644 index 0000000..cc13a24 --- /dev/null +++ b/src/dfm_lib/src/fault_record_processor.rs @@ -0,0 +1,904 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Core processing logic for incoming fault reports. +//! +//! [`FaultRecordProcessor`] receives [`FaultRecord`](common::fault::FaultRecord) +//! messages from the IPC layer and applies: +//! - Catalog hash verification (reporter ↔ DFM agreement). +//! - Per-source debounce filtering. +//! - Lifecycle-stage transitions and aging-state management. +//! - Persistence of confirmed fault state via [`SovdFaultStateStorage`]. + +use crate::aging_manager::{AgingManager, AgingState}; +use crate::fault_catalog_registry::FaultCatalogRegistry; +use crate::operation_cycle::OperationCycleTracker; +use crate::sovd_fault_storage::SovdFaultStateStorage; +use alloc::sync::Arc; +use common::debounce::Debounce; +use common::fault; +use common::fault::{ComplianceTag, FaultId, LifecycleStage}; +use common::types::{LongString, Sha256Vec}; +use log::{error, info, trace, warn}; +use std::collections::HashMap; +use std::sync::RwLock; +use std::time::Instant; + +/// Unique key for per-fault debounce state in DFM. +/// Combines source path (IPC identity) and fault ID so that each +/// reporter app has independent debounce state (Option A — per-source). +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub(crate) struct FaultKey { + pub source: String, + pub fault_id: FaultId, +} + +impl FaultKey { + pub fn new(source: &str, fault_id: &FaultId) -> Self { + Self { + source: source.to_string(), + fault_id: fault_id.clone(), + } + } +} + +pub struct FaultRecordProcessor { + storage: Arc, + catalog_registry: Arc, + /// Per-source, per-fault debounce state. Lazily populated from catalog on first event. + debouncers: HashMap>, + /// Tracks last confirmed (post-debounce) lifecycle stage per fault key. + /// Used to detect transitions that should reset the debouncer. + last_stages: HashMap, + /// Evaluates fault aging/reset policies against operation cycle counters. + aging_manager: AgingManager, + /// Per-fault aging state tracking cycle counts and timestamps. + /// Keyed by `(source, fault_id)` — same granularity as debounce state. + aging_states: HashMap, + /// Shared operation cycle tracker for aging state snapshots. + cycle_tracker: Arc>, +} + +impl FaultRecordProcessor { + pub fn new(storage: Arc, catalog_registry: Arc, cycle_tracker: Arc>) -> Self { + let aging_manager = AgingManager::new(Arc::clone(&cycle_tracker)); + Self { + storage, + catalog_registry, + debouncers: HashMap::new(), + last_stages: HashMap::new(), + aging_manager, + aging_states: HashMap::new(), + cycle_tracker, + } + } + + pub fn process_record(&mut self, path: &LongString, record: &fault::FaultRecord) { + let path_str = path.to_string(); + let key = FaultKey::new(&path_str, &record.id); + + // Validate lifecycle transition (warn-only, does not block for backward compat). + if let Some(last_stage) = self.last_stages.get(&key) + && !last_stage.is_valid_transition(&record.lifecycle_stage) + { + warn!( + "Invalid lifecycle transition {:?} → {:?} for fault {:?} from {:?}", + last_stage, record.lifecycle_stage, record.id, path_str + ); + } + + // Handle lifecycle transition — reset debounce on Passed→Failed etc. + self.handle_lifecycle_transition(&key, &record.lifecycle_stage); + + // Always track incoming lifecycle stage for transition detection. + // Updated BEFORE debounce check so a transition is consumed once + // and does not repeatedly reset the debouncer on suppressed events. + self.last_stages.insert(key.clone(), record.lifecycle_stage); + + // Apply manager-side debounce filter + if !self.check_debounce(&key, &record.id) { + trace!("Fault {:?} from {:?} suppressed by manager-side debounce", record.id, path_str); + return; + } + + // Read existing state to preserve counters and latched flags across events. + let mut state = self.storage.get(&path_str, &record.id).ok().flatten().unwrap_or_default(); + + // Resolve whether this fault has an aging/reset policy configured. + let reset_policy = self.lookup_reset_policy(&key, &record.id); + + match record.lifecycle_stage { + fault::LifecycleStage::Failed => { + state.test_failed = true; + state.confirmed_dtc = true; + state.pending_dtc = false; + state.test_failed_this_operation_cycle = true; + state.test_failed_since_last_clear = true; + + // ISO 14229: WIR driven by descriptor compliance flags + if self.is_warning_indicator_relevant(&key, &record.id) { + state.warning_indicator_requested = true; + } + + let now_secs = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + state.record_occurrence(now_secs); + + self.mark_aging_active(&key); + } + fault::LifecycleStage::Passed => { + state.test_failed = false; + state.pending_dtc = false; + + if reset_policy.is_some() { + // ISO 14229 aging: confirmed_dtc stays latched until + // aging conditions are met. Only test_failed clears. + } else { + state.confirmed_dtc = false; + } + } + fault::LifecycleStage::PreFailed => { + state.test_failed = true; + state.pending_dtc = true; + state.test_failed_this_operation_cycle = true; + state.test_failed_since_last_clear = true; + + if self.is_warning_indicator_relevant(&key, &record.id) { + state.warning_indicator_requested = true; + } + + let now_secs = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + state.record_occurrence(now_secs); + + // Conservative (automotive-safe): any sign of fault activity + // keeps the DTC alive. A fault oscillating between PreFailed + // and PrePassed should not be silently aged out. + self.mark_aging_active(&key); + } + fault::LifecycleStage::PrePassed => { + state.test_failed = false; + state.pending_dtc = false; + } + fault::LifecycleStage::NotTested => { + state.test_not_completed_this_operation_cycle = true; + state.test_not_completed_since_last_clear = true; + } + } + + // Evaluate aging reset: if the fault is no longer active and the + // reset policy trigger is satisfied, clear the latched DTC flags. + if let Some(policy) = &reset_policy + && let Some(aging_state) = self.aging_states.get_mut(&key) + && self.aging_manager.should_reset(policy, aging_state) + { + info!("Aging reset triggered for fault {:?} from {:?}", record.id, path_str); + self.aging_manager.apply_reset(aging_state, &mut state); + } + + // Preserve diagnostic env_data from failure stages — a Passed/PrePassed + // event must not overwrite the env_data captured during the fault. + if matches!(record.lifecycle_stage, fault::LifecycleStage::Failed | fault::LifecycleStage::PreFailed) { + state.env_data = record.env_data.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect(); + } + match self.storage.put(&path_str, &record.id, state) { + Ok(()) => info!("Fault ID {:?} stored", record.id), + Err(e) => error!("Failed to store fault ID {:?}: {}", record.id, e), + } + } + + /// Returns `true` if the event should be processed (passes debounce), + /// `false` if it should be suppressed. + fn check_debounce(&mut self, key: &FaultKey, fault_id: &FaultId) -> bool { + match self.get_or_create_debouncer(key, fault_id) { + Some(debouncer) => debouncer.on_event(Instant::now()), + None => true, // No debounce configured — pass through + } + } + + /// Lazily looks up the debouncer for a fault key, creating it from + /// the catalog's `manager_side_debounce` config on first access. + /// Returns `None` when no manager-side debounce is configured. + fn get_or_create_debouncer(&mut self, key: &FaultKey, fault_id: &FaultId) -> Option<&mut Box> { + if self.debouncers.contains_key(key) { + return self.debouncers.get_mut(key); + } + + // Look up descriptor from catalog + let catalog = self.catalog_registry.get(&key.source)?; + let descriptor = catalog.descriptor(fault_id)?; + + // Check if manager-side debounce is configured + let debounce_mode = descriptor.manager_side_debounce?; + + let debouncer = debounce_mode.into_debouncer(); + self.debouncers.insert(key.clone(), debouncer); + self.debouncers.get_mut(key) + } + + /// Detects lifecycle transitions that should reset the debouncer, + /// e.g. when a fault clears (Passed) and then re-occurs (Failed). + fn handle_lifecycle_transition(&mut self, key: &FaultKey, new_stage: &LifecycleStage) { + if let Some(last_stage) = self.last_stages.get(key) + && Self::should_reset_debounce(last_stage, new_stage) + && let Some(debouncer) = self.debouncers.get_mut(key) + { + trace!( + "Resetting manager-side debounce for {:?} on {:?} → {:?} transition", + key.fault_id, last_stage, new_stage + ); + debouncer.reset(Instant::now()); + } + } + + /// Transitions that warrant a debounce reset: the fault was cleared + /// and is now re-entering a failure state. + fn should_reset_debounce(last: &LifecycleStage, new: &LifecycleStage) -> bool { + use LifecycleStage::*; + matches!((last, new), (Passed, Failed) | (Passed, PreFailed) | (NotTested, Failed)) + } + + /// Mark the aging state for a fault as active (failure re-occurred). + /// Creates the aging state entry if it doesn't exist yet. + fn mark_aging_active(&mut self, key: &FaultKey) { + // Recover from RwLock poisoning — data integrity is more important + // than propagating a panic from an unrelated thread. + let tracker = self.cycle_tracker.read().unwrap_or_else(|e| e.into_inner()); + let aging_state = self.aging_states.entry(key.clone()).or_default(); + aging_state.mark_active(&tracker); + } + + /// Check whether the fault descriptor flags warrant a warning indicator. + /// + /// ISO 14229: `warning_indicator_requested` is set when the descriptor + /// carries `SafetyCritical` or `EmissionRelevant` compliance tags. + fn is_warning_indicator_relevant(&self, key: &FaultKey, fault_id: &FaultId) -> bool { + let Some(catalog) = self.catalog_registry.get(&key.source) else { + return false; + }; + let Some(descriptor) = catalog.descriptor(fault_id) else { + return false; + }; + descriptor + .compliance + .iter() + .any(|tag| matches!(tag, ComplianceTag::SafetyCritical | ComplianceTag::EmissionRelevant)) + } + + /// Handle a "clear DTC" operation for a specific path. + /// + /// ISO 14229: resets `*_since_last_clear` flags for all faults at this path. + /// This is triggered by diagnostic tool commands (UDS $14 ClearDTC). + pub fn clear_dtc(&self, path: &str) { + match self.storage.get_all(path) { + Ok(faults) => { + for (fault_id, mut state) in faults { + state.test_failed_since_last_clear = false; + state.test_not_completed_since_last_clear = false; + if let Err(e) = self.storage.put(path, &fault_id, state) { + error!("Failed to clear DTC flags for {fault_id:?}: {e}"); + } + } + info!("Clear DTC completed for path: {path}"); + } + Err(e) => error!("Failed to read faults for clear DTC on {path}: {e}"), + } + } + + /// Handle a new operation cycle boundary. + /// + /// ISO 14229: resets `*_this_operation_cycle` flags for all faults at this path. + /// Called when the operation cycle tracker detects a cycle transition. + pub fn on_new_operation_cycle(&self, path: &str) { + match self.storage.get_all(path) { + Ok(faults) => { + for (fault_id, mut state) in faults { + state.test_failed_this_operation_cycle = false; + state.test_not_completed_this_operation_cycle = false; + if let Err(e) = self.storage.put(path, &fault_id, state) { + error!("Failed to reset cycle flags for {fault_id:?}: {e}"); + } + } + } + Err(e) => error!("Failed to read faults for cycle boundary on {path}: {e}"), + } + } + + /// Look up `manager_side_reset` policy from the fault catalog for this key. + fn lookup_reset_policy(&self, key: &FaultKey, fault_id: &FaultId) -> Option { + let catalog = self.catalog_registry.get(&key.source)?; + let descriptor = catalog.descriptor(fault_id)?; + descriptor.manager_side_reset.clone() + } + + /// Provide read access to the shared operation cycle tracker, + /// allowing external callers (e.g. DiagnosticFaultManager) to + /// advance operation cycles. + pub fn cycle_tracker(&self) -> &Arc> { + &self.cycle_tracker + } + + pub fn check_hash_sum(&self, path: &LongString, hash_sum: &Sha256Vec) -> bool { + match self.catalog_registry.get(&path.to_string()) { + Some(catalog) => { + let ret = catalog.config_hash() == hash_sum.to_vec(); + if !ret { + error!("Fault catalog hash sum error for {:?}", path.to_string()); + error!("Expected {:?}", catalog.config_hash()); + error!("Received {:?}", hash_sum.to_vec()); + } + ret + } + None => { + error!("Catalog hash sum entity {:?} not found ", path.to_string()); + false + } + } + } +} + +#[cfg(test)] +mod processor_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::dfm_test_utils::*; + use crate::fault_record_processor::FaultRecordProcessor; + use crate::sovd_fault_storage::SovdFaultStateStorage; + use common::fault::*; + use std::sync::Arc; + + fn make_processor( + storage: Arc, + registry: Arc, + ) -> FaultRecordProcessor { + FaultRecordProcessor::new(storage, registry, make_cycle_tracker()) + } + + /// Processor correctly handles Failed lifecycle stage. + #[test] + fn processor_handles_failed_stage() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let mut processor = make_processor(storage.clone(), registry); + + let record = make_record(FaultId::Numeric(42), LifecycleStage::Failed); + let path = make_path("test_entity"); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap(); + assert!(state.is_some(), "Failed stage should store state"); + + let state = state.unwrap(); + assert!(state.test_failed, "Failed should set test_failed=true"); + assert!(state.confirmed_dtc, "Failed should set confirmed_dtc=true"); + } + + /// Processor correctly handles Passed lifecycle stage. + #[test] + fn processor_handles_passed_stage() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let mut processor = make_processor(storage.clone(), registry); + + let record = make_record(FaultId::Numeric(42), LifecycleStage::Passed); + let path = make_path("test_entity"); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap(); + assert!(state.is_some(), "Passed stage should store state"); + + let state = state.unwrap(); + assert!(!state.test_failed, "Passed should set test_failed=false"); + assert!(!state.confirmed_dtc, "Passed should set confirmed_dtc=false"); + } + + /// Processor handles transition from Failed to Passed. + #[test] + fn processor_handles_failed_to_passed_transition() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + // First: Failed + let record_failed = make_record(FaultId::Numeric(42), LifecycleStage::Failed); + processor.process_record(&path, &record_failed); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!(state.test_failed); + assert!(state.confirmed_dtc); + + // Then: Passed + let record_passed = make_record(FaultId::Numeric(42), LifecycleStage::Passed); + processor.process_record(&path, &record_passed); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!(!state.test_failed); + assert!(!state.confirmed_dtc); + } + + /// Processor handles PreFailed stage correctly. + #[test] + fn processor_handles_prefailed_stage() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let mut processor = make_processor(storage.clone(), registry); + + let record = make_record(FaultId::Numeric(42), LifecycleStage::PreFailed); + let path = make_path("test_entity"); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap(); + assert!(state.is_some(), "PreFailed stage should store state"); + let state = state.unwrap(); + assert!(state.test_failed, "PreFailed should set test_failed=true"); + assert!(state.pending_dtc, "PreFailed should set pending_dtc=true"); + assert!(!state.confirmed_dtc, "PreFailed should NOT set confirmed_dtc"); + } + + /// Processor handles PrePassed stage correctly. + #[test] + fn processor_handles_prepassed_stage() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let mut processor = make_processor(storage.clone(), registry); + + let record = make_record(FaultId::Numeric(42), LifecycleStage::PrePassed); + let path = make_path("test_entity"); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap(); + assert!(state.is_some(), "PrePassed stage should store state"); + let state = state.unwrap(); + assert!(!state.test_failed, "PrePassed should set test_failed=false"); + assert!(!state.pending_dtc, "PrePassed should set pending_dtc=false"); + } + + /// Processor handles NotTested stage correctly. + #[test] + fn processor_handles_nottested_stage() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let mut processor = make_processor(storage.clone(), registry); + + let record = make_record(FaultId::Numeric(42), LifecycleStage::NotTested); + let path = make_path("test_entity"); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap(); + assert!(state.is_some(), "NotTested stage should store state"); + let state = state.unwrap(); + assert!( + state.test_not_completed_this_operation_cycle, + "NotTested should set test_not_completed_this_operation_cycle=true" + ); + } +} + +#[cfg(test)] +mod debounce_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::dfm_test_utils::*; + use crate::fault_record_processor::FaultRecordProcessor; + use crate::sovd_fault_storage::SovdFaultStateStorage as _; + use common::debounce::DebounceMode; + use common::fault::*; + use std::sync::Arc; + use std::time::Duration; + + fn make_processor( + storage: Arc, + registry: Arc, + ) -> FaultRecordProcessor { + FaultRecordProcessor::new(storage, registry, make_cycle_tracker()) + } + + /// DFM applies manager-side debounce: first (min_count-1) events are + /// suppressed, the min_count-th event fires and updates storage. + #[test] + fn dfm_applies_manager_side_debounce() { + let fault_id = FaultId::Numeric(100); + let debounce = DebounceMode::CountWithinWindow { + min_count: 3, + window: Duration::from_secs(10).into(), + }; + let registry = make_debounce_registry("test_entity", fault_id.clone(), debounce); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + // Events 1 and 2: suppressed (count < min_count) + let record = make_record(fault_id.clone(), LifecycleStage::Failed); + processor.process_record(&path, &record); + assert!( + storage.get("test_entity", &fault_id).unwrap().is_none(), + "First event should be suppressed by debounce" + ); + + processor.process_record(&path, &record); + assert!( + storage.get("test_entity", &fault_id).unwrap().is_none(), + "Second event should be suppressed by debounce" + ); + + // Event 3: fires (count == min_count) + processor.process_record(&path, &record); + let state = storage.get("test_entity", &fault_id).unwrap(); + assert!(state.is_some(), "Third event should pass debounce and update storage"); + let state = state.unwrap(); + assert!(state.test_failed); + assert!(state.confirmed_dtc); + } + + /// When manager_side_debounce is None, every event passes through. + #[test] + fn dfm_passes_through_without_debounce_config() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); // No debounce configured + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Numeric(42), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap(); + assert!(state.is_some(), "Event should pass through when no debounce configured"); + assert!(state.unwrap().test_failed); + } + + /// Debounce resets on lifecycle transition (Passed -> Failed). + /// After a Passed event fires, subsequent Failed events restart the counter. + #[test] + fn dfm_debounce_resets_on_lifecycle_transition() { + let fault_id = FaultId::Numeric(200); + let debounce = DebounceMode::CountWithinWindow { + min_count: 3, + window: Duration::from_secs(60).into(), + }; + let registry = make_debounce_registry("test_entity", fault_id.clone(), debounce); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_record(fault_id.clone(), LifecycleStage::Passed); + + // 2 Failed events: suppressed (count 1, 2) + processor.process_record(&path, &failed); + processor.process_record(&path, &failed); + assert!( + storage.get("test_entity", &fault_id).unwrap().is_none(), + "First two Failed events should be suppressed" + ); + + // Passed event: this is the 3rd event, reaches min_count -> fires + processor.process_record(&path, &passed); + let state = storage.get("test_entity", &fault_id).unwrap(); + assert!(state.is_some(), "Passed event (3rd) should fire"); + assert!(!state.unwrap().test_failed, "Passed should clear test_failed"); + + // Now Failed again: lifecycle transition (Passed->Failed) resets debouncer + // After reset, counter restarts: event 1 -> suppressed + processor.process_record(&path, &failed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.test_failed, "First Failed after reset should be suppressed; Passed state remains"); + + // Events 2 and 3 after reset + processor.process_record(&path, &failed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.test_failed, "Second Failed after reset should still be suppressed"); + + processor.process_record(&path, &failed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.test_failed, "Third Failed after reset should fire (counter reached min_count)"); + assert!(state.confirmed_dtc); + } + + /// Debounce state is independent per source (per-app). + /// app1 reaching min_count does not affect app2's counter. + #[test] + fn dfm_debounce_is_per_source() { + let fault_id = FaultId::Numeric(300); + let debounce = DebounceMode::CountWithinWindow { + min_count: 2, + window: Duration::from_secs(10).into(), + }; + let registry = make_two_source_debounce_registry(fault_id.clone(), debounce); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path1 = make_path("app1"); + let path2 = make_path("app2"); + + let record = make_record(fault_id.clone(), LifecycleStage::Failed); + + // app1: event 1 -> suppressed + processor.process_record(&path1, &record); + assert!(storage.get("app1", &fault_id).unwrap().is_none(), "app1 first event should be suppressed"); + + // app2: event 1 -> suppressed + processor.process_record(&path2, &record); + assert!(storage.get("app2", &fault_id).unwrap().is_none(), "app2 first event should be suppressed"); + + // app1: event 2 -> fires (count == min_count for app1) + processor.process_record(&path1, &record); + assert!( + storage.get("app1", &fault_id).unwrap().is_some(), + "app1 second event should fire (reached min_count)" + ); + // app2 should still be suppressed + assert!( + storage.get("app2", &fault_id).unwrap().is_none(), + "app2 should still be suppressed (only 1 event)" + ); + + // app2: event 2 -> fires independently + processor.process_record(&path2, &record); + assert!( + storage.get("app2", &fault_id).unwrap().is_some(), + "app2 second event should fire independently" + ); + } + + /// EdgeWithCooldown debounce: first event fires, subsequent within cooldown suppressed. + #[test] + fn dfm_applies_edge_with_cooldown_debounce() { + let fault_id = FaultId::Numeric(400); + let debounce = DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_secs(60).into(), + }; + let registry = make_debounce_registry("test_entity", fault_id.clone(), debounce); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + let record = make_record(fault_id.clone(), LifecycleStage::Failed); + + // First event: passes (edge trigger) + processor.process_record(&path, &record); + assert!( + storage.get("test_entity", &fault_id).unwrap().is_some(), + "First event should pass through (edge trigger)" + ); + + // Overwrite storage to detect if second event updates it + storage.delete("test_entity", &fault_id).unwrap(); + + // Second event within cooldown: suppressed + processor.process_record(&path, &record); + assert!( + storage.get("test_entity", &fault_id).unwrap().is_none(), + "Second event within cooldown should be suppressed" + ); + } +} + +#[cfg(test)] +mod iso14229_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::dfm_test_utils::*; + use crate::fault_record_processor::FaultRecordProcessor; + use crate::sovd_fault_storage::SovdFaultStateStorage; + use common::fault::*; + use std::sync::Arc; + + fn make_processor( + storage: Arc, + registry: Arc, + ) -> FaultRecordProcessor { + FaultRecordProcessor::new(storage, registry, make_cycle_tracker()) + } + + // ======================================================================== + // warning_indicator_requested tests + // ======================================================================== + + /// Failed with SafetyCritical descriptor → warning_indicator_requested = true. + #[test] + fn warning_indicator_set_for_safety_critical() { + let fault_id = FaultId::Numeric(700); + let compliance = ComplianceVec::try_from(&[ComplianceTag::SafetyCritical][..]).unwrap(); + let registry = make_compliance_registry("test_entity", fault_id.clone(), compliance); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + let record = make_record(fault_id.clone(), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.warning_indicator_requested, "SafetyCritical fault should set WIR"); + } + + /// Failed with EmissionRelevant descriptor → warning_indicator_requested = true. + #[test] + fn warning_indicator_set_for_emission_relevant() { + let fault_id = FaultId::Numeric(701); + let compliance = ComplianceVec::try_from(&[ComplianceTag::EmissionRelevant][..]).unwrap(); + let registry = make_compliance_registry("test_entity", fault_id.clone(), compliance); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + let record = make_record(fault_id.clone(), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.warning_indicator_requested, "EmissionRelevant fault should set WIR"); + } + + /// Failed with SecurityRelevant only → warning_indicator_requested = false. + #[test] + fn warning_indicator_not_set_for_security_only() { + let fault_id = FaultId::Numeric(702); + let compliance = ComplianceVec::try_from(&[ComplianceTag::SecurityRelevant][..]).unwrap(); + let registry = make_compliance_registry("test_entity", fault_id.clone(), compliance); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + let record = make_record(fault_id.clone(), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.warning_indicator_requested, "SecurityRelevant-only should not set WIR"); + } + + /// PreFailed also sets warning_indicator_requested. + #[test] + fn warning_indicator_set_on_prefailed() { + let fault_id = FaultId::Numeric(703); + let compliance = ComplianceVec::try_from(&[ComplianceTag::SafetyCritical][..]).unwrap(); + let registry = make_compliance_registry("test_entity", fault_id.clone(), compliance); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + let record = make_record(fault_id.clone(), LifecycleStage::PreFailed); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.warning_indicator_requested, "PreFailed should also set WIR for SafetyCritical"); + } + + // ======================================================================== + // clear_dtc tests + // ======================================================================== + + /// clear_dtc resets *_since_last_clear flags. + #[test] + fn clear_dtc_resets_since_last_clear_flags() { + let _fault_id = FaultId::Numeric(710); + let registry = make_registry(); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + // Create some fault state with since_last_clear flags set + let record = make_record(FaultId::Numeric(42), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!(state.test_failed_since_last_clear); + + // Clear DTC + processor.clear_dtc("test_entity"); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!(!state.test_failed_since_last_clear, "clear_dtc should reset test_failed_since_last_clear"); + assert!( + !state.test_not_completed_since_last_clear, + "clear_dtc should reset test_not_completed_since_last_clear" + ); + // Other flags preserved + assert!(state.test_failed, "clear_dtc should not affect test_failed"); + } + + // ======================================================================== + // on_new_operation_cycle tests + // ======================================================================== + + /// on_new_operation_cycle resets *_this_operation_cycle flags. + #[test] + fn new_operation_cycle_resets_this_cycle_flags() { + let registry = make_registry(); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + // Create fault state with this_operation_cycle flags set + let record = make_record(FaultId::Numeric(42), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!(state.test_failed_this_operation_cycle); + + // New operation cycle + processor.on_new_operation_cycle("test_entity"); + + let state = storage.get("test_entity", &FaultId::Numeric(42)).unwrap().unwrap(); + assert!( + !state.test_failed_this_operation_cycle, + "new cycle should reset test_failed_this_operation_cycle" + ); + assert!( + !state.test_not_completed_this_operation_cycle, + "new cycle should reset test_not_completed_this_operation_cycle" + ); + // Other flags preserved + assert!(state.test_failed, "new cycle should not affect test_failed"); + assert!(state.confirmed_dtc, "new cycle should not affect confirmed_dtc"); + } + + /// Full ISO 14229 DTC lifecycle: Failed → clear → new cycle → Failed. + #[test] + fn full_dtc_lifecycle_clear_then_new_cycle() { + let fault_id = FaultId::Numeric(42); + let compliance = ComplianceVec::try_from(&[ComplianceTag::SafetyCritical][..]).unwrap(); + let registry = make_compliance_registry("test_entity", fault_id.clone(), compliance); + let storage = Arc::new(InMemoryStorage::new()); + let mut processor = make_processor(storage.clone(), registry); + let path = make_path("test_entity"); + + // Step 1: Failed + let failed = make_record(fault_id.clone(), LifecycleStage::Failed); + processor.process_record(&path, &failed); + + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.test_failed); + assert!(state.test_failed_this_operation_cycle); + assert!(state.test_failed_since_last_clear); + assert!(state.warning_indicator_requested); + + // Step 2: Clear DTC + processor.clear_dtc("test_entity"); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.test_failed_since_last_clear, "cleared"); + assert!(state.test_failed_this_operation_cycle, "clear doesn't touch this_cycle"); + + // Step 3: New operation cycle + processor.on_new_operation_cycle("test_entity"); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(!state.test_failed_this_operation_cycle, "reset by new cycle"); + assert!(!state.test_failed_since_last_clear, "still cleared"); + + // Step 4: Failed again + processor.process_record(&path, &failed); + let state = storage.get("test_entity", &fault_id).unwrap().unwrap(); + assert!(state.test_failed_this_operation_cycle); + assert!(state.test_failed_since_last_clear); + assert_eq!(state.occurrence_counter, 2); + } +} diff --git a/src/dfm_lib/src/lib.rs b/src/dfm_lib/src/lib.rs new file mode 100644 index 0000000..9a79b5b --- /dev/null +++ b/src/dfm_lib/src/lib.rs @@ -0,0 +1,64 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Diagnostic Fault Manager (DFM) library. +//! +//! `dfm_lib` implements the central fault management logic that receives +//! fault reports from distributed `fault_lib` reporters via IPC and +//! applies policies such as debouncing, aging, and lifecycle transitions. +//! +//! ## Key components +//! +//! | Module | Responsibility | +//! |--------|----------------| +//! | [`diagnostic_fault_manager`] | Top-level orchestrator that wires sub-components together | +//! | [`transport`] | [`DfmTransport`](transport::DfmTransport) trait — pluggable IPC abstraction | +//! | [`fault_record_processor`] | Incoming record handling: debounce, hash verification, lifecycle transitions | +//! | [`aging_manager`] | Evaluates aging/reset policies (operation-cycle or time-based) | +//! | [`operation_cycle`] | Tracks named operation cycles (ignition, drive, power) | +//! | [`fault_catalog_registry`] | In-memory registry of catalogs received during handshake | +//! | [`enabling_condition_registry`] | Tracks enabling-condition statuses and broadcasts changes | +//! | [`sovd_fault_manager`] | SOVD-compliant query/clear API for external diagnostic tools | +//! | [`sovd_fault_storage`] | Persistent storage for SOVD fault state (backed by `rust_kvs`) | +//! | [`query_api`] | [`DfmQueryApi`] trait abstracting SOVD query/clear for consumers | +//! | [`query_ipc`] | [`Iceoryx2DfmQuery`] - IPC client implementing `DfmQueryApi` via iceoryx2 | +//! | [`query_server`] | [`DfmQueryServer`](query_server::DfmQueryServer) - DFM-side request handler | +//! | [`query_conversion`] | Bidirectional `SovdFault` <-> `IpcSovdFault` conversion | + +extern crate alloc; + +pub mod aging_manager; +pub mod diagnostic_fault_manager; +pub mod enabling_condition_registry; +pub mod fault_catalog_registry; +pub(crate) mod fault_lib_communicator; +pub mod fault_record_processor; +pub mod operation_cycle; +pub mod query_api; +pub(crate) mod query_conversion; +pub mod query_ipc; +pub(crate) mod query_server; +pub mod sovd_fault_manager; +pub mod sovd_fault_storage; +pub mod transport; + +// Re-export key types for convenience +pub use aging_manager::{AgingManager, AgingState}; +pub use enabling_condition_registry::EnablingConditionRegistry; +pub use fault_lib_communicator::{DfmLoopExtensions, Iceoryx2Transport, TransportInitError, run_dfm_loop}; +pub use operation_cycle::{CycleEventType, CycleSource, ManualCycleProvider, OperationCycleEvent, OperationCycleProvider, OperationCycleTracker}; +pub use query_api::{DfmQueryApi, DirectDfmQuery}; +pub use query_ipc::Iceoryx2DfmQuery; +pub use transport::DfmTransport; + +#[cfg(test)] +mod dfm_test_utils; diff --git a/src/dfm_lib/src/operation_cycle.rs b/src/dfm_lib/src/operation_cycle.rs new file mode 100644 index 0000000..c14776c --- /dev/null +++ b/src/dfm_lib/src/operation_cycle.rs @@ -0,0 +1,371 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Tracks named operation cycles for fault aging. +//! +//! Operation cycles are logical time units (e.g., "ignition", "drive", "power") +//! that govern when faults can be automatically reset/aged. +//! +//! # Architecture +//! +//! External lifecycle events (ECU power-on, ignition, etc.) are delivered +//! through an [`OperationCycleProvider`] abstraction. Each provider translates +//! platform-specific signals into [`OperationCycleEvent`] values that the +//! [`OperationCycleTracker`] consumes to advance counters. + +use std::collections::HashMap; +use std::time::SystemTime; + +// --------------------------------------------------------------------------- +// Event types +// --------------------------------------------------------------------------- + +/// Identifies the source of an operation-cycle event. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum CycleSource { + /// Event originates from ECU power management. + Ecu, + /// Event originates from HPC lifecycle service. + Hpc, + /// Manually injected (e.g. diagnostic tool or test harness). + Manual, +} + +/// What happened with the named operation cycle. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum CycleEventType { + /// A new cycle has started — counter is incremented. + Start, + /// The current cycle has ended (informational — counter is not changed). + Stop, + /// The cycle was restarted (equivalent to Stop + Start, single increment). + Restart, +} + +/// A typed operation-cycle event. +/// +/// Providers emit these events; the tracker consumes them. +#[derive(Debug, Clone)] +pub struct OperationCycleEvent { + /// Name of the cycle (e.g. "power", "ignition", "drive"). + pub cycle_id: String, + /// What happened. + pub event_type: CycleEventType, + /// When the event occurred. + pub timestamp: SystemTime, + /// Where the event came from. + pub source: CycleSource, +} + +// --------------------------------------------------------------------------- +// Provider trait +// --------------------------------------------------------------------------- + +/// Abstraction over external operation-cycle event sources. +/// +/// Implementors translate platform signals (IPC, CAN, GPIO, etc.) +/// into a stream of [`OperationCycleEvent`] values that the DFM polls. +pub trait OperationCycleProvider: Send { + /// Drain all pending events since the last call. + /// + /// Must be non-blocking. Returns an empty `Vec` when no events are + /// available. Implementations must be idempotent — calling `poll()` + /// twice without new external events yields an empty result the second + /// time. + fn poll(&mut self) -> Vec; + + /// Current count for the named cycle, if the provider tracks it. + /// + /// Returns `None` when the cycle name is unknown or the provider + /// does not keep running totals (in that case the tracker is the + /// authoritative source). + fn current_cycle(&self, _cycle_id: &str) -> Option { + None + } +} + +/// Tracks operation cycle counts for fault aging logic. +/// +/// Each counter is identified by a string reference (e.g., "power", "ignition"). +/// External lifecycle events (power-on, ignition, etc.) should call `increment()` +/// to advance the appropriate counter. +#[derive(Debug, Default)] +pub struct OperationCycleTracker { + cycles: HashMap, +} + +impl OperationCycleTracker { + /// Create a new empty tracker. + pub fn new() -> Self { + Self::default() + } + + /// Increment the named cycle counter. Returns the new count. + /// + /// # Examples + /// ``` + /// use dfm_lib::OperationCycleTracker; + /// let mut tracker = OperationCycleTracker::new(); + /// assert_eq!(tracker.increment("power"), 1); + /// assert_eq!(tracker.increment("power"), 2); + /// ``` + pub fn increment(&mut self, cycle_ref: &str) -> u64 { + let count = self.cycles.entry(cycle_ref.to_string()).or_insert(0); + *count = count.saturating_add(1); + *count + } + + /// Get current count for a cycle reference. Returns 0 if unknown. + pub fn get(&self, cycle_ref: &str) -> u64 { + self.cycles.get(cycle_ref).copied().unwrap_or(0) + } + + /// Snapshot all current cycle values. + /// Returns a HashMap that can be stored with fault aging state. + pub fn snapshot(&self) -> HashMap { + self.cycles.clone() + } + + /// Apply a batch of operation-cycle events. + /// + /// `Start` and `Restart` increment the named counter; `Stop` is + /// recorded but does not change the counter (it is informational). + /// Duplicate events (same cycle_id + same resulting count) are + /// effectively idempotent because `increment()` always moves forward. + /// + /// Returns the set of cycle names that were actually incremented. + pub fn apply_events(&mut self, events: &[OperationCycleEvent]) -> Vec { + let mut incremented = Vec::new(); + for event in events { + match event.event_type { + CycleEventType::Start | CycleEventType::Restart => { + self.increment(&event.cycle_id); + incremented.push(event.cycle_id.clone()); + } + CycleEventType::Stop => { + // Informational — no counter change. + } + } + } + incremented + } +} + +// --------------------------------------------------------------------------- +// Mock provider (available in tests and as a manual/debug provider) +// --------------------------------------------------------------------------- + +/// A provider backed by an in-memory queue of events. +/// +/// Useful for tests and for manual / diagnostic-tool injection. +#[derive(Debug, Default)] +pub struct ManualCycleProvider { + pending: Vec, +} + +impl ManualCycleProvider { + pub fn new() -> Self { + Self::default() + } + + /// Enqueue a single event that will be returned on the next `poll()`. + pub fn push(&mut self, event: OperationCycleEvent) { + self.pending.push(event); + } + + /// Convenience: enqueue a `Start` event for the given cycle name. + pub fn start_cycle(&mut self, cycle_id: &str) { + self.pending.push(OperationCycleEvent { + cycle_id: cycle_id.to_string(), + event_type: CycleEventType::Start, + timestamp: SystemTime::now(), + source: CycleSource::Manual, + }); + } + + /// Convenience: enqueue a `Stop` event for the given cycle name. + pub fn stop_cycle(&mut self, cycle_id: &str) { + self.pending.push(OperationCycleEvent { + cycle_id: cycle_id.to_string(), + event_type: CycleEventType::Stop, + timestamp: SystemTime::now(), + source: CycleSource::Manual, + }); + } +} + +impl OperationCycleProvider for ManualCycleProvider { + fn poll(&mut self) -> Vec { + core::mem::take(&mut self.pending) + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + + #[test] + fn new_tracker_returns_zero() { + let tracker = OperationCycleTracker::new(); + assert_eq!(tracker.get("power"), 0); + assert_eq!(tracker.get("ignition"), 0); + } + + #[test] + fn increment_increases_count() { + let mut tracker = OperationCycleTracker::new(); + assert_eq!(tracker.increment("power"), 1); + assert_eq!(tracker.increment("power"), 2); + assert_eq!(tracker.increment("power"), 3); + assert_eq!(tracker.get("power"), 3); + } + + #[test] + fn independent_cycle_refs() { + let mut tracker = OperationCycleTracker::new(); + tracker.increment("power"); + tracker.increment("power"); + tracker.increment("ignition"); + + assert_eq!(tracker.get("power"), 2); + assert_eq!(tracker.get("ignition"), 1); + assert_eq!(tracker.get("drive"), 0); + } + + #[test] + fn snapshot_captures_all_cycles() { + let mut tracker = OperationCycleTracker::new(); + tracker.increment("power"); + tracker.increment("drive"); + tracker.increment("drive"); + + let snap = tracker.snapshot(); + assert_eq!(snap.get("power"), Some(&1)); + assert_eq!(snap.get("drive"), Some(&2)); + } + + // ----------------------------------------------------------------------- + // OperationCycleEvent / apply_events tests + // ----------------------------------------------------------------------- + + fn make_event(cycle_id: &str, event_type: CycleEventType) -> OperationCycleEvent { + OperationCycleEvent { + cycle_id: cycle_id.to_string(), + event_type, + timestamp: SystemTime::now(), + source: CycleSource::Ecu, + } + } + + #[test] + fn apply_events_start_increments_counter() { + let mut tracker = OperationCycleTracker::new(); + let events = vec![make_event("power", CycleEventType::Start), make_event("power", CycleEventType::Start)]; + let incremented = tracker.apply_events(&events); + assert_eq!(tracker.get("power"), 2); + assert_eq!(incremented.len(), 2); + } + + #[test] + fn apply_events_stop_does_not_increment() { + let mut tracker = OperationCycleTracker::new(); + tracker.increment("power"); // count = 1 + let events = vec![make_event("power", CycleEventType::Stop)]; + let incremented = tracker.apply_events(&events); + assert_eq!(tracker.get("power"), 1); // unchanged + assert!(incremented.is_empty()); + } + + #[test] + fn apply_events_restart_increments_once() { + let mut tracker = OperationCycleTracker::new(); + let events = vec![make_event("ignition", CycleEventType::Restart)]; + let incremented = tracker.apply_events(&events); + assert_eq!(tracker.get("ignition"), 1); + assert_eq!(incremented, vec!["ignition"]); + } + + #[test] + fn apply_events_mixed_sequence() { + let mut tracker = OperationCycleTracker::new(); + let events = vec![ + make_event("power", CycleEventType::Start), + make_event("power", CycleEventType::Stop), + make_event("power", CycleEventType::Start), + make_event("ignition", CycleEventType::Start), + ]; + tracker.apply_events(&events); + assert_eq!(tracker.get("power"), 2); + assert_eq!(tracker.get("ignition"), 1); + } + + // ----------------------------------------------------------------------- + // ManualCycleProvider tests + // ----------------------------------------------------------------------- + + #[test] + fn manual_provider_poll_drains_queue() { + let mut provider = ManualCycleProvider::new(); + provider.start_cycle("power"); + provider.start_cycle("ignition"); + + let events = provider.poll(); + assert_eq!(events.len(), 2); + assert_eq!(events[0].cycle_id, "power"); + assert_eq!(events[1].cycle_id, "ignition"); + + // Second poll is empty (idempotent) + let events2 = provider.poll(); + assert!(events2.is_empty()); + } + + #[test] + fn manual_provider_start_and_stop() { + let mut provider = ManualCycleProvider::new(); + provider.start_cycle("drive"); + provider.stop_cycle("drive"); + + let events = provider.poll(); + assert_eq!(events.len(), 2); + assert_eq!(events[0].event_type, CycleEventType::Start); + assert_eq!(events[1].event_type, CycleEventType::Stop); + } + + #[test] + fn manual_provider_current_cycle_returns_none() { + let provider = ManualCycleProvider::new(); + assert_eq!(provider.current_cycle("power"), None); + } + + // ----------------------------------------------------------------------- + // Integration: provider → tracker + // ----------------------------------------------------------------------- + + #[test] + fn provider_events_drive_tracker() { + let mut provider = ManualCycleProvider::new(); + provider.start_cycle("power"); + provider.start_cycle("power"); + provider.stop_cycle("power"); + provider.start_cycle("ignition"); + + let mut tracker = OperationCycleTracker::new(); + let events = provider.poll(); + tracker.apply_events(&events); + + assert_eq!(tracker.get("power"), 2); + assert_eq!(tracker.get("ignition"), 1); + } +} diff --git a/src/dfm_lib/src/query_api.rs b/src/dfm_lib/src/query_api.rs new file mode 100644 index 0000000..6f4c2ed --- /dev/null +++ b/src/dfm_lib/src/query_api.rs @@ -0,0 +1,175 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Abstraction over DFM fault query/clear operations. +//! +//! [`DfmQueryApi`] enables both in-process ([`DirectDfmQuery`]) and remote +//! (IPC) access to the Diagnostic Fault Manager's SOVD-compliant fault store. + +use crate::fault_catalog_registry::FaultCatalogRegistry; +use crate::sovd_fault_manager::{Error, SovdEnvData, SovdFault, SovdFaultManager}; +use crate::sovd_fault_storage::SovdFaultStateStorage; +use alloc::sync::Arc; +use alloc::vec::Vec; + +/// Abstraction over DFM fault query/clear operations. +/// +/// Enables both in-process (glue code) and remote (IPC) access +/// to the Diagnostic Fault Manager's SOVD-compliant fault store. +/// +/// Object-safe: can be used as `Box` or `&dyn DfmQueryApi`. +pub trait DfmQueryApi: Send + Sync { + /// List all known faults for a given entity path. + fn get_all_faults(&self, path: &str) -> Result, Error>; + + /// Get a single fault with its environment data snapshot. + fn get_fault(&self, path: &str, fault_code: &str) -> Result<(SovdFault, SovdEnvData), Error>; + + /// Clear all stored faults for a given entity path. + fn delete_all_faults(&self, path: &str) -> Result<(), Error>; + + /// Clear a single stored fault by its code. + fn delete_fault(&self, path: &str, fault_code: &str) -> Result<(), Error>; +} + +/// In-process [`DfmQueryApi`] implementation - zero-cost delegation to +/// [`SovdFaultManager`]. +/// +/// Use this when the SOVD consumer runs in the same process as the DFM +/// (glue-code / embedded scenario). +pub struct DirectDfmQuery { + inner: SovdFaultManager, +} + +impl DirectDfmQuery { + /// Wrap an existing [`SovdFaultManager`] as a [`DfmQueryApi`] implementor. + pub fn new(storage: Arc, registry: Arc) -> Self { + Self { + inner: SovdFaultManager::new(storage, registry), + } + } +} + +impl DfmQueryApi for DirectDfmQuery { + fn get_all_faults(&self, path: &str) -> Result, Error> { + self.inner.get_all_faults(path) + } + + fn get_fault(&self, path: &str, fault_code: &str) -> Result<(SovdFault, SovdEnvData), Error> { + self.inner.get_fault(path, fault_code) + } + + fn delete_all_faults(&self, path: &str) -> Result<(), Error> { + self.inner.delete_all_faults(path) + } + + fn delete_fault(&self, path: &str, fault_code: &str) -> Result<(), Error> { + self.inner.delete_fault(path, fault_code) + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used, clippy::std_instead_of_core, clippy::std_instead_of_alloc)] +mod tests { + use super::*; + use crate::dfm_test_utils::*; + use crate::fault_record_processor::FaultRecordProcessor; + use common::fault::{FaultId, LifecycleStage}; + use common::types::to_static_short_string; + use std::sync::Arc; + + fn make_direct_query(storage: Arc, registry: Arc) -> DirectDfmQuery { + DirectDfmQuery::new(storage, registry) + } + + /// DirectDfmQuery delegates get_all_faults correctly. + #[test] + fn direct_query_get_all_faults() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry.clone(), make_cycle_tracker()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let query: &dyn DfmQueryApi = &make_direct_query(storage, registry); + let faults = query.get_all_faults("test_entity").unwrap(); + assert_eq!(faults.len(), 2); // 2 descriptors in catalog + } + + /// DirectDfmQuery delegates get_fault correctly. + #[test] + fn direct_query_get_fault() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry.clone(), make_cycle_tracker()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let query = make_direct_query(storage, registry); + let (fault, _env) = query.get_fault("test_entity", "fault_a").unwrap(); + assert_eq!(fault.code, "fault_a"); + assert!(fault.typed_status.as_ref().unwrap().test_failed.unwrap()); + } + + /// DirectDfmQuery delegates delete_fault correctly. + #[test] + fn direct_query_delete_fault() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry.clone(), make_cycle_tracker()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let query = make_direct_query(storage, registry); + assert!(query.delete_fault("test_entity", "fault_a").is_ok()); + } + + /// DirectDfmQuery delegates delete_all_faults correctly. + #[test] + fn direct_query_delete_all_faults() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry.clone(), make_cycle_tracker()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let query = make_direct_query(storage, registry); + assert!(query.delete_all_faults("test_entity").is_ok()); + } + + /// DirectDfmQuery returns BadArgument for unknown path. + #[test] + fn direct_query_bad_path() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let query = make_direct_query(storage, registry); + assert_eq!(query.get_all_faults("nonexistent"), Err(Error::BadArgument)); + } + + /// DfmQueryApi is object-safe - can be used as Box. + #[test] + fn trait_is_object_safe() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let boxed: Box = Box::new(make_direct_query(storage, registry)); + // Just verify it compiles and can be called + let _ = boxed.get_all_faults("test_entity"); + } +} diff --git a/src/dfm_lib/src/query_conversion.rs b/src/dfm_lib/src/query_conversion.rs new file mode 100644 index 0000000..b2d6b9d --- /dev/null +++ b/src/dfm_lib/src/query_conversion.rs @@ -0,0 +1,415 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Conversions between `SovdFault` (heap-allocated) and `IpcSovdFault` +//! (fixed-size, IPC-safe). +//! +//! # Lossy conversion policy +//! +//! `SovdFault` -> `IpcSovdFault`: +//! - `String` fields silently truncated via `from_str_truncated()` +//! - `HashMap status` / `Option typed_status` -> individual bool fields +//! - `Option schema` -> omitted +//! - `Option` -> `bool` (None -> false) +//! - Timestamps: ISO 8601 `String` -> `u64` seconds (parsed back from our own format) +//! +//! `IpcSovdFault` -> `SovdFault`: +//! - Fixed-size strings -> heap-allocated `String` +//! - Bool fields -> reconstructed `SovdFaultStatus` + `HashMap` +//! - `u64` timestamps -> ISO 8601 strings (using existing `format_unix_timestamp`) + +use crate::sovd_fault_manager::{SovdEnvData, SovdFault, SovdFaultStatus}; +use common::query_protocol::{IpcEnvData, IpcSovdFault}; +use common::types::{LongString, ShortString}; + +/// Truncating conversion from `&str` to `ShortString`, falling back to empty +/// on encoding errors (should not happen for valid UTF-8). +fn short_str(s: &str) -> ShortString { + let result = ShortString::from_str_truncated(s).unwrap_or_default(); + if s.len() > result.len() { + log::warn!("IPC ShortString truncation: input {} bytes -> {} bytes", s.len(), result.len()); + } + result +} + +/// Truncating conversion from `&str` to `LongString`. +fn long_str(s: &str) -> LongString { + let result = LongString::from_str_truncated(s).unwrap_or_default(); + if s.len() > result.len() { + log::warn!("IPC LongString truncation: input {} bytes -> {} bytes", s.len(), result.len()); + } + result +} + +/// Convert a [`SovdFault`] to its IPC-safe equivalent. +/// +/// This conversion is lossy - see module docs for truncation policy. +pub fn sovd_fault_to_ipc(fault: &SovdFault) -> IpcSovdFault { + let status = fault.typed_status.as_ref(); + + let status_mask = status.map_or(0, |s| s.compute_mask()); + + IpcSovdFault { + code: short_str(&fault.code), + display_code: short_str(&fault.display_code), + scope: short_str(&fault.scope), + fault_name: short_str(&fault.fault_name), + fault_translation_id: short_str(&fault.fault_translation_id), + severity: fault.severity, + + status_mask, + test_failed: status.and_then(|s| s.test_failed).unwrap_or(false), + test_failed_this_operation_cycle: status.and_then(|s| s.test_failed_this_operation_cycle).unwrap_or(false), + pending_dtc: status.and_then(|s| s.pending_dtc).unwrap_or(false), + confirmed_dtc: status.and_then(|s| s.confirmed_dtc).unwrap_or(false), + test_not_completed_since_last_clear: status.and_then(|s| s.test_not_completed_since_last_clear).unwrap_or(false), + test_failed_since_last_clear: status.and_then(|s| s.test_failed_since_last_clear).unwrap_or(false), + test_not_completed_this_operation_cycle: status.and_then(|s| s.test_not_completed_this_operation_cycle).unwrap_or(false), + warning_indicator_requested: status.and_then(|s| s.warning_indicator_requested).unwrap_or(false), + + occurrence_counter: fault.occurrence_counter.unwrap_or(0), + aging_counter: fault.aging_counter.unwrap_or(0), + healing_counter: fault.healing_counter.unwrap_or(0), + first_occurrence_secs: parse_iso_timestamp(fault.first_occurrence.as_deref()), + last_occurrence_secs: parse_iso_timestamp(fault.last_occurrence.as_deref()), + + symptom: fault.symptom.as_ref().map(|s| long_str(s)).unwrap_or_default(), + has_symptom: fault.symptom.is_some(), + symptom_translation_id: fault.symptom_translation_id.as_ref().map(|s| short_str(s)).unwrap_or_default(), + has_symptom_translation_id: fault.symptom_translation_id.is_some(), + } +} + +/// Convert an [`IpcSovdFault`] back to a heap-allocated [`SovdFault`]. +pub fn ipc_fault_to_sovd(ipc: &IpcSovdFault) -> SovdFault { + let typed_status = SovdFaultStatus { + test_failed: Some(ipc.test_failed), + test_failed_this_operation_cycle: Some(ipc.test_failed_this_operation_cycle), + pending_dtc: Some(ipc.pending_dtc), + confirmed_dtc: Some(ipc.confirmed_dtc), + test_not_completed_since_last_clear: Some(ipc.test_not_completed_since_last_clear), + test_failed_since_last_clear: Some(ipc.test_failed_since_last_clear), + test_not_completed_this_operation_cycle: Some(ipc.test_not_completed_this_operation_cycle), + warning_indicator_requested: Some(ipc.warning_indicator_requested), + mask: Some(alloc::format!("0x{:02X}", ipc.status_mask)), + }; + + SovdFault { + code: ipc.code.to_string(), + display_code: ipc.display_code.to_string(), + scope: ipc.scope.to_string(), + fault_name: ipc.fault_name.to_string(), + fault_translation_id: ipc.fault_translation_id.to_string(), + severity: ipc.severity, + status: typed_status.to_hash_map(), + typed_status: Some(typed_status), + symptom: if ipc.has_symptom { Some(ipc.symptom.to_string()) } else { None }, + symptom_translation_id: if ipc.has_symptom_translation_id { + Some(ipc.symptom_translation_id.to_string()) + } else { + None + }, + schema: None, + occurrence_counter: Some(ipc.occurrence_counter), + aging_counter: Some(ipc.aging_counter), + healing_counter: Some(ipc.healing_counter), + first_occurrence: if ipc.first_occurrence_secs > 0 { + Some(crate::sovd_fault_manager::format_unix_timestamp(ipc.first_occurrence_secs)) + } else { + None + }, + last_occurrence: if ipc.last_occurrence_secs > 0 { + Some(crate::sovd_fault_manager::format_unix_timestamp(ipc.last_occurrence_secs)) + } else { + None + }, + } +} + +/// Convert a [`SovdEnvData`] (`HashMap`) to [`IpcEnvData`]. +/// +/// Truncates keys/values to `ShortString` capacity. Entries beyond capacity +/// (8) are silently dropped. +pub fn env_data_to_ipc(env: &SovdEnvData) -> IpcEnvData { + use iceoryx2_bb_container::vector::Vector; + let mut ipc = IpcEnvData::new(); + for (k, v) in env { + if ipc.is_full() { + break; + } + let _ = ipc.push((short_str(k), short_str(v))); + } + ipc +} + +/// Convert [`IpcEnvData`] back to [`SovdEnvData`]. +pub fn ipc_env_data_to_sovd(ipc: &IpcEnvData) -> SovdEnvData { + ipc.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect() +} + +/// Parse an ISO 8601 timestamp string (our format: "YYYY-MM-DDThh:mm:ssZ") +/// back to Unix seconds. Returns 0 if None or unparseable. +/// +/// This is the inverse of `sovd_fault_manager::format_unix_timestamp`. +/// Only supports the exact format we produce - no timezone offsets. +/// Dates before 1970-01-01 return 0 (pre-epoch). +#[allow(clippy::arithmetic_side_effects, clippy::cast_possible_truncation)] +fn parse_iso_timestamp(s: Option<&str>) -> u64 { + let Some(s) = s else { return 0 }; + // Expected: "2024-01-15T09:50:00Z" (exactly 20 chars) + if s.len() != 20 || !s.ends_with('Z') { + return 0; + } + let b = s.as_bytes(); + let year = parse_u64(&b[0..4]); + let month = parse_u64(&b[5..7]); + let day = parse_u64(&b[8..10]); + let hour = parse_u64(&b[11..13]); + let min = parse_u64(&b[14..16]); + let sec = parse_u64(&b[17..19]); + + if year < 1970 || month == 0 || day == 0 { + return 0; + } + if month > 12 || day > 31 { + return 0; + } + if hour >= 24 || min >= 60 || sec >= 60 { + return 0; + } + + days_from_civil(year as i64, month as u32, day as u32) * 86_400 + hour * 3_600 + min * 60 + sec +} + +/// Parse ASCII decimal digits to u64. Returns 0 on any non-digit. +#[allow(clippy::arithmetic_side_effects)] +fn parse_u64(bytes: &[u8]) -> u64 { + let mut result = 0u64; + for &b in bytes { + if !b.is_ascii_digit() { + return 0; + } + result = result * 10 + u64::from(b - b'0'); + } + result +} + +/// Convert (year, month, day) to days since Unix epoch (1970-01-01). +/// Algorithm: Howard Hinnant's `days_from_civil`. +/// +/// Returns 0 for dates before 1970-01-01 (negative day counts). +#[allow(clippy::arithmetic_side_effects, clippy::cast_possible_truncation, clippy::cast_sign_loss)] +fn days_from_civil(y: i64, m: u32, d: u32) -> u64 { + let y = if m <= 2 { y - 1 } else { y }; + let era = if y >= 0 { y } else { y - 399 } / 400; + let yoe = (y - era * 400) as u32; + let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d - 1; + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + let days = era * 146_097 + doe as i64 - 719_468; + if days < 0 { + return 0; + } + days as u64 +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + use crate::dfm_test_utils::*; + use crate::fault_record_processor::FaultRecordProcessor; + use crate::sovd_fault_manager::SovdFaultManager; + use common::fault::{FaultId, LifecycleStage}; + use common::types::to_static_short_string; + use iceoryx2_bb_container::string::String as IceString; + use std::sync::Arc; + + /// Helper: create a SovdFault via the real pipeline. + fn make_real_fault() -> SovdFault { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = FaultRecordProcessor::new(storage.clone(), registry.clone(), make_cycle_tracker()); + let path = make_path("test_entity"); + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("test_entity").unwrap(); + faults.into_iter().find(|f| f.code == "fault_a").unwrap() + } + + #[test] + fn roundtrip_preserves_core_fields() { + let original = make_real_fault(); + let ipc = sovd_fault_to_ipc(&original); + let restored = ipc_fault_to_sovd(&ipc); + + assert_eq!(restored.code, original.code); + assert_eq!(restored.display_code, original.display_code); + assert_eq!(restored.scope, original.scope); + assert_eq!(restored.fault_name, original.fault_name); + assert_eq!(restored.severity, original.severity); + } + + #[test] + fn roundtrip_preserves_status_flags() { + let original = make_real_fault(); + let ipc = sovd_fault_to_ipc(&original); + let restored = ipc_fault_to_sovd(&ipc); + + let orig_status = original.typed_status.as_ref().unwrap(); + let rest_status = restored.typed_status.as_ref().unwrap(); + + assert_eq!(rest_status.test_failed, orig_status.test_failed); + assert_eq!(rest_status.confirmed_dtc, orig_status.confirmed_dtc); + assert_eq!(rest_status.pending_dtc, orig_status.pending_dtc); + assert_eq!(rest_status.warning_indicator_requested, orig_status.warning_indicator_requested); + } + + #[test] + fn roundtrip_preserves_counters() { + let original = make_real_fault(); + let ipc = sovd_fault_to_ipc(&original); + let restored = ipc_fault_to_sovd(&ipc); + + assert_eq!(restored.occurrence_counter, original.occurrence_counter); + assert_eq!(restored.aging_counter, original.aging_counter); + assert_eq!(restored.healing_counter, original.healing_counter); + } + + #[test] + fn roundtrip_preserves_symptom() { + let original = make_real_fault(); + let ipc = sovd_fault_to_ipc(&original); + let restored = ipc_fault_to_sovd(&ipc); + + // fault_a in make_text_registry has no summary -> symptom is None + assert_eq!(restored.symptom, original.symptom); + assert_eq!(restored.symptom_translation_id, original.symptom_translation_id); + } + + #[test] + fn known_lossy_fields_are_expected() { + let original = make_real_fault(); + let ipc = sovd_fault_to_ipc(&original); + let restored = ipc_fault_to_sovd(&ipc); + + // schema is always None after IPC roundtrip + assert!(restored.schema.is_none()); + // status HashMap is reconstructed (may differ in iteration order) + assert!(!restored.status.is_empty()); + } + + #[test] + fn truncation_does_not_error() { + // Create a fault with a very long name + let fault = SovdFault { + code: "x".repeat(200), + fault_name: "y".repeat(200), + symptom: Some("z".repeat(300)), + ..SovdFault::default() + }; + + let ipc = sovd_fault_to_ipc(&fault); + // Should truncate, not panic or error + assert!(ipc.code.as_bytes().len() <= 64); + assert!(ipc.fault_name.as_bytes().len() <= 64); + assert!(ipc.symptom.as_bytes().len() <= 128); + assert!(ipc.has_symptom); + } + + #[test] + fn env_data_roundtrip() { + let mut env = SovdEnvData::new(); + env.insert("temp".into(), "42".into()); + env.insert("pressure".into(), "1013".into()); + + let ipc = env_data_to_ipc(&env); + let restored = ipc_env_data_to_sovd(&ipc); + + assert_eq!(restored.get("temp"), Some(&"42".into())); + assert_eq!(restored.get("pressure"), Some(&"1013".into())); + } + + #[test] + fn env_data_overflow_truncates() { + use iceoryx2_bb_container::vector::Vector; + let mut env = SovdEnvData::new(); + for i in 0..20 { + env.insert(alloc::format!("key_{i}"), alloc::format!("val_{i}")); + } + let ipc = env_data_to_ipc(&env); + assert_eq!(ipc.len(), 8); // capacity limit + } + + #[test] + fn timestamp_roundtrip() { + let ts = 1705312200u64; // 2024-01-15T09:50:00Z + let iso = crate::sovd_fault_manager::format_unix_timestamp(ts); + let parsed = parse_iso_timestamp(Some(&iso)); + assert_eq!(parsed, ts); + } + + #[test] + fn timestamp_none_returns_zero() { + assert_eq!(parse_iso_timestamp(None), 0); + } + + #[test] + fn timestamp_invalid_returns_zero() { + assert_eq!(parse_iso_timestamp(Some("not-a-date")), 0); + assert_eq!(parse_iso_timestamp(Some("")), 0); + } + + #[test] + fn timestamp_epoch_zero() { + let iso = crate::sovd_fault_manager::format_unix_timestamp(0); + assert_eq!(iso, "1970-01-01T00:00:00Z"); + let parsed = parse_iso_timestamp(Some(&iso)); + assert_eq!(parsed, 0); + } + + #[test] + fn timestamp_pre_epoch_returns_zero() { + // Year 1969 is before Unix epoch + assert_eq!(parse_iso_timestamp(Some("1969-12-31T23:59:59Z")), 0); + } + + #[test] + fn timestamp_invalid_month_day_returns_zero() { + assert_eq!(parse_iso_timestamp(Some("2024-13-01T00:00:00Z")), 0); // month 13 + assert_eq!(parse_iso_timestamp(Some("2024-01-32T00:00:00Z")), 0); // day 32 + } + + #[test] + fn format_timestamp_u64_max_clamps_to_year_9999() { + let result = crate::sovd_fault_manager::format_unix_timestamp(u64::MAX); + assert_eq!(result, "9999-12-31T23:59:59Z"); + } + + #[test] + fn format_timestamp_year_9999_boundary() { + // Exact boundary: 253_402_300_799 = 9999-12-31T23:59:59Z + let result = crate::sovd_fault_manager::format_unix_timestamp(253_402_300_799); + assert_eq!(result, "9999-12-31T23:59:59Z"); + + // One second over the boundary still clamps + let result_over = crate::sovd_fault_manager::format_unix_timestamp(253_402_300_800); + assert_eq!(result_over, "9999-12-31T23:59:59Z"); + } +} diff --git a/src/dfm_lib/src/query_ipc.rs b/src/dfm_lib/src/query_ipc.rs new file mode 100644 index 0000000..6457238 --- /dev/null +++ b/src/dfm_lib/src/query_ipc.rs @@ -0,0 +1,153 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! IPC client implementation of [`DfmQueryApi`]. +//! +//! [`Iceoryx2DfmQuery`] sends [`DfmQueryRequest`]s to the DFM process via +//! iceoryx2 native request-response and converts the [`DfmQueryResponse`] +//! back to `SovdFault` / `SovdEnvData`. + +use crate::query_api::DfmQueryApi; +use crate::query_conversion::{ipc_env_data_to_sovd, ipc_fault_to_sovd}; +use crate::sovd_fault_manager::{Error, SovdEnvData, SovdFault}; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; +use common::ipc_service_name::DFM_QUERY_SERVICE_NAME; +use common::ipc_service_type::ServiceType; +use common::query_protocol::{DfmQueryError, DfmQueryRequest, DfmQueryResponse}; +use common::types::{LongString, ShortString}; +use core::time::Duration; +use iceoryx2::port::client::Client; +use iceoryx2::prelude::*; + +/// Default timeout for waiting for a response from the DFM. +const DEFAULT_RESPONSE_TIMEOUT: Duration = Duration::from_secs(1); + +/// IPC-based [`DfmQueryApi`] client. +/// +/// Connects to the DFM's `dfm/query` request-response service and sends +/// queries/clears via iceoryx2 shared memory. The DFM must have a +/// [`DfmQueryServer`](crate::query_server::DfmQueryServer) running. +pub struct Iceoryx2DfmQuery { + client: Client, + node: Node, + timeout: Duration, +} + +impl Iceoryx2DfmQuery { + /// Create a new IPC query client. + /// + /// Connects to the `dfm/query` service. The service must already exist + /// (created by the DFM's `DfmQueryServer`). + /// + /// # Errors + /// + /// Returns `Error::Storage` if the iceoryx2 service or client port + /// cannot be opened. + pub fn new() -> Result { + Self::with_timeout(DEFAULT_RESPONSE_TIMEOUT) + } + + /// Create a new IPC query client with a custom response timeout. + pub fn with_timeout(timeout: Duration) -> Result { + let node = NodeBuilder::new() + .create::() + .map_err(|e| Error::Storage(format!("failed to create node: {e:?}")))?; + + let service_name = ServiceName::new(DFM_QUERY_SERVICE_NAME).map_err(|e| Error::Storage(format!("invalid service name: {e:?}")))?; + + let service = node + .service_builder(&service_name) + .request_response::() + .open_or_create() + .map_err(|e| Error::Storage(format!("failed to open query service: {e:?}")))?; + + let client = service + .client_builder() + .create() + .map_err(|e| Error::Storage(format!("failed to create client: {e:?}")))?; + + Ok(Self { client, node, timeout }) + } + + /// Send a request and wait for the response (blocking with timeout). + #[allow(clippy::arithmetic_side_effects)] + fn request(&self, req: DfmQueryRequest) -> Result { + let pending = self.client.send_copy(req).map_err(|e| Error::Storage(format!("send failed: {e:?}")))?; + + // Poll for response with timeout + let deadline = std::time::Instant::now() + self.timeout; + loop { + if let Some(response) = pending.receive().map_err(|e| Error::Storage(format!("receive failed: {e:?}")))? { + return Ok(response.payload().clone()); + } + if std::time::Instant::now() >= deadline { + return Err(Error::Storage(String::from("query timeout"))); + } + // Brief sleep to avoid busy-waiting; DFM cycle is ~10ms + let _ = self.node.wait(Duration::from_millis(1)); + } + } +} + +impl DfmQueryApi for Iceoryx2DfmQuery { + fn get_all_faults(&self, path: &str) -> Result, Error> { + let req = DfmQueryRequest::GetAllFaults(LongString::from_str_truncated(path).unwrap_or_default()); + match self.request(req)? { + DfmQueryResponse::FaultList(list) => Ok(list.faults.iter().map(ipc_fault_to_sovd).collect()), + DfmQueryResponse::Error(e) => Err(ipc_error_to_sovd(e)), + other => Err(Error::Storage(format!("unexpected response: {other:?}"))), + } + } + + fn get_fault(&self, path: &str, fault_code: &str) -> Result<(SovdFault, SovdEnvData), Error> { + let req = DfmQueryRequest::GetFault( + LongString::from_str_truncated(path).unwrap_or_default(), + ShortString::from_str_truncated(fault_code).unwrap_or_default(), + ); + match self.request(req)? { + DfmQueryResponse::SingleFault(ipc_fault, ipc_env) => Ok((ipc_fault_to_sovd(&ipc_fault), ipc_env_data_to_sovd(&ipc_env))), + DfmQueryResponse::Error(e) => Err(ipc_error_to_sovd(e)), + other => Err(Error::Storage(format!("unexpected response: {other:?}"))), + } + } + + fn delete_all_faults(&self, path: &str) -> Result<(), Error> { + let req = DfmQueryRequest::DeleteAllFaults(LongString::from_str_truncated(path).unwrap_or_default()); + match self.request(req)? { + DfmQueryResponse::Ok => Ok(()), + DfmQueryResponse::Error(e) => Err(ipc_error_to_sovd(e)), + other => Err(Error::Storage(format!("unexpected response: {other:?}"))), + } + } + + fn delete_fault(&self, path: &str, fault_code: &str) -> Result<(), Error> { + let req = DfmQueryRequest::DeleteFault( + LongString::from_str_truncated(path).unwrap_or_default(), + ShortString::from_str_truncated(fault_code).unwrap_or_default(), + ); + match self.request(req)? { + DfmQueryResponse::Ok => Ok(()), + DfmQueryResponse::Error(e) => Err(ipc_error_to_sovd(e)), + other => Err(Error::Storage(format!("unexpected response: {other:?}"))), + } + } +} + +fn ipc_error_to_sovd(e: DfmQueryError) -> Error { + match e { + DfmQueryError::BadArgument => Error::BadArgument, + DfmQueryError::NotFound => Error::NotFound, + DfmQueryError::StorageError(msg) => Error::Storage(msg.to_string()), + } +} diff --git a/src/dfm_lib/src/query_server.rs b/src/dfm_lib/src/query_server.rs new file mode 100644 index 0000000..7d5b379 --- /dev/null +++ b/src/dfm_lib/src/query_server.rs @@ -0,0 +1,144 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! DFM-side query request handler. +//! +//! [`DfmQueryServer`] receives [`DfmQueryRequest`]s from external diagnostic +//! tools via iceoryx2 request-response and dispatches them to the local +//! [`SovdFaultManager`]. Called from the DFM event loop via [`poll`](DfmQueryServer::poll). + +use crate::query_conversion::{env_data_to_ipc, sovd_fault_to_ipc}; +use crate::sovd_fault_manager::{Error as SovdError, SovdFaultManager}; +use crate::sovd_fault_storage::SovdFaultStateStorage; +use common::ipc_service_name::DFM_QUERY_SERVICE_NAME; +use common::ipc_service_type::ServiceType; +use common::query_protocol::{DfmQueryError, DfmQueryRequest, DfmQueryResponse, MAX_FAULTS_PER_RESPONSE}; +use common::sink_error::SinkError; +use common::types::ShortString; +use iceoryx2::port::server::Server; +use iceoryx2::prelude::*; +use iceoryx2_bb_container::vector::Vector; + +/// Server-side handler for DFM query requests. +/// +/// Wraps an iceoryx2 `Server` and a reference to the `SovdFaultManager`. +/// Call [`poll`](Self::poll) from the DFM event loop to process pending +/// requests. +pub struct DfmQueryServer { + server: Server, + sovd_manager: SovdFaultManager, +} + +impl DfmQueryServer { + /// Create a new query server on the given iceoryx2 node. + /// + /// Opens (or creates) the `dfm/query` request-response service and + /// creates a server port. + /// + /// # Errors + /// + /// Returns `SinkError::TransportDown` if the iceoryx2 service or server + /// port cannot be created. + pub fn new(node: &Node, sovd_manager: SovdFaultManager) -> Result { + let service_name = ServiceName::new(DFM_QUERY_SERVICE_NAME).map_err(|_| SinkError::TransportDown)?; + let service = node + .service_builder(&service_name) + .request_response::() + .open_or_create() + .map_err(|_| SinkError::TransportDown)?; + let server = service.server_builder().create().map_err(|_| SinkError::TransportDown)?; + + Ok(Self { server, sovd_manager }) + } + + /// Process all pending query requests (non-blocking). + /// + /// Called once per DFM event-loop iteration. Drains all queued requests, + /// dispatches each to [`SovdFaultManager`], and sends the response back + /// to the requesting client. + /// + /// Errors from individual requests are sent as `DfmQueryResponse::Error` + /// to the client - they do not propagate to the caller. Only transport-level + /// failures (send/receive broken) return `Err`. + pub fn poll(&self) -> Result<(), SinkError> { + loop { + let active_request = match self.server.receive() { + Ok(Some(req)) => req, + Ok(None) => break, // no more pending requests + Err(_) => return Err(SinkError::TransportDown), + }; + + let response = self.handle_request(&active_request); + active_request.send_copy(response).map_err(|_| SinkError::TransportDown)?; + } + Ok(()) + } + + fn handle_request(&self, request: &DfmQueryRequest) -> DfmQueryResponse { + match request { + DfmQueryRequest::GetAllFaults(path) => { + let path_str = path.to_string(); + match self.sovd_manager.get_all_faults(&path_str) { + Ok(faults) => { + let total_count = u32::try_from(faults.len()).unwrap_or(u32::MAX); + let mut ipc_faults = common::query_protocol::IpcFaultListResponse { + faults: iceoryx2_bb_container::vector::StaticVec::new(), + total_count, + }; + for fault in faults.iter().take(MAX_FAULTS_PER_RESPONSE) { + let _ = ipc_faults.faults.push(sovd_fault_to_ipc(fault)); + } + DfmQueryResponse::FaultList(ipc_faults) + } + Err(e) => DfmQueryResponse::Error(sovd_error_to_ipc(e)), + } + } + DfmQueryRequest::GetFault(path, fault_code) => { + let path_str = path.to_string(); + let code_str = fault_code.to_string(); + match self.sovd_manager.get_fault(&path_str, &code_str) { + Ok((fault, env)) => DfmQueryResponse::SingleFault(sovd_fault_to_ipc(&fault), env_data_to_ipc(&env)), + Err(e) => DfmQueryResponse::Error(sovd_error_to_ipc(e)), + } + } + DfmQueryRequest::DeleteAllFaults(path) => { + let path_str = path.to_string(); + match self.sovd_manager.delete_all_faults(&path_str) { + Ok(()) => DfmQueryResponse::Ok, + Err(e) => DfmQueryResponse::Error(sovd_error_to_ipc(e)), + } + } + DfmQueryRequest::DeleteFault(path, fault_code) => { + let path_str = path.to_string(); + let code_str = fault_code.to_string(); + match self.sovd_manager.delete_fault(&path_str, &code_str) { + Ok(()) => DfmQueryResponse::Ok, + Err(e) => DfmQueryResponse::Error(sovd_error_to_ipc(e)), + } + } + } + } +} + +fn sovd_error_to_ipc(e: SovdError) -> DfmQueryError { + match e { + SovdError::BadArgument => DfmQueryError::BadArgument, + SovdError::NotFound => DfmQueryError::NotFound, + SovdError::Storage(msg) => { + let truncated = ShortString::from_str_truncated(&msg).unwrap_or_default(); + if msg.len() > truncated.len() { + log::warn!("IPC error message truncation: input {} bytes -> {} bytes", msg.len(), truncated.len()); + } + DfmQueryError::StorageError(truncated) + } + } +} diff --git a/src/dfm_lib/src/sovd_fault_manager.rs b/src/dfm_lib/src/sovd_fault_manager.rs new file mode 100644 index 0000000..a0d98e7 --- /dev/null +++ b/src/dfm_lib/src/sovd_fault_manager.rs @@ -0,0 +1,898 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! SOVD-compliant fault query and clear API. +//! +//! Provides the interface consumed by external diagnostic tools (e.g. +//! OpenSOVD diagnostic service) to query DTC statuses, read +//! environment snapshots, and request fault clears. Backed by +//! [`FaultCatalogRegistry`] and [`SovdFaultStateStorage`]. + +use crate::fault_catalog_registry::FaultCatalogRegistry; +use crate::sovd_fault_storage::{StorageError, *}; +use alloc::sync::Arc; +use common::{fault, types::ShortString}; +use std::collections::HashMap; + +#[derive(Debug, thiserror::Error, PartialEq, Eq, Clone)] +#[non_exhaustive] +pub enum Error { + #[error("bad argument")] + BadArgument, + #[error("not found")] + NotFound, + #[error("storage error: {0}")] + Storage(String), +} + +/// SOVD-compliant fault status (DTC status bits). +/// Aligned with CDA cda-sovd-interfaces FaultStatus. +/// Follows ISO 14229 DTC status byte semantics. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct SovdFaultStatus { + pub test_failed: Option, + pub test_failed_this_operation_cycle: Option, + pub pending_dtc: Option, + pub confirmed_dtc: Option, + pub test_not_completed_since_last_clear: Option, + pub test_failed_since_last_clear: Option, + pub test_not_completed_this_operation_cycle: Option, + pub warning_indicator_requested: Option, + pub mask: Option, +} + +impl SovdFaultStatus { + /// Create status from SovdFaultState (internal storage). + pub fn from_state(state: &SovdFaultState) -> Self { + let mut status = Self { + test_failed: Some(state.test_failed), + test_failed_this_operation_cycle: Some(state.test_failed_this_operation_cycle), + pending_dtc: Some(state.pending_dtc), + confirmed_dtc: Some(state.confirmed_dtc), + test_not_completed_since_last_clear: Some(state.test_not_completed_since_last_clear), + test_failed_since_last_clear: Some(state.test_failed_since_last_clear), + test_not_completed_this_operation_cycle: Some(state.test_not_completed_this_operation_cycle), + warning_indicator_requested: Some(state.warning_indicator_requested), + mask: None, + }; + status.mask = Some(format!("0x{:02X}", status.compute_mask())); + status + } + + /// Compute ISO 14229 status mask byte. + /// Bit positions per UDS standard: + /// - Bit 0: testFailed + /// - Bit 1: testFailedThisOperationCycle + /// - Bit 2: pendingDTC + /// - Bit 3: confirmedDTC + /// - Bit 4: testNotCompletedSinceLastClear + /// - Bit 5: testFailedSinceLastClear + /// - Bit 6: testNotCompletedThisOperationCycle + /// - Bit 7: warningIndicatorRequested + pub fn compute_mask(&self) -> u8 { + let mut mask = 0u8; + if self.test_failed.unwrap_or(false) { + mask |= 0x01; + } + if self.test_failed_this_operation_cycle.unwrap_or(false) { + mask |= 0x02; + } + if self.pending_dtc.unwrap_or(false) { + mask |= 0x04; + } + if self.confirmed_dtc.unwrap_or(false) { + mask |= 0x08; + } + if self.test_not_completed_since_last_clear.unwrap_or(false) { + mask |= 0x10; + } + if self.test_failed_since_last_clear.unwrap_or(false) { + mask |= 0x20; + } + if self.test_not_completed_this_operation_cycle.unwrap_or(false) { + mask |= 0x40; + } + if self.warning_indicator_requested.unwrap_or(false) { + mask |= 0x80; + } + mask + } + + /// Convert to HashMap for backward compat / JSON serialization. + pub fn to_hash_map(&self) -> HashMap { + let mut map = HashMap::new(); + if let Some(v) = self.test_failed { + map.insert("testFailed".into(), (v as u32).to_string()); + } + if let Some(v) = self.test_failed_this_operation_cycle { + map.insert("testFailedThisOperationCycle".into(), (v as u32).to_string()); + } + if let Some(v) = self.pending_dtc { + map.insert("pendingDTC".into(), (v as u32).to_string()); + } + if let Some(v) = self.confirmed_dtc { + map.insert("confirmedDTC".into(), (v as u32).to_string()); + } + if let Some(v) = self.test_not_completed_since_last_clear { + map.insert("testNotCompletedSinceLastClear".into(), (v as u32).to_string()); + } + if let Some(v) = self.test_failed_since_last_clear { + map.insert("testFailedSinceLastClear".into(), (v as u32).to_string()); + } + if let Some(v) = self.test_not_completed_this_operation_cycle { + map.insert("testNotCompletedThisOperationCycle".into(), (v as u32).to_string()); + } + if let Some(v) = self.warning_indicator_requested { + map.insert("warningIndicatorRequested".into(), (v as u32).to_string()); + } + if let Some(ref m) = self.mask { + map.insert("mask".into(), m.clone()); + } + map + } +} + +/// SOVD fault representation per SOVD specification. +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct SovdFault { + /// Unique fault code (e.g., "0x1001" or "fault_a") + pub code: String, + /// Human-readable display code + pub display_code: String, + /// Fault scope (e.g., "ecu", "component", "system") + pub scope: String, + /// Fault name identifier + pub fault_name: String, + /// Translation key for fault name + pub fault_translation_id: String, + /// Fault severity level + pub severity: u32, + /// Dynamic status properties (DTC flags) as key-value pairs. + /// Used for OpenSOVD JSON wire format serialization (backward compat). + /// When `typed_status` is `Some`, it is the authoritative source; + /// this HashMap is the serialization view derived from it. + pub status: HashMap, + /// Human-readable symptom description (from descriptor summary) + pub symptom: Option, + /// Translation key for symptom + pub symptom_translation_id: Option, + /// JSON schema reference for extended fault data + pub schema: Option, + + // --- Extended fields for richer diagnostics --- + /// Typed SOVD status (CDA-aligned alternative to HashMap status) + pub typed_status: Option, + /// Number of times this fault has occurred + pub occurrence_counter: Option, + /// Aging cycles passed since last occurrence + pub aging_counter: Option, + /// Number of times this fault was healed/reset + pub healing_counter: Option, + /// ISO 8601 timestamp of first occurrence + pub first_occurrence: Option, + /// ISO 8601 timestamp of most recent occurrence + pub last_occurrence: Option, +} + +impl SovdFault { + fn new(descriptor: &fault::FaultDescriptor, state: &SovdFaultState) -> Self { + let code = fault_id_to_code(&descriptor.id); + let typed_status = SovdFaultStatus::from_state(state); + + Self { + display_code: code.clone(), + fault_translation_id: format!("fault.{}", &code), + symptom: descriptor.summary.as_ref().map(|s| s.to_string()), + symptom_translation_id: descriptor.summary.as_ref().map(|_| format!("symptom.{}", &code)), + schema: None, + code, + scope: "ecu".into(), + fault_name: descriptor.name.to_string(), + severity: descriptor.severity as u32, + status: typed_status.to_hash_map(), + typed_status: Some(typed_status), + occurrence_counter: Some(state.occurrence_counter), + aging_counter: Some(state.aging_counter), + healing_counter: Some(state.healing_counter), + first_occurrence: if state.first_occurrence_secs > 0 { + Some(format_unix_timestamp(state.first_occurrence_secs)) + } else { + None + }, + last_occurrence: if state.last_occurrence_secs > 0 { + Some(format_unix_timestamp(state.last_occurrence_secs)) + } else { + None + }, + } + } +} + +/// Format Unix timestamp as ISO 8601 UTC string (e.g. "2024-01-15T09:50:00Z"). +/// +/// Uses Howard Hinnant's civil_from_days algorithm to convert days since epoch +/// to year/month/day without external dependencies. +/// +/// Inputs beyond year 9999 (253,402,300,799 seconds) are clamped to +/// "9999-12-31T23:59:59Z" to prevent overflow in the `days as i64` cast. +#[allow(clippy::arithmetic_side_effects, clippy::cast_possible_truncation)] +pub(crate) fn format_unix_timestamp(secs: u64) -> String { + // Year 9999-12-31T23:59:59Z in Unix seconds. + const MAX_SECS: u64 = 253_402_300_799; + if secs > MAX_SECS { + return String::from("9999-12-31T23:59:59Z"); + } + + const SECS_PER_DAY: u64 = 86_400; + const SECS_PER_HOUR: u64 = 3_600; + const SECS_PER_MINUTE: u64 = 60; + + let days = secs / SECS_PER_DAY; + let day_secs = secs % SECS_PER_DAY; + let hours = day_secs / SECS_PER_HOUR; + let minutes = (day_secs % SECS_PER_HOUR) / SECS_PER_MINUTE; + let seconds = day_secs % SECS_PER_MINUTE; + + // Safe: MAX_SECS / 86_400 = 2_932_896 which fits in i64. + let (year, month, day) = civil_from_days(days as i64); + + format!("{year:04}-{month:02}-{day:02}T{hours:02}:{minutes:02}:{seconds:02}Z") +} + +/// Convert days since 1970-01-01 to (year, month, day). +/// Algorithm: Howard Hinnant's `civil_from_days` +/// Reference: +#[allow(clippy::arithmetic_side_effects, clippy::cast_possible_truncation)] +fn civil_from_days(days: i64) -> (i64, u32, u32) { + let z = days + 719_468; + let era = (if z >= 0 { z } else { z - 146_096 }) / 146_097; + let doe = (z - era * 146_097) as u32; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; + let y = yoe as i64 + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y, m, d) +} + +pub type SovdEnvData = HashMap; + +pub struct SovdFaultManager { + storage: Arc, + registry: Arc, +} + +impl SovdFaultManager { + pub fn new(storage: Arc, registry: Arc) -> Self { + Self { storage, registry } + } + + pub fn get_all_faults(&self, path: &str) -> Result, Error> { + let Some(catalog) = self.registry.catalogs.get(path) else { + return Err(Error::BadArgument); + }; + let descriptors = catalog.descriptors(); + let mut faults = Vec::new(); + + for descriptor in descriptors { + // All registered faults are returned regardless of their current state. + // Faults without a stored record use the default (clear) status. + // This matches SOVD/UDS semantics where the diagnostic tool sees all + // known faults and their current DTC status flags. + let state = match self.storage.get(path, &descriptor.id) { + Ok(Some(s)) => s, + Ok(None) => SovdFaultState::default(), + Err(e) => { + log::warn!("Failed to read state for {:?}: {}", descriptor.id, e); + SovdFaultState::default() + } + }; + + faults.push(SovdFault::new(descriptor, &state)); + } + + Ok(faults) + } + + pub fn get_fault(&self, path: &str, fault_code: &str) -> Result<(SovdFault, SovdEnvData), Error> { + let Some(catalog) = self.registry.catalogs.get(path) else { + return Err(Error::BadArgument); + }; + let fault_id = fault_id_from_code(fault_code)?; + let Some(descriptor) = catalog.descriptor(&fault_id) else { + return Err(Error::NotFound); + }; + // All registered faults are returned regardless of their current state. + // Faults without a stored record use the default (clear) status. + let state = match self.storage.get(path, &fault_id) { + Ok(Some(s)) => s, + Ok(None) => SovdFaultState::default(), + Err(e) => { + return Err(Error::Storage(format!("{e:?}"))); + } + }; + + Ok((SovdFault::new(descriptor, &state), state.env_data)) + } + + pub fn delete_all_faults(&self, path: &str) -> Result<(), Error> { + if path.is_empty() { + return Err(Error::BadArgument); + } + self.storage.delete_all(path).map_err(|e| Error::Storage(format!("{e}"))) + } + + pub fn delete_fault(&self, path: &str, fault_code: &str) -> Result<(), Error> { + if path.is_empty() || fault_code.is_empty() { + return Err(Error::BadArgument); + } + let fault_id = fault_id_from_code(fault_code)?; + self.storage.delete(path, &fault_id).map_err(|e| match e { + StorageError::NotFound => Error::NotFound, + other => Error::Storage(format!("{other}")), + }) + } +} + +fn fault_id_to_code(fault_id: &fault::FaultId) -> String { + match fault_id { + fault::FaultId::Numeric(n) => format!("0x{n:X}"), + fault::FaultId::Text(t) => t.to_string(), + fault::FaultId::Uuid(u) => { + format!( + "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", + u[0], u[1], u[2], u[3], u[4], u[5], u[6], u[7], u[8], u[9], u[10], u[11], u[12], u[13], u[14], u[15] + ) + } + } +} + +fn fault_id_from_code(fault_code: &str) -> Result { + // Numeric: "0x..." or "0X..." hex prefix (matches fault_id_to_code output) + if let Some(hex) = fault_code.strip_prefix("0x").or_else(|| fault_code.strip_prefix("0X")) { + let n = u32::from_str_radix(hex, 16).map_err(|_| Error::BadArgument)?; + return Ok(fault::FaultId::Numeric(n)); + } + + // UUID: 8-4-4-4-12 hex pattern (36 chars with dashes) + if fault_code.len() == 36 + && fault_code.as_bytes().iter().filter(|&&b| b == b'-').count() == 4 + && let Some(bytes) = parse_uuid_string(fault_code) + { + return Ok(fault::FaultId::Uuid(bytes)); + } + + // Text: fallback + let short = ShortString::try_from(fault_code).map_err(|_| Error::BadArgument)?; + Ok(fault::FaultId::Text(short)) +} + +/// Parse a UUID string in 8-4-4-4-12 hex format into 16 bytes. +#[allow(clippy::arithmetic_side_effects)] +fn parse_uuid_string(s: &str) -> Option<[u8; 16]> { + let hex: String = s.chars().filter(|c| *c != '-').collect(); + if hex.len() != 32 { + return None; + } + let mut bytes = [0u8; 16]; + for (i, byte) in bytes.iter_mut().enumerate() { + *byte = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).ok()?; + } + Some(bytes) +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects +)] +mod tests { + use super::*; + + #[test] + fn error_display_impl() { + assert_eq!(format!("{}", Error::BadArgument), "bad argument"); + assert_eq!(format!("{}", Error::NotFound), "not found"); + assert_eq!(format!("{}", Error::Storage("disk full".into())), "storage error: disk full"); + } + + #[test] + fn error_is_std_error() { + let err: Box = Box::new(Error::NotFound); + assert_eq!(format!("{err}"), "not found"); + } + + #[test] + fn status_mask_all_zeros() { + let status = SovdFaultStatus::default(); + assert_eq!(status.compute_mask(), 0x00); + } + + #[test] + fn status_mask_encodes_correctly() { + let status = SovdFaultStatus { + test_failed: Some(true), // 0x01 + confirmed_dtc: Some(true), // 0x08 + test_failed_since_last_clear: Some(true), // 0x20 + ..Default::default() + }; + assert_eq!(status.compute_mask(), 0x29); + } + + #[test] + fn status_mask_all_bits_set() { + let status = SovdFaultStatus { + test_failed: Some(true), + test_failed_this_operation_cycle: Some(true), + pending_dtc: Some(true), + confirmed_dtc: Some(true), + test_not_completed_since_last_clear: Some(true), + test_failed_since_last_clear: Some(true), + test_not_completed_this_operation_cycle: Some(true), + warning_indicator_requested: Some(true), + mask: None, + }; + assert_eq!(status.compute_mask(), 0xFF); + } + + #[test] + fn from_state_converts_all_flags() { + let state = SovdFaultState { + test_failed: true, + confirmed_dtc: true, + pending_dtc: false, + ..Default::default() + }; + let status = SovdFaultStatus::from_state(&state); + + assert_eq!(status.test_failed, Some(true)); + assert_eq!(status.confirmed_dtc, Some(true)); + assert_eq!(status.pending_dtc, Some(false)); + assert!(status.mask.is_some()); + assert_eq!(status.mask.as_ref().unwrap(), "0x09"); // 0x01 | 0x08 + } + + #[test] + fn to_hash_map_produces_expected_keys() { + let status = SovdFaultStatus { + test_failed: Some(true), + confirmed_dtc: Some(false), + mask: Some("0x01".to_string()), + ..Default::default() + }; + let map = status.to_hash_map(); + + assert_eq!(map.get("testFailed"), Some(&"1".to_string())); + assert_eq!(map.get("confirmedDTC"), Some(&"0".to_string())); + assert_eq!(map.get("mask"), Some(&"0x01".to_string())); + } + + #[test] + fn format_unix_timestamp_iso8601() { + assert_eq!(format_unix_timestamp(0), "1970-01-01T00:00:00Z"); + assert_eq!(format_unix_timestamp(86400), "1970-01-02T00:00:00Z"); + // 2024-01-15 09:50:00 UTC + assert_eq!(format_unix_timestamp(1705312200), "2024-01-15T09:50:00Z"); + // Leap year: 2024-02-29 00:00:00 UTC (day 60 of 2024, which is leap) + assert_eq!(format_unix_timestamp(1709164800), "2024-02-29T00:00:00Z"); + // Y2K: 2000-01-01 00:00:00 UTC + assert_eq!(format_unix_timestamp(946684800), "2000-01-01T00:00:00Z"); + } + + // ==================== FaultId conversion roundtrip tests ==================== + + #[test] + fn fault_id_from_code_parses_numeric_hex() { + let id = fault_id_from_code("0x2A").unwrap(); + assert_eq!(id, fault::FaultId::Numeric(0x2A)); + } + + #[test] + fn fault_id_from_code_parses_numeric_hex_uppercase() { + let id = fault_id_from_code("0X1001").unwrap(); + assert_eq!(id, fault::FaultId::Numeric(0x1001)); + } + + #[test] + fn fault_id_from_code_parses_uuid() { + let id = fault_id_from_code("01020304-0506-0708-090a-0b0c0d0e0f10").unwrap(); + assert_eq!(id, fault::FaultId::Uuid([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])); + } + + #[test] + fn fault_id_from_code_parses_text() { + let id = fault_id_from_code("my_fault").unwrap(); + assert!(matches!(id, fault::FaultId::Text(_))); + } + + #[test] + fn fault_id_roundtrip_numeric() { + let original = fault::FaultId::Numeric(0x1001); + let code = fault_id_to_code(&original); + let parsed = fault_id_from_code(&code).unwrap(); + assert_eq!(original, parsed); + } + + #[test] + fn fault_id_roundtrip_uuid() { + let original = fault::FaultId::Uuid([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let code = fault_id_to_code(&original); + let parsed = fault_id_from_code(&code).unwrap(); + assert_eq!(original, parsed); + } + + #[test] + fn fault_id_roundtrip_text() { + let original = fault::FaultId::Text(ShortString::try_from("my_fault").unwrap()); + let code = fault_id_to_code(&original); + let parsed = fault_id_from_code(&code).unwrap(); + assert_eq!(original, parsed); + } + + #[test] + fn fault_id_from_code_rejects_invalid_hex() { + assert!(fault_id_from_code("0xGGGG").is_err()); + } + + #[test] + fn fault_id_from_code_invalid_uuid_falls_back_to_text() { + // 36 chars with dashes but invalid hex → falls back to Text + let code = "ZZZZZZZZ-ZZZZ-ZZZZ-ZZZZ-ZZZZZZZZZZZZ"; + let id = fault_id_from_code(code).unwrap(); + assert!(matches!(id, fault::FaultId::Text(_))); + } +} + +#[cfg(test)] +mod sovd_manager_tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + )] + + use crate::dfm_test_utils::*; + use crate::fault_catalog_registry::FaultCatalogRegistry; + use crate::fault_record_processor::FaultRecordProcessor; + use crate::sovd_fault_manager::{Error, SovdFaultManager}; + use crate::sovd_fault_storage::SovdFaultStateStorage; + use common::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; + use common::fault::*; + use common::types::to_static_short_string; + use std::sync::Arc; + + fn make_processor_with_registry(storage: Arc, registry: Arc) -> FaultRecordProcessor { + FaultRecordProcessor::new(storage, registry, make_cycle_tracker()) + } + + // ============================================================================ + // SovdFaultManager query tests + // ============================================================================ + + /// SovdFaultManager get_all_faults returns stored faults. + #[test] + fn sovd_manager_get_all_faults() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + + // Pre-populate via processor + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("test_entity"); + + let record1 = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + let record2 = make_record(FaultId::Text(to_static_short_string("fault_b").unwrap()), LifecycleStage::Passed); + processor.process_record(&path, &record1); + processor.process_record(&path, &record2); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("test_entity"); + assert!(faults.is_ok()); + assert_eq!(faults.unwrap().len(), 2); + } + + /// SovdFaultManager returns error for empty path. + #[test] + fn sovd_manager_handles_empty_entity() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_registry(); + let manager = SovdFaultManager::new(storage, registry); + + let result = manager.get_all_faults("nonexistent"); + // Should return Ok with empty or Err - either is acceptable + if let Ok(faults) = result { + assert!(faults.is_empty()); + } + } + + /// FaultCatalogRegistry lookup by path. + #[test] + fn catalog_registry_lookup() { + let config = FaultCatalogConfig { + id: "my_entity".into(), + version: 1, + faults: vec![], + }; + let catalog = FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + let registry = FaultCatalogRegistry::new(vec![catalog]); + + assert!(registry.get("my_entity").is_some()); + assert!(registry.get("nonexistent").is_none()); + } + + /// get_fault returns NotFound for a fault ID not in the catalog. + #[test] + fn get_fault_missing_id_returns_not_found() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let manager = SovdFaultManager::new(storage, registry); + + let result = manager.get_fault("test_entity", "nonexistent_fault"); + assert_eq!(result, Err(Error::NotFound)); + } + + /// get_fault returns BadArgument for a nonexistent path (entity). + #[test] + fn get_fault_bad_path_returns_bad_argument() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let manager = SovdFaultManager::new(storage, registry); + + let result = manager.get_fault("nonexistent_entity", "fault_a"); + assert_eq!(result, Err(Error::BadArgument)); + } + + // ============================================================================ + // SovdFault typed_status and counters tests (Phase 6) + // ============================================================================ + + /// SovdFault includes typed_status with all flags populated. + #[test] + fn sovd_fault_includes_typed_status() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("test_entity").unwrap(); + let fault = faults.iter().find(|f| f.code == "fault_a").unwrap(); + + assert!(fault.typed_status.is_some()); + let status = fault.typed_status.as_ref().unwrap(); + assert_eq!(status.test_failed, Some(true)); + assert_eq!(status.confirmed_dtc, Some(true)); + assert!(status.mask.is_some()); + } + + /// SovdFault status includes mask field in HashMap. + #[test] + fn sovd_fault_status_hashmap_includes_mask() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("test_entity").unwrap(); + let fault = faults.iter().find(|f| f.code == "fault_a").unwrap(); + + assert!(fault.status.contains_key("mask")); + // testFailed=1, testFailedThisOpCycle=1, confirmedDTC=1, testFailedSinceLastClear=1 + // -> 0x01 | 0x02 | 0x08 | 0x20 = 0x2B + assert_eq!(fault.status.get("mask"), Some(&"0x2B".to_string())); + } + + /// SovdFault includes occurrence counter (defaults to 0 for new faults). + #[test] + fn sovd_fault_includes_counters() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("test_entity").unwrap(); + let fault = faults.iter().find(|f| f.code == "fault_a").unwrap(); + + // occurrence_counter incremented on Failed + assert_eq!(fault.occurrence_counter, Some(1)); + assert_eq!(fault.aging_counter, Some(0)); + assert_eq!(fault.healing_counter, Some(0)); + } + + /// SovdFault preserves existing fields (symptom, schema, translation_id). + #[test] + fn sovd_fault_preserves_existing_fields() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("test_entity"); + + let record = make_record(FaultId::Text(to_static_short_string("fault_a").unwrap()), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("test_entity").unwrap(); + let fault = faults.iter().find(|f| f.code == "fault_a").unwrap(); + + // Core fields + assert_eq!(fault.code, "fault_a"); + assert_eq!(fault.fault_name, "Fault A"); + assert_eq!(fault.scope, "ecu"); + + // Translation fields + assert!(!fault.fault_translation_id.is_empty()); + } + + /// All three FaultId variants (Text, Numeric, UUID) work through the full + /// SOVD pipeline: process → store → query. + #[test] + fn sovd_manager_mixed_fault_id_variants() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_mixed_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("mixed_entity"); + + // Process one record per variant + let record_text = make_record(FaultId::Text(to_static_short_string("fault_text").unwrap()), LifecycleStage::Failed); + let record_numeric = make_record(FaultId::Numeric(0x1001), LifecycleStage::Failed); + let record_uuid = make_record( + FaultId::Uuid([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + ]), + LifecycleStage::Failed, + ); + processor.process_record(&path, &record_text); + processor.process_record(&path, &record_numeric); + processor.process_record(&path, &record_uuid); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("mixed_entity").unwrap(); + assert_eq!(faults.len(), 3); + + // Text fault → code is the literal text + let text_fault = faults.iter().find(|f| f.code == "fault_text").unwrap(); + assert_eq!(text_fault.fault_name, "Text Fault"); + assert_eq!(text_fault.typed_status.as_ref().unwrap().test_failed, Some(true)); + + // Numeric fault → code is hex-formatted + let numeric_fault = faults.iter().find(|f| f.code == "0x1001").unwrap(); + assert_eq!(numeric_fault.fault_name, "Numeric Fault"); + + // UUID fault → code is standard UUID format + let uuid_fault = faults.iter().find(|f| f.code == "01020304-0506-0708-090a-0b0c0d0e0f10").unwrap(); + assert_eq!(uuid_fault.fault_name, "UUID Fault"); + } + + /// get_fault with numeric code "0x1001" correctly resolves to Numeric variant. + #[test] + fn sovd_manager_get_fault_numeric_code() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_mixed_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("mixed_entity"); + + let record = make_record(FaultId::Numeric(0x1001), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let (fault, _env) = manager.get_fault("mixed_entity", "0x1001").unwrap(); + assert_eq!(fault.code, "0x1001"); + assert_eq!(fault.fault_name, "Numeric Fault"); + } + + /// get_fault with UUID code correctly resolves to Uuid variant. + #[test] + fn sovd_manager_get_fault_uuid_code() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_mixed_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("mixed_entity"); + + let record = make_record( + FaultId::Uuid([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + ]), + LifecycleStage::Failed, + ); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let (fault, _env) = manager.get_fault("mixed_entity", "01020304-0506-0708-090a-0b0c0d0e0f10").unwrap(); + assert_eq!(fault.code, "01020304-0506-0708-090a-0b0c0d0e0f10"); + assert_eq!(fault.fault_name, "UUID Fault"); + } + + /// delete_fault with numeric code removes the correct entry. + #[test] + fn sovd_manager_delete_fault_numeric() { + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_mixed_registry(); + let mut processor = make_processor_with_registry(storage.clone(), registry.clone()); + let path = make_path("mixed_entity"); + + let record = make_record(FaultId::Numeric(0x1001), LifecycleStage::Failed); + processor.process_record(&path, &record); + + let manager = SovdFaultManager::new(storage, registry); + let result = manager.delete_fault("mixed_entity", "0x1001"); + assert!(result.is_ok()); + } + + /// SovdFault includes ISO 8601 timestamps when occurrence data is present. + #[test] + fn sovd_fault_timestamp_iso8601_format() { + use crate::sovd_fault_storage::SovdFaultState; + + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_text_registry(); + + // Manually inject a state with known timestamps + let state = SovdFaultState { + test_failed: true, + confirmed_dtc: true, + first_occurrence_secs: 946684800, // 2000-01-01T00:00:00Z + last_occurrence_secs: 1705312200, // 2024-01-15T09:50:00Z + ..Default::default() + }; + storage + .put("test_entity", &FaultId::Text(to_static_short_string("fault_a").unwrap()), state) + .unwrap(); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("test_entity").unwrap(); + let fault = faults.iter().find(|f| f.code == "fault_a").unwrap(); + + assert_eq!(fault.first_occurrence.as_deref(), Some("2000-01-01T00:00:00Z")); + assert_eq!(fault.last_occurrence.as_deref(), Some("2024-01-15T09:50:00Z")); + } + + /// SovdFault symptom field comes from descriptor summary. + #[test] + fn sovd_fault_symptom_from_descriptor_summary() { + use crate::sovd_fault_storage::SovdFaultState; + + let storage = Arc::new(InMemoryStorage::new()); + let registry = make_mixed_registry(); + + // Inject a state for a descriptor that has a summary + let state = SovdFaultState { + test_failed: true, + confirmed_dtc: true, + ..Default::default() + }; + storage.put("mixed_entity", &FaultId::Numeric(0x1001), state).unwrap(); + + let manager = SovdFaultManager::new(storage, registry); + let faults = manager.get_all_faults("mixed_entity").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x1001").unwrap(); + + assert_eq!(fault.symptom.as_deref(), Some("A numeric DTC-like fault")); + assert!(fault.symptom_translation_id.is_some()); + } +} diff --git a/src/dfm_lib/src/sovd_fault_storage.rs b/src/dfm_lib/src/sovd_fault_storage.rs new file mode 100644 index 0000000..313d8d0 --- /dev/null +++ b/src/dfm_lib/src/sovd_fault_storage.rs @@ -0,0 +1,585 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Persistent storage layer for SOVD fault state. +//! +//! Defines the [`SovdFaultStateStorage`] trait and a `rust_kvs`-backed +//! implementation for durable fault-state persistence across DFM restarts. +//! Fault states, environment snapshots, and DTC status bits are stored +//! and retrieved through this abstraction. + +use crate::sovd_fault_manager::SovdEnvData; +use common::{fault, types::ShortString}; +use rust_kvs::prelude::*; +use std::{collections::HashMap, path::Path, sync::Mutex}; + +/// Errors that can occur during fault state storage operations. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum StorageError { + /// Serialization of fault state data failed. + #[error("serialization failed: {0}")] + Serialization(String), + + /// Deserialization of fault state data failed. + #[error("deserialization failed: {0}")] + Deserialization(String), + + /// Backend storage operation failed. + #[error("storage backend error: {0}")] + Backend(String), + + /// Initialization of storage backend failed. + #[error("storage initialization failed: {0}")] + Init(String), + + /// Requested entry was not found. + #[error("entry not found")] + NotFound, +} + +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct SovdFaultState { + // Individual boolean fields are used instead of a packed integer for type + // safety and readability. The integer representation is available via + // `SovdFaultStatus::compute_mask()` when needed (e.g. for UDS/ISO 14229). + pub(crate) test_failed: bool, + pub(crate) test_failed_this_operation_cycle: bool, + pub(crate) test_failed_since_last_clear: bool, + pub(crate) test_not_completed_this_operation_cycle: bool, + pub(crate) test_not_completed_since_last_clear: bool, + pub(crate) pending_dtc: bool, + pub(crate) confirmed_dtc: bool, + pub(crate) warning_indicator_requested: bool, + pub(crate) env_data: SovdEnvData, + + // Aging counters (for reset/healing tracking) + /// Number of times this fault has occurred. + pub(crate) occurrence_counter: u32, + /// Number of aging cycles passed (used for aging logic). + pub(crate) aging_counter: u32, + /// Number of times this fault was healed/reset. + pub(crate) healing_counter: u32, + /// Unix timestamp (secs) of first occurrence. + pub(crate) first_occurrence_secs: u64, + /// Unix timestamp (secs) of most recent occurrence. + pub(crate) last_occurrence_secs: u64, +} + +impl SovdFaultState { + /// Record a new fault occurrence: increment counter and update timestamps. + pub(crate) fn record_occurrence(&mut self, now_secs: u64) { + self.occurrence_counter = self.occurrence_counter.saturating_add(1); + if self.first_occurrence_secs == 0 { + self.first_occurrence_secs = now_secs; + } + self.last_occurrence_secs = now_secs; + } +} + +impl KvsSerialize for SovdFaultState { + type Error = ErrorCode; + + fn to_kvs(&self) -> Result { + let mut map = KvsMap::new(); + map.insert("test_failed".to_string(), self.test_failed.to_kvs()?); + map.insert( + "test_failed_this_operation_cycle".to_string(), + self.test_failed_this_operation_cycle.to_kvs()?, + ); + map.insert("test_failed_since_last_clear".to_string(), self.test_failed_since_last_clear.to_kvs()?); + map.insert( + "test_not_completed_this_operation_cycle".to_string(), + self.test_not_completed_this_operation_cycle.to_kvs()?, + ); + map.insert( + "test_not_completed_since_last_clear".to_string(), + self.test_not_completed_since_last_clear.to_kvs()?, + ); + map.insert("pending_dtc".to_string(), self.pending_dtc.to_kvs()?); + map.insert("confirmed_dtc".to_string(), self.confirmed_dtc.to_kvs()?); + map.insert("warning_indicator_requested".to_string(), self.warning_indicator_requested.to_kvs()?); + let kvs_env_data = self + .env_data + .iter() + .map(|(k, v)| (k.clone(), KvsValue::from(v.as_str()))) + .collect::(); + map.insert("env_data".to_string(), kvs_env_data.to_kvs()?); + + // Aging counters — propagate error on out-of-range values rather + // than silently clamping. Silent fallback could mask data corruption + // in long-running automotive systems. + map.insert("occurrence_counter".to_string(), KvsValue::from(i64::from(self.occurrence_counter))); + map.insert("aging_counter".to_string(), KvsValue::from(i64::from(self.aging_counter))); + map.insert("healing_counter".to_string(), KvsValue::from(i64::from(self.healing_counter))); + map.insert( + "first_occurrence_secs".to_string(), + KvsValue::from( + i64::try_from(self.first_occurrence_secs) + .map_err(|_| ErrorCode::SerializationFailed("first_occurrence_secs out of i64 range".to_string()))?, + ), + ); + map.insert( + "last_occurrence_secs".to_string(), + KvsValue::from( + i64::try_from(self.last_occurrence_secs) + .map_err(|_| ErrorCode::SerializationFailed("last_occurrence_secs out of i64 range".to_string()))?, + ), + ); + + map.to_kvs() + } +} + +impl KvsDeserialize for SovdFaultState { + type Error = ErrorCode; + + fn from_kvs(kvs_value: &KvsValue) -> Result { + if let KvsValue::Object(map) = kvs_value { + let kvs_env_data = KvsMap::from_kvs(map.get("env_data").ok_or(ErrorCode::DeserializationFailed("env_data".to_string()))?)?; + let mut env_data = HashMap::new(); + + for (k, v) in kvs_env_data { + env_data.insert(k, String::from_kvs(&v)?); + } + + // Helper to extract u32/u64 with TryFrom — propagates error + // on negative or overflowing values to surface data corruption. + // Missing keys default to 0 for backward compatibility with + // storage created before these fields existed. + fn get_u32(map: &KvsMap, key: &str) -> Result { + match map.get(key) { + Some(v) => { + let i = i64::from_kvs(v).map_err(|_| ErrorCode::DeserializationFailed(format!("{key}: not an i64")))?; + u32::try_from(i).map_err(|_| ErrorCode::DeserializationFailed(format!("{key}: value {i} out of u32 range"))) + } + None => Ok(0), + } + } + fn get_u64(map: &KvsMap, key: &str) -> Result { + match map.get(key) { + Some(v) => { + let i = i64::from_kvs(v).map_err(|_| ErrorCode::DeserializationFailed(format!("{key}: not an i64")))?; + u64::try_from(i).map_err(|_| ErrorCode::DeserializationFailed(format!("{key}: value {i} out of u64 range"))) + } + None => Ok(0), + } + } + + Ok(SovdFaultState { + test_failed: bool::from_kvs( + map.get("test_failed") + .ok_or(ErrorCode::DeserializationFailed("test_failed".to_string()))?, + )?, + test_failed_this_operation_cycle: bool::from_kvs( + map.get("test_failed_this_operation_cycle") + .ok_or(ErrorCode::DeserializationFailed("test_failed_this_operation_cycle".to_string()))?, + )?, + test_failed_since_last_clear: bool::from_kvs( + map.get("test_failed_since_last_clear") + .ok_or(ErrorCode::DeserializationFailed("test_failed_since_last_clear".to_string()))?, + )?, + test_not_completed_this_operation_cycle: bool::from_kvs( + map.get("test_not_completed_this_operation_cycle") + .ok_or(ErrorCode::DeserializationFailed("test_not_completed_this_operation_cycle".to_string()))?, + )?, + test_not_completed_since_last_clear: bool::from_kvs( + map.get("test_not_completed_since_last_clear") + .ok_or(ErrorCode::DeserializationFailed("test_not_completed_since_last_clear".to_string()))?, + )?, + pending_dtc: bool::from_kvs( + map.get("pending_dtc") + .ok_or(ErrorCode::DeserializationFailed("pending_dtc".to_string()))?, + )?, + confirmed_dtc: bool::from_kvs( + map.get("confirmed_dtc") + .ok_or(ErrorCode::DeserializationFailed("confirmed_dtc".to_string()))?, + )?, + warning_indicator_requested: bool::from_kvs( + map.get("warning_indicator_requested") + .ok_or(ErrorCode::DeserializationFailed("warning_indicator_requested".to_string()))?, + )?, + env_data, + // Aging counters — propagate deserialization errors + occurrence_counter: get_u32(map, "occurrence_counter")?, + aging_counter: get_u32(map, "aging_counter")?, + healing_counter: get_u32(map, "healing_counter")?, + first_occurrence_secs: get_u64(map, "first_occurrence_secs")?, + last_occurrence_secs: get_u64(map, "last_occurrence_secs")?, + }) + } else { + Err(ErrorCode::DeserializationFailed( + "expected KvsValue::Object for SovdFaultState".to_string(), + )) + } + } +} + +pub trait SovdFaultStateStorage: Send + Sync { + fn put(&self, path: &str, fault_id: &fault::FaultId, state: SovdFaultState) -> Result<(), StorageError>; + fn get_all(&self, path: &str) -> Result, StorageError>; + fn get(&self, path: &str, fault_id: &fault::FaultId) -> Result, StorageError>; + fn delete_all(&self, path: &str) -> Result<(), StorageError>; + fn delete(&self, path: &str, fault_id: &fault::FaultId) -> Result<(), StorageError>; +} + +pub struct KvsSovdFaultStateStorage { + kvs: Mutex, +} + +impl KvsSovdFaultStateStorage { + /// Create a new KVS-backed storage at the given path. + /// + /// # Instance pool + /// + /// KVS uses a process-global instance pool with a hard limit of + /// `KVS_MAX_INSTANCES` (currently 10). Each `instance` ID must be + /// unique within the process. Exceeding the limit or reusing an + /// instance ID with a different backend path causes + /// `InstanceParametersMismatch` errors. + pub fn new(dir: &Path, instance: usize) -> Result { + let builder = KvsBuilder::new(InstanceId(instance)) + .backend(Box::new(JsonBackendBuilder::new().working_dir(dir.to_path_buf()).build())) + .kvs_load(KvsLoad::Optional); + let kvs = builder.build().map_err(|e| StorageError::Init(format!("{e:?}")))?; + + Ok(Self { kvs: Mutex::new(kvs) }) + } +} + +impl SovdFaultStateStorage for KvsSovdFaultStateStorage { + fn put(&self, path: &str, fault_id: &fault::FaultId, state: SovdFaultState) -> Result<(), StorageError> { + let kvs = self.kvs.lock().map_err(|e| StorageError::Backend(format!("lock poisoned: {e}")))?; + let mut states = kvs.get_value_as::(path).unwrap_or_default(); + states.insert( + fault_id_to_key(fault_id), + state.to_kvs().map_err(|e| StorageError::Serialization(format!("{e:?}")))?, + ); + kvs.set_value(path, states).map_err(|e| StorageError::Backend(format!("{e:?}")))?; + Ok(()) + } + + fn get_all(&self, path: &str) -> Result, StorageError> { + let kvs = self.kvs.lock().map_err(|e| StorageError::Backend(format!("lock poisoned: {e}")))?; + let states = match kvs.get_value_as::(path) { + Ok(s) => s, + Err(_) => return Ok(Vec::new()), + }; + let mut result = Vec::new(); + for (fault_id_key, state) in &states { + let fault_state = SovdFaultState::from_kvs(state).map_err(|e| StorageError::Deserialization(format!("{e:?}")))?; + result.push((fault_id_from_key(fault_id_key)?, fault_state)); + } + Ok(result) + } + + fn get(&self, path: &str, fault_id: &fault::FaultId) -> Result, StorageError> { + let kvs = self.kvs.lock().map_err(|e| StorageError::Backend(format!("lock poisoned: {e}")))?; + let states = match kvs.get_value_as::(path) { + Ok(s) => s, + Err(_) => return Ok(None), + }; + match states.get(&fault_id_to_key(fault_id)) { + Some(state) => { + let fault_state = SovdFaultState::from_kvs(state).map_err(|e| StorageError::Deserialization(format!("{e:?}")))?; + Ok(Some(fault_state)) + } + None => Ok(None), + } + } + + fn delete_all(&self, path: &str) -> Result<(), StorageError> { + let kvs = self.kvs.lock().map_err(|e| StorageError::Backend(format!("lock poisoned: {e}")))?; + kvs.remove_key(path).map_err(|e| StorageError::Backend(format!("{e:?}")))?; + Ok(()) + } + + fn delete(&self, path: &str, fault_id: &fault::FaultId) -> Result<(), StorageError> { + let kvs = self.kvs.lock().map_err(|e| StorageError::Backend(format!("lock poisoned: {e}")))?; + let mut states = kvs.get_value_as::(path).map_err(|e| StorageError::Backend(format!("{e:?}")))?; + let key = fault_id_to_key(fault_id); + if states.remove(&key).is_some() { + kvs.set_value(path, states).map_err(|e| StorageError::Backend(format!("{e:?}")))?; + Ok(()) + } else { + Err(StorageError::NotFound) + } + } +} + +/// Encode a FaultId as a typed storage key. +/// +/// Format: `n:` for Numeric, `t:` for Text, `u:` for Uuid. +/// This preserves variant type information for lossless roundtrips. +fn fault_id_to_key(fault_id: &fault::FaultId) -> String { + match fault_id { + fault::FaultId::Numeric(x) => format!("n:{x}"), + fault::FaultId::Text(t) => format!("t:{t}"), + fault::FaultId::Uuid(u) => { + let hex: String = u.iter().map(|b| format!("{b:02x}")).collect(); + format!("u:{hex}") + } + } +} + +/// Decode a storage key back to a FaultId. +/// +/// Supports typed prefix format (`n:`, `t:`, `u:`) for new entries. +/// Unrecognized keys (from older storage) are treated as `FaultId::Text` +/// for backward compatibility. +#[allow(clippy::arithmetic_side_effects)] +fn fault_id_from_key(key: &str) -> Result { + if let Some(num_str) = key.strip_prefix("n:") { + let n = num_str + .parse::() + .map_err(|_| StorageError::Deserialization(format!("invalid numeric key: {key}")))?; + return Ok(fault::FaultId::Numeric(n)); + } + if let Some(hex_str) = key.strip_prefix("u:") { + if hex_str.len() != 32 { + return Err(StorageError::Deserialization(format!( + "invalid uuid key length (expected 32 hex chars): {key}" + ))); + } + let mut bytes = [0u8; 16]; + for (i, byte) in bytes.iter_mut().enumerate() { + *byte = + u8::from_str_radix(&hex_str[i * 2..i * 2 + 2], 16).map_err(|_| StorageError::Deserialization(format!("invalid uuid hex: {key}")))?; + } + return Ok(fault::FaultId::Uuid(bytes)); + } + if let Some(text) = key.strip_prefix("t:") { + let short = ShortString::try_from(text).map_err(|_| StorageError::Deserialization(format!("fault id key too long: {key}")))?; + return Ok(fault::FaultId::Text(short)); + } + // Backward compatibility: untyped keys from older storage are treated as Text. + let short = ShortString::try_from(key).map_err(|_| StorageError::Deserialization(format!("fault id key too long: {key}")))?; + Ok(fault::FaultId::Text(short)) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + + #[test] + fn to_and_from_kvs() { + let state = SovdFaultState { + test_failed: true, + test_failed_this_operation_cycle: false, + test_failed_since_last_clear: true, + test_not_completed_this_operation_cycle: true, + test_not_completed_since_last_clear: false, + pending_dtc: false, + confirmed_dtc: false, + warning_indicator_requested: true, + env_data: SovdEnvData::from([ + ("key1".into(), "val1".into()), + ("key2".into(), "val2".into()), + ("key3".into(), "val3".into()), + ]), + occurrence_counter: 5, + aging_counter: 2, + healing_counter: 1, + first_occurrence_secs: 1700000000, + last_occurrence_secs: 1700001000, + }; + + let state_to_kvs = state.to_kvs().unwrap(); + let state_from_kvs = SovdFaultState::from_kvs(&state_to_kvs).unwrap(); + + assert_eq!(state, state_from_kvs); + } + + // ==================== Typed key roundtrip tests ==================== + + #[test] + fn fault_id_key_roundtrip_numeric() { + let id = fault::FaultId::Numeric(42); + let key = fault_id_to_key(&id); + assert_eq!(key, "n:42"); + let parsed = fault_id_from_key(&key).unwrap(); + assert_eq!(parsed, id); + } + + #[test] + fn fault_id_key_roundtrip_text() { + let id = fault::FaultId::Text(ShortString::try_from("test_fault").unwrap()); + let key = fault_id_to_key(&id); + assert_eq!(key, "t:test_fault"); + let parsed = fault_id_from_key(&key).unwrap(); + assert_eq!(parsed, id); + } + + #[test] + fn fault_id_key_roundtrip_uuid() { + let id = fault::FaultId::Uuid([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let key = fault_id_to_key(&id); + assert_eq!(key, "u:0102030405060708090a0b0c0d0e0f10"); + let parsed = fault_id_from_key(&key).unwrap(); + assert_eq!(parsed, id); + } + + #[test] + fn fault_id_key_backward_compat_untyped() { + // Old-format keys without a type prefix are treated as Text. + let parsed = fault_id_from_key("old_fault_key").unwrap(); + assert_eq!(parsed, fault::FaultId::Text(ShortString::try_from("old_fault_key").unwrap())); + } + + #[test] + fn fault_id_key_invalid_uuid_length() { + let result = fault_id_from_key("u:0102"); + assert!(result.is_err()); + } + + #[test] + fn fault_id_key_invalid_numeric() { + let result = fault_id_from_key("n:not_a_number"); + assert!(result.is_err()); + } + + #[test] + fn backward_compat_missing_aging_fields() { + // Simulate old KVS data without aging counters + let mut map = KvsMap::new(); + map.insert("test_failed".to_string(), true.to_kvs().unwrap()); + map.insert("test_failed_this_operation_cycle".to_string(), false.to_kvs().unwrap()); + map.insert("test_failed_since_last_clear".to_string(), false.to_kvs().unwrap()); + map.insert("test_not_completed_this_operation_cycle".to_string(), false.to_kvs().unwrap()); + map.insert("test_not_completed_since_last_clear".to_string(), false.to_kvs().unwrap()); + map.insert("pending_dtc".to_string(), false.to_kvs().unwrap()); + map.insert("confirmed_dtc".to_string(), true.to_kvs().unwrap()); + map.insert("warning_indicator_requested".to_string(), false.to_kvs().unwrap()); + map.insert("env_data".to_string(), KvsMap::new().to_kvs().unwrap()); + + let kvs_value = map.to_kvs().unwrap(); + let state = SovdFaultState::from_kvs(&kvs_value).unwrap(); + + // New aging fields should default to 0 + assert_eq!(state.occurrence_counter, 0); + assert_eq!(state.aging_counter, 0); + assert_eq!(state.healing_counter, 0); + assert_eq!(state.first_occurrence_secs, 0); + assert_eq!(state.last_occurrence_secs, 0); + // Old fields should still work + assert!(state.test_failed); + assert!(state.confirmed_dtc); + } +} + +#[cfg(test)] +mod storage_tests { + #![allow(clippy::unwrap_used, clippy::expect_used)] + + use crate::dfm_test_utils::InMemoryStorage; + use crate::sovd_fault_storage::{SovdFaultState, SovdFaultStateStorage}; + use common::fault::*; + + /// Storage stores and retrieves fault state correctly. + #[test] + fn storage_put_and_get() { + let storage = InMemoryStorage::new(); + + let state = SovdFaultState { + test_failed: true, + confirmed_dtc: true, + ..Default::default() + }; + + storage.put("entity/1", &FaultId::Numeric(42), state).unwrap(); + + let retrieved = storage.get("entity/1", &FaultId::Numeric(42)).unwrap(); + assert!(retrieved.is_some()); + let retrieved = retrieved.unwrap(); + assert!(retrieved.test_failed); + assert!(retrieved.confirmed_dtc); + } + + /// Storage returns None for non-existent entries. + #[test] + fn storage_get_nonexistent() { + let storage = InMemoryStorage::new(); + assert!(storage.get("entity/1", &FaultId::Numeric(42)).unwrap().is_none()); + } + + /// Storage get_all returns all faults for a path. + #[test] + fn storage_get_all() { + let storage = InMemoryStorage::new(); + + storage + .put( + "entity/1", + &FaultId::Numeric(1), + SovdFaultState { + test_failed: true, + ..Default::default() + }, + ) + .unwrap(); + storage + .put( + "entity/1", + &FaultId::Numeric(2), + SovdFaultState { + test_failed: false, + ..Default::default() + }, + ) + .unwrap(); + + let all = storage.get_all("entity/1").unwrap(); + assert_eq!(all.len(), 2); + } + + /// Storage isolates different paths. + #[test] + fn storage_path_isolation() { + let storage = InMemoryStorage::new(); + + storage.put("entity/1", &FaultId::Numeric(1), SovdFaultState::default()).unwrap(); + storage.put("entity/2", &FaultId::Numeric(2), SovdFaultState::default()).unwrap(); + + let e1 = storage.get_all("entity/1").unwrap(); + let e2 = storage.get_all("entity/2").unwrap(); + assert_eq!(e1.len(), 1); + assert_eq!(e2.len(), 1); + + assert!(storage.get_all("entity/3").unwrap().is_empty()); + } + + /// Storage delete_all removes all faults for a path. + #[test] + fn storage_delete_all() { + let storage = InMemoryStorage::new(); + + storage.put("entity/1", &FaultId::Numeric(1), SovdFaultState::default()).unwrap(); + storage.put("entity/1", &FaultId::Numeric(2), SovdFaultState::default()).unwrap(); + + storage.delete_all("entity/1").unwrap(); + assert!(storage.get_all("entity/1").unwrap().is_empty()); + } + + /// Storage delete removes a single fault. + #[test] + fn storage_delete_single() { + let storage = InMemoryStorage::new(); + + storage.put("entity/1", &FaultId::Numeric(1), SovdFaultState::default()).unwrap(); + storage.put("entity/1", &FaultId::Numeric(2), SovdFaultState::default()).unwrap(); + + storage.delete("entity/1", &FaultId::Numeric(1)).unwrap(); + assert!(storage.get("entity/1", &FaultId::Numeric(1)).unwrap().is_none()); + assert!(storage.get("entity/1", &FaultId::Numeric(2)).unwrap().is_some()); + } +} diff --git a/src/dfm_lib/src/transport.rs b/src/dfm_lib/src/transport.rs new file mode 100644 index 0000000..23df364 --- /dev/null +++ b/src/dfm_lib/src/transport.rs @@ -0,0 +1,69 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Transport abstraction for DFM ↔ FaultLib communication. +//! +//! [`DfmTransport`] decouples the DFM run-loop from the concrete IPC +//! mechanism (iceoryx2). Implementations can use shared-memory IPC, +//! in-process channels, or any other messaging backend. +//! +//! The default production implementation is +//! [`Iceoryx2Transport`](crate::fault_lib_communicator::Iceoryx2Transport), +//! which uses iceoryx2 zero-copy shared memory. + +use common::enabling_condition::EnablingConditionNotification; +use common::sink_error::SinkError; +use common::types::DiagnosticEvent; +use core::time::Duration; + +/// Abstraction over the DFM-side IPC transport. +/// +/// The DFM run-loop calls these methods each iteration to receive events +/// from reporter applications, publish hash-check responses, and broadcast +/// enabling-condition notifications. +/// +/// # Implementing a custom transport +/// +/// ```rust,ignore +/// use dfm_lib::transport::DfmTransport; +/// +/// struct MyTransport { /* ... */ } +/// +/// impl DfmTransport for MyTransport { +/// fn receive_event(&self) -> Result, SinkError> { /* ... */ } +/// fn publish_hash_response(&self, response: bool) -> Result<(), SinkError> { /* ... */ } +/// fn publish_ec_notification(&self, notification: EnablingConditionNotification) -> Result<(), SinkError> { /* ... */ } +/// fn wait(&self, timeout: Duration) -> Result { /* ... */ } +/// } +/// ``` +pub trait DfmTransport: Send + 'static { + /// Receive the next diagnostic event, if available. + /// + /// Returns `Ok(None)` when no events are pending (non-blocking). + /// Returns `Ok(Some(event))` for each queued event. + /// Returns `Err` on transport failure. + fn receive_event(&self) -> Result, SinkError>; + + /// Publish a catalog hash-check response back to reporters. + fn publish_hash_response(&self, response: bool) -> Result<(), SinkError>; + + /// Broadcast an enabling-condition status notification to all FaultLib + /// subscribers. + fn publish_ec_notification(&self, notification: EnablingConditionNotification) -> Result<(), SinkError>; + + /// Wait/sleep for one DFM cycle iteration. + /// + /// Returns `Ok(true)` if the node is still alive and the loop should + /// continue. Returns `Ok(false)` if the node died and the loop should + /// exit. Returns `Err` on transport failure. + fn wait(&self, timeout: Duration) -> Result; +} From 690a9ad428c59012619ee66fc5e5a920974a28f0 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 25 Feb 2026 15:28:10 +0100 Subject: [PATCH 5/7] test: add integration test suite for fault-lib and DFM interaction E2E tests covering lifecycle transitions, debounce/aging/cycles, persistent storage, concurrent access, boundary values, error paths, multi-catalog, JSON catalog loading, IPC query/clear, and report-and-query flow. --- tests/fault_catalog.json | 53 +++ tests/integration/BUILD | 37 ++ tests/integration/Cargo.toml | 22 + tests/integration/src/helpers.rs | 210 ++++++++++ tests/integration/src/lib.rs | 57 +++ tests/integration/src/test_boundary_values.rs | 378 +++++++++++++++++ .../integration/src/test_concurrent_access.rs | 254 ++++++++++++ .../src/test_debounce_aging_cycles_ec.rs | 388 ++++++++++++++++++ tests/integration/src/test_error_paths.rs | 304 ++++++++++++++ tests/integration/src/test_ipc_query.rs | 271 ++++++++++++ tests/integration/src/test_json_catalog.rs | 183 +++++++++ .../src/test_lifecycle_transitions.rs | 184 +++++++++ tests/integration/src/test_multi_catalog.rs | 176 ++++++++ .../src/test_persistent_storage.rs | 136 ++++++ .../integration/src/test_report_and_query.rs | 119 ++++++ 15 files changed, 2772 insertions(+) create mode 100644 tests/fault_catalog.json create mode 100644 tests/integration/BUILD create mode 100644 tests/integration/Cargo.toml create mode 100644 tests/integration/src/helpers.rs create mode 100644 tests/integration/src/lib.rs create mode 100644 tests/integration/src/test_boundary_values.rs create mode 100644 tests/integration/src/test_concurrent_access.rs create mode 100644 tests/integration/src/test_debounce_aging_cycles_ec.rs create mode 100644 tests/integration/src/test_error_paths.rs create mode 100644 tests/integration/src/test_ipc_query.rs create mode 100644 tests/integration/src/test_json_catalog.rs create mode 100644 tests/integration/src/test_lifecycle_transitions.rs create mode 100644 tests/integration/src/test_multi_catalog.rs create mode 100644 tests/integration/src/test_persistent_storage.rs create mode 100644 tests/integration/src/test_report_and_query.rs diff --git a/tests/fault_catalog.json b/tests/fault_catalog.json new file mode 100644 index 0000000..29a5b95 --- /dev/null +++ b/tests/fault_catalog.json @@ -0,0 +1,53 @@ +{ + "id": "hvac", + "version": 3, + "faults": [ + { + "id": { + "Text": "d1" + }, + "name": "Descriptor 1", + "summary": null, + "category": "Software", + "severity": "Debug", + "compliance": [ + "EmissionRelevant", + "SafetyCritical" + ], + "reporter_side_debounce": { + "EdgeWithCooldown": { + "cooldown": { + "secs": 0, + "nanos": 100000000 + } + } + }, + "reporter_side_reset": null, + "manager_side_debounce": null, + "manager_side_reset": null + }, + { + "id": { + "Text": "d2" + }, + "name": "Descriptor 2", + "summary": "Human-readable summary", + "category": "Configuration", + "severity": "Warn", + "compliance": [ + "SecurityRelevant" + ], + "reporter_side_debounce": null, + "reporter_side_reset": null, + "manager_side_debounce": { + "EdgeWithCooldown": { + "cooldown": { + "secs": 0, + "nanos": 100000000 + } + } + }, + "manager_side_reset": null + } + ] +} \ No newline at end of file diff --git a/tests/integration/BUILD b/tests/integration/BUILD new file mode 100644 index 0000000..f612f2e --- /dev/null +++ b/tests/integration/BUILD @@ -0,0 +1,37 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@rules_rust//rust:defs.bzl", "rust_test") + +filegroup( + name = "integration_srcs", + srcs = glob(["src/**/*.rs"]), +) + +# E2E integration tests exercising the full fault-lib → DFM pipeline +# without iceoryx2 IPC (in-process wiring only). +rust_test( + name = "tests", + srcs = [":integration_srcs"], + edition = "2024", + deps = [ + "//src/common", + "//src/dfm_lib", + "//src/fault_lib", + "@score_fault_lib_crates//:env_logger", + "@score_fault_lib_crates//:log", + "@score_fault_lib_crates//:serde_json", + "@score_fault_lib_crates//:serial_test", + "@score_fault_lib_crates//:tempfile", + ], +) diff --git a/tests/integration/Cargo.toml b/tests/integration/Cargo.toml new file mode 100644 index 0000000..846d50a --- /dev/null +++ b/tests/integration/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "integration_tests" +version.workspace = true +edition.workspace = true +publish = false + +[lib] +name = "integration_tests" +path = "src/lib.rs" + +[lints] +workspace = true + +[dev-dependencies] +common = { path = "../../src/common" } +dfm_lib = { path = "../../src/dfm_lib" } +fault_lib = { path = "../../src/fault_lib" } +tempfile = "3.20" +env_logger.workspace = true +log.workspace = true +serial_test.workspace = true +serde_json.workspace = true diff --git a/tests/integration/src/helpers.rs b/tests/integration/src/helpers.rs new file mode 100644 index 0000000..167c99a --- /dev/null +++ b/tests/integration/src/helpers.rs @@ -0,0 +1,210 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Shared helpers for integration tests. +//! +//! Provides a convenience [`TestHarness`] that wires up all DFM components +//! (catalog, registry, processor, storage, SOVD manager) in a single call, +//! matching the real deployment topology minus IPC transport. + +use common::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; +use common::debounce::DebounceMode; +use common::fault::*; +use common::types::*; +use dfm_lib::fault_catalog_registry::FaultCatalogRegistry; +use dfm_lib::fault_record_processor::FaultRecordProcessor; +use dfm_lib::operation_cycle::OperationCycleTracker; +use dfm_lib::sovd_fault_manager::SovdFaultManager; +use dfm_lib::sovd_fault_storage::KvsSovdFaultStateStorage; +use std::path::Path; +use std::sync::{Arc, LazyLock, RwLock}; +use std::time::Duration; +use tempfile::TempDir; + +/// Shared KVS storage directory used by **all** integration tests. +/// +/// KVS uses a process-wide global pool (`KVS_MAX_INSTANCES = 10`) that +/// binds each instance ID to a specific backend path on first use. +/// Subsequent calls with the same instance ID but a *different* path +/// return `InstanceParametersMismatch`. +/// +/// To work around this, every test uses the **same** directory for KVS +/// instance 0. Tests are serialized with `#[serial]` to prevent parallel +/// data corruption, and each test cleans the data via `delete_all_faults`. +static SHARED_STORAGE_DIR: LazyLock = LazyLock::new(|| TempDir::new().expect("failed to create shared storage dir")); + +/// Returns the path of the shared KVS storage directory. +pub fn shared_storage_path() -> &'static Path { + SHARED_STORAGE_DIR.path() +} + +/// All-in-one test harness wiring DFM components together. +/// +/// All instances share the same KVS backend directory (process-wide +/// constraint). Tests must run serially (`#[serial]`). +pub struct TestHarness { + pub processor: FaultRecordProcessor, + pub manager: SovdFaultManager, +} + +impl TestHarness { + /// Build a harness from one or more [`FaultCatalogConfig`]s. + /// + /// Each config represents a separate reporter application's fault catalog + /// (e.g., HVAC, IVI), mirroring how multiple apps register with a single DFM. + pub fn new(configs: Vec) -> Self { + Self::with_storage_path(configs, shared_storage_path()) + } + + /// Build a harness using an explicit storage path. Useful for persistence + /// tests where the same directory is reused across harness instances. + pub fn with_storage_path(configs: Vec, storage_path: &Path) -> Self { + let storage = Arc::new(KvsSovdFaultStateStorage::new(storage_path, 0).expect("storage init")); + let catalogs: Vec<_> = configs + .into_iter() + .map(|cfg| FaultCatalogBuilder::new().cfg_struct(cfg).expect("builder config").build()) + .collect(); + let registry = Arc::new(FaultCatalogRegistry::new(catalogs)); + let cycle_tracker = Arc::new(RwLock::new(OperationCycleTracker::new())); + + let processor = FaultRecordProcessor::new(Arc::clone(&storage), Arc::clone(®istry), cycle_tracker); + let manager = SovdFaultManager::new(storage, registry); + + Self { processor, manager } + } + + /// Clean all fault data from the shared storage. + /// + /// Call this at the start of each test to ensure a clean slate + /// (defence-in-depth alongside `#[serial]`). + pub fn clean_catalogs(&mut self, paths: &[&str]) { + for path in paths { + // Ignore errors — the path may not have data yet. + let _ = self.manager.delete_all_faults(path); + } + } +} + +// ============================================================================ +// Catalog configs +// ============================================================================ + +/// HVAC subsystem catalog with two faults: +/// - `CabinTempSensorStuck` (Numeric 0x7001, reporter-side HoldTime debounce) +/// - `BlowerSpeedMismatch` (Text, manager-side EdgeWithCooldown debounce) +pub fn hvac_catalog_config() -> FaultCatalogConfig { + FaultCatalogConfig { + id: "hvac".into(), + version: 3, + faults: vec![ + FaultDescriptor { + id: FaultId::Numeric(0x7001), + name: to_static_short_string("CabinTempSensorStuck").unwrap(), + summary: None, + category: FaultType::Communication, + severity: FaultSeverity::Error, + compliance: ComplianceVec::try_from(&[ComplianceTag::EmissionRelevant][..]).unwrap(), + reporter_side_debounce: Some(DebounceMode::HoldTime { + duration: Duration::from_secs(60).into(), + }), + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Text(to_static_short_string("hvac.blower.speed_sensor_mismatch").unwrap()), + name: to_static_short_string("BlowerSpeedMismatch").unwrap(), + summary: Some(to_static_long_string("Blower motor speed does not match commanded value").unwrap()), + category: FaultType::Communication, + severity: FaultSeverity::Error, + compliance: ComplianceVec::try_from(&[ComplianceTag::SecurityRelevant, ComplianceTag::SafetyCritical][..]).unwrap(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(100).into(), + }), + manager_side_reset: None, + }, + ], + } +} + +/// IVI (In-Vehicle Infotainment) catalog with a single software fault. +pub fn ivi_catalog_config() -> FaultCatalogConfig { + FaultCatalogConfig { + id: "ivi".into(), + version: 1, + faults: vec![FaultDescriptor { + id: FaultId::Text(to_static_short_string("ivi.display.init_timeout").unwrap()), + name: to_static_short_string("DisplayInitTimeout").unwrap(), + summary: Some(to_static_long_string("Display initialization exceeded 5s timeout").unwrap()), + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + } +} + +// ============================================================================ +// Record builders +// ============================================================================ + +/// Build a [`FaultRecord`] simulating what a reporter would create. +pub fn make_fault_record(fault_id: FaultId, stage: LifecycleStage) -> FaultRecord { + FaultRecord { + id: fault_id, + time: IpcTimestamp::default(), + source: common::SourceId { + entity: to_static_short_string("test_reporter").unwrap(), + ecu: Some(to_static_short_string("ECU-A").unwrap()), + domain: Some(to_static_short_string("body").unwrap()), + sw_component: Some(to_static_short_string("hvac_ctrl").unwrap()), + instance: Some(to_static_short_string("0").unwrap()), + }, + lifecycle_phase: LifecyclePhase::Running, + lifecycle_stage: stage, + env_data: MetadataVec::new(), + } +} + +/// Build a [`FaultRecord`] with environment data attached. +pub fn make_fault_record_with_env(fault_id: FaultId, stage: LifecycleStage, env: &[(&str, &str)]) -> FaultRecord { + let env_data = MetadataVec::try_from( + &env.iter() + .map(|(k, v)| (to_static_short_string(k).unwrap(), to_static_short_string(v).unwrap())) + .collect::>()[..], + ) + .unwrap(); + + FaultRecord { + id: fault_id, + time: IpcTimestamp::default(), + source: common::SourceId { + entity: to_static_short_string("test_reporter").unwrap(), + ecu: Some(to_static_short_string("ECU-A").unwrap()), + domain: Some(to_static_short_string("body").unwrap()), + sw_component: Some(to_static_short_string("hvac_ctrl").unwrap()), + instance: Some(to_static_short_string("0").unwrap()), + }, + lifecycle_phase: LifecyclePhase::Running, + lifecycle_stage: stage, + env_data, + } +} + +/// Helper to create a [`LongString`] path for DFM routing. +pub fn make_path(path: &str) -> LongString { + LongString::from_str_truncated(path).unwrap() +} diff --git a/tests/integration/src/lib.rs b/tests/integration/src/lib.rs new file mode 100644 index 0000000..2d29b42 --- /dev/null +++ b/tests/integration/src/lib.rs @@ -0,0 +1,57 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +#![cfg_attr( + test, + allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::std_instead_of_core, + clippy::std_instead_of_alloc, + clippy::arithmetic_side_effects + ) +)] +//! Integration tests demonstrating the fault-lib ↔ DFM end-to-end flow. +//! +//! These tests exercise the full pipeline without IPC (iceoryx2), using +//! in-process wiring instead: +//! +//! 1. Build a `FaultCatalog` from JSON config +//! 2. Create a `FaultRecordProcessor` (DFM core) with persistent storage +//! 3. Simulate reporter-side record creation +//! 4. Feed records through the processor +//! 5. Query results via `SovdFaultManager` +//! +//! This mirrors a real deployment where fault-lib reporters publish to DFM +//! over IPC, but tests the logic without shared-memory transport. + +#[cfg(test)] +mod helpers; +#[cfg(test)] +mod test_boundary_values; +#[cfg(test)] +mod test_concurrent_access; +#[cfg(test)] +mod test_debounce_aging_cycles_ec; +#[cfg(test)] +mod test_error_paths; +#[cfg(test)] +mod test_ipc_query; +#[cfg(test)] +mod test_json_catalog; +#[cfg(test)] +mod test_lifecycle_transitions; +#[cfg(test)] +mod test_multi_catalog; +#[cfg(test)] +mod test_persistent_storage; +#[cfg(test)] +mod test_report_and_query; diff --git a/tests/integration/src/test_boundary_values.rs b/tests/integration/src/test_boundary_values.rs new file mode 100644 index 0000000..53af366 --- /dev/null +++ b/tests/integration/src/test_boundary_values.rs @@ -0,0 +1,378 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Boundary value integration tests. +//! +//! Tests on extreme/edge values: max-length fault IDs, max faults in a +//! catalog, many catalogs, env data limits, and Unicode in identifiers. + +use crate::helpers::*; +use common::catalog::FaultCatalogConfig; +use common::fault::*; +use common::types::*; +use serial_test::serial; + +// ============================================================================ +// 1. Max-length fault IDs +// ============================================================================ + +/// A text FaultId at the maximum ShortString length (64 bytes) should +/// work correctly through the full pipeline. +#[test] +#[serial] +fn max_length_text_fault_id() { + // ShortString is 64 bytes; use a string that fills it exactly + let max_id = "a".repeat(63); // 63 chars + null = 64 bytes in StaticString + let fault_id = FaultId::Text(to_static_short_string(&max_id).unwrap()); + + let config = FaultCatalogConfig { + id: "boundary_text".into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id.clone(), + name: to_static_short_string("MaxLenFault").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + + let mut harness = TestHarness::new(vec![config]); + harness.clean_catalogs(&["boundary_text"]); + + let path = make_path("boundary_text"); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("boundary_text").unwrap(); + assert_eq!(faults.len(), 1); + let fault = &faults[0]; + assert_eq!(fault.code, max_id); + assert_eq!(fault.typed_status.as_ref().unwrap().test_failed, Some(true)); +} + +/// UUID FaultId (all zeros) boundary value works through pipeline. +#[test] +#[serial] +fn uuid_fault_id_all_zeros() { + let fault_id = FaultId::Uuid([0u8; 16]); + + let config = FaultCatalogConfig { + id: "boundary_uuid".into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id.clone(), + name: to_static_short_string("ZeroUuid").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + + let mut harness = TestHarness::new(vec![config]); + harness.clean_catalogs(&["boundary_uuid"]); + + let path = make_path("boundary_uuid"); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("boundary_uuid").unwrap(); + assert_eq!(faults.len(), 1); + assert_eq!(faults[0].code, "00000000-0000-0000-0000-000000000000"); +} + +/// UUID FaultId (all 0xFF) boundary value works through pipeline. +#[test] +#[serial] +fn uuid_fault_id_all_max() { + let fault_id = FaultId::Uuid([0xFF; 16]); + + let config = FaultCatalogConfig { + id: "boundary_uuid_max".into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id.clone(), + name: to_static_short_string("MaxUuid").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + + let mut harness = TestHarness::new(vec![config]); + harness.clean_catalogs(&["boundary_uuid_max"]); + + let path = make_path("boundary_uuid_max"); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("boundary_uuid_max").unwrap(); + assert_eq!(faults.len(), 1); + assert_eq!(faults[0].code, "ffffffff-ffff-ffff-ffff-ffffffffffff"); +} + +// ============================================================================ +// 2. Max faults in a catalog +// ============================================================================ + +/// A catalog with many faults (50) builds and queries correctly. +#[test] +#[serial] +fn catalog_with_many_faults() { + let faults: Vec = (0..50u32) + .map(|i| FaultDescriptor { + id: FaultId::Numeric(i), + name: to_static_short_string(format!("Fault_{i}")).unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }) + .collect(); + + let config = FaultCatalogConfig { + id: "many_faults".into(), + version: 1, + faults, + }; + + let mut harness = TestHarness::new(vec![config]); + harness.clean_catalogs(&["many_faults"]); + + // Process a record for every fault + let path = make_path("many_faults"); + for i in 0..50u32 { + let record = make_fault_record(FaultId::Numeric(i), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + } + + let faults = harness.manager.get_all_faults("many_faults").unwrap(); + assert_eq!(faults.len(), 50, "All 50 faults should be queryable"); + + // Verify all are marked as failed + for fault in &faults { + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed, + Some(true), + "Fault {} should be marked as failed", + fault.code + ); + } +} + +// ============================================================================ +// 3. Max catalogs +// ============================================================================ + +/// Multiple catalogs (5) registered simultaneously work correctly. +#[test] +#[serial] +fn many_catalogs_registered() { + let configs: Vec = (0..5) + .map(|i| FaultCatalogConfig { + id: format!("catalog_{i}").into(), + version: 1, + faults: vec![FaultDescriptor { + id: FaultId::Numeric(i * 100), + name: to_static_short_string(format!("Fault_C{i}")).unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }) + .collect(); + + let catalog_ids: Vec = (0..5).map(|i| format!("catalog_{i}")).collect(); + let catalog_refs: Vec<&str> = catalog_ids.iter().map(|s| s.as_str()).collect(); + + let mut harness = TestHarness::new(configs); + harness.clean_catalogs(&catalog_refs); + + // Process one fault per catalog + for i in 0..5u32 { + let path = make_path(&format!("catalog_{i}")); + let record = make_fault_record(FaultId::Numeric(i * 100), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + } + + // Each catalog should have exactly 1 fault + for i in 0..5 { + let faults = harness.manager.get_all_faults(&format!("catalog_{i}")).unwrap(); + assert_eq!(faults.len(), 1, "catalog_{i} should have 1 fault"); + assert_eq!(faults[0].typed_status.as_ref().unwrap().test_failed, Some(true)); + } +} + +// ============================================================================ +// 4. Env data limits +// ============================================================================ + +/// Fault with maximum env data entries (MetadataVec capacity = 8) works. +#[test] +#[serial] +fn fault_with_max_env_data_entries() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let path = make_path("hvac"); + let env: Vec<(&str, &str)> = vec![ + ("key0", "val0"), + ("key1", "val1"), + ("key2", "val2"), + ("key3", "val3"), + ("key4", "val4"), + ("key5", "val5"), + ("key6", "val6"), + ("key7", "val7"), + ]; + let record = make_fault_record_with_env(FaultId::Numeric(0x7001), LifecycleStage::Failed, &env); + harness.processor.process_record(&path, &record); + + let (fault, env_data) = harness.manager.get_fault("hvac", "0x7001").unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().test_failed, Some(true)); + assert_eq!(env_data.len(), 8, "All 8 env data entries should be preserved"); + assert_eq!(env_data.get("key0"), Some(&"val0".to_string())); + assert_eq!(env_data.get("key7"), Some(&"val7".to_string())); +} + +// ============================================================================ +// 5. Unicode in fault IDs and env data +// ============================================================================ + +/// Non-ASCII (Unicode) characters in text FaultIds are rejected by +/// `StaticString` which only accepts ASCII. This is a known API boundary. +#[test] +fn unicode_in_text_fault_id_rejected() { + let result = to_static_short_string("sensor.温度.stuck"); + assert!(result.is_err(), "StaticString should reject non-ASCII characters"); +} + +/// ASCII-only text FaultIds with special characters work correctly. +#[test] +#[serial] +fn special_ascii_chars_in_text_fault_id() { + let special_id = "sensor.temp-123_v2.stuck"; + let fault_id = FaultId::Text(to_static_short_string(special_id).unwrap()); + + let config = FaultCatalogConfig { + id: "ascii_special".into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id.clone(), + name: to_static_short_string("SpecialAscii").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + + let mut harness = TestHarness::new(vec![config]); + harness.clean_catalogs(&["ascii_special"]); + + let path = make_path("ascii_special"); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("ascii_special").unwrap(); + assert_eq!(faults.len(), 1); + assert_eq!(faults[0].code, special_id); + assert_eq!(faults[0].typed_status.as_ref().unwrap().test_failed, Some(true)); +} + +/// Non-ASCII (Unicode) env data values are rejected by StaticString. +/// This documents the API boundary for IPC-safe types. +#[test] +fn unicode_in_env_data_rejected() { + let result = to_static_short_string("エンジンルーム"); + assert!(result.is_err(), "StaticString should reject non-ASCII env data values"); +} + +/// ASCII env data with special characters works correctly. +#[test] +#[serial] +fn special_ascii_in_env_data() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let path = make_path("hvac"); + let env = [("location", "engine-room_v2"), ("status", "fault:active")]; + let record = make_fault_record_with_env(FaultId::Numeric(0x7001), LifecycleStage::Failed, &env); + harness.processor.process_record(&path, &record); + + let (_, env_data) = harness.manager.get_fault("hvac", "0x7001").unwrap(); + assert_eq!(env_data.get("location"), Some(&"engine-room_v2".to_string())); + assert_eq!(env_data.get("status"), Some(&"fault:active".to_string())); +} + +/// Numeric FaultId boundary: u32::MAX works through the pipeline. +#[test] +#[serial] +fn numeric_fault_id_max_u32() { + let fault_id = FaultId::Numeric(u32::MAX); + + let config = FaultCatalogConfig { + id: "boundary_u32".into(), + version: 1, + faults: vec![FaultDescriptor { + id: fault_id.clone(), + name: to_static_short_string("MaxNumeric").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + + let mut harness = TestHarness::new(vec![config]); + harness.clean_catalogs(&["boundary_u32"]); + + let path = make_path("boundary_u32"); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("boundary_u32").unwrap(); + assert_eq!(faults.len(), 1); + assert_eq!(faults[0].code, "0xFFFFFFFF"); + assert_eq!(faults[0].typed_status.as_ref().unwrap().test_failed, Some(true)); +} diff --git a/tests/integration/src/test_concurrent_access.rs b/tests/integration/src/test_concurrent_access.rs new file mode 100644 index 0000000..17eeb3d --- /dev/null +++ b/tests/integration/src/test_concurrent_access.rs @@ -0,0 +1,254 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Concurrent access integration tests. +//! +//! These tests verify thread-safety under contention: concurrent +//! process_record + get_fault, concurrent delete + process, and +//! multi-catalog concurrent access. + +use crate::helpers::*; +use common::fault::*; +use serial_test::serial; +use std::sync::Arc; +use std::thread; + +// ============================================================================ +// 1. Concurrent process_record + get_fault on the same fault +// ============================================================================ + +/// Concurrent writes (process_record) and reads (get_all_faults) +/// on the same catalog must not panic or corrupt data. +/// +/// The outer `Mutex` is architecturally necessary because +/// `FaultRecordProcessor` requires `&mut self`. This test validates +/// that the lock acquisition + operation sequence does not deadlock +/// or panic under thread contention. +#[test] +#[serial] +fn concurrent_process_and_query_does_not_panic() { + let harness = Arc::new(std::sync::Mutex::new(TestHarness::new(vec![hvac_catalog_config()]))); + + // Clean slate + harness.lock().unwrap().clean_catalogs(&["hvac"]); + + let path = make_path("hvac"); + let fault_id = FaultId::Numeric(0x7001); + + // Pre-populate with a known state + { + let mut h = harness.lock().unwrap(); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + h.processor.process_record(&path, &record); + } + + // Spawn concurrent readers and writers + let mut handles = vec![]; + + for _ in 0..5 { + let harness = Arc::clone(&harness); + let fault_id = fault_id.clone(); + handles.push(thread::spawn(move || { + for stage in [LifecycleStage::Failed, LifecycleStage::Passed, LifecycleStage::PreFailed] { + let mut h = harness.lock().unwrap(); + let record = make_fault_record(fault_id.clone(), stage); + h.processor.process_record(&path, &record); + } + })); + } + + for _ in 0..5 { + let harness = Arc::clone(&harness); + handles.push(thread::spawn(move || { + for _ in 0..3 { + let h = harness.lock().unwrap(); + let result = h.manager.get_all_faults("hvac"); + assert!(result.is_ok(), "get_all_faults should not fail under contention"); + let faults = result.unwrap(); + assert!(!faults.is_empty(), "Should always have faults in catalog"); + } + })); + } + + for h in handles { + h.join().expect("Thread should not panic"); + } + + // Final state should be valid + let h = harness.lock().unwrap(); + let faults = h.manager.get_all_faults("hvac").unwrap(); + assert_eq!(faults.len(), 2, "HVAC catalog has 2 descriptors"); +} + +// ============================================================================ +// 1b. Concurrent reads on SovdFaultManager without outer Mutex +// ============================================================================ + +/// Concurrent reads on `SovdFaultManager` without an outer Mutex. +/// +/// Unlike the test above, this exercises real concurrency: `SovdFaultManager` +/// is internally thread-safe (uses `Arc` where `S: SovdFaultStateStorage` +/// provides its own `Mutex`), so sharing it via `Arc` across threads +/// without an outer lock is safe and tests the actual concurrent code path. +#[test] +#[serial] +fn concurrent_reads_on_manager_does_not_panic() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + // Pre-populate with known state (single-threaded setup). + let path = make_path("hvac"); + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + // Share only the manager (internally thread-safe) via Arc. + let manager = Arc::new(harness.manager); + let mut handles = vec![]; + + for _ in 0..10 { + let mgr = Arc::clone(&manager); + handles.push(thread::spawn(move || { + for _ in 0..20 { + let result = mgr.get_all_faults("hvac"); + assert!(result.is_ok(), "get_all_faults should not fail under concurrent reads"); + let faults = result.unwrap(); + assert!(!faults.is_empty(), "Should always have faults in catalog"); + } + })); + } + + for h in handles { + h.join().expect("Reader thread should not panic"); + } +} + +// ============================================================================ +// 2. Concurrent delete + process on the same fault +// ============================================================================ + +/// Concurrent delete_all_faults and process_record must not panic. +/// The storage layer must handle interleaved writes and deletes gracefully. +#[test] +#[serial] +fn concurrent_delete_and_process_does_not_panic() { + let harness = Arc::new(std::sync::Mutex::new(TestHarness::new(vec![hvac_catalog_config()]))); + harness.lock().unwrap().clean_catalogs(&["hvac"]); + + let path = make_path("hvac"); + + // Pre-populate + { + let mut h = harness.lock().unwrap(); + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + h.processor.process_record(&path, &record); + } + + let mut handles = vec![]; + + // Writers: repeatedly process records + for _ in 0..3 { + let harness = Arc::clone(&harness); + handles.push(thread::spawn(move || { + for _ in 0..5 { + let mut h = harness.lock().unwrap(); + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + h.processor.process_record(&path, &record); + } + })); + } + + // Deleters: repeatedly delete all faults + for _ in 0..3 { + let harness = Arc::clone(&harness); + handles.push(thread::spawn(move || { + for _ in 0..5 { + let h = harness.lock().unwrap(); + let _ = h.manager.delete_all_faults("hvac"); + } + })); + } + + for h in handles { + h.join().expect("Thread should not panic"); + } +} + +// ============================================================================ +// 3. Multi-catalog concurrent access +// ============================================================================ + +/// Concurrent access to different catalogs (HVAC and IVI) must maintain +/// isolation — writes to one catalog must not affect the other. +#[test] +#[serial] +fn multi_catalog_concurrent_access_maintains_isolation() { + let harness = Arc::new(std::sync::Mutex::new(TestHarness::new(vec![hvac_catalog_config(), ivi_catalog_config()]))); + harness.lock().unwrap().clean_catalogs(&["hvac", "ivi"]); + + let mut handles = vec![]; + + // HVAC writer thread + { + let harness = Arc::clone(&harness); + handles.push(thread::spawn(move || { + let path = make_path("hvac"); + for _ in 0..10 { + let mut h = harness.lock().unwrap(); + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + h.processor.process_record(&path, &record); + } + })); + } + + // IVI writer thread + { + let harness = Arc::clone(&harness); + handles.push(thread::spawn(move || { + let path = make_path("ivi"); + for _ in 0..10 { + let mut h = harness.lock().unwrap(); + let record = make_fault_record( + FaultId::Text(common::types::to_static_short_string("ivi.display.init_timeout").unwrap()), + LifecycleStage::Failed, + ); + h.processor.process_record(&path, &record); + } + })); + } + + for h in handles { + h.join().expect("Thread should not panic"); + } + + // Verify isolation: each catalog has its own fault counts + let h = harness.lock().unwrap(); + let hvac_faults = h.manager.get_all_faults("hvac").unwrap(); + let ivi_faults = h.manager.get_all_faults("ivi").unwrap(); + + assert_eq!(hvac_faults.len(), 2, "HVAC catalog has 2 descriptors"); + assert_eq!(ivi_faults.len(), 1, "IVI catalog has 1 descriptor"); + + // HVAC fault should be marked as failed + let hvac_fault = hvac_faults.iter().find(|f| f.code == "0x7001").unwrap(); + assert_eq!( + hvac_fault.typed_status.as_ref().unwrap().test_failed, + Some(true), + "HVAC fault should be Failed" + ); + + // IVI fault should be marked as failed + let ivi_fault = ivi_faults.iter().find(|f| f.code == "ivi.display.init_timeout").unwrap(); + assert_eq!( + ivi_fault.typed_status.as_ref().unwrap().test_failed, + Some(true), + "IVI fault should be Failed" + ); +} diff --git a/tests/integration/src/test_debounce_aging_cycles_ec.rs b/tests/integration/src/test_debounce_aging_cycles_ec.rs new file mode 100644 index 0000000..f75409e --- /dev/null +++ b/tests/integration/src/test_debounce_aging_cycles_ec.rs @@ -0,0 +1,388 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! E2E tests for debounce, aging, operation cycles, and enabling conditions. +//! +//! These integration tests exercise the full DFM pipeline for each +//! fault-management flow, including edge cases and boundary conditions. + +use crate::helpers::*; +use common::catalog::FaultCatalogConfig; +use common::config::{ResetPolicy, ResetTrigger}; +use common::debounce::DebounceMode; +use common::fault::*; +use common::types::to_static_short_string; +use dfm_lib::enabling_condition_registry::EnablingConditionRegistry; +use dfm_lib::operation_cycle::OperationCycleTracker; +use serial_test::serial; +use std::sync::{Arc, RwLock}; +use std::time::Duration; + +// ============================================================================ +// Helper: build a catalog with manager-side debounce +// ============================================================================ + +fn debounce_catalog_config(mode: DebounceMode) -> FaultCatalogConfig { + FaultCatalogConfig { + id: "debounce_test".into(), + version: 1, + faults: vec![FaultDescriptor { + id: FaultId::Numeric(0xD001), + name: to_static_short_string("DebouncedFault").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: Some(mode), + manager_side_reset: None, + }], + } +} + +fn aging_catalog_config(policy: ResetPolicy) -> FaultCatalogConfig { + FaultCatalogConfig { + id: "aging_test".into(), + version: 1, + faults: vec![FaultDescriptor { + id: FaultId::Numeric(0xA001), + name: to_static_short_string("AgingFault").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: Some(policy), + }], + } +} + +// ============================================================================ +// 1. Debounce: CountWithinWindow E2E +// ============================================================================ + +/// CountWithinWindow debounce: events below min_count are suppressed. +/// Only the N-th event within the window fires the fault through. +#[test] +#[serial] +fn debounce_count_within_window_suppresses_below_threshold() { + let mode = DebounceMode::CountWithinWindow { + min_count: 3, + window: Duration::from_secs(60).into(), + }; + let mut harness = TestHarness::new(vec![debounce_catalog_config(mode)]); + harness.clean_catalogs(&["debounce_test"]); + + let path = make_path("debounce_test"); + let fault_id = FaultId::Numeric(0xD001); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + + // Events 1 & 2: suppressed by debounce + harness.processor.process_record(&path, &record); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("debounce_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xD001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed, + Some(false), + "First 2 events should be suppressed" + ); + + // Event 3: fires (min_count reached) + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("debounce_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xD001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed, + Some(true), + "Third event should pass debounce" + ); + assert_eq!( + fault.typed_status.as_ref().unwrap().confirmed_dtc, + Some(true), + "Confirmed DTC should be set" + ); +} + +// ============================================================================ +// 2. Debounce: HoldTime E2E +// ============================================================================ + +/// HoldTime debounce: first event starts timer, second within hold time +/// is suppressed, event after hold time elapses fires. +/// +/// NOTE: HoldTime's on_event() checks wall-clock Instant. In integration +/// tests we can only verify that the first event is always suppressed. +#[test] +#[serial] +fn debounce_holdtime_first_event_always_suppressed() { + let mode = DebounceMode::HoldTime { + duration: Duration::from_secs(60).into(), // long hold time + }; + let mut harness = TestHarness::new(vec![debounce_catalog_config(mode)]); + harness.clean_catalogs(&["debounce_test"]); + + let path = make_path("debounce_test"); + let fault_id = FaultId::Numeric(0xD001); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + + // First event: starts HoldTime timer, suppressed + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("debounce_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xD001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed, + Some(false), + "First event in HoldTime should be suppressed (timer just started)" + ); +} + +// ============================================================================ +// 3. Debounce: EdgeWithCooldown E2E +// ============================================================================ + +/// EdgeWithCooldown: first event fires immediately, subsequent events +/// within cooldown are suppressed. +#[test] +#[serial] +fn debounce_edge_with_cooldown_first_fires_then_suppresses() { + let mode = DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_secs(60).into(), // long cooldown + }; + let mut harness = TestHarness::new(vec![debounce_catalog_config(mode)]); + harness.clean_catalogs(&["debounce_test"]); + + let path = make_path("debounce_test"); + let fault_id = FaultId::Numeric(0xD001); + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + + // First event: fires (edge trigger) + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("debounce_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xD001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed, + Some(true), + "Edge trigger should fire on first event" + ); +} + +// ============================================================================ +// 4. Aging: PowerCycles E2E +// ============================================================================ + +/// Aging with PowerCycles: confirmed_dtc stays latched after Passed until +/// the required number of power cycles elapse. +#[test] +#[serial] +fn aging_power_cycles_clears_after_threshold() { + let policy = ResetPolicy { + trigger: ResetTrigger::PowerCycles(2), + min_operating_cycles_before_clear: None, + }; + let config = aging_catalog_config(policy); + + // Build harness with custom cycle tracker + let storage_dir = shared_storage_path(); + let storage = Arc::new(dfm_lib::sovd_fault_storage::KvsSovdFaultStateStorage::new(storage_dir, 0).expect("storage init")); + let catalog = common::catalog::FaultCatalogBuilder::new().cfg_struct(config).unwrap().build(); + let registry = Arc::new(dfm_lib::fault_catalog_registry::FaultCatalogRegistry::new(vec![catalog])); + let cycle_tracker = Arc::new(RwLock::new(OperationCycleTracker::new())); + let mut processor = + dfm_lib::fault_record_processor::FaultRecordProcessor::new(Arc::clone(&storage), Arc::clone(®istry), Arc::clone(&cycle_tracker)); + let manager = dfm_lib::sovd_fault_manager::SovdFaultManager::new(storage, registry); + let _ = manager.delete_all_faults("aging_test"); + + let path = make_path("aging_test"); + let fault_id = FaultId::Numeric(0xA001); + + // Step 1: Fault occurs and then passes + let failed = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + let passed = make_fault_record(fault_id.clone(), LifecycleStage::Passed); + processor.process_record(&path, &failed); + processor.process_record(&path, &passed); + + let faults = manager.get_all_faults("aging_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xA001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().confirmed_dtc, + Some(true), + "confirmed_dtc should stay latched after Passed (aging policy)" + ); + + // Step 2: Only 1 power cycle — not enough + cycle_tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + + let faults = manager.get_all_faults("aging_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xA001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().confirmed_dtc, + Some(true), + "Only 1/2 power cycles — not enough" + ); + + // Step 3: Second power cycle — aging threshold met + cycle_tracker.write().unwrap().increment("power"); + processor.process_record(&path, &passed); + + let faults = manager.get_all_faults("aging_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xA001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().confirmed_dtc, + Some(false), + "2/2 power cycles — confirmed_dtc should be cleared by aging" + ); +} + +// ============================================================================ +// 5. Operation Cycles: new cycle resets per-cycle flags +// ============================================================================ + +/// Operation cycle boundary resets `*_this_operation_cycle` flags +/// for all faults at the given path. +#[test] +#[serial] +fn operation_cycle_boundary_resets_per_cycle_flags() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let path = make_path("hvac"); + let fault_id = FaultId::Numeric(0x7001); + + // Report a fault — sets test_failed_this_operation_cycle + let record = make_fault_record(fault_id.clone(), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().test_failed_this_operation_cycle, Some(true)); + + // New operation cycle boundary + harness.processor.on_new_operation_cycle("hvac"); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed_this_operation_cycle, + Some(false), + "test_failed_this_operation_cycle should reset on new cycle" + ); + // Other flags preserved + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed, + Some(true), + "test_failed should be preserved across cycles" + ); +} + +// ============================================================================ +// 6. Enabling Conditions: status tracking +// ============================================================================ + +/// Enabling condition registry correctly tracks status transitions. +/// This is a unit-level integration test verifying the EC workflow. +#[test] +fn enabling_condition_status_transitions() { + use common::enabling_condition::EnablingConditionStatus; + + let mut registry = EnablingConditionRegistry::new(); + + // Register new condition — starts Inactive + let initial = registry.register("vehicle.speed.valid"); + assert_eq!(initial, EnablingConditionStatus::Inactive); + + // Activate + let changed = registry.update_status("vehicle.speed.valid", EnablingConditionStatus::Active); + assert_eq!(changed, Some(EnablingConditionStatus::Active)); + + // Same status again — no change + let no_change = registry.update_status("vehicle.speed.valid", EnablingConditionStatus::Active); + assert_eq!(no_change, None, "Same status should return None (no change)"); + + // Deactivate + let deactivated = registry.update_status("vehicle.speed.valid", EnablingConditionStatus::Inactive); + assert_eq!(deactivated, Some(EnablingConditionStatus::Inactive)); + + // Unknown condition is auto-registered + let auto = registry.update_status("engine.temp.valid", EnablingConditionStatus::Active); + assert_eq!(auto, Some(EnablingConditionStatus::Active)); + assert_eq!(registry.len(), 2); +} + +// ============================================================================ +// Edge cases +// ============================================================================ + +/// Rapid events within a CountWithinWindow debounce: +/// sending many events quickly should eventually fire. +#[test] +#[serial] +fn debounce_rapid_events_eventually_fire() { + let mode = DebounceMode::CountWithinWindow { + min_count: 5, + window: Duration::from_secs(60).into(), + }; + let mut harness = TestHarness::new(vec![debounce_catalog_config(mode)]); + harness.clean_catalogs(&["debounce_test"]); + + let path = make_path("debounce_test"); + let record = make_fault_record(FaultId::Numeric(0xD001), LifecycleStage::Failed); + + // Send 4 events: suppressed + for _ in 0..4 { + harness.processor.process_record(&path, &record); + } + + let faults = harness.manager.get_all_faults("debounce_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xD001").unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().test_failed, Some(false)); + + // 5th event: fires + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("debounce_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xD001").unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().test_failed, Some(true)); +} + +/// Zero-threshold debounce (min_count = 1) should fire on first event +/// (though not a realistic production config, tests boundary behavior). +#[test] +#[serial] +fn debounce_min_count_one_fires_immediately() { + let mode = DebounceMode::CountWithinWindow { + min_count: 1, + window: Duration::from_secs(60).into(), + }; + let mut harness = TestHarness::new(vec![debounce_catalog_config(mode)]); + harness.clean_catalogs(&["debounce_test"]); + + let path = make_path("debounce_test"); + let record = make_fault_record(FaultId::Numeric(0xD001), LifecycleStage::Failed); + + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("debounce_test").unwrap(); + let fault = faults.iter().find(|f| f.code == "0xD001").unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().test_failed, + Some(true), + "min_count=1 should fire on first event" + ); +} diff --git a/tests/integration/src/test_error_paths.rs b/tests/integration/src/test_error_paths.rs new file mode 100644 index 0000000..dacce17 --- /dev/null +++ b/tests/integration/src/test_error_paths.rs @@ -0,0 +1,304 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Error-path integration tests. +//! +//! These tests exercise fault-lib error handling and validation at the +//! integration level, complementing the happy-path tests in other modules. +//! +//! Scenarios covered: +//! - Processing records with unknown fault IDs +//! - Empty catalog behavior +//! - KVS storage initialization failures +//! - Catalog builder double-configuration detection +//! - Duplicate FaultId detection in catalog configs + +use crate::helpers::*; +use common::catalog::{CatalogBuildError, FaultCatalogBuilder, FaultCatalogConfig}; +use common::fault::*; +use common::types::to_static_short_string; +use dfm_lib::sovd_fault_storage::KvsSovdFaultStateStorage; +use serial_test::serial; + +// ============================================================================ +// 1. Invalid Fault ID +// ============================================================================ + +/// Processing a record with a fault ID not in any registered catalog +/// must not panic. The processor handles unrecognised faults gracefully +/// and existing catalog faults remain queryable. +#[test] +#[serial] +fn process_record_with_unknown_fault_id_does_not_panic() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let path = make_path("hvac"); + let unknown_id = FaultId::Numeric(0xDEAD); + let record = make_fault_record(unknown_id, LifecycleStage::Failed); + + // Must not panic or corrupt state + harness.processor.process_record(&path, &record); + + // Known catalog faults remain intact + let faults = harness.manager.get_all_faults("hvac").unwrap(); + assert_eq!(faults.len(), 2, "Known catalog faults should still be returned"); +} + +/// An unknown fault ID processed multiple times does not accumulate +/// unexpected state or cause the processor to misbehave. +#[test] +#[serial] +fn repeated_unknown_fault_id_does_not_corrupt_state() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let path = make_path("hvac"); + let unknown_id = FaultId::Text(to_static_short_string("nonexistent.sensor").unwrap()); + + // Process several records with the unknown ID + for stage in [LifecycleStage::Failed, LifecycleStage::Passed, LifecycleStage::Failed] { + let record = make_fault_record(unknown_id.clone(), stage); + harness.processor.process_record(&path, &record); + } + + // Catalog faults are unaffected + let faults = harness.manager.get_all_faults("hvac").unwrap(); + assert_eq!(faults.len(), 2); +} + +// ============================================================================ +// 2. Empty Catalog +// ============================================================================ + +/// An empty fault catalog (zero descriptors) is valid but yields no faults. +#[test] +#[serial] +fn empty_catalog_query_returns_no_faults() { + let empty_config = FaultCatalogConfig { + id: "empty_test".into(), + version: 1, + faults: vec![], + }; + let mut harness = TestHarness::new(vec![empty_config]); + harness.clean_catalogs(&["empty_test"]); + + let faults = harness.manager.get_all_faults("empty_test").unwrap(); + assert!(faults.is_empty(), "Empty catalog should return no faults, got: {}", faults.len()); +} + +/// Processing a record against an empty catalog does not panic. +#[test] +#[serial] +fn process_record_against_empty_catalog_does_not_panic() { + let empty_config = FaultCatalogConfig { + id: "empty_process".into(), + version: 1, + faults: vec![], + }; + let mut harness = TestHarness::new(vec![empty_config]); + harness.clean_catalogs(&["empty_process"]); + + let path = make_path("empty_process"); + let record = make_fault_record(FaultId::Numeric(42), LifecycleStage::Failed); + + // Must not panic even though the fault ID is not in the empty catalog + harness.processor.process_record(&path, &record); +} + +// ============================================================================ +// 3. Corrupt KVS +// ============================================================================ + +/// KVS storage rejects re-initialisation of an already-bound instance ID +/// with a different path, returning a `StorageError`. +/// +/// The process-wide KVS pool binds each instance ID to a specific backend +/// path. Attempting to reuse an instance with a different path must fail. +#[test] +#[serial] +fn kvs_rejects_instance_id_reuse_with_different_path() { + // Ensure instance 0 is bound to the shared storage path + let _harness = TestHarness::new(vec![hvac_catalog_config()]); + + // Try to rebind instance 0 to a completely different path + let other_dir = tempfile::TempDir::new().unwrap(); + let result = KvsSovdFaultStateStorage::new(other_dir.path(), 0); + assert!(result.is_err(), "KVS should reject reuse of instance 0 with a different path"); +} + +/// KVS storage should fail when given a regular file instead of a directory. +#[test] +fn kvs_storage_rejects_file_as_storage_path() { + let tmpfile = tempfile::NamedTempFile::new().unwrap(); + // tmpfile.path() points to a regular file, not a directory + let result = KvsSovdFaultStateStorage::new(tmpfile.path(), 7); + assert!(result.is_err(), "KVS should reject a regular file as storage path"); +} + +// ============================================================================ +// 4. Double Init (Catalog Builder) +// ============================================================================ + +/// The builder rejects a second `cfg_struct()` call with +/// `AlreadyConfigured`. +#[test] +fn catalog_builder_rejects_double_configure() { + let config = FaultCatalogConfig { + id: "double_init".into(), + version: 1, + faults: vec![], + }; + + let result = FaultCatalogBuilder::new() + .cfg_struct(config.clone()) + .expect("first cfg_struct should succeed") + .cfg_struct(config); + + let err = result.err().expect("second cfg_struct should fail"); + assert!( + matches!(err, CatalogBuildError::AlreadyConfigured), + "Second cfg_struct() should return AlreadyConfigured, got: {err:?}" + ); +} + +/// Building without any configuration returns `MissingConfig`. +#[test] +fn catalog_builder_rejects_no_config() { + let result = FaultCatalogBuilder::new().try_build(); + assert!( + matches!(result, Err(CatalogBuildError::MissingConfig)), + "Building with no config should return MissingConfig, got: {result:?}" + ); +} + +/// Invalid JSON string triggers `InvalidJson`. +#[test] +fn catalog_builder_rejects_invalid_json() { + let result = FaultCatalogBuilder::new() + .json_string("{ not valid json }") + .expect("json_string accepts any string") + .try_build(); + + assert!( + matches!(result, Err(CatalogBuildError::InvalidJson(_))), + "Invalid JSON should return InvalidJson, got: {result:?}" + ); +} + +/// Non-existent JSON file path triggers `Io` error. +#[test] +fn catalog_builder_rejects_missing_json_file() { + let result = FaultCatalogBuilder::new() + .json_file(std::path::PathBuf::from("/nonexistent/path/catalog.json")) + .expect("json_file accepts any path") + .try_build(); + + assert!( + matches!(result, Err(CatalogBuildError::Io(_))), + "Missing file should return Io error, got: {result:?}" + ); +} + +// ============================================================================ +// 5. Duplicate FaultId +// ============================================================================ + +/// A catalog config containing two faults with the same numeric ID must +/// be rejected with `DuplicateFaultId`. +#[test] +fn catalog_builder_rejects_duplicate_numeric_fault_id() { + let dup_config = FaultCatalogConfig { + id: "dup_numeric".into(), + version: 1, + faults: vec![ + FaultDescriptor { + id: FaultId::Numeric(0x1234), + name: to_static_short_string("Fault A").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Numeric(0x1234), // Duplicate! + name: to_static_short_string("Fault B").unwrap(), + summary: None, + category: FaultType::Communication, + severity: FaultSeverity::Error, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + ], + }; + + let result = FaultCatalogBuilder::new() + .cfg_struct(dup_config) + .expect("cfg_struct accepts any config") + .try_build(); + + assert!( + matches!(result, Err(CatalogBuildError::DuplicateFaultId(_))), + "Should return DuplicateFaultId, got: {result:?}" + ); +} + +/// Duplicate text-based FaultIds are also caught. +#[test] +fn catalog_builder_rejects_duplicate_text_fault_id() { + let dup_config = FaultCatalogConfig { + id: "dup_text".into(), + version: 1, + faults: vec![ + FaultDescriptor { + id: FaultId::Text(to_static_short_string("sensor.temp.stuck").unwrap()), + name: to_static_short_string("TempStuck A").unwrap(), + summary: None, + category: FaultType::Software, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Text(to_static_short_string("sensor.temp.stuck").unwrap()), + name: to_static_short_string("TempStuck B").unwrap(), + summary: None, + category: FaultType::Communication, + severity: FaultSeverity::Error, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + ], + }; + + let result = FaultCatalogBuilder::new() + .cfg_struct(dup_config) + .expect("cfg_struct accepts any config") + .try_build(); + + assert!( + matches!(result, Err(CatalogBuildError::DuplicateFaultId(_))), + "Should return DuplicateFaultId, got: {result:?}" + ); +} diff --git a/tests/integration/src/test_ipc_query.rs b/tests/integration/src/test_ipc_query.rs new file mode 100644 index 0000000..1f12c48 --- /dev/null +++ b/tests/integration/src/test_ipc_query.rs @@ -0,0 +1,271 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Integration tests for the DFM query/clear API. +//! +//! **Part 1 - DirectDfmQuery baseline:** process faults via `FaultRecordProcessor`, +//! then query/clear via `SovdFaultManager`. Covers get_all, get_fault, delete_single, +//! delete_all, bad_path, not_found. +//! +//! **Part 2 - IPC E2E:** `DiagnosticFaultManager::with_query_server()` + `Iceoryx2DfmQuery` +//! over real iceoryx2 shared-memory transport. + +use common::catalog::FaultCatalogBuilder; +use common::fault::{FaultId, LifecycleStage}; +use dfm_lib::diagnostic_fault_manager::DiagnosticFaultManager; +use dfm_lib::fault_catalog_registry::FaultCatalogRegistry; +use dfm_lib::fault_record_processor::FaultRecordProcessor; +use dfm_lib::operation_cycle::OperationCycleTracker; +use dfm_lib::query_api::DfmQueryApi; +use dfm_lib::query_ipc::Iceoryx2DfmQuery; +use dfm_lib::sovd_fault_manager::Error; +use dfm_lib::sovd_fault_storage::KvsSovdFaultStateStorage; +use serial_test::serial; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, RwLock}; +use std::time::Duration; + +use crate::helpers::*; + +/// Global counter for unique KVS instance IDs. +/// Starts at 2 to avoid conflict with instance 0 (shared tests) and 1. +static KVS_INSTANCE_COUNTER: AtomicUsize = AtomicUsize::new(2); + +#[test] +#[serial] +fn direct_query_baseline() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + let path = make_path("hvac"); + harness.processor.process_record(&path, &record); + + let query = harness.manager.get_all_faults("hvac").unwrap(); + assert_eq!(query.len(), 2); // 2 descriptors in hvac catalog + + let fault = query.iter().find(|f| f.code == "0x7001").unwrap(); + assert!(fault.typed_status.as_ref().unwrap().test_failed.unwrap()); + assert_eq!(fault.occurrence_counter, Some(1)); +} + +#[test] +#[serial] +fn direct_query_get_fault_with_env_data() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let record = make_fault_record_with_env(FaultId::Numeric(0x7001), LifecycleStage::Failed, &[("temp", "42"), ("pressure", "1013")]); + let path = make_path("hvac"); + harness.processor.process_record(&path, &record); + + let (fault, env) = harness.manager.get_fault("hvac", "0x7001").unwrap(); + assert_eq!(fault.code, "0x7001"); + assert_eq!(env.get("temp"), Some(&"42".into())); +} + +#[test] +#[serial] +fn direct_query_delete_single_fault() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + let path = make_path("hvac"); + harness.processor.process_record(&path, &record); + + harness.manager.delete_fault("hvac", "0x7001").unwrap(); + + // Fault still appears (descriptor exists) but status is cleared + let (fault, _) = harness.manager.get_fault("hvac", "0x7001").unwrap(); + assert!(!fault.typed_status.as_ref().unwrap().test_failed.unwrap()); +} + +#[test] +#[serial] +fn direct_query_delete_all_faults() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + let path = make_path("hvac"); + harness.processor.process_record(&path, &record); + + harness.manager.delete_all_faults("hvac").unwrap(); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + // All faults show default/cleared status + for fault in &faults { + assert!(!fault.typed_status.as_ref().unwrap().test_failed.unwrap()); + } +} + +#[test] +#[serial] +fn direct_query_bad_path_returns_bad_argument() { + let harness = TestHarness::new(vec![hvac_catalog_config()]); + assert_eq!(harness.manager.get_all_faults("nonexistent"), Err(Error::BadArgument)); +} + +#[test] +#[serial] +fn direct_query_not_found_fault_code() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + assert_eq!(harness.manager.get_fault("hvac", "0xFFFF"), Err(Error::NotFound)); +} + +// ============================================================================ +// Part 2: IPC E2E tests (DiagnosticFaultManager + Iceoryx2DfmQuery) +// ============================================================================ + +/// Helper: create a pre-populated KVS storage with faults processed, +/// then build a `DiagnosticFaultManager::with_query_server()` on top. +/// +/// Returns the DFM (must be kept alive for the server to run). +/// Uses a dedicated temp dir + KVS instance to avoid conflicts. +fn start_dfm_with_faults( + configs: Vec, + faults: &[(FaultId, LifecycleStage)], + entity: &str, +) -> (DiagnosticFaultManager, tempfile::TempDir) { + let dir = tempfile::TempDir::new().expect("temp dir"); + let instance_id = KVS_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed); + let storage = Arc::new(KvsSovdFaultStateStorage::new(dir.path(), instance_id).expect("storage")); + let catalogs: Vec<_> = configs + .iter() + .map(|cfg| FaultCatalogBuilder::new().cfg_struct(cfg.clone()).expect("builder").build()) + .collect(); + let registry = Arc::new(FaultCatalogRegistry::new(catalogs)); + let cycle_tracker = Arc::new(RwLock::new(OperationCycleTracker::new())); + + // Process faults into storage, then drop processor + registry + tracker + // to release Arc references so we can unwrap the storage. + { + let mut processor = FaultRecordProcessor::new(Arc::clone(&storage), Arc::clone(®istry), cycle_tracker); + let path = make_path(entity); + for (id, stage) in faults { + let record = make_fault_record(id.clone(), *stage); + processor.process_record(&path, &record); + } + } + + // Unwrap Arc to pass owned storage to DiagnosticFaultManager + drop(registry); + let storage = Arc::try_unwrap(storage).ok().expect("all Arc refs dropped"); + + let dfm_registry = FaultCatalogRegistry::new( + configs + .into_iter() + .map(|cfg| FaultCatalogBuilder::new().cfg_struct(cfg).expect("builder").build()) + .collect(), + ); + + let dfm = DiagnosticFaultManager::with_query_server(storage, dfm_registry); + + // Wait for query server to be available (retry with timeout instead of fixed sleep) + let deadline = std::time::Instant::now() + Duration::from_secs(2); + loop { + match Iceoryx2DfmQuery::new() { + Ok(_) => break, + Err(_) if std::time::Instant::now() < deadline => { + std::thread::sleep(Duration::from_millis(10)); + } + Err(e) => panic!("DFM query server not ready within 2s: {e}"), + } + } + // Allow DFM worker to process queued fault events after IPC is ready + std::thread::sleep(Duration::from_millis(50)); + + (dfm, dir) +} + +#[test] +#[serial(ipc)] +fn ipc_e2e_query_all_faults() { + let (dfm, _dir) = start_dfm_with_faults(vec![hvac_catalog_config()], &[(FaultId::Numeric(0x7001), LifecycleStage::Failed)], "hvac"); + + let client = Iceoryx2DfmQuery::new().expect("IPC client"); + let faults = client.get_all_faults("hvac").unwrap(); + assert_eq!(faults.len(), 2); // 2 descriptors in hvac catalog + + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + assert!(fault.typed_status.as_ref().unwrap().test_failed.unwrap()); + assert_eq!(fault.occurrence_counter, Some(1)); + + drop(dfm); +} + +#[test] +#[serial(ipc)] +fn ipc_e2e_get_single_fault() { + let (dfm, _dir) = start_dfm_with_faults(vec![hvac_catalog_config()], &[(FaultId::Numeric(0x7001), LifecycleStage::Failed)], "hvac"); + + let client = Iceoryx2DfmQuery::new().expect("IPC client"); + let (fault, _env) = client.get_fault("hvac", "0x7001").unwrap(); + assert_eq!(fault.code, "0x7001"); + assert!(fault.typed_status.as_ref().unwrap().test_failed.unwrap()); + + drop(dfm); +} + +#[test] +#[serial(ipc)] +fn ipc_e2e_delete_single_fault() { + let (dfm, _dir) = start_dfm_with_faults(vec![hvac_catalog_config()], &[(FaultId::Numeric(0x7001), LifecycleStage::Failed)], "hvac"); + + let client = Iceoryx2DfmQuery::new().expect("IPC client"); + client.delete_fault("hvac", "0x7001").unwrap(); + + let (fault, _) = client.get_fault("hvac", "0x7001").unwrap(); + assert!(!fault.typed_status.as_ref().unwrap().test_failed.unwrap()); + + drop(dfm); +} + +#[test] +#[serial(ipc)] +fn ipc_e2e_delete_all_faults() { + let (dfm, _dir) = start_dfm_with_faults(vec![hvac_catalog_config()], &[(FaultId::Numeric(0x7001), LifecycleStage::Failed)], "hvac"); + + let client = Iceoryx2DfmQuery::new().expect("IPC client"); + client.delete_all_faults("hvac").unwrap(); + + let faults = client.get_all_faults("hvac").unwrap(); + for fault in &faults { + assert!(!fault.typed_status.as_ref().unwrap().test_failed.unwrap()); + } + + drop(dfm); +} + +#[test] +#[serial(ipc)] +fn ipc_e2e_bad_path_returns_bad_argument() { + let (dfm, _dir) = start_dfm_with_faults(vec![hvac_catalog_config()], &[], "hvac"); + + let client = Iceoryx2DfmQuery::new().expect("IPC client"); + assert_eq!(client.get_all_faults("nonexistent"), Err(Error::BadArgument)); + + drop(dfm); +} + +#[test] +#[serial(ipc)] +fn ipc_e2e_not_found_fault_code() { + let (dfm, _dir) = start_dfm_with_faults(vec![hvac_catalog_config()], &[], "hvac"); + + let client = Iceoryx2DfmQuery::new().expect("IPC client"); + assert_eq!(client.get_fault("hvac", "0xFFFF"), Err(Error::NotFound)); + + drop(dfm); +} diff --git a/tests/integration/src/test_json_catalog.rs b/tests/integration/src/test_json_catalog.rs new file mode 100644 index 0000000..ac491b5 --- /dev/null +++ b/tests/integration/src/test_json_catalog.rs @@ -0,0 +1,183 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! E2E test for JSON catalog loading. +//! +//! Verifies that `FaultCatalogBuilder::json_file()` and `json_string()` +//! correctly load catalog configurations and produce a working catalog +//! that can be used in the DFM pipeline. + +use crate::helpers::*; +use common::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; +use common::fault::*; +use serial_test::serial; +use std::io::Write; + +// ============================================================================ +// JSON catalog fixture +// ============================================================================ + +/// Returns a valid JSON string representing a catalog config. +fn sample_json_catalog() -> &'static str { + r#"{ + "id": "json_catalog", + "version": 2, + "faults": [ + { + "id": { "Numeric": 1001 }, + "name": "JsonFaultA", + "summary": "First fault loaded from JSON", + "category": "Software", + "severity": "Warn", + "compliance": [], + "reporter_side_debounce": null, + "reporter_side_reset": null, + "manager_side_debounce": null, + "manager_side_reset": null + }, + { + "id": { "Numeric": 1002 }, + "name": "JsonFaultB", + "summary": null, + "category": "Communication", + "severity": "Error", + "compliance": ["EmissionRelevant"], + "reporter_side_debounce": null, + "reporter_side_reset": null, + "manager_side_debounce": null, + "manager_side_reset": null + } + ] + }"# +} + +// ============================================================================ +// 1. JSON string → catalog → DFM pipeline +// ============================================================================ + +/// Load a catalog from a JSON string and verify it works end-to-end. +#[test] +#[serial] +fn json_string_catalog_e2e() { + let catalog = FaultCatalogBuilder::new() + .json_string(sample_json_catalog()) + .expect("json_string should accept valid JSON") + .build(); + + assert_eq!(catalog.id.as_ref(), "json_catalog"); + assert_eq!(catalog.len(), 2); + assert!(catalog.descriptor(&FaultId::Numeric(1001)).is_some()); + assert!(catalog.descriptor(&FaultId::Numeric(1002)).is_some()); + + // Build config to also test via TestHarness + let config: FaultCatalogConfig = serde_json::from_str(sample_json_catalog()).unwrap(); + let mut harness = TestHarness::new(vec![config]); + harness.clean_catalogs(&["json_catalog"]); + + let path = make_path("json_catalog"); + let record = make_fault_record(FaultId::Numeric(1001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("json_catalog").unwrap(); + assert_eq!(faults.len(), 2, "JSON catalog should have 2 faults"); + + let fault_a = faults.iter().find(|f| f.code == "0x3E9").unwrap(); // 1001 = 0x3E9 + assert_eq!(fault_a.typed_status.as_ref().unwrap().test_failed, Some(true)); + assert_eq!(fault_a.fault_name, "JsonFaultA"); + assert_eq!(fault_a.symptom.as_deref(), Some("First fault loaded from JSON")); +} + +// ============================================================================ +// 2. JSON file → catalog → DFM pipeline +// ============================================================================ + +/// Load a catalog from a JSON file and verify it works end-to-end. +#[test] +#[serial] +fn json_file_catalog_e2e() { + // Write JSON to a temporary file + let mut tmpfile = tempfile::NamedTempFile::new().unwrap(); + tmpfile.write_all(sample_json_catalog().as_bytes()).unwrap(); + tmpfile.flush().unwrap(); + + let catalog = FaultCatalogBuilder::new() + .json_file(tmpfile.path().to_path_buf()) + .expect("json_file should accept valid path") + .build(); + + assert_eq!(catalog.id.as_ref(), "json_catalog"); + assert_eq!(catalog.len(), 2); + assert!(catalog.descriptor(&FaultId::Numeric(1001)).is_some()); + assert!(catalog.descriptor(&FaultId::Numeric(1002)).is_some()); + + // Verify descriptor details + let desc = catalog.descriptor(&FaultId::Numeric(1002)).unwrap(); + assert_eq!(desc.name.to_string(), "JsonFaultB"); + assert_eq!(desc.category, FaultType::Communication); + assert_eq!(desc.severity, FaultSeverity::Error); + assert!(desc.compliance.iter().any(|c| matches!(c, ComplianceTag::EmissionRelevant))); +} + +// ============================================================================ +// 3. JSON string with all fields populated +// ============================================================================ + +/// JSON with debounce and reset policies deserializes correctly. +#[test] +fn json_string_with_debounce_and_reset() { + let json = r#"{ + "id": "full_json", + "version": 5, + "faults": [ + { + "id": { "Numeric": 2001 }, + "name": "FullFault", + "summary": "Full featured fault", + "category": "Hardware", + "severity": "Fatal", + "compliance": ["SafetyCritical", "EmissionRelevant"], + "reporter_side_debounce": { "HoldTime": { "duration": { "secs": 30, "nanos": 0 } } }, + "reporter_side_reset": null, + "manager_side_debounce": null, + "manager_side_reset": { + "trigger": { "PowerCycles": 3 }, + "min_operating_cycles_before_clear": null + } + } + ] + }"#; + + let catalog = FaultCatalogBuilder::new().json_string(json).unwrap().build(); + + assert_eq!(catalog.id.as_ref(), "full_json"); + assert_eq!(catalog.len(), 1); + + let desc = catalog.descriptor(&FaultId::Numeric(2001)).unwrap(); + assert_eq!(desc.severity, FaultSeverity::Fatal); + assert!(desc.reporter_side_debounce.is_some()); + assert!(desc.manager_side_reset.is_some()); +} + +// ============================================================================ +// 4. Empty JSON faults array +// ============================================================================ + +/// JSON with empty faults array produces an empty catalog. +#[test] +fn json_string_empty_faults() { + let json = r#"{ "id": "empty_json", "version": 1, "faults": [] }"#; + + let catalog = FaultCatalogBuilder::new().json_string(json).unwrap().build(); + + assert_eq!(catalog.id.as_ref(), "empty_json"); + assert!(catalog.is_empty()); + assert_eq!(catalog.len(), 0); +} diff --git a/tests/integration/src/test_lifecycle_transitions.rs b/tests/integration/src/test_lifecycle_transitions.rs new file mode 100644 index 0000000..3f52ee1 --- /dev/null +++ b/tests/integration/src/test_lifecycle_transitions.rs @@ -0,0 +1,184 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Fault lifecycle transition tests. +//! +//! Validates the complete fault lifecycle as seen through the SOVD interface: +//! NotTested → PreFailed → Failed → PrePassed → Passed, and back. +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use crate::helpers::*; +use common::fault::*; +use serial_test::serial; + +/// **Scenario**: Full lifecycle: NotTested → Failed → Passed. +/// +/// A newly detected fault transitions through the standard lifecycle. +/// After being reported as `Failed`, the DTC flags are set. When the +/// fault clears (`Passed`), the flags reset. +#[test] +#[serial] +fn full_lifecycle_failed_then_passed() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + // Step 1: Report fault as Failed. + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + let status = fault.typed_status.as_ref().unwrap(); + assert_eq!(status.test_failed, Some(true)); + assert_eq!(status.confirmed_dtc, Some(true)); + + // Step 2: Fault clears — report as Passed. + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Passed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + let status = fault.typed_status.as_ref().unwrap(); + assert_eq!(status.test_failed, Some(false), "Passed clears test_failed"); + assert_eq!(status.confirmed_dtc, Some(false), "Passed clears confirmed_dtc (no aging policy)"); +} + +/// **Scenario**: Pre-stages set pending DTC without confirming. +/// +/// `PreFailed` indicates the fault condition is developing but not yet +/// confirmed. The `pending_dtc` flag is set, but `confirmed_dtc` remains +/// false until a `Failed` event arrives. +#[test] +#[serial] +fn prefailed_sets_pending_without_confirming() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::PreFailed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + let status = fault.typed_status.as_ref().unwrap(); + + assert_eq!(status.test_failed, Some(true), "PreFailed sets test_failed"); + assert_eq!(status.pending_dtc, Some(true), "PreFailed sets pending_dtc"); + assert_eq!(status.confirmed_dtc, Some(false), "PreFailed does NOT set confirmed_dtc"); +} + +/// **Scenario**: PreFailed → Failed confirms the DTC. +/// +/// The standard diagnostic flow: a developing fault (PreFailed) is +/// confirmed (Failed). The pending flag clears and confirmed is set. +#[test] +#[serial] +fn prefailed_then_failed_confirms_dtc() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + // PreFailed + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::PreFailed); + harness.processor.process_record(&path, &record); + + // Failed (confirms) + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + let status = fault.typed_status.as_ref().unwrap(); + + assert_eq!(status.test_failed, Some(true)); + assert_eq!(status.pending_dtc, Some(false), "Failed clears pending_dtc"); + assert_eq!(status.confirmed_dtc, Some(true), "Failed sets confirmed_dtc"); +} + +/// **Scenario**: NotTested marks the test-not-completed flag. +/// +/// When a diagnostic monitor cannot complete (e.g., preconditions not met), +/// it reports `NotTested`. This sets the ISO 14229 bit for +/// "test not completed this operation cycle". +#[test] +#[serial] +fn not_tested_sets_incomplete_flag() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::NotTested); + harness.processor.process_record(&path, &record); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + let status = fault.typed_status.as_ref().unwrap(); + + assert_eq!( + status.test_not_completed_this_operation_cycle, + Some(true), + "NotTested sets test_not_completed_this_operation_cycle" + ); +} + +/// **Scenario**: Intermittent fault — Failed → Passed → Failed again. +/// +/// An intermittent fault clears and then re-occurs. The SOVD status +/// should reflect the latest state after each transition. +#[test] +#[serial] +fn intermittent_fault_toggles_correctly() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + // First occurrence + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + let fault = harness + .manager + .get_all_faults("hvac") + .unwrap() + .into_iter() + .find(|f| f.code == "0x7001") + .unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().confirmed_dtc, Some(true)); + + // Clears + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Passed); + harness.processor.process_record(&path, &record); + let fault = harness + .manager + .get_all_faults("hvac") + .unwrap() + .into_iter() + .find(|f| f.code == "0x7001") + .unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().confirmed_dtc, Some(false)); + + // Re-occurs + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + let fault = harness + .manager + .get_all_faults("hvac") + .unwrap() + .into_iter() + .find(|f| f.code == "0x7001") + .unwrap(); + assert_eq!( + fault.typed_status.as_ref().unwrap().confirmed_dtc, + Some(true), + "Re-confirmed after re-occurrence" + ); + assert_eq!(fault.typed_status.as_ref().unwrap().test_failed, Some(true)); +} diff --git a/tests/integration/src/test_multi_catalog.rs b/tests/integration/src/test_multi_catalog.rs new file mode 100644 index 0000000..43f51ac --- /dev/null +++ b/tests/integration/src/test_multi_catalog.rs @@ -0,0 +1,176 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Multi-catalog integration tests. +//! +//! Validates that multiple reporter applications (each with their own fault +//! catalog) can coexist within a single DFM instance without interference. +//! This is the primary multi-tenant scenario in a vehicle ECU. +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use crate::helpers::*; +use common::fault::*; +use common::types::*; +use serial_test::serial; + +/// **Scenario**: Two catalogs (HVAC + IVI) registered, faults are isolated. +/// +/// When two subsystems register their catalogs with DFM, fault records +/// from one subsystem must not affect the other. SOVD queries are scoped +/// by the catalog path (e.g., "hvac" or "ivi"). +#[test] +#[serial] +fn multi_catalog_fault_isolation() { + let mut harness = TestHarness::new(vec![hvac_catalog_config(), ivi_catalog_config()]); + harness.clean_catalogs(&["hvac", "ivi"]); + + // HVAC reporter sends a Failed fault. + let hvac_path = make_path("hvac"); + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&hvac_path, &record); + + // IVI reporter sends a different Failed fault. + let ivi_path = make_path("ivi"); + let record = make_fault_record( + FaultId::Text(to_static_short_string("ivi.display.init_timeout").unwrap()), + LifecycleStage::Failed, + ); + harness.processor.process_record(&ivi_path, &record); + + // HVAC faults — only HVAC fault should show as failed. + let hvac_faults = harness.manager.get_all_faults("hvac").unwrap(); + assert_eq!(hvac_faults.len(), 2, "HVAC catalog has 2 faults"); + let cabin_temp = hvac_faults.iter().find(|f| f.code == "0x7001").unwrap(); + assert_eq!(cabin_temp.typed_status.as_ref().unwrap().confirmed_dtc, Some(true)); + + // IVI faults — only IVI fault should show as failed. + let ivi_faults = harness.manager.get_all_faults("ivi").unwrap(); + assert_eq!(ivi_faults.len(), 1, "IVI catalog has 1 fault"); + let display = &ivi_faults[0]; + assert_eq!(display.code, "ivi.display.init_timeout"); + assert_eq!(display.typed_status.as_ref().unwrap().confirmed_dtc, Some(true)); + + // Cross-check: HVAC blower fault should NOT be affected by IVI. + let blower = hvac_faults.iter().find(|f| f.code == "hvac.blower.speed_sensor_mismatch").unwrap(); + assert_eq!( + blower.typed_status.as_ref().unwrap().confirmed_dtc, + Some(false), + "HVAC blower fault should not be affected by IVI fault" + ); +} + +/// **Scenario**: Same fault ID in different catalogs stays independent. +/// +/// Two subsystems may define the same fault ID (e.g., both use text ID "d1"). +/// DFM must keep their states separate because they are scoped by path. +#[test] +#[serial] +fn same_fault_id_different_catalogs_independent() { + use common::catalog::FaultCatalogConfig; + + // Two catalogs both defining a fault with text ID "comm_error". + let config_a = FaultCatalogConfig { + id: "subsystem_a".into(), + version: 1, + faults: vec![FaultDescriptor { + id: FaultId::Text(to_static_short_string("comm_error").unwrap()), + name: to_static_short_string("CommErrorA").unwrap(), + summary: None, + category: FaultType::Communication, + severity: FaultSeverity::Error, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + + let config_b = FaultCatalogConfig { + id: "subsystem_b".into(), + version: 1, + faults: vec![FaultDescriptor { + id: FaultId::Text(to_static_short_string("comm_error").unwrap()), + name: to_static_short_string("CommErrorB").unwrap(), + summary: None, + category: FaultType::Communication, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::new(), + reporter_side_debounce: None, + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }], + }; + + let mut harness = TestHarness::new(vec![config_a, config_b]); + harness.clean_catalogs(&["subsystem_a", "subsystem_b"]); + + // Only subsystem_a reports the fault. + let path_a = make_path("subsystem_a"); + let record = make_fault_record(FaultId::Text(to_static_short_string("comm_error").unwrap()), LifecycleStage::Failed); + harness.processor.process_record(&path_a, &record); + + // Subsystem A: comm_error is confirmed. + let faults_a = harness.manager.get_all_faults("subsystem_a").unwrap(); + assert_eq!(faults_a.len(), 1); + assert_eq!(faults_a[0].fault_name, "CommErrorA"); + assert_eq!(faults_a[0].typed_status.as_ref().unwrap().confirmed_dtc, Some(true)); + + // Subsystem B: same fault ID, but should have default (unfailed) state. + let faults_b = harness.manager.get_all_faults("subsystem_b").unwrap(); + assert_eq!(faults_b.len(), 1); + assert_eq!(faults_b[0].fault_name, "CommErrorB"); + assert_eq!( + faults_b[0].typed_status.as_ref().unwrap().confirmed_dtc, + Some(false), + "subsystem_b's comm_error should NOT be affected by subsystem_a" + ); +} + +/// **Scenario**: Delete faults in one catalog does not affect another. +#[test] +#[serial] +fn delete_faults_scoped_to_catalog() { + let mut harness = TestHarness::new(vec![hvac_catalog_config(), ivi_catalog_config()]); + harness.clean_catalogs(&["hvac", "ivi"]); + + // Report faults in both catalogs. + let hvac_path = make_path("hvac"); + let ivi_path = make_path("ivi"); + + harness + .processor + .process_record(&hvac_path, &make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed)); + harness.processor.process_record( + &ivi_path, + &make_fault_record( + FaultId::Text(to_static_short_string("ivi.display.init_timeout").unwrap()), + LifecycleStage::Failed, + ), + ); + + // Delete all HVAC faults. + harness.manager.delete_all_faults("hvac").unwrap(); + + // HVAC faults are cleared. + let hvac_faults = harness.manager.get_all_faults("hvac").unwrap(); + let cabin_temp = hvac_faults.iter().find(|f| f.code == "0x7001").unwrap(); + assert_eq!(cabin_temp.typed_status.as_ref().unwrap().confirmed_dtc, Some(false)); + + // IVI fault is unaffected. + let ivi_faults = harness.manager.get_all_faults("ivi").unwrap(); + assert_eq!( + ivi_faults[0].typed_status.as_ref().unwrap().confirmed_dtc, + Some(true), + "IVI faults should be unaffected by HVAC delete" + ); +} diff --git a/tests/integration/src/test_persistent_storage.rs b/tests/integration/src/test_persistent_storage.rs new file mode 100644 index 0000000..e53000c --- /dev/null +++ b/tests/integration/src/test_persistent_storage.rs @@ -0,0 +1,136 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Persistent storage tests. +//! +//! Verifies that fault state survives across DFM restarts by using the +//! real `KvsSovdFaultStateStorage` (backed by JSON files on disk). +//! This is analogous to persistency module's CIT flush/reload tests. +//! +//! NOTE: All tests use the shared KVS storage directory (process-wide KVS pool +//! constraint) and must be annotated with `#[serial]`. +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use crate::helpers::*; +use common::fault::*; +use serial_test::serial; + +/// **Scenario**: Fault state persists across DFM restart (storage reload). +/// +/// Steps: +/// 1. First DFM instance processes a Failed fault → flush to disk +/// 2. Drop the first instance (simulates DFM shutdown) +/// 3. Create a second DFM instance pointing to the same storage dir +/// 4. Query via SOVD — the fault state should still be present +/// +/// This mirrors the real deployment where DFM crashes or ECU reboots, +/// and fault history must survive. +/// +/// Because the KVS global pool retains instance 0 for the process lifetime, +/// the second harness transparently picks up the same KVS data that was +/// flushed to disk by the first harness. +#[test] +#[serial] +fn fault_state_survives_dfm_restart() { + // --- First DFM lifetime --- + { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + // Verify it's stored. + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().confirmed_dtc, Some(true)); + + // harness dropped here — DFM shuts down. + } + + // --- Second DFM lifetime (restart) --- + { + let harness = TestHarness::new(vec![hvac_catalog_config()]); + + // Query — the fault state should be recovered from the shared KVS. + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + let status = fault.typed_status.as_ref().unwrap(); + + assert_eq!(status.test_failed, Some(true), "test_failed should persist across restart"); + assert_eq!(status.confirmed_dtc, Some(true), "confirmed_dtc should persist across restart"); + } +} + +/// **Scenario**: Delete individual fault clears it from persistent storage. +/// +/// After clearing a specific fault via the SOVD delete API, subsequent +/// queries should return default (cleared) status for that fault. +#[test] +#[serial] +fn delete_fault_clears_persistent_state() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + // Report fault. + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + // Confirm fault is stored. + let (fault, _) = harness.manager.get_fault("hvac", "0x7001").unwrap(); + assert_eq!(fault.typed_status.as_ref().unwrap().confirmed_dtc, Some(true)); + + // Delete the fault. + harness.manager.delete_fault("hvac", "0x7001").unwrap(); + + // Query again — should return default status (all flags cleared). + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let fault = faults.iter().find(|f| f.code == "0x7001").unwrap(); + let status = fault.typed_status.as_ref().unwrap(); + assert_eq!(status.test_failed, Some(false), "Deleted fault should have default status"); + assert_eq!(status.confirmed_dtc, Some(false)); +} + +/// **Scenario**: Delete all faults clears the entire catalog's persistent state. +#[test] +#[serial] +fn delete_all_faults_clears_persistent_state() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + // Report both HVAC faults. + let record1 = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + let record2 = make_fault_record( + FaultId::Text(common::types::to_static_short_string("hvac.blower.speed_sensor_mismatch").unwrap()), + LifecycleStage::Failed, + ); + harness.processor.process_record(&path, &record1); + harness.processor.process_record(&path, &record2); + + // Verify both are stored. + let faults = harness.manager.get_all_faults("hvac").unwrap(); + let all_confirmed = faults.iter().all(|f| f.typed_status.as_ref().unwrap().confirmed_dtc == Some(true)); + assert!(all_confirmed, "Both faults should be confirmed"); + + // Delete all. + harness.manager.delete_all_faults("hvac").unwrap(); + + // Query — all faults should have default (cleared) status. + let faults = harness.manager.get_all_faults("hvac").unwrap(); + for fault in &faults { + let status = fault.typed_status.as_ref().unwrap(); + assert_eq!(status.test_failed, Some(false), "Fault {} should be cleared", fault.code); + assert_eq!(status.confirmed_dtc, Some(false), "Fault {} should be cleared", fault.code); + } +} diff --git a/tests/integration/src/test_report_and_query.rs b/tests/integration/src/test_report_and_query.rs new file mode 100644 index 0000000..ebdf25e --- /dev/null +++ b/tests/integration/src/test_report_and_query.rs @@ -0,0 +1,119 @@ +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// +//! Basic report → process → query flow. +//! +//! Demonstrates the primary use case: a reporter detects a fault condition, +//! publishes a FaultRecord, DFM processes it, and the SOVD manager can +//! query the resulting fault status. +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use crate::helpers::*; +use common::fault::*; +use common::types::*; +use serial_test::serial; + +/// **Scenario**: Reporter reports a single fault, DFM stores it, SOVD returns it. +/// +/// Steps: +/// 1. Build HVAC catalog with known fault descriptors +/// 2. Reporter creates a `Failed` record for fault 0x7001 +/// 3. DFM processor ingests the record +/// 4. SOVD manager returns the fault with `test_failed=true`, `confirmed_dtc=true` +#[test] +#[serial] +fn report_single_fault_and_query_via_sovd() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + // Reporter side: create and send a Failed record. + let record = make_fault_record(FaultId::Numeric(0x7001), LifecycleStage::Failed); + harness.processor.process_record(&path, &record); + + // SOVD query side: retrieve all faults for HVAC. + let faults = harness.manager.get_all_faults("hvac").unwrap(); + assert_eq!(faults.len(), 2, "HVAC catalog has 2 registered faults"); + + // Find the fault we reported. + let cabin_temp = faults.iter().find(|f| f.code == "0x7001").expect("fault 0x7001 should exist"); + + assert_eq!(cabin_temp.fault_name, "CabinTempSensorStuck"); + assert_eq!(cabin_temp.severity, FaultSeverity::Error as u32); + + let status = cabin_temp.typed_status.as_ref().expect("typed_status should be populated"); + assert_eq!(status.test_failed, Some(true), "Failed stage sets test_failed"); + assert_eq!(status.confirmed_dtc, Some(true), "Failed stage sets confirmed_dtc"); +} + +/// **Scenario**: Reporter reports a fault with environment data, SOVD returns it. +/// +/// Environment data (e.g., sensor readings, temperatures) provides diagnostic +/// context for troubleshooting. This test verifies env_data flows through the +/// full pipeline. +#[test] +#[serial] +fn report_fault_with_env_data_and_query() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + let path = make_path("hvac"); + + let record = make_fault_record_with_env( + FaultId::Text(to_static_short_string("hvac.blower.speed_sensor_mismatch").unwrap()), + LifecycleStage::Failed, + &[("cabin_temp_c", "42"), ("target_temp_c", "22")], + ); + harness.processor.process_record(&path, &record); + + // Query individual fault with env_data. + let (fault, env_data) = harness.manager.get_fault("hvac", "hvac.blower.speed_sensor_mismatch").unwrap(); + + assert_eq!(fault.fault_name, "BlowerSpeedMismatch"); + assert_eq!(fault.symptom.as_deref(), Some("Blower motor speed does not match commanded value")); + + // Verify env_data round-trips through the pipeline. + assert_eq!(env_data.get("cabin_temp_c"), Some(&"42".to_string())); + assert_eq!(env_data.get("target_temp_c"), Some(&"22".to_string())); +} + +/// **Scenario**: Querying an unknown catalog path returns an error. +/// +/// The SOVD manager should reject queries for paths not registered +/// in the fault catalog registry. +#[test] +#[serial] +fn query_unknown_path_returns_error() { + let harness = TestHarness::new(vec![hvac_catalog_config()]); + + let result = harness.manager.get_all_faults("nonexistent_ecu"); + assert!(result.is_err(), "Unknown path should return error"); +} + +/// **Scenario**: Faults that were never reported still appear with default status. +/// +/// SOVD specification requires all registered faults to be visible, +/// even if no fault record was ever received for them. +#[test] +#[serial] +fn unreported_faults_have_default_status() { + let mut harness = TestHarness::new(vec![hvac_catalog_config()]); + harness.clean_catalogs(&["hvac"]); + + let faults = harness.manager.get_all_faults("hvac").unwrap(); + assert_eq!(faults.len(), 2, "Both HVAC faults should be listed"); + + for fault in &faults { + let status = fault.typed_status.as_ref().expect("typed_status should exist"); + assert_eq!(status.test_failed, Some(false), "Default: test_failed=false"); + assert_eq!(status.confirmed_dtc, Some(false), "Default: confirmed_dtc=false"); + assert_eq!(status.pending_dtc, Some(false), "Default: pending_dtc=false"); + } +} From d8983a3db2078364d2ade6682a2eba4090243110 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 25 Feb 2026 15:38:31 +0100 Subject: [PATCH 6/7] ci: add GitHub Actions workflows and PR/issue templates Workflows: build/test, clippy lint, rustfmt, miri, coverage, copyright header check, cargo audit (pinned to SHA), Bazel format check. All workflows set permissions: contents: read. --- .github/ISSUE_TEMPLATE/bug_fix.md | 11 +++ .github/ISSUE_TEMPLATE/improvement.md | 11 +++ .github/PULL_REQUEST_TEMPLATE/bug_fix.md | 19 ++++ .github/PULL_REQUEST_TEMPLATE/improvement.md | 19 ++++ .github/workflows/audit.yml | 47 ++++++++++ .github/workflows/build_test.yml | 77 +++++++++++++++ .github/workflows/copyright.yml | 46 +++++++++ .github/workflows/coverage.yml | 98 ++++++++++++++++++++ .github/workflows/format.yml | 61 ++++++++++++ .github/workflows/lint.yml | 56 +++++++++++ .github/workflows/miri.yml | 62 +++++++++++++ 11 files changed, 507 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_fix.md create mode 100644 .github/ISSUE_TEMPLATE/improvement.md create mode 100644 .github/PULL_REQUEST_TEMPLATE/bug_fix.md create mode 100644 .github/PULL_REQUEST_TEMPLATE/improvement.md create mode 100644 .github/workflows/audit.yml create mode 100644 .github/workflows/build_test.yml create mode 100644 .github/workflows/copyright.yml create mode 100644 .github/workflows/coverage.yml create mode 100644 .github/workflows/format.yml create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/miri.yml diff --git a/.github/ISSUE_TEMPLATE/bug_fix.md b/.github/ISSUE_TEMPLATE/bug_fix.md new file mode 100644 index 0000000..c9c4f35 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_fix.md @@ -0,0 +1,11 @@ +--- +name: Bugfix +about: 'Issue to track a bugfix' +title: 'Bugfix: Your bugfix title' +labels: 'codeowner_review' +assignees: '' + +--- + +> [!IMPORTANT] +> Make sure to link this issue with the PR for your bugfix. diff --git a/.github/ISSUE_TEMPLATE/improvement.md b/.github/ISSUE_TEMPLATE/improvement.md new file mode 100644 index 0000000..fd2c171 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/improvement.md @@ -0,0 +1,11 @@ +--- +name: Improvement +about: 'Issue to track a improvement' +title: 'Improvement: Your improvement title' +labels: 'codeowner_review' +assignees: '' + +--- + +> [!IMPORTANT] +> Make sure to link this issue with the PR for your improvement. diff --git a/.github/PULL_REQUEST_TEMPLATE/bug_fix.md b/.github/PULL_REQUEST_TEMPLATE/bug_fix.md new file mode 100644 index 0000000..8341f51 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/bug_fix.md @@ -0,0 +1,19 @@ +# Bugfix + +> [!IMPORTANT] +> Use this template only for bugfixes that do not influence topics covered by contribution requests or improvements. + +> [!CAUTION] +> Make sure to submit your pull-request as **Draft** until you are ready to have it reviewed by the Committers. + +## Description + +[A short description of the bug being fixed by the contribution.] + +## Related ticket + +> [!IMPORTANT] +> Please replace `[ISSUE-NUMBER]` with the issue-number that tracks this bug fix. If there is no such +> ticket yet, create one via [this issue template](../ISSUE_TEMPLATE/new?template=bug_fix.md). + +closes [ISSUE-NUMBER] (bugfix ticket) diff --git a/.github/PULL_REQUEST_TEMPLATE/improvement.md b/.github/PULL_REQUEST_TEMPLATE/improvement.md new file mode 100644 index 0000000..090ad43 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/improvement.md @@ -0,0 +1,19 @@ +# Improvement + +> [!IMPORTANT] +> Use this template only for improvement that do not influence topics covered by contribution requests or bug fixes. + +> [!CAUTION] +> Make sure to submit your pull-request as **Draft** until you are ready to have it reviewed by the Committers. + +## Description + +[A short description of the improvement being addressed by the contribution.] + +## Related ticket + +> [!IMPORTANT] +> Please replace `[ISSUE-NUMBER]` with the issue-number that tracks this bug fix. If there is no such +> ticket yet, create one via [this issue template](../ISSUE_TEMPLATE/new?template=improvement.md). + +closes [ISSUE-NUMBER] (improvement ticket) diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml new file mode 100644 index 0000000..84fe5b5 --- /dev/null +++ b/.github/workflows/audit.yml @@ -0,0 +1,47 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +name: Security Audit + +on: + pull_request: + branches: [main] + push: + branches: [main] + merge_group: + types: [checks_requested] + schedule: + # Run weekly on Monday at 06:00 UTC to catch newly disclosed advisories + - cron: "0 6 * * 1" + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + cargo-audit: + name: Cargo Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install cargo-audit + uses: taiki-e/install-action@23db74cab27cd77071e076a00b569de9549cf0dd # v2.48.8 + with: + tool: cargo-audit@0.22.1 + + - name: Run cargo audit + run: cargo audit diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml new file mode 100644 index 0000000..a9d213b --- /dev/null +++ b/.github/workflows/build_test.yml @@ -0,0 +1,77 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +name: Build & Test + +on: + pull_request: + branches: [main] + push: + branches: [main] + merge_group: + types: [checks_requested] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + cargo-build-test: + name: Cargo Build & Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust nightly + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: nightly-2025-07-14 + + - name: Cache cargo registry & target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-test-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-test- + + - name: Build all crates + run: cargo build --workspace + + - name: Run tests (IPC tests run serially) + run: cargo test --workspace --all-targets + + bazel-build-test: + name: Bazel Build & Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bazel + uses: bazel-contrib/setup-bazel@0.14.0 + with: + bazelisk-cache: true + disk-cache: ${{ github.workflow }}-bazel + repository-cache: true + + - name: Bazel build (excluding docs — score_plantuml requires local runfiles) + run: bazel build //src/... //tests/... + + - name: Bazel test (excluding docs) + run: bazel test //src/... //tests/... diff --git a/.github/workflows/copyright.yml b/.github/workflows/copyright.yml new file mode 100644 index 0000000..8bba9a7 --- /dev/null +++ b/.github/workflows/copyright.yml @@ -0,0 +1,46 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +name: Copyright Check + +on: + pull_request: + branches: [main] + push: + branches: [main] + merge_group: + types: [checks_requested] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + copyright: + name: Copyright Header Validation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bazel + uses: bazel-contrib/setup-bazel@0.14.0 + with: + bazelisk-cache: true + disk-cache: ${{ github.workflow }}-bazel + repository-cache: true + + - name: Check copyright headers + run: bazel run //:copyright.check diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000..197f575 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,98 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +name: Coverage + +on: + pull_request: + branches: [main] + push: + branches: [main] + merge_group: + types: [checks_requested] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + COVERAGE_THRESHOLD: 90 + +jobs: + coverage: + name: Code Coverage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust nightly + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: nightly-2025-07-14 + components: llvm-tools-preview + + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + + - name: Cache cargo registry & target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-cov-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-cov- + + - name: Generate coverage (LCOV) + run: cargo llvm-cov --workspace --exclude xtask --ignore-filename-regex 'test_utils|dfm_test_utils' --lcov --output-path lcov.info + + - name: Generate coverage summary + id: coverage + run: | + # Extract total line coverage percentage from cargo-llvm-cov text output + REPORT=$(cargo llvm-cov --workspace --exclude xtask --ignore-filename-regex 'test_utils|dfm_test_utils' --no-run 2>&1) + echo "$REPORT" + + # Parse the TOTAL line for line coverage + TOTAL_LINE_COV=$(echo "$REPORT" | grep '^TOTAL' | awk '{print $(NF-3)}' | tr -d '%') + echo "total_coverage=$TOTAL_LINE_COV" >> "$GITHUB_OUTPUT" + echo "### Coverage Report" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "**Total line coverage: ${TOTAL_LINE_COV}%**" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + echo "$REPORT" >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + + - name: Upload coverage artifact + uses: actions/upload-artifact@v4 + with: + name: coverage-lcov + path: lcov.info + retention-days: 30 + + - name: Enforce coverage threshold + run: | + COVERAGE="${{ steps.coverage.outputs.total_coverage }}" + THRESHOLD="${{ env.COVERAGE_THRESHOLD }}" + echo "Coverage: ${COVERAGE}%, Threshold: ${THRESHOLD}%" + if (( $(echo "$COVERAGE < $THRESHOLD" | bc -l) )); then + echo "::error::Coverage ${COVERAGE}% is below threshold ${THRESHOLD}%" + exit 1 + fi + echo "Coverage ${COVERAGE}% meets threshold ${THRESHOLD}%" diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml new file mode 100644 index 0000000..fa8ff5d --- /dev/null +++ b/.github/workflows/format.yml @@ -0,0 +1,61 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +name: Format Check + +on: + pull_request: + branches: [main] + push: + branches: [main] + merge_group: + types: [checks_requested] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + rustfmt: + name: Rust Format (rustfmt) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust nightly with rustfmt + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: nightly-2025-07-14 + components: rustfmt + + - name: Check formatting + run: cargo fmt --all -- --check + + bazel-format: + name: Bazel Format Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bazel + uses: bazel-contrib/setup-bazel@0.14.0 + with: + bazelisk-cache: true + disk-cache: ${{ github.workflow }}-bazel + repository-cache: true + + - name: Check Bazel formatting + run: bazel test //:format.check diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..6144640 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,56 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +name: Lint + +on: + pull_request: + branches: [main] + push: + branches: [main] + merge_group: + types: [checks_requested] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + clippy: + name: Clippy Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust nightly with clippy + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: nightly-2025-07-14 + components: clippy + + - name: Cache cargo registry & target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-clippy-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-clippy- + + - name: Run Clippy (deny warnings + safety lints) + run: cargo clippy --workspace --all-targets -- -D warnings diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml new file mode 100644 index 0000000..92208ce --- /dev/null +++ b/.github/workflows/miri.yml @@ -0,0 +1,62 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +name: Miri + +on: + pull_request: + branches: [main] + push: + branches: [main] + merge_group: + types: [checks_requested] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + cargo-miri: + name: Miri (Undefined Behavior Check) + runs-on: ubuntu-latest + env: + MIRIFLAGS: "-Zmiri-disable-isolation" + steps: + - uses: actions/checkout@v4 + + - name: Install Rust nightly with Miri + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: nightly-2025-07-14 + components: miri, rust-src + + - name: Cache cargo registry & target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-miri-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-miri- + + - name: Run Miri + # --exclude integration_tests: integration tests use iceoryx2 IPC (shared memory + # + system calls) which Miri cannot emulate — they'd fail with unsupported-syscall errors. + # --exclude xtask: build helper binary with no unsafe code; excluding it avoids + # pulling in proc-macro and build-script dependencies that Miri cannot interpret. + run: cargo +nightly-2025-07-14 miri test --workspace --exclude integration_tests --exclude xtask From e2ea0a22fbee851377b92dd2a4da4498c57d8c26 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 25 Feb 2026 15:38:41 +0100 Subject: [PATCH 7/7] docs: add architecture documentation, PlantUML diagrams, and API reference Architecture overview, fault catalog/reporter/DFM sequence diagrams, library architecture drawing, Sphinx docs scaffold, and HVAC component design reference example. --- README.md | 214 +++++++++++-- docs/conf.py | 65 ++++ docs/design/design.md | 53 +++- docs/drawings/lib_arch.drawio | 146 +++++++++ docs/drawings/lib_arch.svg | 1 + .../hvac_component_design_reference.rs | 221 +++++++++++++ docs/index.rst | 293 ++++++++++++++++++ .../Registering new fault in the system.svg | 1 + docs/puml/enable_condition_ntf.puml | 58 ++++ docs/puml/enable_condition_ntf.svg | 70 +++++ docs/puml/fault_catalog.puml | 45 +++ docs/puml/fault_catalog.svg | 57 ++++ docs/puml/generate_svg.sh | 21 ++ docs/puml/local_enable_condition_ntf.puml | 54 ++++ docs/puml/local_enable_condition_ntf.svg | 66 ++++ docs/puml/new_enable_condition.puml | 43 +++ docs/puml/new_enable_condition.svg | 55 ++++ docs/puml/new_fault.puml | 68 ++++ docs/puml/new_fault.svg | 80 +++++ docs/puml/query_clear.puml | 83 +++++ 20 files changed, 1655 insertions(+), 39 deletions(-) create mode 100644 docs/conf.py create mode 100644 docs/drawings/lib_arch.drawio create mode 100644 docs/drawings/lib_arch.svg create mode 100644 docs/examples/hvac_component_design_reference.rs create mode 100644 docs/index.rst create mode 100644 docs/puml/Registering new fault in the system.svg create mode 100644 docs/puml/enable_condition_ntf.puml create mode 100644 docs/puml/enable_condition_ntf.svg create mode 100644 docs/puml/fault_catalog.puml create mode 100644 docs/puml/fault_catalog.svg create mode 100755 docs/puml/generate_svg.sh create mode 100644 docs/puml/local_enable_condition_ntf.puml create mode 100644 docs/puml/local_enable_condition_ntf.svg create mode 100644 docs/puml/new_enable_condition.puml create mode 100644 docs/puml/new_enable_condition.svg create mode 100644 docs/puml/new_fault.puml create mode 100644 docs/puml/new_fault.svg create mode 100644 docs/puml/query_clear.puml diff --git a/README.md b/README.md index c830476..2d89b01 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,189 @@ - - -# Fault Library - -OpenSOVD Fault Library - -## Design - -The high-level design can be found here: [OpenSOVD Design](https://github.com/eclipse-opensovd/opensovd/blob/main/docs/design/design.md) - -The Fault Lib design can be found here: [Fault Lib Design](docs/design/design.md) +# Diagnostic Fault Library + +A Rust library for managing diagnostic fault reporting, processing, and querying +in Software-Defined Vehicles. The library implements the **fault lifecycle** +defined by ISO 14229 (UDS) and exposes fault state through an +[OpenSOVD](https://covesa.github.io/OpenSOVD/)-compatible interface. + +--- + +## 📂 Project Structure + +``` +fault-lib/ +├── src/ +│ ├── common/ # Shared types: FaultId, FaultRecord, FaultCatalog, debounce … +│ ├── fault_lib/ # Reporter-side API (fault reporting, enabling conditions) +│ ├── dfm_lib/ # Diagnostic Fault Manager (processing, SOVD, KVS storage) +│ └── xtask/ # Developer automation (cargo xtask …) +├── tests/ +│ └── integration/ # Integration tests (fault-lib ↔ DFM end-to-end) +├── examples/ # Runnable examples (DFM, SOVD fault manager, tst_app) +├── docs/ # Sphinx / mdBook documentation +│ └── design/ # Architecture & design decisions +├── .github/workflows/ # CI/CD pipelines +├── .vscode/ # Recommended VS Code settings +├── BUILD # Root Bazel targets +├── MODULE.bazel # Bazel module dependencies (bzlmod) +├── Cargo.toml # Rust workspace root +├── project_config.bzl # Project metadata for Bazel macros +├── LICENSE # Apache-2.0 +└── README.md # This file +``` + +### Source Crates + +| Crate | Description | +|-------|-------------| +| `common` | Foundational types shared by reporters and DFM: `FaultId`, `FaultRecord`, `FaultCatalog`, `DebounceMode`, compliance tags, IPC timestamps. | +| `fault_lib` | **Reporter-side API.** Applications use `Reporter` to publish fault records to DFM via iceoryx2 IPC. Includes enabling-condition guards and builder-pattern catalog configuration. | +| `dfm_lib` | **Diagnostic Fault Manager.** Receives records via `FaultRecordProcessor`, manages lifecycle state, persists to KVS (`KvsSovdFaultStateStorage`), and exposes faults through `SovdFaultManager`. | +| `xtask` | Developer automation tasks (e.g., code generation helpers). | + +--- + +## 🚀 Getting Started + +### 1️⃣ Clone the Repository + +```sh +git clone https://github.com/eclipse-opensovd/fault-lib.git +cd fault-lib +``` + +### 2️⃣ Build + +```sh +# Cargo (all crates) +cargo build --workspace + +# Bazel (all targets) +bazel build //src/... +``` + +### 3️⃣ Run Examples + +Start the Diagnostic Fault Manager (DFM): + +```sh +cargo run -p dfm_lib --example dfm +``` + +The `dfm` process uses hardcoded fault catalogs matching the JSON files in +`src/fault_lib/tests/data/`. When ready you will see: + +``` +[INFO dfm_lib::fault_lib_communicator] FaultLibCommunicator listening... +``` + +In a **separate terminal**, start a reporter application: + +```sh +cargo run -p fault_lib --features testutils --example tst_app -- -c src/fault_lib/tests/data/hvac_fault_catalog.json +# or +cargo run -p fault_lib --features testutils --example tst_app -- -c src/fault_lib/tests/data/ivi_fault_catalog.json +``` + +The `tst_app` reporter loops 20 times over every fault in the catalog, +alternating between `Failed` and `Passed` with a 200 ms delay. + +### 4️⃣ Run Tests + +```sh +# All workspace tests (unit + integration) +cargo test --workspace + +# Integration tests only +cargo test -p integration_tests + +# Bazel tests +bazel test //... +``` + +#### Integration Tests + +The `tests/integration/` crate contains a comprehensive end-to-end test suite +exercising the full fault-lib - DFM - SOVD pipeline **without IPC**: + +| Module | What it covers | +|--------|----------------| +| `test_report_and_query` | Basic report - process - SOVD query flow | +| `test_lifecycle_transitions` | Full lifecycle: NotTested - PreFailed - Failed - Passed | +| `test_persistent_storage` | KVS persistence across DFM restart, delete operations | +| `test_multi_catalog` | Multi-tenant catalog isolation, cross-catalog independence | +| `test_debounce_aging_cycles_ec` | Debounce, aging policies, operation cycles, enabling conditions | +| `test_error_paths` | Error handling, validation, edge cases | +| `test_boundary_values` | Boundary conditions, limits, overflow scenarios | +| `test_concurrent_access` | Thread safety, concurrent publish/query | +| `test_ipc_query` | IPC-based SOVD query/clear protocol | +| `test_json_catalog` | JSON catalog parsing, validation | + +--- + +## 🛠 Quality Gates + +CI automatically runs these checks on every PR and push to `main`. +To run them locally before pushing: + +### Format check + +```sh +cargo fmt --all -- --check +bazel test //:format.check +``` + +### Lint + +```sh +cargo clippy --workspace --all-targets -- \ + -D warnings -D clippy::unwrap_used -D clippy::expect_used \ + -D clippy::todo -D clippy::unimplemented -A clippy::new_without_default +``` + +### Build & test + +```sh +cargo build --workspace +cargo test --workspace +bazel build //src/... +bazel test //... +``` + +### Miri (Undefined Behavior) + +```sh +cargo +nightly miri test --workspace +``` + +### Copyright headers + +```sh +bazel test //:copyright.check +``` + +--- + +## 📖 Documentation + +| Document | Description | +|----------|-------------| +| [Design](docs/design/design.md) | Architecture and design decisions | + +To run a live preview of the documentation locally: + +```sh +bazel run //docs:live_preview +``` + +--- + +## ⚙️ `project_config.bzl` + +Project-specific metadata used by Bazel macros (e.g., `dash_license_checker`): + +```python +PROJECT_CONFIG = { + "asil_level": "QM", + "source_code": ["rust"], +} +``` diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..07135a0 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,65 @@ +# ******************************************************************************* +# Copyright (c) 2024 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "fault-lib" +project_url = "https://eclipse-opensovd.github.io/fault-lib/" +project_prefix = "FLIB_" +author = "S-CORE" +version = "0.1.0" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + + +extensions = [ + "sphinx_design", + "sphinx_needs", + "sphinxcontrib.plantuml", + "score_plantuml", + "score_metamodel", + "score_draw_uml_funcs", + "score_source_code_linker", + "score_layout", +] + +myst_enable_extensions = ["colon_fence"] + +exclude_patterns = [ + # The following entries are not required when building the documentation via 'bazel + # build //docs:docs', as that command runs in a sandboxed environment. However, when + # building the documentation via 'bazel run //docs:incremental' or esbonio, these + # entries are required to prevent the build from failing. + "bazel-*", + ".venv_docs", +] + +# Enable markdown rendering +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} + + +templates_path = ["templates"] + +# Enable numref +numfig = True diff --git a/docs/design/design.md b/docs/design/design.md index 31ca3f0..500bef2 100644 --- a/docs/design/design.md +++ b/docs/design/design.md @@ -162,32 +162,57 @@ sequenceDiagram ## Rust API Draft -An example can be found here: [Example Component](../../tests/hvac_component.rs) +An example can be found here: [Example Component](../examples/hvac_component_design_reference.rs) -Here’s how a component ends up talking to the library: +Here's how a component ends up talking to the library: -1. Define a handful of `FaultDescriptor`s (the `fault_descriptor!` macro keeps them readable) and park them inside a `'static` `FaultCatalog { id, version, descriptors }`. Components still embed that slice at build time, while the DFM loads the same artifact through `FaultCatalog::from_config` so updates land via JSON/YAML config instead of rebuilding the manager. -2. Spin up a `FaultApi` with an `Arc` that knows how to reach the DFM and an `Arc` that mirrors events into your logging stack. -3. Initialize the singleton FaultApi once (`FaultApi::new(sink, logger)`), then create one `Reporter` per fault ID using `Reporter::new(&catalog, config, &fault_id)`. Each reporter is bound to a single fault and holds static config for that fault. -4. At runtime, create a mutable `FaultRecord` from the bound `Reporter` using `reporter.create_record()`. Update the record in place (e.g., `add_environment_data`, `update_stage`, `update_severity`). -5. Publish the record via the bound reporter: `reporter.publish(&record)`. This enqueues the record to the configured FaultSink and is non-blocking for the caller. +1. Build a `FaultCatalog` using `FaultCatalogBuilder`. The builder accepts a `FaultCatalogConfig` struct, a JSON string, or a JSON file path. Each `FaultDescriptor` is a plain struct literal with fields for ID, name, category, severity, compliance tags, and optional debounce/reset policies. +2. Initialise the singleton `FaultApi` once via `FaultApi::new(catalog)` (or `try_new` for the fallible variant). This creates the IPC sink to the DFM and stores the catalog in process-global state. Optionally register a `LogHook` via `FaultApi::set_log_hook(hook)` before creating reporters. +3. Create one `Reporter` per fault ID using `Reporter::new(&fault_id, config)` (via the `ReporterApi` trait). Each reporter looks up its descriptor in the global catalog and binds the IPC sink — callers never handle the sink directly. +4. At runtime, create a `FaultRecord` from the bound reporter: `reporter.create_record(LifecycleStage::Failed)`. The lifecycle stage is set at creation and the timestamp is captured automatically. +5. Publish the record via the bound reporter: `reporter.publish("service/path", record)`. This enqueues the record to the IPC sink and is non-blocking for the caller. -Each `FaultRecord` contains only runtime-mutable data (fault_id, time, severity, source, lifecycle_phase, stage, environment_data). All static configuration (name, default severity, compliance, debounce, reset, etc.) lives in the `FaultDescriptor` held by the `Reporter`. +Each `FaultRecord` contains only runtime data (id, time, source, lifecycle_phase, lifecycle_stage, env_data). All static configuration (name, severity, compliance, debounce, reset, etc.) lives in the `FaultDescriptor` held by the `Reporter` and is not sent over IPC. Separate traits are used for logging and fault reporting mainly due to separation of concerns (transport to DFM vs. observability (logging)). -Additional reasons include: different failure domains (IPC vs logging), different performance expactations, user-control and clarity (maybe a logging system is already used directly by the user) and cleaner mocking of transport (just mock faultsink trait). +Additional reasons include: different failure domains (IPC vs logging), different performance expectations, user-control and clarity (maybe a logging system is already used directly by the user) and cleaner mocking of transport (just mock `FaultSinkApi` trait). ## Design Decisions & Trade-offs -- **Static catalogs + runtime config:** Components still ship `'static` descriptors for zero-cost lookup, while the DFM consumes the same artifact via `FaultCatalog::from_config` so policy changes land via JSON/YAML config instead of a rebuild. This keeps deployment fast with only a light runtime copy cost on the DFM side. +- **Builder-based catalogs + runtime config:** Components build `FaultCatalog` via `FaultCatalogBuilder` (from a `FaultCatalogConfig`, JSON string, or JSON file). The DFM loads the same artifact so policy changes land via config updates instead of a rebuild. - **Minimal runtime records:** `FaultRecord` contains only runtime-mutable data. All static configuration (descriptor, debounce, compliance, etc.) is held by the `Reporter` and not sent over IPC. -- **Explicit lifecycle states (test-centric):** `FaultLifecycleStage` uses `NotTested`, `PreFailed`, `Failed`, `PrePassed`, `Passed` to track raw test outcomes and debounce stabilization. DTC lifecycle (pending, confirmed, aging) is not represented here; it is derived by the DFM from these stages. -- **Non-blocking publish path:** `Reporter::publish` enqueues the record to the FaultSink and returns immediately; it does not block on DFM or transport. +- **Explicit lifecycle states (test-centric):** `LifecycleStage` uses `NotTested`, `PreFailed`, `Failed`, `PrePassed`, `Passed` to track raw test outcomes and debounce stabilization. DTC lifecycle (pending, confirmed, aging) is not represented here; it is derived by the DFM from these stages. +- **Symmetric IPC abstraction:** The reporter side uses `FaultSinkApi` to send events; the DFM side consumes them through `DfmTransport`. Both are traits with default iceoryx2 implementations (`FaultManagerSink` / `Iceoryx2Transport`) and in-memory test doubles, so integration tests can run without shared-memory infrastructure. +- **Non-blocking publish path:** `Reporter::publish` enqueues the record to the `FaultSinkApi` and returns immediately; it does not block on DFM or transport. - **Declarative policies:** Debounce and aging (reset) logic ride on enums (`DebounceMode`, `ResetTrigger`) to handle typical cases. Debounce variants: `CountWithinWindow { min_count, window }`, `HoldTime { duration }`, `EdgeWithCooldown { cooldown }`, `CountThreshold { min_count }`. Reset triggers: `OperationCycles { kind, min_cycles, cycle_ref }`, `StableFor(duration)`, `ToolOnly`. `cycle_ref` links the aging policy to a concrete cycle counter identity (e.g. `"ignition.main"`, `"drive.standard"`) so the DFM can correlate counts from different domains. Clarification: Debouncing can occur in Fault Lib and/or DFM (if central aggregation needed) while aging (reset) is performed in DFM. -- **Panic on missing descriptors:** If a caller asks for a fault that isn’t in the catalog we `expect(...)` and crash. That flushes out drift early, so production flows should generate the catalog and component code together. +- **Panic on missing descriptors:** If a caller asks for a fault that isn't in the catalog we `expect(...)` and crash. That flushes out drift early, so production flows should generate the catalog and component code together. ## Open Topics Open Topics to be addressed during development: -- [ ] define time source for faults and fault lib. Time source can be from application, from fault lib or both. +- [ ] **Define time source for faults and fault lib.** + - **Current state:** The library uses `std::time::SystemTime::now()` (wall-clock) + to timestamp fault records (`reporter.rs:create_record`). The + `duration_since(UNIX_EPOCH)` call uses `unwrap_or_default()`, which means a + pre-epoch clock (e.g. after a severe NTP correction) silently produces + `IpcTimestamp { seconds_since_epoch: 0, nanoseconds: 0 }` - an epoch-zero + timestamp indistinguishable from "no timestamp". + - **Implications:** + - **NTP drift/jumps:** `SystemTime` is not monotonic. NTP step corrections + can cause duplicate or out-of-order timestamps across fault records. + - **Clock set before epoch:** The `unwrap_or_default()` fallback silently + zeros the timestamp instead of signaling an error. + - **Cross-ECU ordering:** Wall-clock timestamps from different ECUs are not + comparable without a shared time synchronization protocol. + - **Why this is an open point:** Automotive diagnostics often require monotonic + or synchronized time bases (e.g., ECU uptime, AUTOSAR `StbM` time) to + guarantee consistent ordering across ECUs and to survive clock corrections. + Using `SystemTime` is sufficient for prototyping but may violate ordering + guarantees in production. + - **Decision needed:** Should the timestamp be (a) provided by the calling + application, (b) sourced from a configurable `TimeProvider` trait inside + fault-lib, or (c) a combination (library default + optional override per + report)? The chosen approach must work across IPC boundaries (`IpcTimestamp` + is already `#[repr(C)]` and time-source agnostic). + - **Decision owner:** Project architect / platform team. diff --git a/docs/drawings/lib_arch.drawio b/docs/drawings/lib_arch.drawio new file mode 100644 index 0000000..4b56bf9 --- /dev/null +++ b/docs/drawings/lib_arch.drawio @@ -0,0 +1,146 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/drawings/lib_arch.svg b/docs/drawings/lib_arch.svg new file mode 100644 index 0000000..8a52b23 --- /dev/null +++ b/docs/drawings/lib_arch.svg @@ -0,0 +1 @@ +
FaultMgrClient
FaultMgrClient
Fault Lib
Fault Lib
Diagnostic Fault Manager 
Diagnostic Fault Man...
Diagnostic DB
Diagnostic...
SOVD Gateway
SOVD Gateway
App
App
Fault Monitor
Fault Monitor
Enabling
Condition
Enabling...
Activity
Activity
Fault Lib
Fault Lib
.json
.json
.json
.json
.json
.json
.json
.json
Fault Catalog
Cfg
Fault Cata...
Faults Catalog
Faults Catalog
.json
.json
Fault Catalog
Cfg
Fault Cata...
Fault
Fault
Daignostic
Entity
Daignostic...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/examples/hvac_component_design_reference.rs b/docs/examples/hvac_component_design_reference.rs new file mode 100644 index 0000000..669a676 --- /dev/null +++ b/docs/examples/hvac_component_design_reference.rs @@ -0,0 +1,221 @@ +/* +* Copyright (c) 2026 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) +* +* See the NOTICE file(s) distributed with this work for additional +* information regarding copyright ownership. +* +* This program and the accompanying materials are made available under the +* terms of the Apache License Version 2.0 which is available at +* https://www.apache.org/licenses/LICENSE-2.0 +* +* SPDX-License-Identifier: Apache-2.0 +*/ + +//! **DESIGN REFERENCE** — illustrates the fault-lib API surface described in +//! `docs/design/design.md`. All types and method signatures match the current +//! implementation. +//! +//! This file is **not** compiled as part of any workspace crate. It serves as +//! living documentation that is kept in sync with the actual API. + +use std::sync::Arc; +use core::time::Duration; + +// --- common crate types --- +use common::catalog::{FaultCatalogBuilder, FaultCatalogConfig}; +use common::config::ResetPolicy; +use common::debounce::DebounceMode; +use common::fault::*; +use common::ids::SourceId; +use common::sink_error::SinkError; +use common::types::*; + +// --- fault_lib crate API --- +use fault_lib::FaultApi; +use fault_lib::reporter::{Reporter, ReporterApi, ReporterConfig}; +use fault_lib::sink::LogHook; + +// ============================================================================ +// 1. Build a FaultCatalog from a FaultCatalogConfig +// ============================================================================ +// +// In production the catalog is typically loaded from a JSON file via +// `FaultCatalogBuilder::new().json_file(path)?.build()`. Here we construct +// the config in code for clarity. + +fn build_hvac_catalog() -> common::catalog::FaultCatalog { + let config = FaultCatalogConfig { + id: "hvac".into(), + version: 3, + faults: vec![ + FaultDescriptor { + id: FaultId::Numeric(0x7001), + name: to_static_short_string("CabinTempSensorStuck").unwrap(), + summary: Some(to_static_long_string( + "Cabin temperature sensor delivered the same sample for >60 s", + ).unwrap()), + category: FaultType::Hardware, + severity: FaultSeverity::Warn, + compliance: ComplianceVec::try_from( + &[ComplianceTag::SafetyCritical][..], + ).unwrap(), + reporter_side_debounce: Some(DebounceMode::HoldTime { + duration: Duration::from_secs(60).into(), + }), + reporter_side_reset: Some(ResetPolicy { + trigger: common::config::ResetTrigger::StableFor( + Duration::from_secs(900).into(), + ), + min_operating_cycles_before_clear: Some(5), + }), + manager_side_debounce: None, + manager_side_reset: None, + }, + FaultDescriptor { + id: FaultId::Text(to_static_short_string( + "hvac.blower.speed_mismatch", + ).unwrap()), + name: to_static_short_string("BlowerSpeedMismatch").unwrap(), + summary: Some(to_static_long_string( + "Commanded and measured blower speeds diverged beyond tolerance", + ).unwrap()), + category: FaultType::Communication, + severity: FaultSeverity::Error, + compliance: ComplianceVec::try_from( + &[ComplianceTag::EmissionRelevant][..], + ).unwrap(), + reporter_side_debounce: Some(DebounceMode::EdgeWithCooldown { + cooldown: Duration::from_millis(500).into(), + }), + reporter_side_reset: None, + manager_side_debounce: None, + manager_side_reset: None, + }, + ], + }; + + FaultCatalogBuilder::new() + .cfg_struct(config) + .expect("builder input") + .build() +} + +// ============================================================================ +// 2. LogHook — observability bridge to your logging stack +// ============================================================================ + +/// Minimal log hook. Real implementations would forward to DLT, tracing, etc. +struct StdoutLogHook; + +impl LogHook for StdoutLogHook { + fn on_publish(&self, record: &FaultRecord) { + println!( + "[fault-log] id={:?} stage={:?} source={}", + record.id, record.lifecycle_stage, record.source, + ); + } + + fn on_error(&self, record: &FaultRecord, error: &SinkError) { + eprintln!( + "[fault-log] FAILED id={:?} error={error}", + record.id, + ); + } +} + +// ============================================================================ +// 3. Application wiring: one Reporter per fault ID +// ============================================================================ + +struct HvacApp { + #[allow(dead_code)] + temp_sensor_fault: Reporter, + blower_fault: Reporter, +} + +impl HvacApp { + /// Bind reporters at startup. + /// + /// `FaultApi` must be initialised before calling `Reporter::new`; + /// each reporter looks up its descriptor in the global catalog and + /// obtains a handle to the IPC sink. + pub fn new(reporter_cfg: ReporterConfig) -> Self { + Self { + temp_sensor_fault: Reporter::new( + &FaultId::Numeric(0x7001), + reporter_cfg.clone(), + ) + .expect("descriptor 0x7001 must exist in catalog"), + + blower_fault: Reporter::new( + &FaultId::Text( + to_static_short_string("hvac.blower.speed_mismatch").unwrap(), + ), + reporter_cfg, + ) + .expect("descriptor 'hvac.blower.speed_mismatch' must exist in catalog"), + } + } + + /// Simulate a control-loop iteration. + pub fn step(&mut self) { + self.handle_blower_fault(0.6, 0.9); + } + + /// 4. At runtime: create a record, set lifecycle stage, publish. + /// + /// `create_record` captures the current timestamp. + /// `publish` enqueues the record to the IPC sink (non-blocking). + fn handle_blower_fault(&mut self, measured_rpm: f32, commanded_rpm: f32) { + let _measured = measured_rpm; + let _commanded = commanded_rpm; + + // Create a record stamped with the current wall-clock time. + // The lifecycle stage (Failed / Passed / …) is set at creation. + let record = self.blower_fault.create_record(LifecycleStage::Failed); + + // Publish to DFM via the IPC sink. + // `path` identifies the IPC channel (e.g. service name). + if let Err(err) = self.blower_fault.publish("hvac/blower", record) { + eprintln!("failed to enqueue blower mismatch fault: {err}"); + } + } +} + +// ============================================================================ +// Putting it all together +// ============================================================================ + +#[allow(dead_code)] +fn main() { + // --- Startup --- + + // 1. Build the catalog (from config struct, JSON string, or JSON file). + let catalog = build_hvac_catalog(); + + // 2. Initialise the global FaultApi singleton (creates IPC sink). + // Must happen exactly once before any Reporter is created. + let _api = FaultApi::new(catalog); + + // 3. (Optional) Register a log hook for observability. + FaultApi::set_log_hook(Arc::new(StdoutLogHook)).ok(); + + // 4. Create per-component ReporterConfig. + let reporter_cfg = ReporterConfig { + source: SourceId { + entity: to_static_short_string("HVAC.Controller").unwrap(), + ecu: Some(ShortString::from_bytes(b"CCU-SoC-A").unwrap()), + domain: Some(to_static_short_string("HVAC").unwrap()), + sw_component: Some(to_static_short_string("ClimateManager").unwrap()), + instance: None, + }, + lifecycle_phase: LifecyclePhase::Running, + default_env_data: MetadataVec::new(), + }; + + // 5. Create the application with bound reporters. + let mut app = HvacApp::new(reporter_cfg); + + // --- Runtime --- + app.step(); +} diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..1737b4c --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,293 @@ +.. + # ************************************************************************** + # Copyright (c) 2024 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ************************************************************************** + + +Diagnostic Fault Library Documentation +======================================= + +This documentation describes the structure, usage and configuration of the +Diagonostic Fault Library. + +.. contents:: Table of Contents + :depth: 4 + :local: + + + +Abbreviations +------------- + ++-------------+---------------------------+ +| **Abbrev.** | **Meaning** | ++=============+===========================+ +| FL | Fault Library | ++-------------+---------------------------+ +| DFM | Diagnostic Fault Manager | ++-------------+---------------------------+ +| FOTA | Flashing over the air | ++-------------+---------------------------+ +| IPC | Inter Process Com. | ++-------------+---------------------------+ +| HPC | High-Performance Computer | ++-------------+---------------------------+ + +Overview +-------- + +The diagnostic fault library should provide S-CORE applications with an API for reporting results of the diagnostic tests. +For more information on the SOVD context, see `S-CORE Diagnostic and Fault Management `_ + +Every application which is able to test health states of part or the complete HPC, it's submodules, hardware etc., needs +possibility to report results of those test back to the car environment, so the other application +or SOVD clients can access them. The Fault Library enables this possibility. + +The results of the tests reported to the FaultLib are send to the Diagnostic Fault Manager which stores or update them in the Diagnostic Data Base. + +.. image:: drawings/lib_arch.svg + :alt: Fault monitor + :width: 800px + :align: center + + +Historical name mapping (pre-v0.1): + +- FaultMonitor -> Reporter +- FaultMgrClient -> FaultSinkApi +- FaultApi -> FaultLib + + +Fault-lib and fault diagnostic manager +-------------------------------------- + +The fault diagnostic manager is a proxy between the apps reporting faults and the SOVD server. +Beside that it collects all faults in the system and manage persisten storage of their states. +According to the SOVD specification (chapter 4.3.1), faults can be reported by: + +- SOVD Server itself +- Component +- an App + + +Design Decisons & Trade-offs +---------------------------- + +Fault Catalog +~~~~~~~~~~~~~ +Despite the SOVD assumes to work with offline diagnostic services and faults catalogs (like ODX, etc.), we assume the fault_lib and DFM to share common fault catalogs. +Otherwise during the startup phase, all the fault_lib clients would need to register thousands of faults, which then would lead to heavy IPC traffic in the system. +Considering, that the presence of most of the faults in the car, doesn't change over the lifetime, it makes less sense to dynamically inform DFM about their existence by each startup. + +From another hand, there will be still a subset of the faults which cannot be known during the integration of the system, or can appear and disappear depending +on the current conditions in the car (change in the features configuration, OTA, new apps downloaded to the car , etc.). For that reason the fault_lib and the +DFM shall still provide mechanism which allow the FL client to register new faults and start to reporting resuls. + +.. note:: + TBD: + Do we need a mechanism to remove from DFM a fault in case it is not tested any more ? What the SOVD standard is expecting ? + +.. note:: + TBD: + How the fault catalog shall be looks like (generated code ? , json file (probably)), and be shared between DFM and FL + + +Use cases +--------- + +Following usecases are valid for the S-CORE application using the Fault Library: + +- registering new fault in the system + - depending on car configuration variant, enabled features etc. the number of faults detected and reported by the app can change + - depending on the current status and state of the car electronic system the APP can report different faults +- configuring debouncing for the fault + - different test can require the results to be filtered over time or debounced, to prevent setting the faults by glitches or false positives +- configuring enabling conditions for the fault + - each test can require different system conditions to be fulfilled before the test can be performed (e.g. the communication test can be done only if the power supply is in expected range) +- reporting results of diagnostic tests (fail / pass) +- reporting status of enabling conditions (if done in the app) + - the application can report only status on the enabling condition and does not report any faults +- react to the SOVD Fault Handling actions (e.g. delete faults can cause the test to restart) +- react to change in the enabling conditions (some tests could be impossible to be process when enabling conditions are not fulfilled) +- provide interface to the user which allow to provide additional environmental data to be stored with the fault + +Following usecases applies for the Fault Library (FL) and Diagnostic Fault Manager (DFM): + +- validate the consistency of the fault catalog shared between DFM and FL +- DFM maintain global fault catalog based on the information from each FL +- FL reports state changes in the faults to DFM over IPC +- FL reports enabling condition state change to the DFM over IPC +- DFM reports over IPC to FL enabling condition state change reported by another FL +- DFM requests restart of the test for the faults reported by FL +- DFM reports cleaning of the faults in the DFM by the SOVD client +- DFM receives and maintain current status of the environment conditions to be stored together with faults + + +Fault Catalog Init +~~~~~~~~~~~~~~~~~~~ + +This sequence shall ran at each start of the system to assure the FL and DFM are using consistent definitions of the faults. + +.. image:: puml/fault_catalog.svg + :alt: Fault monitor + :width: 800px + :align: center + + +New fault in the system +~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: puml/new_fault.svg + :alt: Fault monitor + :width: 1200px + :align: center + + +New enabling condition in the system +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: puml/new_enable_condition.svg + :alt: Fault enabling condition + :width: 800px + :align: center + + +Enabling conditions change +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. image:: puml/enable_condition_ntf.svg + :alt: Fault enabling condition + :width: 1200px + :align: center + +Local Enabling Condition +~~~~~~~~~~~~~~~~~~~~~~~~ +.. image:: puml/local_enable_condition_ntf.svg + :alt: Fault enabling condition + :width: 1200px + :align: center + + +Diagnostic Fault Manager +------------------------ + + +Based on the above use cases, the Diagnostic Fault Manager shall: + +- collect and manage the fault's enabling conditions + - let all fault library instances to subscribe on the changes to the fault's enable conditions set + - notify all subscribes in case the set of the active enabling conditions changes + - let registering new enable conditions + - receive status of enable conditions and notify all fault_lib instances using those conditions +- collect and manage states of the faults + - handle registering of new faults in the system + - preventing duplication of the faults + - storing fault statuses reported by the apps + - storing information which fault reporter awaits which enabling condition notification +- notify fault lib instances about the fault's events triggered by the SOVD diagnostic server (e.g. delete fault, disable fault, trigger etc) + + +MVP +--- + +Scope +~~~~~ + +The MVP shall provide following functionality and features: + +- 2 test apps which can report faults to the Diagnostic Fault Manager over IPC +- in the first step no enabling conditions handling +- the fault catalog will be stored in the single json file and read by both FaultLibs and DiagnosticFaultManager +- the IPC from communication gateway shall be reused + +Design +~~~~~~ + +FaultCatalog +^^^^^^^^^^^^ + +The FaultCatalog module will read, validate the catalog configuration json file and create collection of available Faults with their properties. +Later on it will calculate the hashsum over the catalog and verify if the Diagnostic Fault Manager usees the same catalog. +If not, the Diagnostic Fault Manager will copy the FaultCatalog from the FL and update local copy (TBD: shoul DFM simply share FaultCatalog with FL ?) + + +Fault +^^^^^ + +The struct containing unique ID bound to the full fault property description in the Fault Catalog. The Fault-Lib will transfer to the DFM only this ID +to inform about fault status. All other information needed by SOVD server will be read by the DFM from Fault Catalog. + +Diagnostic Entity +^^^^^^^^^^^^^^^^^ +Keeps the information abut the SOVD entity to which the reported fault belongs. This is open topic. Unclear how the SOVD entities shall be managed and linked to the faults. + + + + + + + + + +Requirements +------------ + +.. stkh_req:: Example Functional Requirement + :id: stkh_req__docgen_enabled__example + :status: valid + :safety: QM + :security: YES + :reqtype: Functional + :rationale: Ensure documentation builds are possible for all modules + + +Project Layout +-------------- + +The module template includes the following top-level structure: + +- `src/`: Main C++/Rust sources +- `tests/`: Unit and integration tests +- `examples/`: Usage examples +- `docs/`: Documentation using `docs-as-code` +- `.github/workflows/`: CI/CD pipelines + +Quick Start +----------- + + +To build the module: + +.. code-block:: bash + + bazel build //src/... + +To run tests: + +.. code-block:: bash + + bazel test //tests/... + +Configuration +------------- + +The `project_config.bzl` file defines metadata used by Bazel macros. + +Example: + +.. code-block:: python + + PROJECT_CONFIG = { + "asil_level": "QM", + "source_code": ["cpp", "rust"] + } + +This enables conditional behavior (e.g., choosing `clang-tidy` for C++ or `clippy` for Rust). diff --git a/docs/puml/Registering new fault in the system.svg b/docs/puml/Registering new fault in the system.svg new file mode 100644 index 0000000..d669a61 --- /dev/null +++ b/docs/puml/Registering new fault in the system.svg @@ -0,0 +1 @@ +AppFaultMonitorFaultLibDiagFaultMgrAppAppFaultMonitorFaultMonitorFaultLibFaultLibDiagFaultMgrDiagFaultMgrStart upipc::subscribeEnablingConditions()ipc::ntfEnablingConditions(list[])get_fault(sovd_path,id,debouncing,enabling_conditions[])register_fault(sovd_path,enabling_conditions[])alt[Fault already registered]Fault already registeredor other conditions not okError()NoneOk(fault_id)Some<Fault(fault_id)>FaultMonitor::new(Fault)register()store(fault_monitor)App Reporting Faultreport_status(pass/fail)report(fault_id, status)ipc::report_fault_status(id, status) \ No newline at end of file diff --git a/docs/puml/enable_condition_ntf.puml b/docs/puml/enable_condition_ntf.puml new file mode 100644 index 0000000..74a1099 --- /dev/null +++ b/docs/puml/enable_condition_ntf.puml @@ -0,0 +1,58 @@ +@startuml +title Enabling Condition : cross-app status notifications +skinparam ParticipantPadding 20 + +box "App1 Process" +participant App1 +participant "EnablingCondition[entity]" as EC +participant FaultLib1 +participant "IpcWorker1" as IW1 +end box + +box "DFM Process" +participant DiagFaultMgr +participant EnablingConditionRegistry +end box + +box "App2 Process" +participant "IpcWorker2" as IW2 +participant FaultLib2 +participant FaultMonitor +participant App2 +end box + +== Setup == + +App2 -> FaultLib2 : create_fault_monitor(["entity"], callback) +create FaultMonitor +FaultLib2 -> FaultMonitor : new(condition_ids, callback) +FaultMonitor -> FaultLib2 : register_monitor() +FaultLib2 -> FaultLib2 : EnablingConditionManager.register_monitor() +note right of FaultLib2 + The FaultLib keeps locally the track which fault + monitors are subscribing on which enabling conditions. + Monitors are auto-unregistered when dropped. +end note + +== Status Reporting == + +App1 -> EC : report_status(Active) +EC -> FaultLib1 : EnablingConditionManager.report_status() +FaultLib1 -> FaultLib1 : update local state + notify local monitors +FaultLib1 ->> IW1 : mpsc::try_send(\nEnablingConditionStatusChange\n(entity, Active)) +IW1 ->> DiagFaultMgr : iceoryx2 publish\non "dfm/event" +DiagFaultMgr -> EnablingConditionRegistry : update_status(entity, Active) +DiagFaultMgr ->> IW2 : iceoryx2 publish on\n"dfm/enabling_condition/notification" +note right of IW2 + IpcWorker polls the iceoryx2 + subscriber in its main loop + (recv_timeout 50ms cycle). +end note +IW2 -> FaultLib2 : EnablingConditionManager\n.handle_remote_notification() +FaultMonitor <- FaultLib2 : callback.on_condition_change(entity, Active) +App2 <- FaultMonitor : on_condition_change(entity, Active) +note right of FaultMonitor + Applications are notified when + enabling condition status changes +end note +@enduml diff --git a/docs/puml/enable_condition_ntf.svg b/docs/puml/enable_condition_ntf.svg new file mode 100644 index 0000000..42d0378 --- /dev/null +++ b/docs/puml/enable_condition_ntf.svg @@ -0,0 +1,70 @@ +Enabling Condition : cross-app status notificationsApp1 ProcessDFM ProcessApp2 ProcessApp1App1EnablingCondition[entity]EnablingCondition[entity]FaultLib1FaultLib1IpcWorker1IpcWorker1DiagFaultMgrDiagFaultMgrEnablingConditionRegistryEnablingConditionRegistryIpcWorker2IpcWorker2FaultLib2FaultLib2FaultMonitorApp2App2Setupcreate_fault_monitor(["entity"], callback)new(condition_ids, callback)FaultMonitorregister_monitor()EnablingConditionManager.register_monitor()The FaultLib keeps locally the track which faultmonitors are subscribing on which enabling conditions.Monitors are auto-unregistered when dropped.Status Reportingreport_status(Active)EnablingConditionManager.report_status()update local state + notify local monitorsmpsc::try_send(EnablingConditionStatusChange(entity, Active))iceoryx2 publishon "dfm/event"update_status(entity, Active)iceoryx2 publish on"dfm/enabling_condition/notification"IpcWorker polls the iceoryx2subscriber in its main loop(recv_timeout 50ms cycle).EnablingConditionManager.handle_remote_notification()callback.on_condition_change(entity, Active)on_condition_change(entity, Active)Applications are notified whenenabling condition status changes \ No newline at end of file diff --git a/docs/puml/fault_catalog.puml b/docs/puml/fault_catalog.puml new file mode 100644 index 0000000..889217a --- /dev/null +++ b/docs/puml/fault_catalog.puml @@ -0,0 +1,45 @@ +@startuml +title Fault Catalog Initialization + +skinparam ParticipantPadding 20 + +box "App Process" +participant App order 1 +create FaultLib order 2 +App -> FaultLib : FaultApi::try_new(catalog) +participant "IpcWorker\n(fault_client_worker)" as IpcWorker order 3 +end box + +box "DFM Process" +participant DiagnosticFaultMgr order 4 +end box + +FaultLib -> FaultLib : read_and_verify_catalog_config_file() +FaultLib -> FaultLib : calculate_catalog_hashsum() + +create IpcWorker +FaultLib -> IpcWorker : spawn thread + create\niceoryx2 publisher ("dfm/event") +\nEC notification subscriber +FaultLib -> FaultLib : create iceoryx2 subscriber\n("dfm/event/hash/response") + +== Hash Verification (iceoryx2 zero-copy shared memory) == + +FaultLib ->> IpcWorker : mpsc::try_send(\nDiagnosticEvent::Hash(catalog_id, hash)) +IpcWorker ->> DiagnosticFaultMgr : iceoryx2 publish on "dfm/event" + +DiagnosticFaultMgr -> DiagnosticFaultMgr : check_hash_sum(path, hash) +DiagnosticFaultMgr ->> FaultLib : iceoryx2 publish bool\non "dfm/event/hash/response" +note right of FaultLib + FaultLib polls subscriber directly + (not via IpcWorker) every 50ms, + timeout 500ms. +end note + +alt [["Response received within 500ms"]] + FaultLib -> FaultLib : FaultCatalogOk() + App <-- FaultLib : Ok(FaultApi) +else [["Timeout (DFM not responding)"]] + App <-- FaultLib : Err(CatalogVerification) +end + + +@enduml diff --git a/docs/puml/fault_catalog.svg b/docs/puml/fault_catalog.svg new file mode 100644 index 0000000..7f69187 --- /dev/null +++ b/docs/puml/fault_catalog.svg @@ -0,0 +1,57 @@ +Fault Catalog InitializationApp ProcessDFM ProcessAppAppFaultLibIpcWorker(fault_client_worker)DiagnosticFaultMgrDiagnosticFaultMgrFaultApi::try_new(catalog)FaultLibread_and_verify_catalog_config_file()calculate_catalog_hashsum()spawn thread + createiceoryx2 publisher ("dfm/event") +EC notification subscriberIpcWorker(fault_client_worker)create iceoryx2 subscriber("dfm/event/hash/response")Hash Verification (iceoryx2 zero-copy shared memory)mpsc::try_send(DiagnosticEvent::Hash(catalog_id, hash))iceoryx2 publish on "dfm/event"check_hash_sum(path, hash)iceoryx2 publish boolon "dfm/event/hash/response"FaultLib polls subscriber directly(not via IpcWorker) every 50ms,timeout 500ms.altreceived within 500ms"]FaultCatalogOk()Ok(FaultApi)(DFM not responding)"]Err(CatalogVerification) \ No newline at end of file diff --git a/docs/puml/generate_svg.sh b/docs/puml/generate_svg.sh new file mode 100755 index 0000000..b2d1c76 --- /dev/null +++ b/docs/puml/generate_svg.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Regenerate SVG diagrams from PlantUML sources. +# Requires: plantuml (https://plantuml.com/download) +# +# Usage: ./docs/puml/generate_svg.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if ! command -v plantuml &>/dev/null; then + echo "error: plantuml not found. Install via: apt install plantuml" >&2 + exit 1 +fi + +for puml in "$SCRIPT_DIR"/*.puml; do + echo "Generating SVG for $(basename "$puml")" + plantuml -tsvg "$puml" +done + +echo "Done. SVGs written to $SCRIPT_DIR/" diff --git a/docs/puml/local_enable_condition_ntf.puml b/docs/puml/local_enable_condition_ntf.puml new file mode 100644 index 0000000..0ea0cbd --- /dev/null +++ b/docs/puml/local_enable_condition_ntf.puml @@ -0,0 +1,54 @@ +@startuml +title Enabling Condition : local status notifications within same app +skinparam ParticipantPadding 20 + +box "App Process" +participant App +participant "EnablingCondition[entity]" as EC +participant FaultMonitor +participant FaultLib +participant "IpcWorker\n(fault_client_worker)" as IpcWorker +end box + +box "DFM Process" +participant DiagFaultMgr +end box + +ref over EC, FaultLib: [[new_enable_condition.svg New Enable Condition]] + +== Setup == + +App -> FaultLib : create_fault_monitor(["entity"], callback) +create FaultMonitor +FaultLib -> FaultMonitor : new(condition_ids, callback) +FaultMonitor -> FaultLib : register_monitor() +FaultLib -> FaultLib : EnablingConditionManager.register_monitor() +note right of FaultLib + The FaultLib keeps locally the track which fault + monitors are subscribing on which enabling conditions. + Monitors are auto-unregistered when dropped. +end note + +== Status Reporting (local fast path) == + +App -> EC : report_status(Active) +EC -> FaultLib : EnablingConditionManager.report_status() +FaultLib -> FaultLib : update local state +FaultMonitor <- FaultLib : callback.on_condition_change(entity, Active) +App <- FaultMonitor : on_condition_change(entity, Active) +note right of FaultLib + Local monitors are notified immediately + (same process, no IPC round-trip). + Status deduplication: no-op if status unchanged. +end note + +== Async DFM notification == + +FaultLib ->> IpcWorker : mpsc::try_send(\nEnablingConditionStatusChange\n(entity, Active)) +IpcWorker ->> DiagFaultMgr : iceoryx2 publish\non "dfm/event" +note right of DiagFaultMgr + DFM is notified asynchronously so it can + broadcast to other FaultLib instances +end note + +@enduml diff --git a/docs/puml/local_enable_condition_ntf.svg b/docs/puml/local_enable_condition_ntf.svg new file mode 100644 index 0000000..f803243 --- /dev/null +++ b/docs/puml/local_enable_condition_ntf.svg @@ -0,0 +1,66 @@ +Enabling Condition : local status notifications within same appApp ProcessDFM ProcessAppAppEnablingCondition[entity]EnablingCondition[entity]FaultMonitorFaultLibFaultLibIpcWorker(fault_client_worker)IpcWorker(fault_client_worker)DiagFaultMgrDiagFaultMgrrefSetupcreate_fault_monitor(["entity"], callback)new(condition_ids, callback)FaultMonitorregister_monitor()EnablingConditionManager.register_monitor()The FaultLib keeps locally the track which faultmonitors are subscribing on which enabling conditions.Monitors are auto-unregistered when dropped.Status Reporting (local fast path)report_status(Active)EnablingConditionManager.report_status()update local statecallback.on_condition_change(entity, Active)on_condition_change(entity, Active)Local monitors are notified immediately(same process, no IPC round-trip).Status deduplication: no-op if status unchanged.Async DFM notificationmpsc::try_send(EnablingConditionStatusChange(entity, Active))iceoryx2 publishon "dfm/event"DFM is notified asynchronously so it canbroadcast to other FaultLib instances \ No newline at end of file diff --git a/docs/puml/new_enable_condition.puml b/docs/puml/new_enable_condition.puml new file mode 100644 index 0000000..f2eeea7 --- /dev/null +++ b/docs/puml/new_enable_condition.puml @@ -0,0 +1,43 @@ +@startuml +title Registering new enabling condition by the provider +skinparam ParticipantPadding 20 + +box "App Process" +participant App +participant EnablingCondition +participant FaultLib +participant "IpcWorker\n(fault_client_worker)" as IpcWorker +end box +box "DFM Process" +participant DiagFaultMgr +participant EnablingConditionRegistry +end box + +== Registration == + +App -> FaultLib : FaultApi::get_enabling_condition(sovd_entity) +FaultLib -> FaultLib : EnablingConditionManager.check_if_exist(sovd_entity) + +alt ["EnablingCondition already registered or entity too long"] + App <-- FaultLib : Err(AlreadyRegistered | EntityTooLong) +else + FaultLib -> FaultLib : store condition (status = Inactive) + create EnablingCondition + EnablingCondition <- FaultLib : new(id, manager) + FaultLib ->> IpcWorker : mpsc::try_send(\nEnablingConditionRegister\n(sovd_entity)) + IpcWorker ->> DiagFaultMgr : iceoryx2 publish\non "dfm/event" + DiagFaultMgr -> EnablingConditionRegistry : register(sovd_entity) + App <-- FaultLib : Ok(EnablingCondition) +end + +== Reporting Condition Change == + +App -> EnablingCondition : report_status(Active) +EnablingCondition -> FaultLib : EnablingConditionManager.report_status() +FaultLib -> FaultLib : update local state + notify local monitors +App <-- EnablingCondition : Ok(()) +FaultLib ->> IpcWorker : mpsc::try_send(\nEnablingConditionStatusChange\n(entity, Active)) +IpcWorker ->> DiagFaultMgr : iceoryx2 publish\non "dfm/event" +DiagFaultMgr -> EnablingConditionRegistry : update_status(entity, Active) + +@enduml diff --git a/docs/puml/new_enable_condition.svg b/docs/puml/new_enable_condition.svg new file mode 100644 index 0000000..c9cf834 --- /dev/null +++ b/docs/puml/new_enable_condition.svg @@ -0,0 +1,55 @@ +Registering new enabling condition by the providerApp ProcessDFM ProcessAppAppEnablingConditionFaultLibFaultLibIpcWorker(fault_client_worker)IpcWorker(fault_client_worker)DiagFaultMgrDiagFaultMgrEnablingConditionRegistryEnablingConditionRegistryRegistrationFaultApi::get_enabling_condition(sovd_entity)EnablingConditionManager.check_if_exist(sovd_entity)altEnablingCondition already registered or entity too longErr(AlreadyRegistered | EntityTooLong)store condition (status = Inactive)new(id, manager)EnablingConditionmpsc::try_send(EnablingConditionRegister(sovd_entity))iceoryx2 publishon "dfm/event"register(sovd_entity)Ok(EnablingCondition)Reporting Condition Changereport_status(Active)EnablingConditionManager.report_status()update local state + notify local monitorsOk(())mpsc::try_send(EnablingConditionStatusChange(entity, Active))iceoryx2 publishon "dfm/event"update_status(entity, Active) \ No newline at end of file diff --git a/docs/puml/new_fault.puml b/docs/puml/new_fault.puml new file mode 100644 index 0000000..b30809d --- /dev/null +++ b/docs/puml/new_fault.puml @@ -0,0 +1,68 @@ +@startuml +title Reporting fault by the app +skinparam ParticipantPadding 20 + +box "App Process" +participant App +participant Reporter +participant FaultApi +participant "IpcWorker\n(fault_client_worker)" as IpcWorker +end box +box "DFM Process" +participant DiagFaultMgr +end box + +ref over FaultApi, DiagFaultMgr: [[fault_catalog.svg Fault Catalog Init]] + +== Start up == + +App -> FaultApi : FaultApi::try_new(catalog, sink) +note right of FaultApi + This diagram does not consider the case + when the fault doesn't exist in catalog +end note + +alt [["Fault ID not in catalog"]] + + App <-- FaultApi : Err(ReporterError::FaultIdNotFound) +else + + create Reporter + App -> Reporter : Reporter::new(&fault_id, config) + Reporter -> Reporter : lookup descriptor in catalog + App <-- Reporter : Ok(Reporter) + App -> App : store/use Reporter +end +== App Reporting Fault == + +App -> Reporter : publish(path, record) +Reporter -> Reporter : check_debouncing() + +alt ["Debouncing not passed or enabling conditions not fulfilled"] + App <-- Reporter +else + Reporter -> FaultApi : sink.publish(path, record) + FaultApi -> FaultApi : create DiagnosticEvent::Fault(path, record) + FaultApi ->> IpcWorker : mpsc::try_send(event)\n(bounded channel, capacity=1024) + note right of FaultApi + Non-blocking enqueue. + Returns QueueFull if channel is full. + end note + App <-- Reporter + + == Async IPC (iceoryx2 zero-copy shared memory) == + + IpcWorker -> IpcWorker : recv_timeout(50ms) + IpcWorker ->> DiagFaultMgr : iceoryx2 publish on "dfm/event" + note right of IpcWorker + Fire-and-forget: no response from DFM. + On transient failure (TransportDown, + Timeout, QueueFull): exponential backoff + retry (100ms base, 5s cap, max 10 retries, + LRU eviction cache of 512 events). + Permanent errors are dropped immediately. + end note + DiagFaultMgr -> DiagFaultMgr : FaultRecordProcessor\n.process_record(path, fault) +end + +@enduml diff --git a/docs/puml/new_fault.svg b/docs/puml/new_fault.svg new file mode 100644 index 0000000..8ad492f --- /dev/null +++ b/docs/puml/new_fault.svg @@ -0,0 +1,80 @@ +Reporting fault by the appApp ProcessDFM ProcessAppAppReporterFaultApiFaultApiIpcWorker(fault_client_worker)IpcWorker(fault_client_worker)DiagFaultMgrDiagFaultMgrrefStart upFaultApi::try_new(catalog, sink)This diagram does not consider the casewhen the fault doesn't exist in catalogaltID not in catalog"]Err(ReporterError::FaultIdNotFound)Reporter::new(&fault_id, config)Reporterlookup descriptor in catalogOk(Reporter)store/use ReporterApp Reporting Faultpublish(path, record)check_debouncing()altDebouncing not passed or enabling conditions not fulfilledsink.publish(path, record)create DiagnosticEvent::Fault(path, record)mpsc::try_send(event)(bounded channel, capacity=1024)Non-blocking enqueue.Returns QueueFull if channel is full.Async IPC (iceoryx2 zero-copy shared memory)recv_timeout(50ms)iceoryx2 publish on "dfm/event"Fire-and-forget: no response from DFM.On transient failure (TransportDown,Timeout, QueueFull): exponential backoffretry (100ms base, 5s cap, max 10 retries,LRU eviction cache of 512 events).Permanent errors are dropped immediately.FaultRecordProcessor.process_record(path, fault) \ No newline at end of file diff --git a/docs/puml/query_clear.puml b/docs/puml/query_clear.puml new file mode 100644 index 0000000..87b05dc --- /dev/null +++ b/docs/puml/query_clear.puml @@ -0,0 +1,83 @@ +@startuml +title DFM Query/Clear IPC Protocol +skinparam ParticipantPadding 20 + +box "External Process\n(e.g. OpenSOVD adapter)" +participant "Diagnostic\nClient" as Client +participant Iceoryx2DfmQuery +end box +box "DFM Process" +participant "DfmQueryServer\n(polled in DFM loop)" as Server +participant SovdFaultManager +participant SovdFaultStorage +end box + +== Setup == + +note over Server + DfmQueryServer created inside + fault_lib_receiver_thread when + enable_query_server = true. + Service: "dfm/query" (request-response). +end note + +Client -> Iceoryx2DfmQuery : new()\nconnects to "dfm/query" service + +== Query: GetAllFaults == + +Client -> Iceoryx2DfmQuery : get_all_faults(entity_path) +Iceoryx2DfmQuery -> Iceoryx2DfmQuery : path -> LongString (128B)\n(rejects if too long) +Iceoryx2DfmQuery ->> Server : iceoryx2 send\nDfmQueryRequest::GetAllFaults(path) +Iceoryx2DfmQuery -> Iceoryx2DfmQuery : poll response\n(1s timeout, 10ms interval) + +Server -> Server : poll() receives request +Server -> SovdFaultManager : get_all_faults(path) +SovdFaultManager -> SovdFaultStorage : read fault states +SovdFaultManager --> Server : Vec +Server -> Server : convert to IpcSovdFault\n(lossy: String->ShortString,\nHashMap omitted) +Server ->> Iceoryx2DfmQuery : DfmQueryResponse::FaultList\n(max 64 faults per response) + +Iceoryx2DfmQuery -> Iceoryx2DfmQuery : convert IpcSovdFault -> SovdFault +Client <-- Iceoryx2DfmQuery : Ok(Vec) + +== Query: GetFault == + +Client -> Iceoryx2DfmQuery : get_fault(entity_path, fault_code) +Iceoryx2DfmQuery ->> Server : DfmQueryRequest::GetFault(path, code) + +Server -> SovdFaultManager : get_fault(path, code) + +alt Fault found + SovdFaultManager --> Server : Ok((SovdFault, env_data)) + Server ->> Iceoryx2DfmQuery : DfmQueryResponse::SingleFault\n(IpcSovdFault, IpcEnvData) + Client <-- Iceoryx2DfmQuery : Ok((SovdFault, env_data)) +else Fault not found + SovdFaultManager --> Server : Err(NotFound) + Server ->> Iceoryx2DfmQuery : DfmQueryResponse::Error\n(DfmQueryError::NotFound) + Client <-- Iceoryx2DfmQuery : Err(Error::NotFound) +end + +== Clear: DeleteAllFaults == + +Client -> Iceoryx2DfmQuery : delete_all_faults(entity_path) +Iceoryx2DfmQuery ->> Server : DfmQueryRequest::DeleteAllFaults(path) + +Server -> SovdFaultManager : delete_all_faults(path) +SovdFaultManager -> SovdFaultStorage : clear all fault states +SovdFaultManager --> Server : Ok(()) +Server ->> Iceoryx2DfmQuery : DfmQueryResponse::Ok +Client <-- Iceoryx2DfmQuery : Ok(()) + +== Error: Storage failure == + +Client -> Iceoryx2DfmQuery : delete_fault(path, code) +Iceoryx2DfmQuery ->> Server : DfmQueryRequest::DeleteFault(path, code) +Server -> SovdFaultManager : delete_fault(path, code) +SovdFaultManager -> SovdFaultStorage : delete fault state +SovdFaultStorage --> SovdFaultManager : Err(StorageError) +SovdFaultManager --> Server : Err(Storage(msg)) +Server -> Server : truncate msg to ShortString (64B) +Server ->> Iceoryx2DfmQuery : DfmQueryResponse::Error\n(DfmQueryError::StorageError) +Client <-- Iceoryx2DfmQuery : Err(Error::Storage(msg)) + +@enduml