From 6efb3c178e11b6ce15b797766c1737a4d495b7a4 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 12 Mar 2026 19:39:00 -0700 Subject: [PATCH 1/2] bump version to v0.7.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 78fad9a..cda1a01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "predicate-authorityd" -version = "0.6.7" +version = "0.7.0" edition = "2021" description = "Rust-based sidecar daemon for Predicate Authority" license = "MIT" From 6ee70838eabd235d1e511c5a5eb80e9dcd240b78 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 12 Mar 2026 19:54:59 -0700 Subject: [PATCH 2/2] ssrf_whitelist fix --- docs/sidecar-user-manual.md | 39 ++++++++++++++-- policies/README.md | 53 +++++++++++++++++++++ src/main.rs | 56 +++++++++++++++------- src/policy_loader.rs | 93 +++++++++++++++++++++++++++++++++++++ src/ssrf.rs | 22 +++++++++ 5 files changed, 243 insertions(+), 20 deletions(-) diff --git a/docs/sidecar-user-manual.md b/docs/sidecar-user-manual.md index f09dd3b..706e123 100644 --- a/docs/sidecar-user-manual.md +++ b/docs/sidecar-user-manual.md @@ -1020,8 +1020,9 @@ SSRF protection is **enabled by default**. Blocked requests return: #### Whitelisting Local Services -To allow specific local endpoints (e.g., local LLM instances, databases), use the `--ssrf-allow` flag with host:port pairs: +To allow specific local endpoints (e.g., local LLM instances, databases), you have four options: +**Option 1: CLI flag** (highest precedence) ```bash # Allow local Ollama (WSL2) and Elasticsearch ./predicate-authorityd \ @@ -1030,19 +1031,49 @@ To allow specific local endpoints (e.g., local LLM instances, databases), use th run ``` -Or via environment variable (comma-separated): +**Option 2: Environment variable** ```bash export PREDICATE_SSRF_ALLOW="172.30.192.1:11434,127.0.0.1:9200" ./predicate-authorityd --policy-file policy.json run ``` -Or in the configuration file: +**Option 3: TOML configuration file** ```toml [ssrf] allowed_endpoints = ["172.30.192.1:11434", "127.0.0.1:9200"] ``` -**Important:** The whitelist is host:port specific to limit the exemption surface. Use exact matches only. +**Option 4: Policy file** (policy-driven, recommended for tenant-scoped deployments) + +Add an `ssrf_whitelist` field to your policy JSON/YAML file: + +```json +{ + "ssrf_whitelist": ["172.30.192.1:11434", "127.0.0.1:9200"], + "rules": [ + ... + ] +} +``` + +Or in YAML: +```yaml +ssrf_whitelist: + - "172.30.192.1:11434" # Local Ollama on WSL2 + - "127.0.0.1:9200" # Local Elasticsearch + +rules: + - name: allow-llm-calls + effect: allow + ... +``` + +**Precedence and merging:** +- CLI and environment variables take highest precedence +- Entries from all sources are merged (deduplicated) +- If no whitelist is configured anywhere, full SSRF enforcement applies + +**Important:** The whitelist uses exact `host:port` matching to limit the exemption surface. Only the specified port is allowed. #### Disabling SSRF Protection diff --git a/policies/README.md b/policies/README.md index b07e182..e64ec39 100644 --- a/policies/README.md +++ b/policies/README.md @@ -289,6 +289,59 @@ Patterns use glob-style matching: | `https://*` | Any HTTPS URL | | `/home/*/projects/**` | Any file under any user's projects dir | +#### Glob `**` Directory Matching Footgun + +**Common mistake:** Using `**` to match a directory itself. + +```json +{ + "resources": ["model-eval/**"] // WRONG: matches files INSIDE model-eval, not the directory +} +``` + +The pattern `model-eval/**` matches `model-eval/file.txt` and `model-eval/sub/file.txt`, but it does **NOT** match the directory `model-eval` itself. + +**To match both the directory and its contents:** + +```json +{ + "resources": ["model-eval", "model-eval/**"] // CORRECT: matches directory AND contents +} +``` + +Or use multiple patterns: +- `model-eval` - matches the directory itself +- `model-eval/*` - matches direct children +- `model-eval/**` - matches all descendants recursively + +### SSRF Whitelist (Policy-Driven) + +You can include an optional `ssrf_whitelist` field in your policy file to allow specific local endpoints to bypass SSRF protection. This is useful for local LLMs (Ollama), databases, or other services running on private IPs. + +```json +{ + "ssrf_whitelist": ["172.30.192.1:11434", "127.0.0.1:9200"], + "rules": [...] +} +``` + +**Key points:** +- Whitelist uses exact `host:port` matching for security +- If CLI `--ssrf-allow` is also provided, entries are merged +- Defaults to empty (full SSRF enforcement) if omitted + +**YAML example:** +```yaml +ssrf_whitelist: + - "172.30.192.1:11434" # Local Ollama on WSL2 + - "127.0.0.1:9200" # Local Elasticsearch + +rules: + - name: allow-llm-calls + effect: allow + # ... +``` + --- ## Creating Custom Policies diff --git a/src/main.rs b/src/main.rs index 1e3446c..1cdfd6b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -484,29 +484,16 @@ async fn main() -> anyhow::Result<()> { // Initialize policy engine let policy_engine = PolicyEngine::new(); - // Configure SSRF protection + // Collect SSRF configuration from CLI and config file let ssrf_disabled = cli.ssrf_disabled || file_config.ssrf.disabled; - let ssrf_allowed_endpoints: Vec = if !cli.ssrf_allow.is_empty() { + let mut ssrf_allowed_endpoints: Vec = if !cli.ssrf_allow.is_empty() { cli.ssrf_allow.clone() } else { file_config.ssrf.allowed_endpoints.clone() }; - if ssrf_disabled { - policy_engine.set_ssrf_protection(None); - warn!("SSRF protection disabled - all endpoints allowed"); - } else if !ssrf_allowed_endpoints.is_empty() { - use predicate_authorityd::ssrf::SsrfProtection; - let ssrf = SsrfProtection::new().with_allowed_endpoints(ssrf_allowed_endpoints.clone()); - policy_engine.set_ssrf_protection(Some(ssrf)); - info!( - "SSRF protection enabled with {} allowed endpoints: {:?}", - ssrf_allowed_endpoints.len(), - ssrf_allowed_endpoints - ); - } - // Load policy file if specified (supports JSON and YAML formats) + // This must happen before SSRF setup to extract ssrf_whitelist from policy if let Some(ref policy_path) = policy_file { let format = policy_loader::detect_format(policy_path); info!( @@ -526,6 +513,28 @@ async fn main() -> anyhow::Result<()> { info!("Loaded {} policy rules", count); } + // Merge ssrf_whitelist from policy file (if CLI/config didn't provide any) + if !result.ssrf_whitelist.is_empty() { + if ssrf_allowed_endpoints.is_empty() { + ssrf_allowed_endpoints = result.ssrf_whitelist; + info!( + "SSRF whitelist loaded from policy file: {:?}", + ssrf_allowed_endpoints + ); + } else { + // CLI/config takes precedence, but we can merge + for endpoint in result.ssrf_whitelist { + if !ssrf_allowed_endpoints.contains(&endpoint) { + ssrf_allowed_endpoints.push(endpoint); + } + } + info!( + "SSRF whitelist merged with policy file entries: {:?}", + ssrf_allowed_endpoints + ); + } + } + // Detect audit mode from policy file name let path_lower = policy_path.to_lowercase(); if path_lower.contains("audit") @@ -542,6 +551,21 @@ async fn main() -> anyhow::Result<()> { } } + // Configure SSRF protection (after policy loading to include policy-based whitelist) + if ssrf_disabled { + policy_engine.set_ssrf_protection(None); + warn!("SSRF protection disabled - all endpoints allowed"); + } else if !ssrf_allowed_endpoints.is_empty() { + use predicate_authorityd::ssrf::SsrfProtection; + let ssrf = SsrfProtection::new().with_whitelist(ssrf_allowed_endpoints.clone()); + policy_engine.set_ssrf_protection(Some(ssrf)); + info!( + "SSRF protection enabled with {} allowed endpoints: {:?}", + ssrf_allowed_endpoints.len(), + ssrf_allowed_endpoints + ); + } + // Enable audit mode if explicitly requested via CLI if cli.audit_mode { policy_engine.set_audit_mode(true); diff --git a/src/policy_loader.rs b/src/policy_loader.rs index 9ae4143..c545ed9 100644 --- a/src/policy_loader.rs +++ b/src/policy_loader.rs @@ -60,6 +60,9 @@ pub struct PolicyLoadResult { pub skipped_rules: usize, /// Whether the policy was cryptographically signed pub is_signed: bool, + /// SSRF whitelist from policy file (optional, host:port format) + /// Example: ["172.30.192.1:11434", "127.0.0.1:9200"] + pub ssrf_whitelist: Vec, } /// Detect the format of a policy file based on its extension. @@ -113,11 +116,23 @@ pub fn load_policy_from_string( let skipped_rules = total_rules - parsed_rules.len(); + // Extract optional ssrf_whitelist array (host:port format) + let ssrf_whitelist: Vec = json_value + .get("ssrf_whitelist") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default(); + Ok(PolicyLoadResult { rules: parsed_rules, format, skipped_rules, is_signed: false, + ssrf_whitelist, }) } @@ -425,5 +440,83 @@ mod tests { assert_eq!(result.rules.len(), 0); assert_eq!(result.skipped_rules, 0); + assert!(result.ssrf_whitelist.is_empty()); + } + + // --- SSRF whitelist tests (Issue #27 policy-driven approach) --- + + #[test] + fn test_ssrf_whitelist_from_json() { + let policy_with_whitelist = r#"{ + "ssrf_whitelist": ["172.30.192.1:11434", "127.0.0.1:9200"], + "rules": [ + { + "name": "allow-all", + "effect": "allow", + "principals": ["*"], + "actions": ["*"], + "resources": ["*"] + } + ] + }"#; + + let result = load_policy_from_string(policy_with_whitelist, PolicyFormat::Json).unwrap(); + + assert_eq!(result.rules.len(), 1); + assert_eq!(result.ssrf_whitelist.len(), 2); + assert!(result + .ssrf_whitelist + .contains(&"172.30.192.1:11434".to_string())); + assert!(result + .ssrf_whitelist + .contains(&"127.0.0.1:9200".to_string())); + } + + #[test] + fn test_ssrf_whitelist_from_yaml() { + let yaml_with_whitelist = r#" +ssrf_whitelist: + - "172.30.192.1:11434" + - "localhost:8787" +rules: + - name: allow-all + effect: allow + principals: + - "*" + actions: + - "*" + resources: + - "*" +"#; + + let result = load_policy_from_string(yaml_with_whitelist, PolicyFormat::Yaml).unwrap(); + + assert_eq!(result.rules.len(), 1); + assert_eq!(result.ssrf_whitelist.len(), 2); + assert!(result + .ssrf_whitelist + .contains(&"172.30.192.1:11434".to_string())); + assert!(result + .ssrf_whitelist + .contains(&"localhost:8787".to_string())); + } + + #[test] + fn test_ssrf_whitelist_missing_defaults_to_empty() { + // Policy without ssrf_whitelist field should have empty whitelist + let result = load_policy_from_string(SAMPLE_JSON_POLICY, PolicyFormat::Json).unwrap(); + assert!(result.ssrf_whitelist.is_empty()); + } + + #[test] + fn test_ssrf_whitelist_empty_array() { + let policy_with_empty_whitelist = r#"{ + "ssrf_whitelist": [], + "rules": [] + }"#; + + let result = + load_policy_from_string(policy_with_empty_whitelist, PolicyFormat::Json).unwrap(); + assert!(result.ssrf_whitelist.is_empty()); } } diff --git a/src/ssrf.rs b/src/ssrf.rs index 04606d2..126eefe 100644 --- a/src/ssrf.rs +++ b/src/ssrf.rs @@ -78,6 +78,12 @@ impl SsrfProtection { self } + /// Alias for `with_allowed_endpoints` - adds whitelist entries that bypass SSRF checks + /// This is the method name used when loading from policy files + pub fn with_whitelist(self, whitelist: Vec) -> Self { + self.with_allowed_endpoints(whitelist) + } + /// Add a single allowed endpoint pub fn add_allowed_endpoint(&mut self, endpoint: &str) { self.allowed_endpoints.push(endpoint.to_lowercase()); @@ -593,4 +599,20 @@ mod tests { assert!(ssrf.block_cloud_metadata); assert!(ssrf.block_internal_dns); } + + #[test] + fn test_with_whitelist_alias() { + // Test that with_whitelist() is an alias for with_allowed_endpoints() + let ssrf = SsrfProtection::new().with_whitelist(vec!["172.30.192.1:11434".to_string()]); + + // Private IP would normally be blocked + assert!(SsrfProtection::new() + .check_resource("http://172.30.192.1:11434/api/generate") + .is_some()); + + // But with_whitelist should allow it + assert!(ssrf + .check_resource("http://172.30.192.1:11434/api/generate") + .is_none()); + } }