diff --git a/README.md b/README.md index c8bbb67..faaf283 100644 --- a/README.md +++ b/README.md @@ -75,29 +75,9 @@ aigate reset --force # Remove everything ## How It Works -``` -Your files: .env secrets/ .ssh/ *.pem - | - aigate init - | - OS group: ai-agents - OS user: ai-runner - ACLs: deny read for ai-agents - | - aigate run -- claude - | - +--namespace--+ - | mount: hide | - | net: restrict| - | pid: isolate | - +-------------+ - | - AI agent runs with: - - Cannot read .env, secrets/ - - Cannot run curl, wget - - Cannot reach unauthorized hosts - - Cannot see host processes -``` +![Linux Process Isolation](docs/diagrams/linux-process.png) + +See [docs/user/README.md](docs/user/README.md) for detailed architecture diagrams covering file isolation, network isolation (Linux & macOS), and process isolation. ## Configuration diff --git a/actions/init.go b/actions/init.go index 272d8da..75757d6 100644 --- a/actions/init.go +++ b/actions/init.go @@ -1,6 +1,7 @@ package actions import ( + "errors" "fmt" "github.com/AxeForging/aigate/helpers" @@ -31,16 +32,24 @@ func (a *InitAction) Execute(c *cli.Context) error { helpers.Log.Info().Str("platform", a.platform.Name()).Msg("Initializing aigate sandbox") - // Create group + // Create group — skip if already exists helpers.Log.Info().Str("group", group).Msg("Creating sandbox group") if err := a.platform.CreateGroup(group); err != nil { - return fmt.Errorf("failed to create group: %w", err) + if errors.Is(err, helpers.ErrAlreadyInit) { + helpers.Log.Info().Str("group", group).Msg("Sandbox group already exists, skipping") + } else { + return fmt.Errorf("failed to create group: %w", err) + } } - // Create user + // Create user — skip if already exists helpers.Log.Info().Str("user", user).Str("group", group).Msg("Creating sandbox user") if err := a.platform.CreateUser(user, group); err != nil { - return fmt.Errorf("failed to create user: %w", err) + if errors.Is(err, helpers.ErrAlreadyInit) { + helpers.Log.Info().Str("user", user).Msg("Sandbox user already exists, skipping") + } else { + return fmt.Errorf("failed to create user: %w", err) + } } // Write default config diff --git a/actions/run.go b/actions/run.go index d985a67..b2bd042 100644 --- a/actions/run.go +++ b/actions/run.go @@ -3,6 +3,7 @@ package actions import ( "fmt" "os" + "strings" "github.com/AxeForging/aigate/domain" "github.com/AxeForging/aigate/helpers" @@ -60,5 +61,22 @@ func (a *RunAction) Execute(c *cli.Context) error { Int("allow_net", len(merged.AllowNet)). Msg("Running sandboxed command") + printSandboxBanner(merged) + return a.runner.Run(profile, cmd, cmdArgs) } + +// printSandboxBanner prints active restrictions to stderr so AI agents +// (and users) can see exactly what is enforced inside the sandbox. +func printSandboxBanner(cfg *domain.Config) { + fmt.Fprintln(os.Stderr, "[aigate] sandbox active") + if len(cfg.DenyRead) > 0 { + fmt.Fprintf(os.Stderr, "[aigate] deny_read: %s\n", strings.Join(cfg.DenyRead, ", ")) + } + if len(cfg.DenyExec) > 0 { + fmt.Fprintf(os.Stderr, "[aigate] deny_exec: %s\n", strings.Join(cfg.DenyExec, ", ")) + } + if len(cfg.AllowNet) > 0 { + fmt.Fprintf(os.Stderr, "[aigate] allow_net: %s (all other outbound connections will be blocked)\n", strings.Join(cfg.AllowNet, ", ")) + } +} diff --git a/docs/diagrams/file-isolation.png b/docs/diagrams/file-isolation.png new file mode 100644 index 0000000..0c38890 Binary files /dev/null and b/docs/diagrams/file-isolation.png differ diff --git a/docs/diagrams/file-isolation.puml b/docs/diagrams/file-isolation.puml new file mode 100644 index 0000000..a3f4948 --- /dev/null +++ b/docs/diagrams/file-isolation.puml @@ -0,0 +1,67 @@ +@startuml file-isolation +!theme plain +skinparam backgroundColor #FEFEFE +skinparam defaultFontName Inter +skinparam shadowing false +skinparam roundcorner 8 +skinparam ArrowColor #444444 +skinparam RectangleBorderColor #888888 +skinparam PackageBorderColor #666666 +skinparam NoteBackgroundColor #FFFDE7 +skinparam NoteBorderColor #FBC02D + +title **File Isolation -- Linux & macOS**\ndual-layer: ACLs (persistent) + namespace overrides (runtime) + +rectangle "Layer 1: Persistent ACLs\n(applied at deny/allow time)" as acls #C8E6C9 { + + rectangle "**Linux -- POSIX ACLs**\nsetfacl -m g:ai-agents:--- " as linuxacl #A5D6A7 + + rectangle "**macOS -- Extended ACLs**\nchmod +a ""group:ai-agents deny read"" " as macosacl #A5D6A7 +} + +rectangle "Layer 2: Runtime Overrides\n(applied inside sandbox)" as runtime #FFECB3 { + + rectangle "**Directories**\nmount -t tmpfs -o ro,size=0 tmpfs " as tmpfs #FFE082 + rectangle "**Files**\nmount --bind /dev/null " as devnull #FFE082 +} + +rectangle "**Protected Paths**" as paths #FFCDD2 { + rectangle ".env, .env.*, *.pem, *.key" as p1 #FFFFFF + rectangle ".ssh/, .aws/, .gcloud/" as p2 #FFFFFF + rectangle "secrets/, credentials/" as p3 #FFFFFF +} + +paths -up-> acls : aigate deny read +paths -up-> runtime : aigate run + +note right of linuxacl + Recursive with inheritance: + setfacl -R -m g:ai-agents:--- dir/ + setfacl -R -m d:g:ai-agents:--- dir/ +end note + +note right of macosacl + Denies: read, readattr, + readextattr, readsecurity + Dirs add: list, search, + file_inherit, directory_inherit +end note + +note right of tmpfs + Only on **Linux** (mount namespaces). + macOS uses Seatbelt file-read* deny + rules instead: + (deny file-read* (subpath "/secrets")) + (deny file-read* (literal ".env")) +end note + +note bottom of paths + Both layers work together: + ACLs block at the OS level + (survives across sessions). + Mount overrides add defense-in-depth + at runtime (even root inside the + namespace sees empty files/dirs). +end note + +@enduml diff --git a/docs/diagrams/linux-network.png b/docs/diagrams/linux-network.png new file mode 100644 index 0000000..20725f3 Binary files /dev/null and b/docs/diagrams/linux-network.png differ diff --git a/docs/diagrams/linux-network.puml b/docs/diagrams/linux-network.puml new file mode 100644 index 0000000..5d71c11 --- /dev/null +++ b/docs/diagrams/linux-network.puml @@ -0,0 +1,77 @@ +@startuml linux-network +!theme plain +skinparam backgroundColor #FEFEFE +skinparam defaultFontName Inter +skinparam shadowing false +skinparam roundcorner 8 +skinparam ArrowColor #444444 +skinparam RectangleBorderColor #888888 +skinparam PackageBorderColor #666666 +skinparam NoteBackgroundColor #FFFDE7 +skinparam NoteBorderColor #FBC02D + +title **Linux Network Isolation**\nslirp4netns + iptables (no root required) + +cloud "Internet" as internet #E0E0E0 + +rectangle "User Namespace\n(outer unshare --user)" as userns #FFF3E0 { + + rectangle "**slirp4netns**\nuser-mode networking\n(host network + user ns caps)" as slirp #B3E5FC + + rectangle "Network Namespace\n(inner unshare --net)" as netns #FFCDD2 { + + rectangle "**tap0** (10.0.2.100)" as tap0 #EF9A9A + + rectangle "**iptables OUTPUT chain**" as iptables #EF9A9A { + rectangle "ACCEPT lo (loopback)" as r1 #FFFFFF + rectangle "ACCEPT UDP/TCP :53 (DNS)" as r2 #FFFFFF + rectangle "ACCEPT upstream DNS servers" as r3 #FFFFFF + rectangle "ACCEPT resolved AllowNet IPs" as r4 #C8E6C9 + rectangle "REJECT everything else" as r5 #FFCDD2 + } + + rectangle "**Sandboxed Process**\nclaude / cursor / aider" as proc #CE93D8 + } +} + +proc -down-> iptables : outbound traffic +iptables -down-> tap0 : allowed +tap0 <-left-> slirp : tap attachment\nvia setns() +slirp <-up-> internet : forwarded + +note right of slirp + Runs **inside** user namespace + (has CAP_SYS_ADMIN) but in the + **host** network namespace. + + Creates tap0 in sandbox net ns, + forwards packets to real network. + + Built-in DNS forwarder: 10.0.2.3 +end note + +note right of r4 + Hosts resolved **inside** namespace + via getent ahostsv4 (same DNS the + sandboxed process will use). + Each host retries 3x to handle + DNS startup delay. +end note + +note left of tap0 + **resolv.conf** bind-mounted + to point at 10.0.2.3 + (slirp4netns DNS forwarder) +end note + +note right of proc + **Dispatch logic:** + AllowNet + slirp4netns found + -> runWithNetFilter() + AllowNet + no slirp4netns + -> warn, run unrestricted + No AllowNet + -> run unrestricted +end note + +@enduml diff --git a/docs/diagrams/linux-process.png b/docs/diagrams/linux-process.png new file mode 100644 index 0000000..41d9979 Binary files /dev/null and b/docs/diagrams/linux-process.png differ diff --git a/docs/diagrams/linux-process.puml b/docs/diagrams/linux-process.puml new file mode 100644 index 0000000..71649e2 --- /dev/null +++ b/docs/diagrams/linux-process.puml @@ -0,0 +1,71 @@ +@startuml linux-process +!theme plain +skinparam backgroundColor #FEFEFE +skinparam defaultFontName Inter +skinparam shadowing false +skinparam roundcorner 8 +skinparam ArrowColor #444444 +skinparam RectangleBorderColor #888888 +skinparam PackageBorderColor #666666 +skinparam NoteBackgroundColor #FFFDE7 +skinparam NoteBorderColor #FBC02D + +title **Linux Process & Filesystem Isolation**\nnamespaces + mount overrides (no root required) + +actor "User" as user + +rectangle "**aigate run -- **" as aigate #E3F2FD + +rectangle "User Namespace\n(unshare --user --map-root-user)" as userns #FFF3E0 { + + rectangle "Mount Namespace\n(unshare --mount)" as mntns #FFECB3 { + + rectangle "**Mount Overrides**\n(deny_read enforcement)" as mounts #FFE082 + + rectangle "**/proc remount**\n(mount -t proc proc /proc)" as procmnt #FFE082 + + rectangle "**resolv.conf**\nbind-mount to 10.0.2.3\n(only with AllowNet)" as resolv #FFE082 + } + + rectangle "PID Namespace\n(unshare --pid --fork)" as pidns #E1BEE7 { + rectangle "**Sandboxed Process**\n " as proc #CE93D8 + } +} + +user --> aigate +aigate --> userns + +note right of mounts + **deny_read paths:** + Directories -> tmpfs (ro, size=0) + Files -> bind /dev/null + + Examples: + mount -t tmpfs tmpfs ~/.ssh/ + mount --bind /dev/null .env +end note + +note right of proc + Process sees itself as PID 1. + Cannot see or signal any host + processes. /proc remounted to + match the new PID namespace. +end note + +note left of userns + Maps calling user to UID 0 + inside the namespace. Gives + CAP_SYS_ADMIN for mount/net + operations without real root. +end note + +note bottom of aigate + **deny_exec enforcement:** + Checked BEFORE entering the + sandbox. If the command (or + subcommand like "kubectl delete") + is in the deny list, aigate + refuses to run it. +end note + +@enduml diff --git a/docs/diagrams/macos-network.png b/docs/diagrams/macos-network.png new file mode 100644 index 0000000..6bac170 Binary files /dev/null and b/docs/diagrams/macos-network.png differ diff --git a/docs/diagrams/macos-network.puml b/docs/diagrams/macos-network.puml new file mode 100644 index 0000000..dc19866 --- /dev/null +++ b/docs/diagrams/macos-network.puml @@ -0,0 +1,55 @@ +@startuml macos-network +!theme plain +skinparam backgroundColor #FEFEFE +skinparam defaultFontName Inter +skinparam shadowing false +skinparam roundcorner 8 +skinparam ArrowColor #444444 +skinparam RectangleBorderColor #888888 +skinparam PackageBorderColor #666666 +skinparam NoteBackgroundColor #FFFDE7 +skinparam NoteBorderColor #FBC02D + +title **macOS Network Isolation**\nsandbox-exec Seatbelt profiles (kernel-enforced) + +cloud "Internet" as internet #E0E0E0 + +rectangle "sandbox-exec (Sandbox.kext)" as sandbox #E3F2FD { + + rectangle "**Seatbelt Profile (.sb)**" as profile #BBDEFB { + rectangle "(deny network-outbound)" as deny #FFCDD2 + rectangle "(allow network-outbound (local ip))" as lo #FFFFFF + rectangle "(allow network-outbound\n (remote ip ""api.anthropic.com""))" as allow1 #C8E6C9 + rectangle "(allow network-outbound\n (remote ip ""api.github.com""))" as allow2 #C8E6C9 + } + + rectangle "**Sandboxed Process**\nclaude / cursor / aider" as proc #CE93D8 +} + +proc -up-> profile : outbound traffic\nchecked by kernel +allow1 -up-> internet : allowed +allow2 -up-> internet : allowed +deny -right[hidden]-> lo + +note right of proc + macOS **Sandbox.kext** enforces + the Seatbelt profile at the + kernel level. + + No user-mode networking needed. + The kernel intercepts syscalls + and denies disallowed connections. +end note + +note right of profile + Profile generated dynamically + by aigate from the allow_net + config. Written to a temp .sb + file and passed to sandbox-exec. + + AllowNet hostnames are passed + directly (Seatbelt handles + DNS resolution natively). +end note + +@enduml diff --git a/docs/user/README.md b/docs/user/README.md index 750f44f..45496e1 100644 --- a/docs/user/README.md +++ b/docs/user/README.md @@ -8,7 +8,29 @@ aigate creates an OS-level sandbox for AI coding agents. When you use Claude Cod - **Execute** dangerous commands (curl, wget, ssh) - **Access** unauthorized network endpoints -Unlike application-level restrictions that can be bypassed, aigate uses kernel-enforced isolation (Linux ACLs + namespaces, macOS ACLs + sandbox-exec). The AI tool physically cannot access what you deny. +Unlike application-level restrictions that can be bypassed, aigate uses kernel-enforced isolation (Linux namespaces + iptables, macOS sandbox-exec). The AI tool physically cannot access what you deny. + +## Prerequisites + +| | Linux | macOS | +|---|---|---| +| **Required** | `setfacl` (usually pre-installed) | None (uses built-in sandbox-exec) | +| **For network filtering** | `slirp4netns` | None (uses built-in Seatbelt) | + +Install `slirp4netns` on Linux if you use `allow_net`: + +```sh +# Fedora / RHEL +sudo dnf install slirp4netns + +# Ubuntu / Debian +sudo apt install slirp4netns + +# Arch +sudo pacman -S slirp4netns +``` + +If `slirp4netns` is not installed, aigate logs a warning and runs without network filtering. ## Install @@ -55,7 +77,7 @@ aigate run -- aider ### init -Creates the sandbox group (`ai-agents`), user (`ai-runner`), and default config. +Creates the sandbox group (`ai-agents`), user (`ai-runner`), and default config. Safe to re-run (skips existing group/user). ```sh sudo aigate init # Default setup @@ -175,16 +197,51 @@ Project config merges with global (extends, does not replace). ## How It Works -### Linux -- **File isolation**: POSIX ACLs via `setfacl` deny the `ai-agents` group read access -- **Process isolation**: Mount namespaces overmount sensitive directories with empty tmpfs -- **Network isolation**: Network namespaces restrict egress to allowed domains -- **PID isolation**: PID namespaces hide host processes -- **Resource limits**: cgroups v2 enforce memory, CPU, and PID limits +Architecture diagrams are in [`docs/diagrams/`](../diagrams/). + +### File isolation + +Two layers working together for defense-in-depth: + +1. **Persistent ACLs** (applied when you run `aigate deny read`): + - **Linux**: POSIX ACLs via `setfacl` deny the `ai-agents` group read access + - **macOS**: Extended ACLs via `chmod +a` with explicit deny entries +2. **Runtime overrides** (applied when you run `aigate run`): + - **Linux**: Mount namespaces overmount directories with empty tmpfs, files with `/dev/null` + - **macOS**: Seatbelt `file-read*` deny rules in the sandbox profile + +![File Isolation](../diagrams/file-isolation.png) + +### Network isolation -### macOS -- **File isolation**: macOS ACLs via `chmod +a` with explicit deny entries -- **Process sandboxing**: `sandbox-exec` Seatbelt profiles restrict file and network access +Restricts outbound connections to domains listed in `allow_net`: + +- **Linux**: User namespace + network namespace + `slirp4netns` for user-mode networking + `iptables` OUTPUT rules. Hostnames are resolved inside the namespace so iptables IPs match what the sandboxed process sees. Requires `slirp4netns` (falls back to unrestricted if not installed). No root needed. +- **macOS**: `sandbox-exec` Seatbelt profiles with `(deny network-outbound)` and per-host `(allow network-outbound (remote ip ...))` rules. Kernel-enforced via Sandbox.kext. + +**Linux**: + +![Linux Network Isolation](../diagrams/linux-network.png) + +**macOS**: + +![macOS Network Isolation](../diagrams/macos-network.png) + +### Process isolation (Linux) + +- **User namespace**: Maps calling user to UID 0 inside the namespace, giving capabilities for mount/net operations without real root +- **PID namespace**: Sandboxed process sees itself as PID 1, cannot see or signal host processes. `/proc` is remounted to match +- **Mount namespace**: Enables filesystem overrides without affecting the host + +![Linux Process Isolation](../diagrams/linux-process.png) + +### Command blocking + +`deny_exec` rules are checked **before** entering the sandbox. If the command (or a subcommand like `kubectl delete`) is in the deny list, aigate refuses to launch it. This is an application-level check, not a kernel feature. + +### Resource limits + +cgroups v2 enforce memory, CPU, and PID limits (Linux only). ## Troubleshooting @@ -197,6 +254,12 @@ If you see "Failed to apply ACLs", the AI agent group may not exist yet. Run `su ### "aigate not initialized" Run `sudo aigate init` to create the sandbox group, user, and default config. +### "slirp4netns not found" warning +Install `slirp4netns` for network filtering on Linux (see [Prerequisites](#prerequisites)). Without it, `allow_net` rules are ignored and the sandboxed process has unrestricted network access. + +### Allowed hosts still blocked +If hosts in `allow_net` are being rejected, DNS inside the sandbox may not have been ready in time. Check that `slirp4netns` is installed and working. Run with `AIGATE_LOG_LEVEL=debug` for detailed output. + ## Exit Codes | Code | Meaning | diff --git a/services/platform.go b/services/platform.go index 4718199..b3b8b9c 100644 --- a/services/platform.go +++ b/services/platform.go @@ -2,6 +2,7 @@ package services import ( "fmt" + "os" "os/exec" "path/filepath" @@ -38,9 +39,9 @@ func (e *RealExecutor) Run(name string, args ...string) ([]byte, error) { func (e *RealExecutor) RunPassthrough(name string, args ...string) error { cmd := exec.Command(name, args...) - cmd.Stdin = nil - cmd.Stdout = nil - cmd.Stderr = nil + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr return cmd.Run() } diff --git a/services/platform_linux.go b/services/platform_linux.go index 4ea801b..033865f 100644 --- a/services/platform_linux.go +++ b/services/platform_linux.go @@ -3,8 +3,12 @@ package services import ( + "bufio" + "encoding/base64" "fmt" + "net" "os" + "os/exec" "strings" "github.com/AxeForging/aigate/domain" @@ -170,47 +174,302 @@ func (p *LinuxPlatform) ListACLs(workDir string) ([]string, error) { } func (p *LinuxPlatform) RunSandboxed(profile domain.SandboxProfile, cmd string, args []string) error { - // Build unshare command for namespace isolation + if len(profile.Config.AllowNet) > 0 { + if hasSlirp4netns() { + return p.runWithNetFilter(profile, cmd, args) + } + helpers.Log.Warn().Msg("slirp4netns not found; network filtering unavailable, running without network restrictions") + } + return p.runUnshare(profile, cmd, args) +} + +// runUnshare runs a command in a user/mount/pid namespace without network filtering. +func (p *LinuxPlatform) runUnshare(profile domain.SandboxProfile, cmd string, args []string) error { unshareArgs := []string{ "--mount", // Mount namespace "--pid", // PID namespace "--fork", // Required for PID namespace "--map-root-user", // User namespace mapping + "--", + } + + shellCmd := buildPolicyFile(profile) + buildMountOverrides(profile) + shellEscape(cmd, args) + fullArgs := append(unshareArgs, "sh", "-c", shellCmd) + return p.exec.RunPassthrough("unshare", fullArgs...) +} + +// hasSlirp4netns checks whether slirp4netns is available on the system. +func hasSlirp4netns() bool { + _, err := exec.LookPath("slirp4netns") + return err == nil +} + +// resolveAllowedIPs resolves a list of hostnames/IPs to deduplicated IPv4 addresses. +func resolveAllowedIPs(hosts []string) []string { + seen := make(map[string]bool) + var result []string + + for _, host := range hosts { + if ip := net.ParseIP(host); ip != nil { + // It's already an IP address — keep only IPv4 + if ip.To4() != nil && !seen[host] { + seen[host] = true + result = append(result, host) + } + continue + } + // Resolve hostname + addrs, err := net.LookupHost(host) + if err != nil { + helpers.Log.Warn().Str("host", host).Err(err).Msg("failed to resolve host, skipping") + continue + } + for _, addr := range addrs { + if ip := net.ParseIP(addr); ip != nil && ip.To4() != nil && !seen[addr] { + seen[addr] = true + result = append(result, addr) + } + } + } + return result +} + +// getSystemDNS reads upstream DNS servers from resolv.conf files. +func getSystemDNS() []string { + // Try systemd-resolved upstream file first, then fall back to /etc/resolv.conf + for _, path := range []string{"/run/systemd/resolve/resolv.conf", "/etc/resolv.conf"} { + servers := parseDNSFromFile(path) + if len(servers) > 0 { + return servers + } + } + return []string{"8.8.8.8", "1.1.1.1"} +} + +func parseDNSFromFile(path string) []string { + f, err := os.Open(path) + if err != nil { + return nil + } + defer f.Close() + + var servers []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if !strings.HasPrefix(line, "nameserver") { + continue + } + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + addr := fields[1] + // Skip localhost/stub resolvers + if strings.HasPrefix(addr, "127.") { + continue + } + servers = append(servers, addr) + } + return servers +} + +// runWithNetFilter runs a command in a network-filtered namespace using slirp4netns. +// +// Architecture (two-layer unshare): +// +// Outer: unshare --user --map-root-user (user namespace only, keeps host network) +// ├── Inner: unshare --net --mount --pid --fork (sandbox, new net ns) +// │ └── sh -c (wait tap0 → iptables → exec cmd) +// └── slirp4netns --configure tap0 (user ns caps, host network) +// +// slirp4netns must run INSIDE the user namespace to have CAP_SYS_ADMIN for +// setns(CLONE_NEWNET). Launching it from the host fails with EPERM because an +// unprivileged process lacks CAP_SYS_ADMIN in its own (init) user namespace. +func (p *LinuxPlatform) runWithNetFilter(profile domain.SandboxProfile, cmd string, args []string) error { + dnsServers := getSystemDNS() + helpers.Log.Info(). + Strs("allow_net", profile.Config.AllowNet). + Strs("dns_servers", dnsServers). + Msg("starting network-filtered sandbox") + + innerScript := buildNetFilterScript(profile.Config.AllowNet, dnsServers, profile, cmd, args) + outerScript := buildOrchestrationScript(innerScript) + + return p.exec.RunPassthrough("unshare", "--user", "--map-root-user", "--", "sh", "-c", outerScript) +} + +// buildOrchestrationScript wraps the inner sandbox script with the two-process +// orchestration that runs inside the user namespace. +// +// It backgrounds the sandbox (in a new net namespace) while preserving stdin +// via fd 3, then launches slirp4netns in the foreground (user ns + host network) +// to provide connectivity. +func buildOrchestrationScript(innerScript string) string { + encoded := base64.StdEncoding.EncodeToString([]byte(innerScript)) + + var sb strings.Builder + + // Save stdin so the backgrounded sandbox can still read the terminal. + // POSIX non-interactive shells redirect background jobs' stdin from /dev/null. + sb.WriteString("exec 3<&0\n") + + // Write the inner script to a temp file (avoids all quoting issues). + sb.WriteString("_AIGATE_INNER=$(mktemp /tmp/.aigate-inner-XXXXXX)\n") + sb.WriteString(fmt.Sprintf("printf '%%s' '%s' | base64 -d > \"$_AIGATE_INNER\"\n", encoded)) + + // Start the sandbox in a new net/mount/pid namespace (background, stdin from fd 3). + sb.WriteString("unshare --net --mount --pid --fork -- sh \"$_AIGATE_INNER\" <&3 &\n") + sb.WriteString("_SANDBOX_PID=$!\n") + sb.WriteString("exec 3<&-\n") + + // Wait until the sandbox has entered its new network namespace. + sb.WriteString("_SELF_NS=$(readlink /proc/self/ns/net)\n") + sb.WriteString("while [ -e \"/proc/$_SANDBOX_PID\" ] && [ \"$(readlink /proc/$_SANDBOX_PID/ns/net 2>/dev/null)\" = \"$_SELF_NS\" ]; do sleep 0.01; done\n") + + // Launch slirp4netns: runs in user ns (has CAP_SYS_ADMIN) + host network. + // Suppress stdout (verbose protocol debug), keep stderr for real errors. + sb.WriteString("slirp4netns --configure $_SANDBOX_PID tap0 >/dev/null &\n") + sb.WriteString("_SLIRP_PID=$!\n") + + // Wait for the sandbox to exit, then clean up. + sb.WriteString("wait $_SANDBOX_PID 2>/dev/null\n") + sb.WriteString("_EXIT=$?\n") + sb.WriteString("kill $_SLIRP_PID 2>/dev/null; wait $_SLIRP_PID 2>/dev/null\n") + sb.WriteString("rm -f \"$_AIGATE_INNER\"\n") + sb.WriteString("exit $_EXIT\n") + + return sb.String() +} + +// buildNetFilterScript builds the shell script that runs inside the network namespace. +// allowNetHosts are the original hostnames/IPs from the config — resolution happens +// inside the namespace so the iptables rules match what the sandboxed process will see. +func buildNetFilterScript(allowNetHosts, dnsServers []string, profile domain.SandboxProfile, cmd string, args []string) string { + var sb strings.Builder + + // Remount /proc so it reflects the new PID namespace. + // Without this, /proc/self is stale and glibc's NSS/dlopen fails with + // "fatal library error, lookup self". + sb.WriteString("mount -t proc proc /proc\n") + + // Wait for tap0 interface to come up (slirp4netns creates it) + sb.WriteString("for i in $(seq 1 100); do ip addr show tap0 2>/dev/null | grep -q inet && break; sleep 0.05; done\n") + + // Set up DNS: point resolv.conf at slirp4netns DNS forwarder (10.0.2.3) + sb.WriteString("echo 'nameserver 10.0.2.3' > /tmp/.aigate-resolv\n") + sb.WriteString("mount --bind /tmp/.aigate-resolv /etc/resolv.conf 2>/dev/null || ") + sb.WriteString("mount --bind /tmp/.aigate-resolv $(readlink -f /etc/resolv.conf) 2>/dev/null || true\n") + + // iptables rules: allow loopback + DNS before anything else + // (DNS must work for the host resolution below) + sb.WriteString("iptables -A OUTPUT -o lo -j ACCEPT\n") + sb.WriteString("iptables -A OUTPUT -p udp --dport 53 -j ACCEPT\n") + sb.WriteString("iptables -A OUTPUT -p tcp --dport 53 -j ACCEPT\n") + + // Allow traffic to upstream DNS servers (needed for slirp4netns forwarding) + for _, dns := range dnsServers { + sb.WriteString(fmt.Sprintf("iptables -A OUTPUT -d %s -j ACCEPT\n", dns)) + } + + // Wait for DNS to actually work by testing a REAL remote query. + // Using localhost previously was wrong — it resolves from /etc/hosts, + // not DNS, so it passed before slirp4netns DNS (10.0.2.3) was ready. + if len(allowNetHosts) > 0 { + sb.WriteString(fmt.Sprintf("for i in $(seq 1 50); do getent ahostsv4 %q >/dev/null 2>&1 && break; sleep 0.1; done\n", allowNetHosts[0])) + } + + // Resolve each AllowNet entry INSIDE the namespace and add iptables rules. + // This ensures the IPs match what the sandboxed process will get from DNS, + // avoiding mismatches from CDN anycast / DNS load balancing. + // Each host retries up to 3 times to handle transient DNS hiccups. + for _, host := range allowNetHosts { + sb.WriteString(fmt.Sprintf("for _attempt in 1 2 3; do _ips=$(getent ahostsv4 %q 2>/dev/null | awk '{print $1}' | sort -u); [ -n \"$_ips\" ] && break; sleep 0.5; done; for _ip in $_ips; do iptables -A OUTPUT -d \"$_ip\" -j ACCEPT; done\n", host)) } - // Add network isolation if allow_net is configured + sb.WriteString("iptables -A OUTPUT -j REJECT --reject-with icmp-admin-prohibited\n") + + // Write policy file + mount overrides (deny_read markers point here) + sb.WriteString(buildPolicyFile(profile)) + sb.WriteString(buildMountOverrides(profile)) + + // Execute the target command + sb.WriteString("exec ") + sb.WriteString(shellEscape(cmd, args)) + sb.WriteString("\n") + + return sb.String() +} + +// buildPolicyFile generates shell commands to write /tmp/.aigate-policy inside the +// sandbox, summarizing all active restrictions. Deny markers and AI agents can +// read this file to understand the full sandbox policy. +func buildPolicyFile(profile domain.SandboxProfile) string { + var sb strings.Builder + sb.WriteString("{\n") + sb.WriteString("printf '[aigate] sandbox policy\\n\\n'\n") + if len(profile.Config.DenyRead) > 0 { + sb.WriteString(fmt.Sprintf("printf 'deny_read: %s\\n'\n", strings.Join(profile.Config.DenyRead, ", "))) + sb.WriteString("printf 'These files/directories appear empty or contain a deny marker inside the sandbox.\\n\\n'\n") + } + if len(profile.Config.DenyExec) > 0 { + sb.WriteString(fmt.Sprintf("printf 'deny_exec: %s\\n'\n", strings.Join(profile.Config.DenyExec, ", "))) + sb.WriteString("printf 'These commands are blocked before the sandbox starts.\\n\\n'\n") + } if len(profile.Config.AllowNet) > 0 { - unshareArgs = append(unshareArgs, "--net") + sb.WriteString(fmt.Sprintf("printf 'allow_net: %s\\n'\n", strings.Join(profile.Config.AllowNet, ", "))) + sb.WriteString("printf 'Only these hosts are reachable. All other outbound connections are rejected.\\n\\n'\n") } + sb.WriteString("} > /tmp/.aigate-policy\n") + return sb.String() +} - unshareArgs = append(unshareArgs, "--") +// buildMountOverrides generates shell commands to overmount denied paths. +// Files are replaced with a marker containing an explicit deny message so AI +// agents understand why the content is unavailable. Directories get a tmpfs +// with a .aigate-denied marker file. Both point to /tmp/.aigate-policy for +// the full restriction list. +func buildMountOverrides(profile domain.SandboxProfile) string { + const denyMsg = "[aigate] access denied: this file is protected by sandbox policy. See /tmp/.aigate-policy for all active restrictions." + const dirMsg = "[aigate] access denied: this directory is protected by sandbox policy. Run 'cat /tmp/.aigate-policy' to see all active restrictions." - // Build the inner command that runs inside the namespace - // First overmount denied directories with empty tmpfs var mountCmds []string + hasFileDeny := false + for _, pattern := range profile.Config.DenyRead { paths, _ := resolvePatterns([]string{pattern}, profile.WorkDir) for _, path := range paths { if info, err := os.Stat(path); err == nil { if info.IsDir() { - mountCmds = append(mountCmds, fmt.Sprintf("mount -t tmpfs -o ro,size=0 tmpfs %s", path)) + mountCmds = append(mountCmds, fmt.Sprintf( + "mount -t tmpfs -o size=4k tmpfs %s && printf '%s\\n' > %s/.aigate-denied && mount -o remount,ro %s", + path, dirMsg, path, path)) } else { - mountCmds = append(mountCmds, fmt.Sprintf("mount --bind /dev/null %s", path)) + hasFileDeny = true + mountCmds = append(mountCmds, fmt.Sprintf("mount --bind /tmp/.aigate-denied %s", path)) } } } } - // Build shell command: mount overrides then exec the target command - var shellCmd string + var sb strings.Builder + if hasFileDeny { + sb.WriteString(fmt.Sprintf("printf '%s\\n' > /tmp/.aigate-denied && ", denyMsg)) + } if len(mountCmds) > 0 { - shellCmd = strings.Join(mountCmds, " && ") + " && " + sb.WriteString(strings.Join(mountCmds, " && ")) + sb.WriteString(" && ") } - shellCmd += cmd + return sb.String() +} + +// shellEscape builds a shell command string from a command and its arguments. +func shellEscape(cmd string, args []string) string { + var sb strings.Builder + sb.WriteString(cmd) for _, a := range args { - shellCmd += " " + a + sb.WriteString(" ") + sb.WriteString(a) } - - fullArgs := append(unshareArgs, "sh", "-c", shellCmd) - return p.exec.RunPassthrough("unshare", fullArgs...) + return sb.String() } diff --git a/services/platform_linux_test.go b/services/platform_linux_test.go index f13bb3d..67a2d61 100644 --- a/services/platform_linux_test.go +++ b/services/platform_linux_test.go @@ -3,9 +3,13 @@ package services import ( + "encoding/base64" "fmt" "os" + "strings" "testing" + + "github.com/AxeForging/aigate/domain" ) type mockExecutor struct { @@ -276,3 +280,294 @@ func writeTestFile(t *testing.T, path, content string) { t.Fatalf("writeFile(%s) error = %v", path, err) } } + +func TestResolveAllowedIPs(t *testing.T) { + t.Run("raw IPv4 passes through", func(t *testing.T) { + ips := resolveAllowedIPs([]string{"1.2.3.4", "5.6.7.8"}) + if len(ips) != 2 { + t.Fatalf("expected 2 IPs, got %d: %v", len(ips), ips) + } + if ips[0] != "1.2.3.4" || ips[1] != "5.6.7.8" { + t.Errorf("unexpected IPs: %v", ips) + } + }) + + t.Run("deduplicates IPs", func(t *testing.T) { + ips := resolveAllowedIPs([]string{"1.2.3.4", "1.2.3.4"}) + if len(ips) != 1 { + t.Fatalf("expected 1 IP after dedup, got %d: %v", len(ips), ips) + } + }) + + t.Run("filters out IPv6", func(t *testing.T) { + ips := resolveAllowedIPs([]string{"::1"}) + if len(ips) != 0 { + t.Errorf("expected 0 IPs for IPv6, got %d: %v", len(ips), ips) + } + }) + + t.Run("unresolvable host skipped", func(t *testing.T) { + ips := resolveAllowedIPs([]string{"this-domain-does-not-exist.invalid"}) + if len(ips) != 0 { + t.Errorf("expected 0 IPs for unresolvable host, got %d: %v", len(ips), ips) + } + }) + + t.Run("resolves real hostname", func(t *testing.T) { + ips := resolveAllowedIPs([]string{"localhost"}) + // localhost should resolve to 127.0.0.1 on any system + found := false + for _, ip := range ips { + if ip == "127.0.0.1" { + found = true + } + } + if !found { + t.Errorf("expected 127.0.0.1 from localhost, got %v", ips) + } + }) +} + +func TestBuildNetFilterScript(t *testing.T) { + profile := domain.SandboxProfile{ + Config: domain.Config{ + DenyRead: []string{"/nonexistent/path/for/test"}, + }, + WorkDir: "/tmp", + } + + t.Run("resolves hosts inside namespace via getent", func(t *testing.T) { + script := buildNetFilterScript( + []string{"api.anthropic.com", "1.2.3.4"}, + []string{"8.8.8.8"}, + profile, "echo", []string{"hello"}, + ) + // Hostnames should be resolved inside the namespace via getent ahostsv4 + if !strings.Contains(script, "getent ahostsv4 \"api.anthropic.com\"") { + t.Error("script should resolve api.anthropic.com inside namespace") + } + // Raw IPs are also passed through getent (getent handles IPs fine) + if !strings.Contains(script, "getent ahostsv4 \"1.2.3.4\"") { + t.Error("script should include raw IP 1.2.3.4 via getent") + } + if !strings.Contains(script, "iptables -A OUTPUT -j REJECT") { + t.Error("script should contain final REJECT rule") + } + }) + + t.Run("contains DNS rules", func(t *testing.T) { + script := buildNetFilterScript( + []string{"example.com"}, + []string{"8.8.8.8", "1.1.1.1"}, + profile, "echo", []string{"hello"}, + ) + if !strings.Contains(script, "iptables -A OUTPUT -p udp --dport 53 -j ACCEPT") { + t.Error("script should allow UDP DNS") + } + if !strings.Contains(script, "iptables -A OUTPUT -p tcp --dport 53 -j ACCEPT") { + t.Error("script should allow TCP DNS") + } + if !strings.Contains(script, "iptables -A OUTPUT -d 8.8.8.8 -j ACCEPT") { + t.Error("script should allow DNS server 8.8.8.8") + } + if !strings.Contains(script, "iptables -A OUTPUT -d 1.1.1.1 -j ACCEPT") { + t.Error("script should allow DNS server 1.1.1.1") + } + }) + + t.Run("contains resolv.conf fix", func(t *testing.T) { + script := buildNetFilterScript(nil, nil, profile, "echo", nil) + if !strings.Contains(script, "nameserver 10.0.2.3") { + t.Error("script should set resolv.conf to slirp4netns DNS") + } + }) + + t.Run("contains wait for tap0", func(t *testing.T) { + script := buildNetFilterScript(nil, nil, profile, "echo", nil) + if !strings.Contains(script, "ip addr show tap0") { + t.Error("script should wait for tap0 interface") + } + }) + + t.Run("waits for real DNS before resolving hosts", func(t *testing.T) { + script := buildNetFilterScript([]string{"example.com", "other.com"}, nil, profile, "echo", nil) + // DNS readiness check should use the FIRST AllowNet host, not localhost + dnsWaitIdx := strings.Index(script, "getent ahostsv4 \"example.com\" >/dev/null") + if dnsWaitIdx == -1 { + t.Fatal("script should check DNS readiness with first AllowNet host") + } + if strings.Contains(script, "getent ahostsv4 localhost") { + t.Error("should NOT use localhost for DNS readiness (resolves from /etc/hosts, not DNS)") + } + }) + + t.Run("retries host resolution on failure", func(t *testing.T) { + script := buildNetFilterScript([]string{"example.com"}, nil, profile, "echo", nil) + if !strings.Contains(script, "_attempt in 1 2 3") { + t.Error("should retry getent resolution") + } + }) + + t.Run("contains target command", func(t *testing.T) { + script := buildNetFilterScript(nil, nil, profile, "mycommand", []string{"--flag", "value"}) + if !strings.Contains(script, "exec mycommand --flag value") { + t.Errorf("script should contain exec of target command, got:\n%s", script) + } + }) +} + +func TestRunSandboxedDispatch(t *testing.T) { + t.Run("empty AllowNet uses runUnshare", func(t *testing.T) { + mock := newMockExecutor() + p := &LinuxPlatform{exec: mock} + profile := domain.SandboxProfile{ + Config: domain.Config{AllowNet: nil}, + WorkDir: "/tmp", + } + _ = p.RunSandboxed(profile, "echo", []string{"hello"}) + if mock.callCount() == 0 { + t.Fatal("expected executor to be called") + } + last := mock.lastCall() + if last.Name != "unshare" { + t.Errorf("expected unshare call, got %q", last.Name) + } + // Verify no --net flag (runUnshare doesn't add it) + for _, arg := range last.Args { + if arg == "--net" { + t.Error("runUnshare should not pass --net flag") + } + } + }) + + t.Run("AllowNet set without slirp4netns warns and falls back to runUnshare", func(t *testing.T) { + // This test works because slirp4netns may or may not be installed. + // If it IS installed, it will try runWithNetFilter which won't use the mock executor. + // We test the warning path by checking that when the mock executor is called, + // --net is not passed (meaning runUnshare was used). + // In CI without slirp4netns, this tests the fallback path. + if hasSlirp4netns() { + t.Skip("slirp4netns is installed; this test covers the fallback path only") + } + mock := newMockExecutor() + p := &LinuxPlatform{exec: mock} + profile := domain.SandboxProfile{ + Config: domain.Config{AllowNet: []string{"example.com"}}, + WorkDir: "/tmp", + } + _ = p.RunSandboxed(profile, "echo", []string{"hello"}) + if mock.callCount() == 0 { + t.Fatal("expected executor to be called via runUnshare fallback") + } + last := mock.lastCall() + if last.Name != "unshare" { + t.Errorf("expected unshare call, got %q", last.Name) + } + for _, arg := range last.Args { + if arg == "--net" { + t.Error("fallback runUnshare should not pass --net flag") + } + } + }) +} + +func TestBuildOrchestrationScript(t *testing.T) { + inner := "echo hello world\n" + + t.Run("embeds inner script via base64", func(t *testing.T) { + script := buildOrchestrationScript(inner) + encoded := base64.StdEncoding.EncodeToString([]byte(inner)) + if !strings.Contains(script, encoded) { + t.Error("orchestration script should contain base64-encoded inner script") + } + }) + + t.Run("preserves stdin via fd 3", func(t *testing.T) { + script := buildOrchestrationScript(inner) + if !strings.Contains(script, "exec 3<&0") { + t.Error("should save stdin to fd 3") + } + if !strings.Contains(script, "<&3") { + t.Error("should redirect fd 3 to sandbox stdin") + } + }) + + t.Run("uses two-layer unshare", func(t *testing.T) { + script := buildOrchestrationScript(inner) + // Inner unshare should create net namespace (outer only creates user ns) + if !strings.Contains(script, "unshare --net --mount --pid --fork") { + t.Error("inner unshare should create net/mount/pid namespaces") + } + }) + + t.Run("runs slirp4netns inside user namespace", func(t *testing.T) { + script := buildOrchestrationScript(inner) + if !strings.Contains(script, "slirp4netns --configure $_SANDBOX_PID tap0") { + t.Error("should launch slirp4netns with sandbox PID") + } + }) + + t.Run("waits for namespace and cleans up", func(t *testing.T) { + script := buildOrchestrationScript(inner) + if !strings.Contains(script, "readlink /proc/$_SANDBOX_PID/ns/net") { + t.Error("should wait for net namespace to differ from host") + } + if !strings.Contains(script, "wait $_SANDBOX_PID") { + t.Error("should wait for sandbox to finish") + } + if !strings.Contains(script, "kill $_SLIRP_PID") { + t.Error("should kill slirp4netns on cleanup") + } + }) +} + +func TestGetSystemDNS(t *testing.T) { + servers := getSystemDNS() + if len(servers) == 0 { + t.Fatal("getSystemDNS() should return at least one server") + } + // Should not contain localhost addresses + for _, s := range servers { + if strings.HasPrefix(s, "127.") { + t.Errorf("getSystemDNS() should not return localhost address %q", s) + } + } +} + +func TestParseDNSFromFile(t *testing.T) { + tmpFile := t.TempDir() + "/resolv.conf" + + t.Run("parses nameserver lines", func(t *testing.T) { + writeTestFile(t, tmpFile, "nameserver 8.8.8.8\nnameserver 1.1.1.1\n") + servers := parseDNSFromFile(tmpFile) + if len(servers) != 2 { + t.Fatalf("expected 2 servers, got %d: %v", len(servers), servers) + } + if servers[0] != "8.8.8.8" || servers[1] != "1.1.1.1" { + t.Errorf("unexpected servers: %v", servers) + } + }) + + t.Run("skips 127.x addresses", func(t *testing.T) { + writeTestFile(t, tmpFile, "nameserver 127.0.0.53\nnameserver 8.8.4.4\n") + servers := parseDNSFromFile(tmpFile) + if len(servers) != 1 || servers[0] != "8.8.4.4" { + t.Errorf("expected [8.8.4.4], got %v", servers) + } + }) + + t.Run("returns nil for missing file", func(t *testing.T) { + servers := parseDNSFromFile("/nonexistent/resolv.conf") + if servers != nil { + t.Errorf("expected nil for missing file, got %v", servers) + } + }) + + t.Run("skips comments and blank lines", func(t *testing.T) { + writeTestFile(t, tmpFile, "# comment\n\nnameserver 9.9.9.9\nsearch example.com\n") + servers := parseDNSFromFile(tmpFile) + if len(servers) != 1 || servers[0] != "9.9.9.9" { + t.Errorf("expected [9.9.9.9], got %v", servers) + } + }) +}