From 6ad4d8e282a0eb597a53e92b055f967b6c5bc6bb Mon Sep 17 00:00:00 2001 From: Derek Etherton Date: Tue, 9 Dec 2025 20:49:57 -0800 Subject: [PATCH 1/4] add escalated privileges for jobs using coding-agent image --- src/pkg/k8s.go | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/pkg/k8s.go b/src/pkg/k8s.go index a2df606..25f954a 100644 --- a/src/pkg/k8s.go +++ b/src/pkg/k8s.go @@ -150,9 +150,44 @@ func executable() *int32 { return &value } +func isCodingAgentJob(job opslevel.RunnerJob) bool { + return strings.Contains(job.Image, "coding-agent") +} + func (s *JobRunner) getPodObject(identifier string, labels map[string]string, job opslevel.RunnerJob) *corev1.Pod { // TODO: Allow configuration of Labels // TODO: Allow configuration of Pod Command + + // hard-coded check to centralize privilege escalations to the runner codebase (i.e. deliberately not extensible) + isCodingAgent := isCodingAgentJob(job) + + podSecurityContext := s.podConfig.SecurityContext + if isCodingAgent { + // Coding agent jobs need root user and group for Docker-in-Docker + runAsUser := int64(0) + fsGroup := int64(0) + podSecurityContext = corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + FSGroup: &fsGroup, + } + } + + var containerSecurityContext *corev1.SecurityContext + if isCodingAgent { + // Coding agent jobs need privileged mode for Docker-in-Docker + privileged := true + allowPrivilegeEscalation := true + // Add all capabilities explicitly to ensure unshare operations work + allCapabilities := corev1.Capabilities{ + Add: []corev1.Capability{"ALL"}, + } + containerSecurityContext = &corev1.SecurityContext{ + Privileged: &privileged, + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + Capabilities: &allCapabilities, + } + } + return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: identifier, @@ -163,9 +198,10 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo Spec: corev1.PodSpec{ TerminationGracePeriodSeconds: &s.podConfig.TerminationGracePeriodSeconds, RestartPolicy: corev1.RestartPolicyNever, - SecurityContext: &s.podConfig.SecurityContext, + SecurityContext: &podSecurityContext, ServiceAccountName: s.podConfig.ServiceAccountName, NodeSelector: s.podConfig.NodeSelector, + HostNetwork: isCodingAgent, // Coding agent jobs need host network for Docker-in-Docker InitContainers: []corev1.Container{ { Name: "helper", @@ -195,8 +231,9 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo "-c", fmt.Sprintf("sleep %d", s.podConfig.Lifetime), }, - Resources: s.podConfig.Resources, - Env: s.getPodEnv(job.Variables), + Resources: s.podConfig.Resources, + Env: s.getPodEnv(job.Variables), + SecurityContext: containerSecurityContext, VolumeMounts: []corev1.VolumeMount{ { Name: "scripts", From 5f6d11ab218de43ff5a8a5f1d296ddc315ad5bdf Mon Sep 17 00:00:00 2001 From: Derek Etherton Date: Wed, 10 Dec 2025 07:03:53 -0800 Subject: [PATCH 2/4] prune back coding agent capabilities to just privileged + root user --- src/pkg/k8s.go | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/pkg/k8s.go b/src/pkg/k8s.go index 25f954a..8e6ccb7 100644 --- a/src/pkg/k8s.go +++ b/src/pkg/k8s.go @@ -158,12 +158,12 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo // TODO: Allow configuration of Labels // TODO: Allow configuration of Pod Command - // hard-coded check to centralize privilege escalations to the runner codebase (i.e. deliberately not extensible) + // hard-coded check to centralize privilege escalations to the runner codebase (i.e. deliberately not part of job templates) isCodingAgent := isCodingAgentJob(job) podSecurityContext := s.podConfig.SecurityContext if isCodingAgent { - // Coding agent jobs need root user and group for Docker-in-Docker + // Coding agent jobs need root user for Docker daemon runAsUser := int64(0) fsGroup := int64(0) podSecurityContext = corev1.PodSecurityContext{ @@ -174,17 +174,10 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo var containerSecurityContext *corev1.SecurityContext if isCodingAgent { - // Coding agent jobs need privileged mode for Docker-in-Docker + // Coding agent jobs need privileged mode for creating containers within container privileged := true - allowPrivilegeEscalation := true - // Add all capabilities explicitly to ensure unshare operations work - allCapabilities := corev1.Capabilities{ - Add: []corev1.Capability{"ALL"}, - } containerSecurityContext = &corev1.SecurityContext{ - Privileged: &privileged, - AllowPrivilegeEscalation: &allowPrivilegeEscalation, - Capabilities: &allCapabilities, + Privileged: &privileged, } } @@ -201,7 +194,6 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo SecurityContext: &podSecurityContext, ServiceAccountName: s.podConfig.ServiceAccountName, NodeSelector: s.podConfig.NodeSelector, - HostNetwork: isCodingAgent, // Coding agent jobs need host network for Docker-in-Docker InitContainers: []corev1.Container{ { Name: "helper", From 521aeb6853ecb5b973ba2cc5d4857783a0669cd8 Mon Sep 17 00:00:00 2001 From: Derek Etherton Date: Wed, 10 Dec 2025 07:57:31 -0800 Subject: [PATCH 3/4] add basic securityContext unit test --- src/pkg/k8s_test.go | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/pkg/k8s_test.go b/src/pkg/k8s_test.go index a067b23..6e82815 100644 --- a/src/pkg/k8s_test.go +++ b/src/pkg/k8s_test.go @@ -3,7 +3,10 @@ package pkg import ( "testing" + "github.com/opslevel/opslevel-go/v2024" "github.com/rocktavious/autopilot/v2023" + "github.com/rs/zerolog" + corev1 "k8s.io/api/core/v1" ) func TestCreateLabelSelector(t *testing.T) { @@ -18,3 +21,51 @@ func TestCreateLabelSelector(t *testing.T) { autopilot.Ok(t, err) autopilot.Equals(t, labels, labelSelector.MatchLabels) } + +func TestGetPodObject_CodingAgentPrivileged(t *testing.T) { + // Arrange + runner := &JobRunner{ + logger: zerolog.Nop(), + podConfig: &K8SPodConfig{ + Namespace: "test", + SecurityContext: corev1.PodSecurityContext{}, + TerminationGracePeriodSeconds: 30, + }, + } + job := opslevel.RunnerJob{ + Image: "jobs-coding-agent:latest", + } + labels := map[string]string{"app": "test"} + + // Act + pod := runner.getPodObject("test-pod", labels, job) + + // Assert + autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext != nil, "SecurityContext should be set for coding agent") + autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext.Privileged != nil, "Privileged should be set for coding agent") + autopilot.Equals(t, true, *pod.Spec.Containers[0].SecurityContext.Privileged) + autopilot.Equals(t, int64(0), *pod.Spec.SecurityContext.RunAsUser) + autopilot.Equals(t, int64(0), *pod.Spec.SecurityContext.FSGroup) +} + +func TestGetPodObject_RegularJobNotPrivileged(t *testing.T) { + // Arrange + runner := &JobRunner{ + logger: zerolog.Nop(), + podConfig: &K8SPodConfig{ + Namespace: "test", + SecurityContext: corev1.PodSecurityContext{}, + TerminationGracePeriodSeconds: 30, + }, + } + job := opslevel.RunnerJob{ + Image: "alpine:latest", + } + labels := map[string]string{"app": "test"} + + // Act + pod := runner.getPodObject("test-pod", labels, job) + + // Assert + autopilot.Equals(t, (*corev1.SecurityContext)(nil), pod.Spec.Containers[0].SecurityContext) +} From 0bce1024295f73b62666b53e7f98ffa0b9deb985 Mon Sep 17 00:00:00 2001 From: Derek Etherton Date: Wed, 10 Dec 2025 09:21:09 -0800 Subject: [PATCH 4/4] add new --job-agent-mode flag to control privileging of pods --- .changes/unreleased/Feature-20251210-091850.yaml | 3 +++ src/cmd/root.go | 2 ++ src/pkg/k8s.go | 15 ++++----------- src/pkg/k8s_config.go | 2 ++ src/pkg/k8s_test.go | 9 +++++---- 5 files changed, 16 insertions(+), 15 deletions(-) create mode 100644 .changes/unreleased/Feature-20251210-091850.yaml diff --git a/.changes/unreleased/Feature-20251210-091850.yaml b/.changes/unreleased/Feature-20251210-091850.yaml new file mode 100644 index 0000000..374b66c --- /dev/null +++ b/.changes/unreleased/Feature-20251210-091850.yaml @@ -0,0 +1,3 @@ +kind: Feature +body: Add a new --job-agent-mode option, for creating privileged pods capable of container-in-container management +time: 2025-12-10T09:18:50.207809-08:00 diff --git a/src/cmd/root.go b/src/cmd/root.go index 2f84b56..3ad5e69 100644 --- a/src/cmd/root.go +++ b/src/cmd/root.go @@ -50,6 +50,7 @@ func init() { rootCmd.PersistentFlags().String("job-pod-workdir", "/jobs", "The job pod working directory.") rootCmd.PersistentFlags().Int("job-pod-log-max-interval", 30, "The max amount of time between when pod logs are shipped to OpsLevel. Works in tandem with 'job-pod-log-max-size'") rootCmd.PersistentFlags().Int("job-pod-log-max-size", 1000000, "The max amount in bytes to buffer before pod logs are shipped to OpsLevel. Works in tandem with 'job-pod-log-max-interval'") + rootCmd.PersistentFlags().Bool("job-agent-mode", false, "Enable agent mode with privileged security context for Container-in-Container support. WARNING: This grants elevated privileges and should only be enabled for trusted workloads.") rootCmd.PersistentFlags().String("runner-pod-name", "", "overrides environment variable 'RUNNER_POD_NAME'") rootCmd.PersistentFlags().String("runner-pod-namespace", "default", "The kubernetes namespace the runner pod is deployed in. Overrides environment variable 'RUNNER_POD_NAMESPACE'") @@ -71,6 +72,7 @@ func init() { viper.BindEnv("job-pod-workdir", "OPSLEVEL_JOB_POD_WORKDIR") viper.BindEnv("job-pod-log-max-interval", "OPSLEVEL_JOB_POD_LOG_MAX_INTERVAL") viper.BindEnv("job-pod-log-max-size", "OPSLEVEL_JOB_POD_LOG_MAX_SIZE") + viper.BindEnv("job-agent-mode", "OPSLEVEL_JOB_AGENT_MODE") viper.BindEnv("runner-pod-name", "RUNNER_POD_NAME") viper.BindEnv("runner-pod-namespace", "RUNNER_POD_NAMESPACE") diff --git a/src/pkg/k8s.go b/src/pkg/k8s.go index 8e6ccb7..7841a3b 100644 --- a/src/pkg/k8s.go +++ b/src/pkg/k8s.go @@ -150,20 +150,13 @@ func executable() *int32 { return &value } -func isCodingAgentJob(job opslevel.RunnerJob) bool { - return strings.Contains(job.Image, "coding-agent") -} - func (s *JobRunner) getPodObject(identifier string, labels map[string]string, job opslevel.RunnerJob) *corev1.Pod { // TODO: Allow configuration of Labels // TODO: Allow configuration of Pod Command - // hard-coded check to centralize privilege escalations to the runner codebase (i.e. deliberately not part of job templates) - isCodingAgent := isCodingAgentJob(job) - podSecurityContext := s.podConfig.SecurityContext - if isCodingAgent { - // Coding agent jobs need root user for Docker daemon + if s.podConfig.AgentMode { + // Agent mode jobs need root user for Docker daemon runAsUser := int64(0) fsGroup := int64(0) podSecurityContext = corev1.PodSecurityContext{ @@ -173,8 +166,8 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo } var containerSecurityContext *corev1.SecurityContext - if isCodingAgent { - // Coding agent jobs need privileged mode for creating containers within container + if s.podConfig.AgentMode { + // Agent mode jobs need privileged mode for creating containers within container privileged := true containerSecurityContext = &corev1.SecurityContext{ Privileged: &privileged, diff --git a/src/pkg/k8s_config.go b/src/pkg/k8s_config.go index 6e28e14..0940bbe 100644 --- a/src/pkg/k8s_config.go +++ b/src/pkg/k8s_config.go @@ -26,6 +26,7 @@ type K8SPodConfig struct { PullPolicy corev1.PullPolicy `yaml:"pullPolicy"` SecurityContext corev1.PodSecurityContext `yaml:"securityContext"` NodeSelector map[string]string `yaml:"nodeSelector"` + AgentMode bool `yaml:"agentMode"` } func ReadPodConfig(path string) (*K8SPodConfig, error) { @@ -46,6 +47,7 @@ func ReadPodConfig(path string) (*K8SPodConfig, error) { }, }, TerminationGracePeriodSeconds: 5, + AgentMode: viper.GetBool("job-agent-mode"), }, } // Early out with viper defaults if config file doesn't exist diff --git a/src/pkg/k8s_test.go b/src/pkg/k8s_test.go index 6e82815..b0a4662 100644 --- a/src/pkg/k8s_test.go +++ b/src/pkg/k8s_test.go @@ -22,7 +22,7 @@ func TestCreateLabelSelector(t *testing.T) { autopilot.Equals(t, labels, labelSelector.MatchLabels) } -func TestGetPodObject_CodingAgentPrivileged(t *testing.T) { +func TestGetPodObject_AgentModePrivileged(t *testing.T) { // Arrange runner := &JobRunner{ logger: zerolog.Nop(), @@ -30,10 +30,11 @@ func TestGetPodObject_CodingAgentPrivileged(t *testing.T) { Namespace: "test", SecurityContext: corev1.PodSecurityContext{}, TerminationGracePeriodSeconds: 30, + AgentMode: true, }, } job := opslevel.RunnerJob{ - Image: "jobs-coding-agent:latest", + Image: "alpine:latest", } labels := map[string]string{"app": "test"} @@ -41,8 +42,8 @@ func TestGetPodObject_CodingAgentPrivileged(t *testing.T) { pod := runner.getPodObject("test-pod", labels, job) // Assert - autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext != nil, "SecurityContext should be set for coding agent") - autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext.Privileged != nil, "Privileged should be set for coding agent") + autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext != nil, "SecurityContext should be set for agent mode") + autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext.Privileged != nil, "Privileged should be set for agent mode") autopilot.Equals(t, true, *pod.Spec.Containers[0].SecurityContext.Privileged) autopilot.Equals(t, int64(0), *pod.Spec.SecurityContext.RunAsUser) autopilot.Equals(t, int64(0), *pod.Spec.SecurityContext.FSGroup)