diff --git a/.changes/unreleased/Feature-20251210-091850.yaml b/.changes/unreleased/Feature-20251210-091850.yaml new file mode 100644 index 0000000..374b66c --- /dev/null +++ b/.changes/unreleased/Feature-20251210-091850.yaml @@ -0,0 +1,3 @@ +kind: Feature +body: Add a new --job-agent-mode option, for creating privileged pods capable of container-in-container management +time: 2025-12-10T09:18:50.207809-08:00 diff --git a/src/cmd/root.go b/src/cmd/root.go index 2f84b56..3ad5e69 100644 --- a/src/cmd/root.go +++ b/src/cmd/root.go @@ -50,6 +50,7 @@ func init() { rootCmd.PersistentFlags().String("job-pod-workdir", "/jobs", "The job pod working directory.") rootCmd.PersistentFlags().Int("job-pod-log-max-interval", 30, "The max amount of time between when pod logs are shipped to OpsLevel. Works in tandem with 'job-pod-log-max-size'") rootCmd.PersistentFlags().Int("job-pod-log-max-size", 1000000, "The max amount in bytes to buffer before pod logs are shipped to OpsLevel. Works in tandem with 'job-pod-log-max-interval'") + rootCmd.PersistentFlags().Bool("job-agent-mode", false, "Enable agent mode with privileged security context for Container-in-Container support. WARNING: This grants elevated privileges and should only be enabled for trusted workloads.") rootCmd.PersistentFlags().String("runner-pod-name", "", "overrides environment variable 'RUNNER_POD_NAME'") rootCmd.PersistentFlags().String("runner-pod-namespace", "default", "The kubernetes namespace the runner pod is deployed in. Overrides environment variable 'RUNNER_POD_NAMESPACE'") @@ -71,6 +72,7 @@ func init() { viper.BindEnv("job-pod-workdir", "OPSLEVEL_JOB_POD_WORKDIR") viper.BindEnv("job-pod-log-max-interval", "OPSLEVEL_JOB_POD_LOG_MAX_INTERVAL") viper.BindEnv("job-pod-log-max-size", "OPSLEVEL_JOB_POD_LOG_MAX_SIZE") + viper.BindEnv("job-agent-mode", "OPSLEVEL_JOB_AGENT_MODE") viper.BindEnv("runner-pod-name", "RUNNER_POD_NAME") viper.BindEnv("runner-pod-namespace", "RUNNER_POD_NAMESPACE") diff --git a/src/pkg/k8s.go b/src/pkg/k8s.go index a2df606..7841a3b 100644 --- a/src/pkg/k8s.go +++ b/src/pkg/k8s.go @@ -153,6 +153,27 @@ func executable() *int32 { func (s *JobRunner) getPodObject(identifier string, labels map[string]string, job opslevel.RunnerJob) *corev1.Pod { // TODO: Allow configuration of Labels // TODO: Allow configuration of Pod Command + + podSecurityContext := s.podConfig.SecurityContext + if s.podConfig.AgentMode { + // Agent mode jobs need root user for Docker daemon + runAsUser := int64(0) + fsGroup := int64(0) + podSecurityContext = corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + FSGroup: &fsGroup, + } + } + + var containerSecurityContext *corev1.SecurityContext + if s.podConfig.AgentMode { + // Agent mode jobs need privileged mode for creating containers within container + privileged := true + containerSecurityContext = &corev1.SecurityContext{ + Privileged: &privileged, + } + } + return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: identifier, @@ -163,7 +184,7 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo Spec: corev1.PodSpec{ TerminationGracePeriodSeconds: &s.podConfig.TerminationGracePeriodSeconds, RestartPolicy: corev1.RestartPolicyNever, - SecurityContext: &s.podConfig.SecurityContext, + SecurityContext: &podSecurityContext, ServiceAccountName: s.podConfig.ServiceAccountName, NodeSelector: s.podConfig.NodeSelector, InitContainers: []corev1.Container{ @@ -195,8 +216,9 @@ func (s *JobRunner) getPodObject(identifier string, labels map[string]string, jo "-c", fmt.Sprintf("sleep %d", s.podConfig.Lifetime), }, - Resources: s.podConfig.Resources, - Env: s.getPodEnv(job.Variables), + Resources: s.podConfig.Resources, + Env: s.getPodEnv(job.Variables), + SecurityContext: containerSecurityContext, VolumeMounts: []corev1.VolumeMount{ { Name: "scripts", diff --git a/src/pkg/k8s_config.go b/src/pkg/k8s_config.go index 6e28e14..0940bbe 100644 --- a/src/pkg/k8s_config.go +++ b/src/pkg/k8s_config.go @@ -26,6 +26,7 @@ type K8SPodConfig struct { PullPolicy corev1.PullPolicy `yaml:"pullPolicy"` SecurityContext corev1.PodSecurityContext `yaml:"securityContext"` NodeSelector map[string]string `yaml:"nodeSelector"` + AgentMode bool `yaml:"agentMode"` } func ReadPodConfig(path string) (*K8SPodConfig, error) { @@ -46,6 +47,7 @@ func ReadPodConfig(path string) (*K8SPodConfig, error) { }, }, TerminationGracePeriodSeconds: 5, + AgentMode: viper.GetBool("job-agent-mode"), }, } // Early out with viper defaults if config file doesn't exist diff --git a/src/pkg/k8s_test.go b/src/pkg/k8s_test.go index a067b23..b0a4662 100644 --- a/src/pkg/k8s_test.go +++ b/src/pkg/k8s_test.go @@ -3,7 +3,10 @@ package pkg import ( "testing" + "github.com/opslevel/opslevel-go/v2024" "github.com/rocktavious/autopilot/v2023" + "github.com/rs/zerolog" + corev1 "k8s.io/api/core/v1" ) func TestCreateLabelSelector(t *testing.T) { @@ -18,3 +21,52 @@ func TestCreateLabelSelector(t *testing.T) { autopilot.Ok(t, err) autopilot.Equals(t, labels, labelSelector.MatchLabels) } + +func TestGetPodObject_AgentModePrivileged(t *testing.T) { + // Arrange + runner := &JobRunner{ + logger: zerolog.Nop(), + podConfig: &K8SPodConfig{ + Namespace: "test", + SecurityContext: corev1.PodSecurityContext{}, + TerminationGracePeriodSeconds: 30, + AgentMode: true, + }, + } + job := opslevel.RunnerJob{ + Image: "alpine:latest", + } + labels := map[string]string{"app": "test"} + + // Act + pod := runner.getPodObject("test-pod", labels, job) + + // Assert + autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext != nil, "SecurityContext should be set for agent mode") + autopilot.Assert(t, pod.Spec.Containers[0].SecurityContext.Privileged != nil, "Privileged should be set for agent mode") + autopilot.Equals(t, true, *pod.Spec.Containers[0].SecurityContext.Privileged) + autopilot.Equals(t, int64(0), *pod.Spec.SecurityContext.RunAsUser) + autopilot.Equals(t, int64(0), *pod.Spec.SecurityContext.FSGroup) +} + +func TestGetPodObject_RegularJobNotPrivileged(t *testing.T) { + // Arrange + runner := &JobRunner{ + logger: zerolog.Nop(), + podConfig: &K8SPodConfig{ + Namespace: "test", + SecurityContext: corev1.PodSecurityContext{}, + TerminationGracePeriodSeconds: 30, + }, + } + job := opslevel.RunnerJob{ + Image: "alpine:latest", + } + labels := map[string]string{"app": "test"} + + // Act + pod := runner.getPodObject("test-pod", labels, job) + + // Assert + autopilot.Equals(t, (*corev1.SecurityContext)(nil), pod.Spec.Containers[0].SecurityContext) +}