From 5f7d0912375ea7cf2c236242c7ad88c17b0a5ef3 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 8 Dec 2025 16:25:18 -0800 Subject: [PATCH 1/6] initial attempt of getting keycloak to work --- adding-keycloak-plan.md | 539 ++++++++++++++++++ airflow/config/webserver_config.py | 166 +++++- airflow/helm/values.tmpl.yaml | 4 + .../terraform-unity-sps-airflow/main.tf | 22 +- .../templates/proxy_oidc.conf.tpl | 75 +++ .../terraform-unity-sps-airflow/variables.tf | 42 ++ 6 files changed, 837 insertions(+), 11 deletions(-) create mode 100644 adding-keycloak-plan.md create mode 100644 terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl diff --git a/adding-keycloak-plan.md b/adding-keycloak-plan.md new file mode 100644 index 00000000..7883dda4 --- /dev/null +++ b/adding-keycloak-plan.md @@ -0,0 +1,539 @@ +# Keycloak OIDC Authentication for Airflow Implementation Plan + +## Overview +Integrate Keycloak OIDC authentication with Airflow using the Apache HTTPD proxy layer with role-based access control (RBAC). + +**Architecture:** User → Keycloak (OIDC) → Apache Proxy (mod_auth_openidc) → Internal NLB → Airflow (Remote User Auth + RBAC) + +## Prerequisites +- Keycloak instance URL, realm name, client ID, and client secret +- Apache HTTPD proxy with mod_auth_openidc module installed +- Venue proxy IAM role needs Secrets Manager read permissions (coordinate with CS team if needed) + +--- + +## Phase 1: Terraform Infrastructure Changes + +### 1.1 Add Keycloak Variables +**File:** `terraform-unity/modules/terraform-unity-sps-airflow/variables.tf` + +Add after line 84: +```hcl +variable "keycloak_provider_url" { + description = "Keycloak OIDC provider URL (e.g., https://keycloak.example.com/realms/unity)" + type = string + default = "" +} + +variable "keycloak_client_id" { + description = "Keycloak OIDC client ID" + type = string + default = "" +} + +variable "keycloak_client_secret" { + description = "Keycloak OIDC client secret" + type = string + sensitive = true + default = "" +} + +variable "enable_oidc_auth" { + description = "Enable Keycloak OIDC authentication" + type = bool + default = false +} + +variable "keycloak_role_mapping" { + description = "Mapping of Keycloak groups to Airflow roles" + type = map(list(string)) + default = { + "airflow-admins" = ["Admin"] + "airflow-ops" = ["Op"] + "airflow-users" = ["User"] + "airflow-viewers" = ["Viewer"] + } +} +``` + +### 1.2 Create Secrets Manager Resources +**File:** `terraform-unity/modules/terraform-unity-sps-airflow/main.tf` + +Add after line 766 (after existing SSM parameters): +```hcl +# Keycloak client secret in Secrets Manager +resource "aws_secretsmanager_secret" "keycloak_client_secret" { + count = var.enable_oidc_auth ? 1 : 0 + name = format(local.resource_name_prefix, "keycloak-client-secret") + description = "Keycloak OIDC client secret for Airflow" + recovery_window_in_days = 7 + tags = merge(local.common_tags, { + Name = format(local.resource_name_prefix, "keycloak-client-secret") + Component = "airflow" + }) +} + +resource "aws_secretsmanager_secret_version" "keycloak_client_secret" { + count = var.enable_oidc_auth ? 1 : 0 + secret_id = aws_secretsmanager_secret.keycloak_client_secret[0].id + secret_string = var.keycloak_client_secret +} + +# SSM parameters for Keycloak config +resource "aws_ssm_parameter" "keycloak_config" { + for_each = var.enable_oidc_auth ? { + provider_url = var.keycloak_provider_url + client_id = var.keycloak_client_id + client_secret_arn = try(aws_secretsmanager_secret.keycloak_client_secret[0].arn, "") + } : {} + + name = format("/%s", join("/", compact(["unity", var.project, var.venue, "cs", "security", "keycloak", each.key]))) + type = "String" + value = each.value + tags = merge(local.common_tags, { Component = "airflow" }) +} +``` + +### 1.3 Update Proxy SSM Parameter +**File:** `terraform-unity/modules/terraform-unity-sps-airflow/main.tf` + +Replace lines 740-766 (aws_ssm_parameter.unity_proxy_airflow_ui): +```hcl +resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { + name = format("/%s", join("/", compact(["unity", var.project, var.venue, "cs", "management", "proxy", "configurations", "015-sps-airflow-ui"]))) + description = "Unity-proxy configuration for Airflow UI with optional OIDC" + type = "String" + value = var.enable_oidc_auth ? templatefile("${path.module}/templates/proxy_oidc.conf.tpl", { + project = var.project + venue = var.venue + airflow_nlb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname + keycloak_provider_url = var.keycloak_provider_url + keycloak_client_id = var.keycloak_client_id + }) : <<-EOT + + + ProxyPassReverse "/" + + + Redirect "/${var.project}/${var.venue}/sps/home" + + + ProxyPassMatch "http://${data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" retry=5 disablereuse=On + ProxyPreserveHost On + FallbackResource /management/index.html + AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html + Substitute "s|\"/([^\"]*)|\"/${var.project}/${var.venue}/sps/$1|q" + + +EOT + tags = merge(local.common_tags, { Component = "SSM" }) +} +``` + +### 1.4 Create Proxy Template File +**File:** `terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl` (NEW) + +```apache +# Apache mod_auth_openidc configuration for Keycloak +OIDCProviderMetadataURL ${keycloak_provider_url}/.well-known/openid-configuration +OIDCClientID ${keycloak_client_id} +OIDCClientSecret "REPLACE_WITH_SECRET_FROM_SECRETS_MANAGER" +OIDCRedirectURI https://REPLACE_WITH_PROXY_DOMAIN/${project}/${venue}/sps/redirect_uri +OIDCCryptoPassphrase "REPLACE_WITH_GENERATED_PASSPHRASE" + +# Session config +OIDCSessionInactivityTimeout 3600 +OIDCSessionMaxDuration 28800 + +# Claims +OIDCRemoteUserClaim preferred_username +OIDCScope "openid email profile groups" + +# Cookie settings +OIDCCookiePath /${project}/${venue}/sps/ +OIDCCookieSameSite On + + + AuthType openid-connect + Require valid-user + + # Forward OIDC claims to Airflow + RequestHeader set X-Remote-User "%{REMOTE_USER}e" + RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" + RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" + RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" + + ProxyPassReverse "/" + + + + Redirect "/${project}/${venue}/sps/home" + + + + AuthType openid-connect + Require valid-user + + RequestHeader set X-Remote-User "%{REMOTE_USER}e" + RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" + RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" + RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" + + ProxyPassMatch "http://${airflow_nlb_hostname}:5000/$1" retry=5 disablereuse=On + ProxyPreserveHost On + FallbackResource /management/index.html + AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html + Substitute "s|\"/([^\"]*)|\"/${project}/${venue}/sps/$1|q" + +``` + +### 1.5 Update tfvars File +**File:** `terraform-unity/tfvars/unity-dev-sps-airflow.tfvars` + +Add at end of file: +```hcl +# Keycloak OIDC Configuration +enable_oidc_auth = false # Set to true when ready to enable +keycloak_provider_url = "https://keycloak.example.com/realms/unity" # REPLACE +keycloak_client_id = "airflow-unity-dev" # REPLACE +keycloak_client_secret = "your-client-secret" # REPLACE - keep secret! + +keycloak_role_mapping = { + "airflow-admins" = ["Admin"] + "airflow-ops" = ["Op"] + "airflow-users" = ["User"] + "airflow-viewers" = ["Viewer"] +} +``` + +--- + +## Phase 2: Airflow Configuration + +### 2.1 Replace Webserver Config +**File:** `airflow/config/webserver_config.py` + +Replace entire file with: +```python +# Keycloak OIDC Remote User Authentication +import os +import logging +from flask_appbuilder.security.manager import AUTH_REMOTE_USER + +log = logging.getLogger(__name__) + +AUTH_TYPE = AUTH_REMOTE_USER +AUTH_USER_REGISTRATION = True +AUTH_USER_REGISTRATION_ROLE = "Viewer" + +from airflow.www.security import AirflowSecurityManager + +class CustomSecurityManager(AirflowSecurityManager): + """Map Keycloak groups to Airflow roles""" + + def auth_user_remote_user(self, username): + from flask import request + + email = request.headers.get('X-Remote-User-Email', f'{username}@example.com') + full_name = request.headers.get('X-Remote-User-Name', username) + groups = request.headers.get('X-Remote-User-Groups', '') + + first_name, last_name = username, '' + if ' ' in full_name: + first_name, last_name = full_name.split(' ', 1) + + keycloak_groups = [g.strip() for g in groups.split(',') if g.strip()] + log.info(f"Auth: {username}, groups: {keycloak_groups}") + + user = self.find_user(username=username) + if not user: + user = self.add_user( + username=username, + first_name=first_name, + last_name=last_name, + email=email, + role=self.find_role(self.auth_user_registration_role) + ) + + # Map groups to roles + role_mapping = { + 'airflow-admins': 'Admin', + 'airflow-ops': 'Op', + 'airflow-users': 'User', + 'airflow-viewers': 'Viewer', + } + + role_priority = ['Viewer', 'User', 'Op', 'Admin'] + highest_role = None + highest_priority = -1 + + for group in keycloak_groups: + if group in role_mapping: + role_name = role_mapping[group] + if role_name in role_priority: + priority = role_priority.index(role_name) + if priority > highest_priority: + highest_priority = priority + highest_role = role_name + + if highest_role: + role = self.find_role(highest_role) + if role: + user.roles = [role] + self.update_user(user) + log.info(f"Assigned role {highest_role} to {username}") + + return user + +SECURITY_MANAGER_CLASS = CustomSecurityManager +WTF_CSRF_ENABLED = True +PERMANENT_SESSION_LIFETIME = 28800 +AUTH_ROLE_PUBLIC = None + +log.info("Airflow configured for OIDC remote user authentication") +``` + +### 2.2 Update Helm Values +**File:** `airflow/helm/values.tmpl.yaml` + +Add after line 374 (in extraEnv section): +```yaml + - name: AIRFLOW__WEBSERVER__AUTH_TYPE + value: "AUTH_REMOTE_USER" + - name: AIRFLOW__WEBSERVER__RBAC + value: "True" +``` + +--- + +## Phase 3: Keycloak Configuration (External) + +### 3.1 Create Keycloak Client +In Keycloak admin console: +1. Create new OIDC client: `airflow-{project}-{venue}` +2. Access Type: `confidential` +3. Valid Redirect URIs: `https://{proxy-domain}/{project}/{venue}/sps/*` +4. Client Scopes: Add `groups` scope with Group Membership mapper +5. Save and copy the client secret + +### 3.2 Create Keycloak Groups +Create these groups: +- `airflow-admins` - Full admin access +- `airflow-ops` - Operational access +- `airflow-users` - User access +- `airflow-viewers` - Read-only access + +### 3.3 Assign Test Users +Add test users to groups for validation. + +--- + +## Phase 4: Proxy Server Configuration + +### 4.1 Install mod_auth_openidc +On venue proxy server: +```bash +# Amazon Linux 2 +sudo yum install -y mod_auth_openidc + +# Verify module +httpd -M | grep auth_openidc +``` + +### 4.2 Create Secret Retrieval Script +**File:** `/etc/httpd/scripts/update-keycloak-secret.sh` (on proxy server) + +```bash +#!/bin/bash +# Retrieve Keycloak client secret and update Apache config + +PROJECT="unity" +VENUE="dev" + +# Get secret ARN from SSM +SECRET_ARN=$(aws ssm get-parameter \ + --name "/unity/${PROJECT}/${VENUE}/cs/security/keycloak/client_secret_arn" \ + --query 'Parameter.Value' --output text) + +# Get actual secret +CLIENT_SECRET=$(aws secretsmanager get-secret-value \ + --secret-id "$SECRET_ARN" \ + --query 'SecretString' --output text) + +# Get proxy config from SSM +aws ssm get-parameter \ + --name "/unity/${PROJECT}/${VENUE}/cs/management/proxy/configurations/015-sps-airflow-ui" \ + --query 'Parameter.Value' --output text > /tmp/airflow-oidc.conf + +# Replace placeholders +sed -i "s/REPLACE_WITH_SECRET_FROM_SECRETS_MANAGER/${CLIENT_SECRET}/" /tmp/airflow-oidc.conf + +# Generate crypto passphrase +CRYPTO_PASS=$(openssl rand -base64 32) +sed -i "s/REPLACE_WITH_GENERATED_PASSPHRASE/${CRYPTO_PASS}/" /tmp/airflow-oidc.conf + +# Replace proxy domain (adjust as needed) +sed -i "s/REPLACE_WITH_PROXY_DOMAIN/unity-dev-proxy.example.com/" /tmp/airflow-oidc.conf + +# Install config +sudo cp /tmp/airflow-oidc.conf /etc/httpd/conf.d/ +sudo systemctl reload httpd + +echo "Keycloak configuration updated" +``` + +### 4.3 Add IAM Permissions +The venue proxy IAM role needs this policy: +```json +{ + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Action": [ + "secretsmanager:GetSecretValue", + "secretsmanager:DescribeSecret" + ], + "Resource": "arn:aws:secretsmanager:*:*:secret:*-sps-keycloak-client-secret-*" + }] +} +``` + +--- + +## Phase 5: Deployment Steps + +### Step 1: Apply Infrastructure (OIDC Disabled) +```bash +cd terraform-unity/ +terraform plan -var-file="tfvars/unity-dev-sps-airflow.tfvars" +terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" +``` + +This creates Secrets Manager secret and SSM parameters but keeps OIDC disabled. + +### Step 2: Configure Proxy Server +1. Install mod_auth_openidc on venue proxy +2. Add IAM permissions for Secrets Manager access +3. Run secret retrieval script +4. Verify Apache config loads without errors + +### Step 3: Enable OIDC +Update `tfvars/unity-dev-sps-airflow.tfvars`: +```hcl +enable_oidc_auth = true +``` + +Apply: +```bash +terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" +``` + +This updates the proxy SSM parameter with OIDC config. Lambda auto-deploys it. + +### Step 4: Restart Airflow +```bash +kubectl rollout restart deployment/airflow-webserver -n sps +``` + +### Step 5: Test Authentication +1. Navigate to `https://{proxy-domain}/{project}/{venue}/sps/` +2. Should redirect to Keycloak login +3. Login with test admin user +4. Verify you're logged into Airflow as Admin + +--- + +## Phase 6: Validation + +### Security Tests +- [ ] Verify OIDC redirect works +- [ ] Verify session timeout (8 hours) +- [ ] Verify logout works +- [ ] Test each role (Admin, Op, User, Viewer) +- [ ] Verify role permissions enforce correctly + +### Role Mapping Tests +- [ ] Login as airflow-admins member → Admin role +- [ ] Login as airflow-ops member → Op role +- [ ] Login as airflow-users member → User role +- [ ] Login as airflow-viewers member → Viewer role + +### Negative Tests +- [ ] User with no groups → Viewer role (default) +- [ ] Invalid Keycloak credentials → Access denied +- [ ] Expired session → Redirect to login + +--- + +## Rollback Plan + +If issues occur: + +**Quick Disable:** +```bash +# Set enable_oidc_auth = false in tfvars +terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" +``` + +This reverts proxy to non-OIDC configuration (open access). + +**Full Rollback:** +```bash +git checkout HEAD~1 airflow/config/webserver_config.py +terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" \ + -var="enable_oidc_auth=false" +kubectl rollout restart deployment/airflow-webserver -n sps +``` + +--- + +## Critical Files + +1. **terraform-unity/modules/terraform-unity-sps-airflow/main.tf** (lines 740-790) + - Add Secrets Manager and SSM resources + - Update proxy SSM parameter with template + +2. **terraform-unity/modules/terraform-unity-sps-airflow/variables.tf** (after line 84) + - Add Keycloak configuration variables + +3. **terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl** (NEW) + - Apache HTTPD OIDC configuration template + +4. **airflow/config/webserver_config.py** (replace entire file) + - Enable remote user auth and RBAC with role mapping + +5. **airflow/helm/values.tmpl.yaml** (lines 374+) + - Add environment variables for remote user auth + +6. **terraform-unity/tfvars/unity-dev-sps-airflow.tfvars** (append) + - Add Keycloak connection details + +--- + +## Security Considerations + +1. **Client secret** stored in Secrets Manager (encrypted) +2. **Internal NLB** prevents direct header spoofing +3. **Network isolation** - proxy is only entry point +4. **Defense in depth** - OIDC at proxy + RBAC in Airflow +5. **Least privilege** - Default role is Viewer (read-only) + +--- + +## Post-Implementation + +### Documentation Needed +- User guide: How to login with Keycloak +- Admin guide: How to manage groups and roles +- Troubleshooting: Common OIDC issues + +### Monitoring +- OIDC authentication success/failure rates +- Session timeout events +- Unauthorized access attempts +- Secrets Manager access logs + +### Future Enhancements +- API authentication with OIDC bearer tokens +- DAG-level permissions based on groups +- Audit logging integration diff --git a/airflow/config/webserver_config.py b/airflow/config/webserver_config.py index 1f3e63d7..e98e5fdb 100644 --- a/airflow/config/webserver_config.py +++ b/airflow/config/webserver_config.py @@ -1,3 +1,163 @@ -# Issue 404: DISABLE AIRRLOW AUTHENTICATION (https://github.com/unity-sds/unity-sps/issues/404) -# Disable airflow authentication, https://airflow.apache.org/docs/apache-airflow-providers-fab/stable/auth-manager/webserver-authentication.html -AUTH_ROLE_PUBLIC = "Admin" +# Keycloak OIDC Remote User Authentication for Airflow +# Authentication happens at Apache proxy layer via mod_auth_openidc +# Airflow trusts the remote user headers from the internal proxy + +import os +import logging +from flask_appbuilder.security.manager import AUTH_REMOTE_USER + +log = logging.getLogger(__name__) + +# Enable remote user authentication +# Airflow will trust REMOTE_USER header set by the Apache proxy +AUTH_TYPE = AUTH_REMOTE_USER + +# Auto-register users on first login +AUTH_USER_REGISTRATION = True +AUTH_USER_REGISTRATION_ROLE = "Viewer" # Default role for new users + +# Custom security manager for mapping Keycloak groups to Airflow roles +from airflow.www.security import AirflowSecurityManager + +class CustomSecurityManager(AirflowSecurityManager): + """ + Custom security manager to map Keycloak groups to Airflow roles. + + This class intercepts remote user authentication and maps the user's + Keycloak groups (from X-Remote-User-Groups header) to Airflow roles. + """ + + def auth_user_remote_user(self, username): + """ + Authenticate user from REMOTE_USER header and map groups to roles. + + Args: + username: Username from REMOTE_USER header (set by mod_auth_openidc) + + Returns: + User object if authentication succeeds, None otherwise + """ + from flask import request + + # Get user info from OIDC headers set by Apache proxy + email = request.headers.get('X-Remote-User-Email', f'{username}@example.com') + full_name = request.headers.get('X-Remote-User-Name', username) + groups_header = request.headers.get('X-Remote-User-Groups', '') + + # Parse full name + first_name, last_name = username, '' + if ' ' in full_name: + first_name, last_name = full_name.split(' ', 1) + + # Parse Keycloak groups from comma-separated header + keycloak_groups = [g.strip() for g in groups_header.split(',') if g.strip()] + + log.info(f"Remote user auth: username={username}, email={email}, groups={keycloak_groups}") + + # Find or create user + user = self.find_user(username=username) + + if not user: + log.info(f"Creating new user: {username}") + user = self.add_user( + username=username, + first_name=first_name, + last_name=last_name, + email=email, + role=self.find_role(self.auth_user_registration_role) + ) + else: + # Update existing user info + log.info(f"Updating existing user: {username}") + user.email = email + user.first_name = first_name + user.last_name = last_name + self.update_user(user) + + # Map Keycloak groups to Airflow roles + airflow_roles = self._map_groups_to_roles(keycloak_groups) + + if airflow_roles: + log.info(f"Assigning roles to {username}: {[r.name for r in airflow_roles]}") + user.roles = airflow_roles + self.update_user(user) + else: + # No matching groups - assign default Viewer role + log.warning(f"No matching Keycloak groups for {username}, assigning default Viewer role") + default_role = self.find_role('Viewer') + if default_role: + user.roles = [default_role] + self.update_user(user) + + return user + + def _map_groups_to_roles(self, keycloak_groups): + """ + Map Keycloak groups to Airflow roles. + + Role mapping (from Keycloak): + - airflow_admin → Admin (full access) + - airflow_op → Op (operational access) + - airflow_user → User (standard access) + - airflow_viewer → Viewer (read-only) + - airflow_public → Public (minimal access) + + Users with multiple groups get the highest priority role. + Priority: Admin > Op > User > Viewer > Public + + Args: + keycloak_groups: List of Keycloak group names from OIDC token + + Returns: + List containing single Airflow role object (highest priority) + """ + # Keycloak group to Airflow role mapping + group_role_mapping = { + 'airflow_admin': 'Admin', + 'airflow_op': 'Op', + 'airflow_user': 'User', + 'airflow_viewer': 'Viewer', + 'airflow_public': 'Public', + } + + # Role priority (higher index = higher priority) + role_priority = ['Public', 'Viewer', 'User', 'Op', 'Admin'] + + # Find highest priority role from user's groups + highest_role_name = None + highest_priority = -1 + + for group in keycloak_groups: + if group in group_role_mapping: + role_name = group_role_mapping[group] + if role_name in role_priority: + priority = role_priority.index(role_name) + if priority > highest_priority: + highest_priority = priority + highest_role_name = role_name + log.debug(f"Group '{group}' maps to role '{role_name}' (priority {priority})") + + # Return the highest priority role + if highest_role_name: + role = self.find_role(highest_role_name) + if role: + return [role] + else: + log.error(f"Role '{highest_role_name}' not found in Airflow database") + + return [] + +# Set the custom security manager +SECURITY_MANAGER_CLASS = CustomSecurityManager + +# Security settings +WTF_CSRF_ENABLED = True +WTF_CSRF_TIME_LIMIT = None # No time limit for CSRF tokens + +# Session configuration (matches OIDC session duration) +PERMANENT_SESSION_LIFETIME = 28800 # 8 hours + +# Disable public access (all users must authenticate) +AUTH_ROLE_PUBLIC = None + +log.info("Airflow webserver configured for Keycloak OIDC remote user authentication") diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 2d6abe31..d9a7f9f3 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -372,3 +372,7 @@ extraEnv: | value: "1024" - name: AIRFLOW__WEBSERVER__EXPOSE_CONFIG value: "True" + - name: AIRFLOW__WEBSERVER__AUTH_TYPE + value: "AUTH_REMOTE_USER" + - name: AIRFLOW__WEBSERVER__RBAC + value: "True" diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index a15b24ef..9fc2a05b 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -520,14 +520,13 @@ resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_proxy_rule" { } #tfsec:ignore:AVD-AWS-0107 -resource "aws_vpc_security_group_ingress_rule" "airflow_jpl_ingress_rule" { - for_each = toset(["128.149.0.0/16", "137.78.0.0/16", "137.79.0.0/16"]) +resource "aws_vpc_security_group_ingress_rule" "airflow_api_ingress_sg_proxy_rule" { security_group_id = aws_security_group.airflow_ingress_sg_internal.id - description = "SecurityGroup ingress rule for JPL-local addresses" + description = "SecurityGroup ingress rule for api-gateway (temporary)" ip_protocol = "tcp" from_port = local.load_balancer_port to_port = local.load_balancer_port - cidr_ipv4 = each.key + cidr_ipv4 = "0.0.0.0/0" } resource "kubernetes_service" "airflow_ingress_internal" { @@ -535,10 +534,10 @@ resource "kubernetes_service" "airflow_ingress_internal" { name = "airflow-ingress-internal" namespace = data.kubernetes_namespace.service_area.metadata[0].name annotations = { - "service.beta.kubernetes.io/aws-load-balancer-scheme" = "internet-facing" + "service.beta.kubernetes.io/aws-load-balancer-scheme" = "internal" "service.beta.kubernetes.io/aws-load-balancer-type" = "external" "service.beta.kubernetes.io/aws-load-balancer-nlb-target-type" = "ip" - "service.beta.kubernetes.io/aws-load-balancer-subnets" = join(",", jsondecode(data.aws_ssm_parameter.subnet_ids.value)["public"]) + "service.beta.kubernetes.io/aws-load-balancer-subnets" = join(",", jsondecode(data.aws_ssm_parameter.subnet_ids.value)["private"]) "service.beta.kubernetes.io/aws-load-balancer-healthcheck-path" = "/health" "service.beta.kubernetes.io/aws-load-balancer-attributes" = "load_balancing.cross_zone.enabled=true" "service.beta.kubernetes.io/aws-load-balancer-security-groups" = aws_security_group.airflow_ingress_sg_internal.id @@ -740,9 +739,16 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { name = format("/%s", join("/", compact(["unity", var.project, var.venue, "cs", "management", "proxy", "configurations", "015-sps-airflow-ui"]))) - description = "The unity-proxy configuration for the Airflow UI." + description = "The unity-proxy configuration for the Airflow UI with optional OIDC." type = "String" - value = <<-EOT + value = var.enable_oidc_auth ? templatefile("${path.module}/templates/proxy_oidc.conf.tpl", { + project = var.project + venue = var.venue + airflow_nlb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname + keycloak_provider_url = var.keycloak_provider_url + keycloak_client_id = var.keycloak_client_id + proxy_domain = var.proxy_domain + }) : <<-EOT ProxyPassReverse "/" diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl b/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl new file mode 100644 index 00000000..ac1819ad --- /dev/null +++ b/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl @@ -0,0 +1,75 @@ +# Apache mod_auth_openidc configuration for Keycloak OIDC Authentication +# This configuration is deployed when enable_oidc_auth is true + +# OIDC Provider Configuration +OIDCProviderMetadataURL ${keycloak_provider_url}/.well-known/openid-configuration +OIDCClientID ${keycloak_client_id} + +# Client secret - retrieved from AWS Parameter Store at runtime by proxy server +# The proxy server must retrieve the secret from: /sps/keycloak/client_secret +# and replace this placeholder before Apache loads the config +OIDCClientSecret "REPLACE_WITH_SECRET_FROM_PARAMETER_STORE" + +# Redirect URI - must match Keycloak client Valid Redirect URIs setting +%{ if proxy_domain != "" ~} +OIDCRedirectURI https://${proxy_domain}/${project}/${venue}/sps/redirect_uri +%{ else ~} +OIDCRedirectURI https://REPLACE_WITH_PROXY_DOMAIN/${project}/${venue}/sps/redirect_uri +%{ endif ~} + +# Crypto passphrase for encrypting session cookies +# Generate at runtime with: openssl rand -base64 32 +OIDCCryptoPassphrase "REPLACE_WITH_GENERATED_CRYPTO_PASSPHRASE" + +# Session configuration +OIDCSessionInactivityTimeout 3600 # 1 hour of inactivity +OIDCSessionMaxDuration 28800 # 8 hours maximum session + +# User identification and claims +OIDCRemoteUserClaim preferred_username +OIDCScope "openid email profile groups" + +# Cookie settings +OIDCCookiePath /${project}/${venue}/sps/ +OIDCCookieSameSite On + +# Airflow UI - Main location with authentication + + AuthType openid-connect + Require valid-user + + # Forward OIDC user claims to Airflow as HTTP headers + # Airflow will use these headers for authentication and authorization + RequestHeader set X-Remote-User "%{REMOTE_USER}e" + RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" + RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" + RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" + + ProxyPassReverse "/" + + +# Handle nested path redirects + + Redirect "/${project}/${venue}/sps/home" + + +# Main proxy pass configuration with authentication + + AuthType openid-connect + Require valid-user + + # Forward OIDC claims to Airflow backend + RequestHeader set X-Remote-User "%{REMOTE_USER}e" + RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" + RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" + RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" + + # Proxy to internal Airflow NLB + ProxyPassMatch "http://${airflow_nlb_hostname}:5000/$1" retry=5 disablereuse=On + ProxyPreserveHost On + FallbackResource /management/index.html + + # URL rewriting for embedded links + AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html + Substitute "s|\"/([^\"]*)|\"/${project}/${venue}/sps/$1|q" + diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf b/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf index a9351c40..5a671938 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf @@ -81,3 +81,45 @@ variable "karpenter_node_pools" { description = "Names of the Karpenter node pools" type = list(string) } + +variable "keycloak_provider_url" { + description = "Keycloak OIDC provider URL including realm (e.g., https://keycloak.example.com/realms/MAAP)" + type = string + default = "" +} + +variable "keycloak_client_id" { + description = "Keycloak OIDC client ID for Airflow authentication" + type = string + default = "" +} + +variable "keycloak_client_secret_ssm_param" { + description = "SSM parameter path containing Keycloak OIDC client secret" + type = string + default = "" +} + +variable "enable_oidc_auth" { + description = "Enable Keycloak OIDC authentication for Airflow" + type = bool + default = false +} + +variable "keycloak_role_mapping" { + description = "Mapping of Keycloak groups to Airflow roles" + type = map(list(string)) + default = { + "airflow_admin" = ["Admin"] + "airflow_op" = ["Op"] + "airflow_user" = ["User"] + "airflow_viewer" = ["Viewer"] + "airflow_public" = ["Public"] + } +} + +variable "proxy_domain" { + description = "Domain name of the Apache proxy server for OIDC redirect URI" + type = string + default = "" +} From db1fb645b86f8e46f0dda1dee7d37a8e1caa8c52 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Tue, 9 Dec 2025 14:16:25 -0800 Subject: [PATCH 2/6] attempt at keycloak builds but cant load loadbalancer endpoint --- airflow/config/webserver_config.py.tpl | 159 ++++++++++++++++++ .../terraform-unity-sps-airflow/main.tf | 30 ++-- .../templates/proxy_oidc.conf.tpl | 18 +- .../terraform-unity-sps-airflow/variables.tf | 8 +- 4 files changed, 189 insertions(+), 26 deletions(-) create mode 100644 airflow/config/webserver_config.py.tpl diff --git a/airflow/config/webserver_config.py.tpl b/airflow/config/webserver_config.py.tpl new file mode 100644 index 00000000..5619379a --- /dev/null +++ b/airflow/config/webserver_config.py.tpl @@ -0,0 +1,159 @@ +# Keycloak OIDC Remote User Authentication for Airflow +# Authentication happens at Apache proxy layer via mod_auth_openidc +# Airflow trusts the remote user headers from the internal proxy + +import os +import logging +from flask_appbuilder.security.manager import AUTH_REMOTE_USER + +log = logging.getLogger(__name__) + +# Enable remote user authentication +# Airflow will trust REMOTE_USER header set by the Apache proxy +AUTH_TYPE = AUTH_REMOTE_USER + +# Auto-register users on first login +AUTH_USER_REGISTRATION = True +AUTH_USER_REGISTRATION_ROLE = "Viewer" # Default role for new users + +# Custom security manager for mapping Keycloak groups to Airflow roles +from airflow.www.security import AirflowSecurityManager + +class CustomSecurityManager(AirflowSecurityManager): + """ + Custom security manager to map Keycloak groups to Airflow roles. + + This class intercepts remote user authentication and maps the user's + Keycloak groups (from X-Remote-User-Groups header) to Airflow roles. + """ + + def auth_user_remote_user(self, username): + """ + Authenticate user from REMOTE_USER header and map groups to roles. + + Args: + username: Username from REMOTE_USER header (set by mod_auth_openidc) + + Returns: + User object if authentication succeeds, None otherwise + """ + from flask import request + + # Get user info from OIDC headers set by Apache proxy + email = request.headers.get('X-Remote-User-Email', f'{username}@example.com') + full_name = request.headers.get('X-Remote-User-Name', username) + groups_header = request.headers.get('X-Remote-User-Groups', '') + + # Parse full name + first_name, last_name = username, '' + if ' ' in full_name: + first_name, last_name = full_name.split(' ', 1) + + # Parse Keycloak groups from comma-separated header + keycloak_groups = [g.strip() for g in groups_header.split(',') if g.strip()] + + log.info(f"Remote user auth: username={username}, email={email}, groups={keycloak_groups}") + + # Find or create user + user = self.find_user(username=username) + + if not user: + log.info(f"Creating new user: {username}") + user = self.add_user( + username=username, + first_name=first_name, + last_name=last_name, + email=email, + role=self.find_role(self.auth_user_registration_role) + ) + else: + # Update existing user info + log.info(f"Updating existing user: {username}") + user.email = email + user.first_name = first_name + user.last_name = last_name + self.update_user(user) + + # Map Keycloak groups to Airflow roles + airflow_roles = self._map_groups_to_roles(keycloak_groups) + + if airflow_roles: + log.info(f"Assigning roles to {username}: {[r.name for r in airflow_roles]}") + user.roles = airflow_roles + self.update_user(user) + else: + # No matching groups - assign default Viewer role + log.warning(f"No matching Keycloak groups for {username}, assigning default Viewer role") + default_role = self.find_role('Viewer') + if default_role: + user.roles = [default_role] + self.update_user(user) + + return user + + def _map_groups_to_roles(self, keycloak_groups): + """ + Map Keycloak groups to Airflow roles. + + Role mapping (configured via Terraform): +%{ for group, roles in keycloak_role_mapping ~} + - ${group} → ${join(", ", roles)} +%{ endfor ~} + + Users with multiple groups get the highest priority role. + Priority: Admin > Op > User > Viewer > Public + + Args: + keycloak_groups: List of Keycloak group names from OIDC token + + Returns: + List containing single Airflow role object (highest priority) + """ + # Keycloak group to Airflow role mapping (from Terraform configuration) + group_role_mapping = { +%{ for group, roles in keycloak_role_mapping ~} + '${group}': '${roles[0]}', +%{ endfor ~} + } + + # Role priority (higher index = higher priority) + role_priority = ['Public', 'Viewer', 'User', 'Op', 'Admin'] + + # Find highest priority role from user's groups + highest_role_name = None + highest_priority = -1 + + for group in keycloak_groups: + if group in group_role_mapping: + role_name = group_role_mapping[group] + if role_name in role_priority: + priority = role_priority.index(role_name) + if priority > highest_priority: + highest_priority = priority + highest_role_name = role_name + log.debug(f"Group '{group}' maps to role '{role_name}' (priority {priority})") + + # Return the highest priority role + if highest_role_name: + role = self.find_role(highest_role_name) + if role: + return [role] + else: + log.error(f"Role '{highest_role_name}' not found in Airflow database") + + return [] + +# Set the custom security manager +SECURITY_MANAGER_CLASS = CustomSecurityManager + +# Security settings +WTF_CSRF_ENABLED = True +WTF_CSRF_TIME_LIMIT = None # No time limit for CSRF tokens + +# Session configuration (matches OIDC session duration) +PERMANENT_SESSION_LIFETIME = 28800 # 8 hours + +# Disable public access (all users must authenticate) +AUTH_ROLE_PUBLIC = None + +log.info("Airflow webserver configured for Keycloak OIDC remote user authentication") diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index 9fc2a05b..fdc67fd3 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -413,8 +413,10 @@ resource "helm_release" "airflow" { unity_cluster_name = data.aws_eks_cluster.cluster.name karpenter_node_pools = join(",", var.karpenter_node_pools) cwl_dag_ecr_uri = "${data.aws_caller_identity.current.account_id}.dkr.ecr.us-west-2.amazonaws.com" - # Issue 404: DISABLE AIRRLOW AUTHENTICATION (https://github.com/unity-sds/unity-sps/issues/404) - webserver_config = indent(4, file("${path.module}/../../../airflow/config/webserver_config.py")) + # Keycloak OIDC authentication configuration + webserver_config = indent(4, templatefile("${path.module}/../../../airflow/config/webserver_config.py.tpl", { + keycloak_role_mapping = var.keycloak_role_mapping + })) }) ] set_sensitive { @@ -520,13 +522,14 @@ resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_proxy_rule" { } #tfsec:ignore:AVD-AWS-0107 -resource "aws_vpc_security_group_ingress_rule" "airflow_api_ingress_sg_proxy_rule" { +resource "aws_vpc_security_group_ingress_rule" "airflow_jpl_ingress_rule" { + for_each = toset(["128.149.0.0/16", "137.78.0.0/16", "137.79.0.0/16"]) security_group_id = aws_security_group.airflow_ingress_sg_internal.id - description = "SecurityGroup ingress rule for api-gateway (temporary)" + description = "SecurityGroup ingress rule for JPL-local addresses" ip_protocol = "tcp" from_port = local.load_balancer_port to_port = local.load_balancer_port - cidr_ipv4 = "0.0.0.0/0" + cidr_ipv4 = each.key } resource "kubernetes_service" "airflow_ingress_internal" { @@ -534,10 +537,10 @@ resource "kubernetes_service" "airflow_ingress_internal" { name = "airflow-ingress-internal" namespace = data.kubernetes_namespace.service_area.metadata[0].name annotations = { - "service.beta.kubernetes.io/aws-load-balancer-scheme" = "internal" + "service.beta.kubernetes.io/aws-load-balancer-scheme" = "internet-facing" "service.beta.kubernetes.io/aws-load-balancer-type" = "external" "service.beta.kubernetes.io/aws-load-balancer-nlb-target-type" = "ip" - "service.beta.kubernetes.io/aws-load-balancer-subnets" = join(",", jsondecode(data.aws_ssm_parameter.subnet_ids.value)["private"]) + "service.beta.kubernetes.io/aws-load-balancer-subnets" = join(",", jsondecode(data.aws_ssm_parameter.subnet_ids.value)["public"]) "service.beta.kubernetes.io/aws-load-balancer-healthcheck-path" = "/health" "service.beta.kubernetes.io/aws-load-balancer-attributes" = "load_balancing.cross_zone.enabled=true" "service.beta.kubernetes.io/aws-load-balancer-security-groups" = aws_security_group.airflow_ingress_sg_internal.id @@ -742,12 +745,13 @@ resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { description = "The unity-proxy configuration for the Airflow UI with optional OIDC." type = "String" value = var.enable_oidc_auth ? templatefile("${path.module}/templates/proxy_oidc.conf.tpl", { - project = var.project - venue = var.venue - airflow_nlb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname - keycloak_provider_url = var.keycloak_provider_url - keycloak_client_id = var.keycloak_client_id - proxy_domain = var.proxy_domain + project = var.project + venue = var.venue + airflow_nlb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname + keycloak_provider_url = var.keycloak_provider_url + keycloak_client_id = var.keycloak_client_id + keycloak_client_secret_ssm_param = var.keycloak_client_secret_ssm_param + proxy_domain = var.proxy_domain }) : <<-EOT diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl b/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl index ac1819ad..fd2d92ba 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl +++ b/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl @@ -6,7 +6,7 @@ OIDCProviderMetadataURL ${keycloak_provider_url}/.well-known/openid-configuratio OIDCClientID ${keycloak_client_id} # Client secret - retrieved from AWS Parameter Store at runtime by proxy server -# The proxy server must retrieve the secret from: /sps/keycloak/client_secret +# The proxy server must retrieve the secret from: ${keycloak_client_secret_ssm_param} # and replace this placeholder before Apache loads the config OIDCClientSecret "REPLACE_WITH_SECRET_FROM_PARAMETER_STORE" @@ -40,10 +40,10 @@ OIDCCookieSameSite On # Forward OIDC user claims to Airflow as HTTP headers # Airflow will use these headers for authentication and authorization - RequestHeader set X-Remote-User "%{REMOTE_USER}e" - RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" - RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" - RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" + RequestHeader set X-Remote-User "%%{REMOTE_USER}e" + RequestHeader set X-Remote-User-Email "%%{OIDC_CLAIM_email}e" + RequestHeader set X-Remote-User-Groups "%%{OIDC_CLAIM_groups}e" + RequestHeader set X-Remote-User-Name "%%{OIDC_CLAIM_name}e" ProxyPassReverse "/" @@ -59,10 +59,10 @@ OIDCCookieSameSite On Require valid-user # Forward OIDC claims to Airflow backend - RequestHeader set X-Remote-User "%{REMOTE_USER}e" - RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" - RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" - RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" + RequestHeader set X-Remote-User "%%{REMOTE_USER}e" + RequestHeader set X-Remote-User-Email "%%{OIDC_CLAIM_email}e" + RequestHeader set X-Remote-User-Groups "%%{OIDC_CLAIM_groups}e" + RequestHeader set X-Remote-User-Name "%%{OIDC_CLAIM_name}e" # Proxy to internal Airflow NLB ProxyPassMatch "http://${airflow_nlb_hostname}:5000/$1" retry=5 disablereuse=On diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf b/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf index 5a671938..48861332 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf @@ -85,25 +85,25 @@ variable "karpenter_node_pools" { variable "keycloak_provider_url" { description = "Keycloak OIDC provider URL including realm (e.g., https://keycloak.example.com/realms/MAAP)" type = string - default = "" + default = "https://dit.kc-test-maap.xyz/realms/MAAP" } variable "keycloak_client_id" { description = "Keycloak OIDC client ID for Airflow authentication" type = string - default = "" + default = "airflow" } variable "keycloak_client_secret_ssm_param" { description = "SSM parameter path containing Keycloak OIDC client secret" type = string - default = "" + default = "/sps/keycloak/client_secret" } variable "enable_oidc_auth" { description = "Enable Keycloak OIDC authentication for Airflow" type = bool - default = false + default = true } variable "keycloak_role_mapping" { From 08f0c7707a2d7a783fadfec8aa0d5c3bc9c44bc2 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 10 Dec 2025 11:30:40 -0800 Subject: [PATCH 3/6] added keycloak and not using common services proxy --- airflow/config/webserver_config.py | 314 +++++++++--------- airflow/config/webserver_config.py.tpl | 172 +++++----- airflow/helm/values.tmpl.yaml | 16 +- airflow/plugins/user_auth.py | 139 ++++++++ .../terraform-unity-sps-airflow/main.tf | 40 ++- 5 files changed, 423 insertions(+), 258 deletions(-) create mode 100644 airflow/plugins/user_auth.py diff --git a/airflow/config/webserver_config.py b/airflow/config/webserver_config.py index e98e5fdb..40f9b692 100644 --- a/airflow/config/webserver_config.py +++ b/airflow/config/webserver_config.py @@ -1,163 +1,165 @@ -# Keycloak OIDC Remote User Authentication for Airflow -# Authentication happens at Apache proxy layer via mod_auth_openidc -# Airflow trusts the remote user headers from the internal proxy - +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Default configuration for the Airflow webserver""" +from __future__ import annotations import os import logging -from flask_appbuilder.security.manager import AUTH_REMOTE_USER +# Wrap imports that might not be available during Terraform file() reads +try: + import jwt + import requests + from base64 import b64decode + from cryptography.hazmat.primitives import serialization + from airflow.www.fab_security.manager import AUTH_OAUTH + from airflow.www.security import AirflowSecurityManager + from flask_appbuilder import expose + from flask_appbuilder.security.views import AuthOAuthView + IMPORTS_AVAILABLE = True +except ImportError as e: + logging.warning(f"Some imports not available during config read: {e}") + IMPORTS_AVAILABLE = False + # Define minimal fallbacks + AUTH_OAUTH = None +basedir = os.path.abspath(os.path.dirname(__file__)) log = logging.getLogger(__name__) - -# Enable remote user authentication -# Airflow will trust REMOTE_USER header set by the Apache proxy -AUTH_TYPE = AUTH_REMOTE_USER - -# Auto-register users on first login +APP_THEME = "simplex.css" +# Flask-WTF flag for CSRF +WTF_CSRF_ENABLED = True +# ---------------------------------------------------- +# AUTHENTICATION CONFIG +# ---------------------------------------------------- +# For details on how to set up each of the following authentication, see +# http://flask-appbuilder.readthedocs.io/en/latest/security.html# authentication-methods +# for details. +AUTH_TYPE = AUTH_OAUTH if IMPORTS_AVAILABLE else None +# Uncomment to setup Full admin role name +# AUTH_ROLE_ADMIN = 'Admin' +# Uncomment and set to desired role to enable access without authentication +# AUTH_ROLE_PUBLIC = 'Viewer' +# Will allow user self registration AUTH_USER_REGISTRATION = True -AUTH_USER_REGISTRATION_ROLE = "Viewer" # Default role for new users - -# Custom security manager for mapping Keycloak groups to Airflow roles -from airflow.www.security import AirflowSecurityManager - -class CustomSecurityManager(AirflowSecurityManager): - """ - Custom security manager to map Keycloak groups to Airflow roles. - - This class intercepts remote user authentication and maps the user's - Keycloak groups (from X-Remote-User-Groups header) to Airflow roles. - """ - - def auth_user_remote_user(self, username): - """ - Authenticate user from REMOTE_USER header and map groups to roles. - - Args: - username: Username from REMOTE_USER header (set by mod_auth_openidc) - - Returns: - User object if authentication succeeds, None otherwise - """ - from flask import request - - # Get user info from OIDC headers set by Apache proxy - email = request.headers.get('X-Remote-User-Email', f'{username}@example.com') - full_name = request.headers.get('X-Remote-User-Name', username) - groups_header = request.headers.get('X-Remote-User-Groups', '') - - # Parse full name - first_name, last_name = username, '' - if ' ' in full_name: - first_name, last_name = full_name.split(' ', 1) - - # Parse Keycloak groups from comma-separated header - keycloak_groups = [g.strip() for g in groups_header.split(',') if g.strip()] - - log.info(f"Remote user auth: username={username}, email={email}, groups={keycloak_groups}") - - # Find or create user - user = self.find_user(username=username) - - if not user: - log.info(f"Creating new user: {username}") - user = self.add_user( - username=username, - first_name=first_name, - last_name=last_name, - email=email, - role=self.find_role(self.auth_user_registration_role) - ) - else: - # Update existing user info - log.info(f"Updating existing user: {username}") - user.email = email - user.first_name = first_name - user.last_name = last_name - self.update_user(user) - - # Map Keycloak groups to Airflow roles - airflow_roles = self._map_groups_to_roles(keycloak_groups) - - if airflow_roles: - log.info(f"Assigning roles to {username}: {[r.name for r in airflow_roles]}") - user.roles = airflow_roles - self.update_user(user) - else: - # No matching groups - assign default Viewer role - log.warning(f"No matching Keycloak groups for {username}, assigning default Viewer role") - default_role = self.find_role('Viewer') - if default_role: - user.roles = [default_role] - self.update_user(user) - - return user - - def _map_groups_to_roles(self, keycloak_groups): - """ - Map Keycloak groups to Airflow roles. - - Role mapping (from Keycloak): - - airflow_admin → Admin (full access) - - airflow_op → Op (operational access) - - airflow_user → User (standard access) - - airflow_viewer → Viewer (read-only) - - airflow_public → Public (minimal access) - - Users with multiple groups get the highest priority role. - Priority: Admin > Op > User > Viewer > Public - - Args: - keycloak_groups: List of Keycloak group names from OIDC token - - Returns: - List containing single Airflow role object (highest priority) - """ - # Keycloak group to Airflow role mapping - group_role_mapping = { - 'airflow_admin': 'Admin', - 'airflow_op': 'Op', - 'airflow_user': 'User', - 'airflow_viewer': 'Viewer', - 'airflow_public': 'Public', - } - - # Role priority (higher index = higher priority) - role_priority = ['Public', 'Viewer', 'User', 'Op', 'Admin'] - - # Find highest priority role from user's groups - highest_role_name = None - highest_priority = -1 - - for group in keycloak_groups: - if group in group_role_mapping: - role_name = group_role_mapping[group] - if role_name in role_priority: - priority = role_priority.index(role_name) - if priority > highest_priority: - highest_priority = priority - highest_role_name = role_name - log.debug(f"Group '{group}' maps to role '{role_name}' (priority {priority})") - - # Return the highest priority role - if highest_role_name: - role = self.find_role(highest_role_name) - if role: - return [role] +# The recaptcha it's automatically enabled for user self registration is active and the keys are necessary +# RECAPTCHA_PRIVATE_KEY = PRIVATE_KEY +# RECAPTCHA_PUBLIC_KEY = PUBLIC_KEY +# Config for Flask-Mail necessary for user self registration +# MAIL_SERVER = 'smtp.gmail.com' +# MAIL_USE_TLS = True +# MAIL_USERNAME = 'yourappemail@gmail.com' +# MAIL_PASSWORD = 'passwordformail' +# MAIL_DEFAULT_SENDER = 'sender@gmail.com' +# The default user self registration role +AUTH_USER_REGISTRATION_ROLE = "Public" +AUTH_ROLES_SYNC_AT_LOGIN = True +AUTH_ROLES_MAPPING = { + "airflow_admin": ["Admin"], + "airflow_op": ["Op"], + "airflow_user": ["User"], + "airflow_viewer": ["Viewer"], + "airflow_public": ["Public"], +} +PROVIDER_NAME = 'keycloak' +CLIENT_ID = 'airflow' +CLIENT_SECRET = 'TODO FILL IN' +OIDC_ISSUER = 'https://dit.kc-test-maap.xyz/realms/MAAP' +OIDC_BASE_URL = "{oidc_issuer}/protocol/openid-connect".format(oidc_issuer=OIDC_ISSUER) +OIDC_TOKEN_URL = "{oidc_base_url}/token".format(oidc_base_url=OIDC_BASE_URL) +OIDC_AUTH_URL = "{oidc_base_url}/auth".format(oidc_base_url=OIDC_BASE_URL) +# When using OAuth Auth, uncomment to setup provider(s) info +OAUTH_PROVIDERS = [{ + 'name':PROVIDER_NAME, + 'token_key':'access_token', + 'icon':'fa-circle-o', + 'remote_app': { + 'api_base_url':OIDC_BASE_URL, + 'access_token_url':OIDC_TOKEN_URL, + 'authorize_url':OIDC_AUTH_URL, + 'request_token_url': None, + 'client_id': CLIENT_ID, + 'client_secret': CLIENT_SECRET, + 'client_kwargs':{ + 'scope': 'email profile' + }, + } +}] + +def get_keycloak_public_key(): + """Fetch Keycloak public key with error handling""" + if not IMPORTS_AVAILABLE: + return None + try: + req = requests.get(OIDC_ISSUER, timeout=5) + req.raise_for_status() + key_der_base64 = req.json()["public_key"] + key_der = b64decode(key_der_base64.encode()) + return serialization.load_der_public_key(key_der) + except Exception as e: + log.error(f"Failed to fetch Keycloak public key: {e}") + return None + +if IMPORTS_AVAILABLE: + class CustomAuthRemoteUserView(AuthOAuthView): + @expose("/logout/") + def logout(self): + """Delete access token before logging out.""" + return super().logout() + + class CustomSecurityManager(AirflowSecurityManager): + authoauthview = CustomAuthRemoteUserView + + def oauth_user_info(self, provider, response): + if provider == PROVIDER_NAME: + public_key = get_keycloak_public_key() + if public_key is None: + log.error("Cannot authenticate: Keycloak public key unavailable") + return {} + + token = response["access_token"] + try: + me = jwt.decode(token, public_key, algorithms=['HS256', 'RS256'], audience=CLIENT_ID) + except jwt.InvalidTokenError as e: + log.error(f"Token validation failed: {e}") + return {} + # sample of resource_access + # { + # "resource_access": { "airflow": { "roles": ["airflow_admin"] }} + # } + try: + groups = me["resource_access"]["airflow"]["roles"] + except KeyError: + log.warning("No airflow roles found in token, using default") + groups = [] + if len(groups) < 1: + groups = ["airflow_public"] + else: + groups = [str for str in groups if "airflow" in str] + userinfo = { + "username": me.get("preferred_username"), + "email": me.get("email"), + "first_name": me.get("given_name"), + "last_name": me.get("family_name"), + "role_keys": groups, + } + log.info("user info: {0}".format(userinfo)) + return userinfo else: - log.error(f"Role '{highest_role_name}' not found in Airflow database") - - return [] - -# Set the custom security manager -SECURITY_MANAGER_CLASS = CustomSecurityManager - -# Security settings -WTF_CSRF_ENABLED = True -WTF_CSRF_TIME_LIMIT = None # No time limit for CSRF tokens - -# Session configuration (matches OIDC session duration) -PERMANENT_SESSION_LIFETIME = 28800 # 8 hours - -# Disable public access (all users must authenticate) -AUTH_ROLE_PUBLIC = None + return {} -log.info("Airflow webserver configured for Keycloak OIDC remote user authentication") + SECURITY_MANAGER_CLASS = CustomSecurityManager +else: + SECURITY_MANAGER_CLASS = None \ No newline at end of file diff --git a/airflow/config/webserver_config.py.tpl b/airflow/config/webserver_config.py.tpl index 5619379a..3ffcf3fa 100644 --- a/airflow/config/webserver_config.py.tpl +++ b/airflow/config/webserver_config.py.tpl @@ -1,95 +1,107 @@ -# Keycloak OIDC Remote User Authentication for Airflow -# Authentication happens at Apache proxy layer via mod_auth_openidc -# Airflow trusts the remote user headers from the internal proxy +# Keycloak Direct OIDC Authentication for Airflow +# Airflow authenticates directly with Keycloak (no proxy layer) import os import logging -from flask_appbuilder.security.manager import AUTH_REMOTE_USER +from airflow.www.security import AirflowSecurityManager +from flask_appbuilder.security.manager import AUTH_OAUTH log = logging.getLogger(__name__) -# Enable remote user authentication -# Airflow will trust REMOTE_USER header set by the Apache proxy -AUTH_TYPE = AUTH_REMOTE_USER +# Enable OAuth authentication +AUTH_TYPE = AUTH_OAUTH + +# Keycloak OIDC Configuration +OIDC_ISSUER = "${keycloak_provider_url}" +OIDC_CLIENT_ID = "${keycloak_client_id}" + +# Client secret must be provided via environment variable +# Set AIRFLOW__WEBSERVER__SECRET_KEY in your deployment +OIDC_CLIENT_SECRET = os.getenv("OIDC_CLIENT_SECRET", "CHANGE_ME") + +# OAuth provider configuration +OAUTH_PROVIDERS = [ + { + "name": "keycloak", + "icon": "fa-key", + "token_key": "access_token", + "remote_app": { + "client_id": OIDC_CLIENT_ID, + "client_secret": OIDC_CLIENT_SECRET, + "api_base_url": OIDC_ISSUER, + "client_kwargs": { + "scope": "openid email profile groups" + }, + "access_token_url": f"{OIDC_ISSUER}/protocol/openid-connect/token", + "authorize_url": f"{OIDC_ISSUER}/protocol/openid-connect/auth", + "request_token_url": None, + "server_metadata_url": f"{OIDC_ISSUER}/.well-known/openid-configuration", + }, + } +] # Auto-register users on first login AUTH_USER_REGISTRATION = True AUTH_USER_REGISTRATION_ROLE = "Viewer" # Default role for new users -# Custom security manager for mapping Keycloak groups to Airflow roles -from airflow.www.security import AirflowSecurityManager - +# Role mapping configuration class CustomSecurityManager(AirflowSecurityManager): """ Custom security manager to map Keycloak groups to Airflow roles. - - This class intercepts remote user authentication and maps the user's - Keycloak groups (from X-Remote-User-Groups header) to Airflow roles. """ - def auth_user_remote_user(self, username): + def oauth_user_info(self, provider, response): """ - Authenticate user from REMOTE_USER header and map groups to roles. + Get user info from OAuth provider and map groups to roles. Args: - username: Username from REMOTE_USER header (set by mod_auth_openidc) + provider: OAuth provider name + response: OAuth response containing tokens Returns: - User object if authentication succeeds, None otherwise + Dictionary with user information """ - from flask import request - - # Get user info from OIDC headers set by Apache proxy - email = request.headers.get('X-Remote-User-Email', f'{username}@example.com') - full_name = request.headers.get('X-Remote-User-Name', username) - groups_header = request.headers.get('X-Remote-User-Groups', '') - - # Parse full name - first_name, last_name = username, '' - if ' ' in full_name: - first_name, last_name = full_name.split(' ', 1) - - # Parse Keycloak groups from comma-separated header - keycloak_groups = [g.strip() for g in groups_header.split(',') if g.strip()] - - log.info(f"Remote user auth: username={username}, email={email}, groups={keycloak_groups}") - - # Find or create user - user = self.find_user(username=username) - - if not user: - log.info(f"Creating new user: {username}") - user = self.add_user( - username=username, - first_name=first_name, - last_name=last_name, - email=email, - role=self.find_role(self.auth_user_registration_role) - ) - else: - # Update existing user info - log.info(f"Updating existing user: {username}") - user.email = email - user.first_name = first_name - user.last_name = last_name - self.update_user(user) - - # Map Keycloak groups to Airflow roles - airflow_roles = self._map_groups_to_roles(keycloak_groups) - - if airflow_roles: - log.info(f"Assigning roles to {username}: {[r.name for r in airflow_roles]}") - user.roles = airflow_roles - self.update_user(user) - else: - # No matching groups - assign default Viewer role - log.warning(f"No matching Keycloak groups for {username}, assigning default Viewer role") - default_role = self.find_role('Viewer') - if default_role: - user.roles = [default_role] - self.update_user(user) + if provider == "keycloak": + # Get user info from Keycloak + import requests + + access_token = response.get("access_token") + if not access_token: + log.error("No access token in OAuth response") + return {} + + # Decode the JWT to get user info and groups + import json + import base64 + + try: + # JWT structure: header.payload.signature + payload = access_token.split('.')[1] + # Add padding if needed + payload += '=' * (4 - len(payload) % 4) + decoded = json.loads(base64.urlsafe_b64decode(payload)) - return user + # Extract user information + user_info = { + "username": decoded.get("preferred_username", ""), + "email": decoded.get("email", ""), + "first_name": decoded.get("given_name", ""), + "last_name": decoded.get("family_name", ""), + "groups": decoded.get("groups", []), + } + + log.info(f"Keycloak user login: {user_info['username']}, groups: {user_info['groups']}") + + # Map groups to roles + user_info["role_keys"] = self._map_groups_to_roles(user_info["groups"]) + + return user_info + + except Exception as e: + log.error(f"Error decoding access token: {e}") + return {} + + return {} def _map_groups_to_roles(self, keycloak_groups): """ @@ -107,7 +119,7 @@ class CustomSecurityManager(AirflowSecurityManager): keycloak_groups: List of Keycloak group names from OIDC token Returns: - List containing single Airflow role object (highest priority) + List of Airflow role names """ # Keycloak group to Airflow role mapping (from Terraform configuration) group_role_mapping = { @@ -135,25 +147,19 @@ class CustomSecurityManager(AirflowSecurityManager): # Return the highest priority role if highest_role_name: - role = self.find_role(highest_role_name) - if role: - return [role] - else: - log.error(f"Role '{highest_role_name}' not found in Airflow database") - - return [] + return [highest_role_name] + else: + log.warning(f"No matching Keycloak groups, assigning default role") + return ["Viewer"] # Set the custom security manager SECURITY_MANAGER_CLASS = CustomSecurityManager # Security settings WTF_CSRF_ENABLED = True -WTF_CSRF_TIME_LIMIT = None # No time limit for CSRF tokens +WTF_CSRF_TIME_LIMIT = None -# Session configuration (matches OIDC session duration) +# Session configuration PERMANENT_SESSION_LIFETIME = 28800 # 8 hours -# Disable public access (all users must authenticate) -AUTH_ROLE_PUBLIC = None - -log.info("Airflow webserver configured for Keycloak OIDC remote user authentication") +log.info("Airflow webserver configured for direct Keycloak OIDC authentication") diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index d9a7f9f3..3a68762c 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -165,10 +165,18 @@ webserverSecretKeySecretName: ${webserver_secret_name} webserver: replicas: 3 - # Issue 404: DISABLE AIRRLOW AUTHENTICATION (https://github.com/unity-sds/unity-sps/issues/404) + # Keycloak OIDC Authentication Configuration webserverConfig: |- ${webserver_config} + # Environment variables for OIDC authentication + extraEnv: |- + - name: OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: airflow-oidc-secret + key: client-secret + startupProbe: timeoutSeconds: 20 failureThreshold: 60 # Number of tries before giving up (10 minutes with periodSeconds of 10) @@ -372,7 +380,5 @@ extraEnv: | value: "1024" - name: AIRFLOW__WEBSERVER__EXPOSE_CONFIG value: "True" - - name: AIRFLOW__WEBSERVER__AUTH_TYPE - value: "AUTH_REMOTE_USER" - - name: AIRFLOW__WEBSERVER__RBAC - value: "True" + - name: AIRFLOW__WEBSERVER__BASE_URL + value: "TODO FILL IN" diff --git a/airflow/plugins/user_auth.py b/airflow/plugins/user_auth.py new file mode 100644 index 00000000..eceff668 --- /dev/null +++ b/airflow/plugins/user_auth.py @@ -0,0 +1,139 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" + User authentication backend + Referencies + - https://flask-appbuilder.readthedocs.io/en/latest/_modules/flask_appbuilder/security/manager.html + - https://github.com/apache/airflow/blob/main/airflow/api/auth/backend/basic_auth.py +""" +from __future__ import annotations +import logging + +# Wrap imports that might not be available during Terraform file() reads +try: + from functools import wraps + from typing import Any, Callable, TypeVar, cast + from flask import Response, request + from flask_appbuilder.const import AUTH_OAUTH, AUTH_LDAP, AUTH_DB + from flask_login import login_user + from airflow.utils.airflow_flask_app import get_airflow_app + from airflow.www.fab_security.sqla.models import User + import jwt + import requests + from base64 import b64decode + from cryptography.hazmat.primitives import serialization + IMPORTS_AVAILABLE = True +except ImportError as e: + logging.warning(f"Some imports not available during config read: {e}") + IMPORTS_AVAILABLE = False + # Define minimal fallbacks + AUTH_OAUTH = None + AUTH_LDAP = None + AUTH_DB = None +CLIENT_AUTH: tuple[str, str] | Any | None = None +log = logging.getLogger(__name__) +CLIENT_ID = 'airflow' +OIDC_ISSUER = 'https://dit.kc-test-maap.xyz/realms/MAAP' + +def get_keycloak_public_key(): + """Fetch Keycloak public key with error handling""" + try: + req = requests.get(OIDC_ISSUER, timeout=5) + req.raise_for_status() + key_der_base64 = req.json()["public_key"] + key_der = b64decode(key_der_base64.encode()) + return serialization.load_der_public_key(key_der) + except Exception as e: + log.error(f"Failed to fetch Keycloak public key: {e}") + return None + +def init_app(_): + """Initializes authentication backend""" + pass + +if IMPORTS_AVAILABLE: + T = TypeVar("T", bound=Callable) + + def auth_current_user() -> User | None: + """Authenticate and set current user if Authorization header exists""" + + ab_security_manager = get_airflow_app().appbuilder.sm + user = None + if ab_security_manager.auth_type == AUTH_OAUTH: + auth_header = request.headers.get('Authorization') + if not auth_header: + return None + + public_key = get_keycloak_public_key() + if public_key is None: + log.error("Cannot authenticate: Keycloak public key unavailable") + return None + + token = auth_header.replace('Bearer ', '') + try: + me = jwt.decode(token, public_key, algorithms=['HS256', 'RS256'], audience=CLIENT_ID) + except jwt.InvalidTokenError as e: + log.error(f"Token validation failed: {e}") + return None + + try: + groups = me["resource_access"]["airflow"]["roles"] + except KeyError: + log.warning("No airflow roles found in token, using default") + groups = [] + if len(groups) < 1: + groups = ["airflow_public"] + else: + groups = [str for str in groups if "airflow" in str] + userinfo = { + "username": me.get("preferred_username"), + "email": me.get("email"), + "first_name": me.get("given_name"), + "last_name": me.get("family_name"), + "role_keys": groups, + } + user = ab_security_manager.auth_user_oauth(userinfo) + else: + auth = request.authorization + if auth is None or not auth.username or not auth.password: + return None + if ab_security_manager.auth_type == AUTH_LDAP: + user = ab_security_manager.auth_user_ldap(auth.username, auth.password) + if ab_security_manager.auth_type == AUTH_DB: + user = ab_security_manager.auth_user_db(auth.username, auth.password) + log.info("user: {0}".format(user)) + if user is not None: + login_user(user, remember=False) + return user + + def requires_authentication(function: T): + """Decorator for functions that require authentication""" + @wraps(function) + def decorated(*args, **kwargs): + if auth_current_user() is not None: + return function(*args, **kwargs) + else: + return Response("Unauthorized", 401, {"WWW-Authenticate": "Basic"}) + + return cast(T, decorated) +else: + # Fallback functions when imports are not available + def auth_current_user(): + return None + + def requires_authentication(function): + return function diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index fdc67fd3..3eedd550 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -49,6 +49,23 @@ resource "kubernetes_secret" "airflow_webserver" { } } +# Keycloak OIDC client secret for direct authentication +resource "kubernetes_secret" "airflow_oidc" { + count = var.enable_oidc_auth ? 1 : 0 + metadata { + name = "airflow-oidc-secret" + namespace = data.kubernetes_namespace.service_area.metadata[0].name + } + data = { + "client-secret" = data.aws_ssm_parameter.keycloak_client_secret[0].value + } +} + +data "aws_ssm_parameter" "keycloak_client_secret" { + count = var.enable_oidc_auth ? 1 : 0 + name = var.keycloak_client_secret_ssm_param +} + # TODO evaluate if this role is still necessary resource "kubernetes_role" "airflow_pod_creator" { metadata { @@ -413,9 +430,11 @@ resource "helm_release" "airflow" { unity_cluster_name = data.aws_eks_cluster.cluster.name karpenter_node_pools = join(",", var.karpenter_node_pools) cwl_dag_ecr_uri = "${data.aws_caller_identity.current.account_id}.dkr.ecr.us-west-2.amazonaws.com" - # Keycloak OIDC authentication configuration + # Keycloak Direct OIDC authentication configuration webserver_config = indent(4, templatefile("${path.module}/../../../airflow/config/webserver_config.py.tpl", { keycloak_role_mapping = var.keycloak_role_mapping + keycloak_provider_url = var.keycloak_provider_url + keycloak_client_id = var.keycloak_client_id })) }) ] @@ -567,9 +586,10 @@ resource "kubernetes_service" "airflow_ingress_internal" { } } wait_for_load_balancer = true - lifecycle { # this is necessary or terraform will try to recreate this every run - ignore_changes = all - } + # Temporarily disabled to allow updating load balancer scheme to internet-facing + # lifecycle { # this is necessary or terraform will try to recreate this every run + # ignore_changes = all + # } depends_on = [helm_release.airflow] } @@ -742,17 +762,9 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { name = format("/%s", join("/", compact(["unity", var.project, var.venue, "cs", "management", "proxy", "configurations", "015-sps-airflow-ui"]))) - description = "The unity-proxy configuration for the Airflow UI with optional OIDC." + description = "The unity-proxy configuration for the Airflow UI" type = "String" - value = var.enable_oidc_auth ? templatefile("${path.module}/templates/proxy_oidc.conf.tpl", { - project = var.project - venue = var.venue - airflow_nlb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname - keycloak_provider_url = var.keycloak_provider_url - keycloak_client_id = var.keycloak_client_id - keycloak_client_secret_ssm_param = var.keycloak_client_secret_ssm_param - proxy_domain = var.proxy_domain - }) : <<-EOT + value = <<-EOT ProxyPassReverse "/" From a9263efefafcf0e4d31bbd5f37139a475b69362b Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 10 Dec 2025 13:56:59 -0800 Subject: [PATCH 4/6] added more print statements: deployment working and being prompted to login with keycloak --- adding-keycloak-plan.md | 539 ------------------------- airflow/config/webserver_config.py.tpl | 81 +++- airflow/helm/values.tmpl.yaml | 13 +- 3 files changed, 65 insertions(+), 568 deletions(-) delete mode 100644 adding-keycloak-plan.md diff --git a/adding-keycloak-plan.md b/adding-keycloak-plan.md deleted file mode 100644 index 7883dda4..00000000 --- a/adding-keycloak-plan.md +++ /dev/null @@ -1,539 +0,0 @@ -# Keycloak OIDC Authentication for Airflow Implementation Plan - -## Overview -Integrate Keycloak OIDC authentication with Airflow using the Apache HTTPD proxy layer with role-based access control (RBAC). - -**Architecture:** User → Keycloak (OIDC) → Apache Proxy (mod_auth_openidc) → Internal NLB → Airflow (Remote User Auth + RBAC) - -## Prerequisites -- Keycloak instance URL, realm name, client ID, and client secret -- Apache HTTPD proxy with mod_auth_openidc module installed -- Venue proxy IAM role needs Secrets Manager read permissions (coordinate with CS team if needed) - ---- - -## Phase 1: Terraform Infrastructure Changes - -### 1.1 Add Keycloak Variables -**File:** `terraform-unity/modules/terraform-unity-sps-airflow/variables.tf` - -Add after line 84: -```hcl -variable "keycloak_provider_url" { - description = "Keycloak OIDC provider URL (e.g., https://keycloak.example.com/realms/unity)" - type = string - default = "" -} - -variable "keycloak_client_id" { - description = "Keycloak OIDC client ID" - type = string - default = "" -} - -variable "keycloak_client_secret" { - description = "Keycloak OIDC client secret" - type = string - sensitive = true - default = "" -} - -variable "enable_oidc_auth" { - description = "Enable Keycloak OIDC authentication" - type = bool - default = false -} - -variable "keycloak_role_mapping" { - description = "Mapping of Keycloak groups to Airflow roles" - type = map(list(string)) - default = { - "airflow-admins" = ["Admin"] - "airflow-ops" = ["Op"] - "airflow-users" = ["User"] - "airflow-viewers" = ["Viewer"] - } -} -``` - -### 1.2 Create Secrets Manager Resources -**File:** `terraform-unity/modules/terraform-unity-sps-airflow/main.tf` - -Add after line 766 (after existing SSM parameters): -```hcl -# Keycloak client secret in Secrets Manager -resource "aws_secretsmanager_secret" "keycloak_client_secret" { - count = var.enable_oidc_auth ? 1 : 0 - name = format(local.resource_name_prefix, "keycloak-client-secret") - description = "Keycloak OIDC client secret for Airflow" - recovery_window_in_days = 7 - tags = merge(local.common_tags, { - Name = format(local.resource_name_prefix, "keycloak-client-secret") - Component = "airflow" - }) -} - -resource "aws_secretsmanager_secret_version" "keycloak_client_secret" { - count = var.enable_oidc_auth ? 1 : 0 - secret_id = aws_secretsmanager_secret.keycloak_client_secret[0].id - secret_string = var.keycloak_client_secret -} - -# SSM parameters for Keycloak config -resource "aws_ssm_parameter" "keycloak_config" { - for_each = var.enable_oidc_auth ? { - provider_url = var.keycloak_provider_url - client_id = var.keycloak_client_id - client_secret_arn = try(aws_secretsmanager_secret.keycloak_client_secret[0].arn, "") - } : {} - - name = format("/%s", join("/", compact(["unity", var.project, var.venue, "cs", "security", "keycloak", each.key]))) - type = "String" - value = each.value - tags = merge(local.common_tags, { Component = "airflow" }) -} -``` - -### 1.3 Update Proxy SSM Parameter -**File:** `terraform-unity/modules/terraform-unity-sps-airflow/main.tf` - -Replace lines 740-766 (aws_ssm_parameter.unity_proxy_airflow_ui): -```hcl -resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { - name = format("/%s", join("/", compact(["unity", var.project, var.venue, "cs", "management", "proxy", "configurations", "015-sps-airflow-ui"]))) - description = "Unity-proxy configuration for Airflow UI with optional OIDC" - type = "String" - value = var.enable_oidc_auth ? templatefile("${path.module}/templates/proxy_oidc.conf.tpl", { - project = var.project - venue = var.venue - airflow_nlb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname - keycloak_provider_url = var.keycloak_provider_url - keycloak_client_id = var.keycloak_client_id - }) : <<-EOT - - - ProxyPassReverse "/" - - - Redirect "/${var.project}/${var.venue}/sps/home" - - - ProxyPassMatch "http://${data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" retry=5 disablereuse=On - ProxyPreserveHost On - FallbackResource /management/index.html - AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html - Substitute "s|\"/([^\"]*)|\"/${var.project}/${var.venue}/sps/$1|q" - - -EOT - tags = merge(local.common_tags, { Component = "SSM" }) -} -``` - -### 1.4 Create Proxy Template File -**File:** `terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl` (NEW) - -```apache -# Apache mod_auth_openidc configuration for Keycloak -OIDCProviderMetadataURL ${keycloak_provider_url}/.well-known/openid-configuration -OIDCClientID ${keycloak_client_id} -OIDCClientSecret "REPLACE_WITH_SECRET_FROM_SECRETS_MANAGER" -OIDCRedirectURI https://REPLACE_WITH_PROXY_DOMAIN/${project}/${venue}/sps/redirect_uri -OIDCCryptoPassphrase "REPLACE_WITH_GENERATED_PASSPHRASE" - -# Session config -OIDCSessionInactivityTimeout 3600 -OIDCSessionMaxDuration 28800 - -# Claims -OIDCRemoteUserClaim preferred_username -OIDCScope "openid email profile groups" - -# Cookie settings -OIDCCookiePath /${project}/${venue}/sps/ -OIDCCookieSameSite On - - - AuthType openid-connect - Require valid-user - - # Forward OIDC claims to Airflow - RequestHeader set X-Remote-User "%{REMOTE_USER}e" - RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" - RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" - RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" - - ProxyPassReverse "/" - - - - Redirect "/${project}/${venue}/sps/home" - - - - AuthType openid-connect - Require valid-user - - RequestHeader set X-Remote-User "%{REMOTE_USER}e" - RequestHeader set X-Remote-User-Email "%{OIDC_CLAIM_email}e" - RequestHeader set X-Remote-User-Groups "%{OIDC_CLAIM_groups}e" - RequestHeader set X-Remote-User-Name "%{OIDC_CLAIM_name}e" - - ProxyPassMatch "http://${airflow_nlb_hostname}:5000/$1" retry=5 disablereuse=On - ProxyPreserveHost On - FallbackResource /management/index.html - AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html - Substitute "s|\"/([^\"]*)|\"/${project}/${venue}/sps/$1|q" - -``` - -### 1.5 Update tfvars File -**File:** `terraform-unity/tfvars/unity-dev-sps-airflow.tfvars` - -Add at end of file: -```hcl -# Keycloak OIDC Configuration -enable_oidc_auth = false # Set to true when ready to enable -keycloak_provider_url = "https://keycloak.example.com/realms/unity" # REPLACE -keycloak_client_id = "airflow-unity-dev" # REPLACE -keycloak_client_secret = "your-client-secret" # REPLACE - keep secret! - -keycloak_role_mapping = { - "airflow-admins" = ["Admin"] - "airflow-ops" = ["Op"] - "airflow-users" = ["User"] - "airflow-viewers" = ["Viewer"] -} -``` - ---- - -## Phase 2: Airflow Configuration - -### 2.1 Replace Webserver Config -**File:** `airflow/config/webserver_config.py` - -Replace entire file with: -```python -# Keycloak OIDC Remote User Authentication -import os -import logging -from flask_appbuilder.security.manager import AUTH_REMOTE_USER - -log = logging.getLogger(__name__) - -AUTH_TYPE = AUTH_REMOTE_USER -AUTH_USER_REGISTRATION = True -AUTH_USER_REGISTRATION_ROLE = "Viewer" - -from airflow.www.security import AirflowSecurityManager - -class CustomSecurityManager(AirflowSecurityManager): - """Map Keycloak groups to Airflow roles""" - - def auth_user_remote_user(self, username): - from flask import request - - email = request.headers.get('X-Remote-User-Email', f'{username}@example.com') - full_name = request.headers.get('X-Remote-User-Name', username) - groups = request.headers.get('X-Remote-User-Groups', '') - - first_name, last_name = username, '' - if ' ' in full_name: - first_name, last_name = full_name.split(' ', 1) - - keycloak_groups = [g.strip() for g in groups.split(',') if g.strip()] - log.info(f"Auth: {username}, groups: {keycloak_groups}") - - user = self.find_user(username=username) - if not user: - user = self.add_user( - username=username, - first_name=first_name, - last_name=last_name, - email=email, - role=self.find_role(self.auth_user_registration_role) - ) - - # Map groups to roles - role_mapping = { - 'airflow-admins': 'Admin', - 'airflow-ops': 'Op', - 'airflow-users': 'User', - 'airflow-viewers': 'Viewer', - } - - role_priority = ['Viewer', 'User', 'Op', 'Admin'] - highest_role = None - highest_priority = -1 - - for group in keycloak_groups: - if group in role_mapping: - role_name = role_mapping[group] - if role_name in role_priority: - priority = role_priority.index(role_name) - if priority > highest_priority: - highest_priority = priority - highest_role = role_name - - if highest_role: - role = self.find_role(highest_role) - if role: - user.roles = [role] - self.update_user(user) - log.info(f"Assigned role {highest_role} to {username}") - - return user - -SECURITY_MANAGER_CLASS = CustomSecurityManager -WTF_CSRF_ENABLED = True -PERMANENT_SESSION_LIFETIME = 28800 -AUTH_ROLE_PUBLIC = None - -log.info("Airflow configured for OIDC remote user authentication") -``` - -### 2.2 Update Helm Values -**File:** `airflow/helm/values.tmpl.yaml` - -Add after line 374 (in extraEnv section): -```yaml - - name: AIRFLOW__WEBSERVER__AUTH_TYPE - value: "AUTH_REMOTE_USER" - - name: AIRFLOW__WEBSERVER__RBAC - value: "True" -``` - ---- - -## Phase 3: Keycloak Configuration (External) - -### 3.1 Create Keycloak Client -In Keycloak admin console: -1. Create new OIDC client: `airflow-{project}-{venue}` -2. Access Type: `confidential` -3. Valid Redirect URIs: `https://{proxy-domain}/{project}/{venue}/sps/*` -4. Client Scopes: Add `groups` scope with Group Membership mapper -5. Save and copy the client secret - -### 3.2 Create Keycloak Groups -Create these groups: -- `airflow-admins` - Full admin access -- `airflow-ops` - Operational access -- `airflow-users` - User access -- `airflow-viewers` - Read-only access - -### 3.3 Assign Test Users -Add test users to groups for validation. - ---- - -## Phase 4: Proxy Server Configuration - -### 4.1 Install mod_auth_openidc -On venue proxy server: -```bash -# Amazon Linux 2 -sudo yum install -y mod_auth_openidc - -# Verify module -httpd -M | grep auth_openidc -``` - -### 4.2 Create Secret Retrieval Script -**File:** `/etc/httpd/scripts/update-keycloak-secret.sh` (on proxy server) - -```bash -#!/bin/bash -# Retrieve Keycloak client secret and update Apache config - -PROJECT="unity" -VENUE="dev" - -# Get secret ARN from SSM -SECRET_ARN=$(aws ssm get-parameter \ - --name "/unity/${PROJECT}/${VENUE}/cs/security/keycloak/client_secret_arn" \ - --query 'Parameter.Value' --output text) - -# Get actual secret -CLIENT_SECRET=$(aws secretsmanager get-secret-value \ - --secret-id "$SECRET_ARN" \ - --query 'SecretString' --output text) - -# Get proxy config from SSM -aws ssm get-parameter \ - --name "/unity/${PROJECT}/${VENUE}/cs/management/proxy/configurations/015-sps-airflow-ui" \ - --query 'Parameter.Value' --output text > /tmp/airflow-oidc.conf - -# Replace placeholders -sed -i "s/REPLACE_WITH_SECRET_FROM_SECRETS_MANAGER/${CLIENT_SECRET}/" /tmp/airflow-oidc.conf - -# Generate crypto passphrase -CRYPTO_PASS=$(openssl rand -base64 32) -sed -i "s/REPLACE_WITH_GENERATED_PASSPHRASE/${CRYPTO_PASS}/" /tmp/airflow-oidc.conf - -# Replace proxy domain (adjust as needed) -sed -i "s/REPLACE_WITH_PROXY_DOMAIN/unity-dev-proxy.example.com/" /tmp/airflow-oidc.conf - -# Install config -sudo cp /tmp/airflow-oidc.conf /etc/httpd/conf.d/ -sudo systemctl reload httpd - -echo "Keycloak configuration updated" -``` - -### 4.3 Add IAM Permissions -The venue proxy IAM role needs this policy: -```json -{ - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Action": [ - "secretsmanager:GetSecretValue", - "secretsmanager:DescribeSecret" - ], - "Resource": "arn:aws:secretsmanager:*:*:secret:*-sps-keycloak-client-secret-*" - }] -} -``` - ---- - -## Phase 5: Deployment Steps - -### Step 1: Apply Infrastructure (OIDC Disabled) -```bash -cd terraform-unity/ -terraform plan -var-file="tfvars/unity-dev-sps-airflow.tfvars" -terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" -``` - -This creates Secrets Manager secret and SSM parameters but keeps OIDC disabled. - -### Step 2: Configure Proxy Server -1. Install mod_auth_openidc on venue proxy -2. Add IAM permissions for Secrets Manager access -3. Run secret retrieval script -4. Verify Apache config loads without errors - -### Step 3: Enable OIDC -Update `tfvars/unity-dev-sps-airflow.tfvars`: -```hcl -enable_oidc_auth = true -``` - -Apply: -```bash -terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" -``` - -This updates the proxy SSM parameter with OIDC config. Lambda auto-deploys it. - -### Step 4: Restart Airflow -```bash -kubectl rollout restart deployment/airflow-webserver -n sps -``` - -### Step 5: Test Authentication -1. Navigate to `https://{proxy-domain}/{project}/{venue}/sps/` -2. Should redirect to Keycloak login -3. Login with test admin user -4. Verify you're logged into Airflow as Admin - ---- - -## Phase 6: Validation - -### Security Tests -- [ ] Verify OIDC redirect works -- [ ] Verify session timeout (8 hours) -- [ ] Verify logout works -- [ ] Test each role (Admin, Op, User, Viewer) -- [ ] Verify role permissions enforce correctly - -### Role Mapping Tests -- [ ] Login as airflow-admins member → Admin role -- [ ] Login as airflow-ops member → Op role -- [ ] Login as airflow-users member → User role -- [ ] Login as airflow-viewers member → Viewer role - -### Negative Tests -- [ ] User with no groups → Viewer role (default) -- [ ] Invalid Keycloak credentials → Access denied -- [ ] Expired session → Redirect to login - ---- - -## Rollback Plan - -If issues occur: - -**Quick Disable:** -```bash -# Set enable_oidc_auth = false in tfvars -terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" -``` - -This reverts proxy to non-OIDC configuration (open access). - -**Full Rollback:** -```bash -git checkout HEAD~1 airflow/config/webserver_config.py -terraform apply -var-file="tfvars/unity-dev-sps-airflow.tfvars" \ - -var="enable_oidc_auth=false" -kubectl rollout restart deployment/airflow-webserver -n sps -``` - ---- - -## Critical Files - -1. **terraform-unity/modules/terraform-unity-sps-airflow/main.tf** (lines 740-790) - - Add Secrets Manager and SSM resources - - Update proxy SSM parameter with template - -2. **terraform-unity/modules/terraform-unity-sps-airflow/variables.tf** (after line 84) - - Add Keycloak configuration variables - -3. **terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl** (NEW) - - Apache HTTPD OIDC configuration template - -4. **airflow/config/webserver_config.py** (replace entire file) - - Enable remote user auth and RBAC with role mapping - -5. **airflow/helm/values.tmpl.yaml** (lines 374+) - - Add environment variables for remote user auth - -6. **terraform-unity/tfvars/unity-dev-sps-airflow.tfvars** (append) - - Add Keycloak connection details - ---- - -## Security Considerations - -1. **Client secret** stored in Secrets Manager (encrypted) -2. **Internal NLB** prevents direct header spoofing -3. **Network isolation** - proxy is only entry point -4. **Defense in depth** - OIDC at proxy + RBAC in Airflow -5. **Least privilege** - Default role is Viewer (read-only) - ---- - -## Post-Implementation - -### Documentation Needed -- User guide: How to login with Keycloak -- Admin guide: How to manage groups and roles -- Troubleshooting: Common OIDC issues - -### Monitoring -- OIDC authentication success/failure rates -- Session timeout events -- Unauthorized access attempts -- Secrets Manager access logs - -### Future Enhancements -- API authentication with OIDC bearer tokens -- DAG-level permissions based on groups -- Audit logging integration diff --git a/airflow/config/webserver_config.py.tpl b/airflow/config/webserver_config.py.tpl index 3ffcf3fa..8c8f0e83 100644 --- a/airflow/config/webserver_config.py.tpl +++ b/airflow/config/webserver_config.py.tpl @@ -16,7 +16,6 @@ OIDC_ISSUER = "${keycloak_provider_url}" OIDC_CLIENT_ID = "${keycloak_client_id}" # Client secret must be provided via environment variable -# Set AIRFLOW__WEBSERVER__SECRET_KEY in your deployment OIDC_CLIENT_SECRET = os.getenv("OIDC_CLIENT_SECRET", "CHANGE_ME") # OAuth provider configuration @@ -62,43 +61,75 @@ class CustomSecurityManager(AirflowSecurityManager): Dictionary with user information """ if provider == "keycloak": - # Get user info from Keycloak - import requests + import json + import base64 + + # Log the OAuth response structure (without sensitive token values) + log.info(f"OAuth callback from provider: {provider}") + log.info(f"OAuth response keys: {list(response.keys())}") + # Get access token access_token = response.get("access_token") if not access_token: - log.error("No access token in OAuth response") + log.error(f"No access token in OAuth response. Response keys: {list(response.keys())}") + log.error(f"Full response (for debugging): {response}") return {} - # Decode the JWT to get user info and groups - import json - import base64 - try: + # Decode JWT to get user info and groups # JWT structure: header.payload.signature - payload = access_token.split('.')[1] + parts = access_token.split('.') + if len(parts) != 3: + log.error(f"Invalid JWT format. Expected 3 parts, got {len(parts)}") + return {} + + payload = parts[1] # Add padding if needed payload += '=' * (4 - len(payload) % 4) decoded = json.loads(base64.urlsafe_b64decode(payload)) - # Extract user information + # Log what we received from Keycloak (useful for debugging) + log.info(f"JWT payload keys: {list(decoded.keys())}") + log.info(f"Available claims: username={decoded.get('preferred_username')}, email={decoded.get('email')}") + log.info(f"Groups in token: {decoded.get('groups', [])}") + + # Extract user information (with fallbacks for different claim names) + username = decoded.get("preferred_username") or decoded.get("username") or decoded.get("sub") + email = decoded.get("email", f"{username}@example.com") + first_name = decoded.get("given_name") or decoded.get("first_name") or username + last_name = decoded.get("family_name") or decoded.get("last_name") or "" + + # Groups might be in different formats depending on Keycloak mapper config + groups = decoded.get("groups", []) + if isinstance(groups, str): + groups = [groups] + + # Some Keycloak configs put groups in realm_access or resource_access + if not groups and "realm_access" in decoded: + groups = decoded["realm_access"].get("roles", []) + if not groups and "resource_access" in decoded: + client_access = decoded["resource_access"].get(OIDC_CLIENT_ID, {}) + groups = client_access.get("roles", []) + user_info = { - "username": decoded.get("preferred_username", ""), - "email": decoded.get("email", ""), - "first_name": decoded.get("given_name", ""), - "last_name": decoded.get("family_name", ""), - "groups": decoded.get("groups", []), + "username": username, + "email": email, + "first_name": first_name, + "last_name": last_name, + "groups": groups, } - log.info(f"Keycloak user login: {user_info['username']}, groups: {user_info['groups']}") + log.info(f"Keycloak user login: username={user_info['username']}, email={user_info['email']}, groups={user_info['groups']}") # Map groups to roles user_info["role_keys"] = self._map_groups_to_roles(user_info["groups"]) + log.info(f"Mapped to Airflow roles: {user_info['role_keys']}") return user_info except Exception as e: - log.error(f"Error decoding access token: {e}") + log.error(f"Error decoding access token: {e}", exc_info=True) + log.error(f"Token (first 50 chars): {access_token[:50]}...") return {} return {} @@ -128,6 +159,9 @@ class CustomSecurityManager(AirflowSecurityManager): %{ endfor ~} } + log.debug(f"Group role mapping: {group_role_mapping}") + log.debug(f"User's Keycloak groups: {keycloak_groups}") + # Role priority (higher index = higher priority) role_priority = ['Public', 'Viewer', 'User', 'Op', 'Admin'] @@ -136,20 +170,23 @@ class CustomSecurityManager(AirflowSecurityManager): highest_priority = -1 for group in keycloak_groups: - if group in group_role_mapping: - role_name = group_role_mapping[group] + # Handle group paths (e.g., "/airflow/admin" or "airflow_admin") + group_name = group.split('/')[-1] # Get last part of path + + if group_name in group_role_mapping: + role_name = group_role_mapping[group_name] if role_name in role_priority: priority = role_priority.index(role_name) if priority > highest_priority: highest_priority = priority highest_role_name = role_name - log.debug(f"Group '{group}' maps to role '{role_name}' (priority {priority})") + log.info(f"Group '{group}' maps to role '{role_name}' (priority {priority})") # Return the highest priority role if highest_role_name: return [highest_role_name] else: - log.warning(f"No matching Keycloak groups, assigning default role") + log.warning(f"No matching Keycloak groups found in {keycloak_groups}, assigning default role Viewer") return ["Viewer"] # Set the custom security manager @@ -163,3 +200,5 @@ WTF_CSRF_TIME_LIMIT = None PERMANENT_SESSION_LIFETIME = 28800 # 8 hours log.info("Airflow webserver configured for direct Keycloak OIDC authentication") +log.info(f"Keycloak provider: {OIDC_ISSUER}") +log.info(f"Keycloak client: {OIDC_CLIENT_ID}") diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 3a68762c..382d6ad6 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -169,14 +169,6 @@ webserver: webserverConfig: |- ${webserver_config} - # Environment variables for OIDC authentication - extraEnv: |- - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: airflow-oidc-secret - key: client-secret - startupProbe: timeoutSeconds: 20 failureThreshold: 60 # Number of tries before giving up (10 minutes with periodSeconds of 10) @@ -382,3 +374,8 @@ extraEnv: | value: "True" - name: AIRFLOW__WEBSERVER__BASE_URL value: "TODO FILL IN" + - name: OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: airflow-oidc-secret + key: client-secret From 5617ecea1defd966969a015d21d7f9264e6c0a45 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 17 Dec 2025 11:25:14 -0800 Subject: [PATCH 5/6] dynamically setting base url --- airflow/helm/values.tmpl.yaml | 2 +- .../terraform-unity-sps-airflow/locals.tf | 2 ++ .../terraform-unity-sps-airflow/main.tf | 29 +++++++++++++++++-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 382d6ad6..94f9075d 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -373,7 +373,7 @@ extraEnv: | - name: AIRFLOW__WEBSERVER__EXPOSE_CONFIG value: "True" - name: AIRFLOW__WEBSERVER__BASE_URL - value: "TODO FILL IN" + value: "${airflow_base_url}" - name: OIDC_CLIENT_SECRET valueFrom: secretKeyRef: diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf b/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf index 87343dd0..f3513533 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf @@ -26,4 +26,6 @@ locals { "dev" = "#58cc35" "sbg-dev" = "#58cc35" }[var.venue] + # BASE_URL uses placeholder initially, updated by null_resource after LB is created + airflow_base_url = "http://placeholder:${local.load_balancer_port}" } diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index 3eedd550..c22f610a 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -430,6 +430,7 @@ resource "helm_release" "airflow" { unity_cluster_name = data.aws_eks_cluster.cluster.name karpenter_node_pools = join(",", var.karpenter_node_pools) cwl_dag_ecr_uri = "${data.aws_caller_identity.current.account_id}.dkr.ecr.us-west-2.amazonaws.com" + airflow_base_url = local.airflow_base_url # Keycloak Direct OIDC authentication configuration webserver_config = indent(4, templatefile("${path.module}/../../../airflow/config/webserver_config.py.tpl", { keycloak_role_mapping = var.keycloak_role_mapping @@ -454,6 +455,30 @@ resource "helm_release" "airflow" { ] } +# Update Airflow BASE_URL after LoadBalancer is created +resource "null_resource" "update_airflow_base_url" { + triggers = { + lb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname + } + + provisioner "local-exec" { + command = < Date: Mon, 22 Dec 2025 13:22:25 -0800 Subject: [PATCH 6/6] cleaned up code and unnecessary files --- airflow/config/webserver_config.py.tpl | 23 ++- airflow/plugins/user_auth.py | 139 ------------------ .../terraform-unity-sps-airflow/main.tf | 3 - .../templates/proxy_oidc.conf.tpl | 75 ---------- .../terraform-unity-sps-airflow/variables.tf | 6 - 5 files changed, 19 insertions(+), 227 deletions(-) delete mode 100644 airflow/plugins/user_auth.py delete mode 100644 terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl diff --git a/airflow/config/webserver_config.py.tpl b/airflow/config/webserver_config.py.tpl index 8c8f0e83..86d9b241 100644 --- a/airflow/config/webserver_config.py.tpl +++ b/airflow/config/webserver_config.py.tpl @@ -39,14 +39,18 @@ OAUTH_PROVIDERS = [ } ] -# Auto-register users on first login +# Auto-register users on first login (only if they have approved Keycloak groups) +# Users without approved groups will be rejected during authentication AUTH_USER_REGISTRATION = True -AUTH_USER_REGISTRATION_ROLE = "Viewer" # Default role for new users +AUTH_USER_REGISTRATION_ROLE = "Viewer" # Not used - role determined by Keycloak group mapping # Role mapping configuration class CustomSecurityManager(AirflowSecurityManager): """ Custom security manager to map Keycloak groups to Airflow roles. + + IMPORTANT: Users must have at least one approved Keycloak group to access Airflow. + Users without approved groups will be denied access during authentication. """ def oauth_user_info(self, provider, response): @@ -146,11 +150,16 @@ class CustomSecurityManager(AirflowSecurityManager): Users with multiple groups get the highest priority role. Priority: Admin > Op > User > Viewer > Public + IMPORTANT: Users without any approved Keycloak groups will be rejected. + Args: keycloak_groups: List of Keycloak group names from OIDC token Returns: List of Airflow role names + + Raises: + Exception: If user has no approved Keycloak groups (access denied) """ # Keycloak group to Airflow role mapping (from Terraform configuration) group_role_mapping = { @@ -186,8 +195,14 @@ class CustomSecurityManager(AirflowSecurityManager): if highest_role_name: return [highest_role_name] else: - log.warning(f"No matching Keycloak groups found in {keycloak_groups}, assigning default role Viewer") - return ["Viewer"] + # Reject users who don't have any approved Keycloak groups + log.error(f"Access denied: User has no approved Keycloak groups. User groups: {keycloak_groups}") + log.error("User must be assigned to one of these Keycloak groups to access Airflow:") + log.error(f" Approved groups: {list(group_role_mapping.keys())}") + raise Exception( + "Access denied: You are not assigned to any approved Keycloak groups. " + "Please contact your administrator to request access." + ) # Set the custom security manager SECURITY_MANAGER_CLASS = CustomSecurityManager diff --git a/airflow/plugins/user_auth.py b/airflow/plugins/user_auth.py deleted file mode 100644 index eceff668..00000000 --- a/airflow/plugins/user_auth.py +++ /dev/null @@ -1,139 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" - User authentication backend - Referencies - - https://flask-appbuilder.readthedocs.io/en/latest/_modules/flask_appbuilder/security/manager.html - - https://github.com/apache/airflow/blob/main/airflow/api/auth/backend/basic_auth.py -""" -from __future__ import annotations -import logging - -# Wrap imports that might not be available during Terraform file() reads -try: - from functools import wraps - from typing import Any, Callable, TypeVar, cast - from flask import Response, request - from flask_appbuilder.const import AUTH_OAUTH, AUTH_LDAP, AUTH_DB - from flask_login import login_user - from airflow.utils.airflow_flask_app import get_airflow_app - from airflow.www.fab_security.sqla.models import User - import jwt - import requests - from base64 import b64decode - from cryptography.hazmat.primitives import serialization - IMPORTS_AVAILABLE = True -except ImportError as e: - logging.warning(f"Some imports not available during config read: {e}") - IMPORTS_AVAILABLE = False - # Define minimal fallbacks - AUTH_OAUTH = None - AUTH_LDAP = None - AUTH_DB = None -CLIENT_AUTH: tuple[str, str] | Any | None = None -log = logging.getLogger(__name__) -CLIENT_ID = 'airflow' -OIDC_ISSUER = 'https://dit.kc-test-maap.xyz/realms/MAAP' - -def get_keycloak_public_key(): - """Fetch Keycloak public key with error handling""" - try: - req = requests.get(OIDC_ISSUER, timeout=5) - req.raise_for_status() - key_der_base64 = req.json()["public_key"] - key_der = b64decode(key_der_base64.encode()) - return serialization.load_der_public_key(key_der) - except Exception as e: - log.error(f"Failed to fetch Keycloak public key: {e}") - return None - -def init_app(_): - """Initializes authentication backend""" - pass - -if IMPORTS_AVAILABLE: - T = TypeVar("T", bound=Callable) - - def auth_current_user() -> User | None: - """Authenticate and set current user if Authorization header exists""" - - ab_security_manager = get_airflow_app().appbuilder.sm - user = None - if ab_security_manager.auth_type == AUTH_OAUTH: - auth_header = request.headers.get('Authorization') - if not auth_header: - return None - - public_key = get_keycloak_public_key() - if public_key is None: - log.error("Cannot authenticate: Keycloak public key unavailable") - return None - - token = auth_header.replace('Bearer ', '') - try: - me = jwt.decode(token, public_key, algorithms=['HS256', 'RS256'], audience=CLIENT_ID) - except jwt.InvalidTokenError as e: - log.error(f"Token validation failed: {e}") - return None - - try: - groups = me["resource_access"]["airflow"]["roles"] - except KeyError: - log.warning("No airflow roles found in token, using default") - groups = [] - if len(groups) < 1: - groups = ["airflow_public"] - else: - groups = [str for str in groups if "airflow" in str] - userinfo = { - "username": me.get("preferred_username"), - "email": me.get("email"), - "first_name": me.get("given_name"), - "last_name": me.get("family_name"), - "role_keys": groups, - } - user = ab_security_manager.auth_user_oauth(userinfo) - else: - auth = request.authorization - if auth is None or not auth.username or not auth.password: - return None - if ab_security_manager.auth_type == AUTH_LDAP: - user = ab_security_manager.auth_user_ldap(auth.username, auth.password) - if ab_security_manager.auth_type == AUTH_DB: - user = ab_security_manager.auth_user_db(auth.username, auth.password) - log.info("user: {0}".format(user)) - if user is not None: - login_user(user, remember=False) - return user - - def requires_authentication(function: T): - """Decorator for functions that require authentication""" - @wraps(function) - def decorated(*args, **kwargs): - if auth_current_user() is not None: - return function(*args, **kwargs) - else: - return Response("Unauthorized", 401, {"WWW-Authenticate": "Basic"}) - - return cast(T, decorated) -else: - # Fallback functions when imports are not available - def auth_current_user(): - return None - - def requires_authentication(function): - return function diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index d99e2caf..c22f610a 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -430,7 +430,6 @@ resource "helm_release" "airflow" { unity_cluster_name = data.aws_eks_cluster.cluster.name karpenter_node_pools = join(",", var.karpenter_node_pools) cwl_dag_ecr_uri = "${data.aws_caller_identity.current.account_id}.dkr.ecr.us-west-2.amazonaws.com" -<<<<<<< HEAD airflow_base_url = local.airflow_base_url # Keycloak Direct OIDC authentication configuration webserver_config = indent(4, templatefile("${path.module}/../../../airflow/config/webserver_config.py.tpl", { @@ -438,8 +437,6 @@ resource "helm_release" "airflow" { keycloak_provider_url = var.keycloak_provider_url keycloak_client_id = var.keycloak_client_id })) -======= ->>>>>>> develop }) ] set_sensitive { diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl b/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl deleted file mode 100644 index fd2d92ba..00000000 --- a/terraform-unity/modules/terraform-unity-sps-airflow/templates/proxy_oidc.conf.tpl +++ /dev/null @@ -1,75 +0,0 @@ -# Apache mod_auth_openidc configuration for Keycloak OIDC Authentication -# This configuration is deployed when enable_oidc_auth is true - -# OIDC Provider Configuration -OIDCProviderMetadataURL ${keycloak_provider_url}/.well-known/openid-configuration -OIDCClientID ${keycloak_client_id} - -# Client secret - retrieved from AWS Parameter Store at runtime by proxy server -# The proxy server must retrieve the secret from: ${keycloak_client_secret_ssm_param} -# and replace this placeholder before Apache loads the config -OIDCClientSecret "REPLACE_WITH_SECRET_FROM_PARAMETER_STORE" - -# Redirect URI - must match Keycloak client Valid Redirect URIs setting -%{ if proxy_domain != "" ~} -OIDCRedirectURI https://${proxy_domain}/${project}/${venue}/sps/redirect_uri -%{ else ~} -OIDCRedirectURI https://REPLACE_WITH_PROXY_DOMAIN/${project}/${venue}/sps/redirect_uri -%{ endif ~} - -# Crypto passphrase for encrypting session cookies -# Generate at runtime with: openssl rand -base64 32 -OIDCCryptoPassphrase "REPLACE_WITH_GENERATED_CRYPTO_PASSPHRASE" - -# Session configuration -OIDCSessionInactivityTimeout 3600 # 1 hour of inactivity -OIDCSessionMaxDuration 28800 # 8 hours maximum session - -# User identification and claims -OIDCRemoteUserClaim preferred_username -OIDCScope "openid email profile groups" - -# Cookie settings -OIDCCookiePath /${project}/${venue}/sps/ -OIDCCookieSameSite On - -# Airflow UI - Main location with authentication - - AuthType openid-connect - Require valid-user - - # Forward OIDC user claims to Airflow as HTTP headers - # Airflow will use these headers for authentication and authorization - RequestHeader set X-Remote-User "%%{REMOTE_USER}e" - RequestHeader set X-Remote-User-Email "%%{OIDC_CLAIM_email}e" - RequestHeader set X-Remote-User-Groups "%%{OIDC_CLAIM_groups}e" - RequestHeader set X-Remote-User-Name "%%{OIDC_CLAIM_name}e" - - ProxyPassReverse "/" - - -# Handle nested path redirects - - Redirect "/${project}/${venue}/sps/home" - - -# Main proxy pass configuration with authentication - - AuthType openid-connect - Require valid-user - - # Forward OIDC claims to Airflow backend - RequestHeader set X-Remote-User "%%{REMOTE_USER}e" - RequestHeader set X-Remote-User-Email "%%{OIDC_CLAIM_email}e" - RequestHeader set X-Remote-User-Groups "%%{OIDC_CLAIM_groups}e" - RequestHeader set X-Remote-User-Name "%%{OIDC_CLAIM_name}e" - - # Proxy to internal Airflow NLB - ProxyPassMatch "http://${airflow_nlb_hostname}:5000/$1" retry=5 disablereuse=On - ProxyPreserveHost On - FallbackResource /management/index.html - - # URL rewriting for embedded links - AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html - Substitute "s|\"/([^\"]*)|\"/${project}/${venue}/sps/$1|q" - diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf b/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf index 48861332..e0f5b7fe 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/variables.tf @@ -117,9 +117,3 @@ variable "keycloak_role_mapping" { "airflow_public" = ["Public"] } } - -variable "proxy_domain" { - description = "Domain name of the Apache proxy server for OIDC redirect URI" - type = string - default = "" -}