From c2e0f7b6356d6130721439f81a9321dd4411c186 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 14 Jul 2025 10:38:22 -0700 Subject: [PATCH 01/19] initial attempt of DAG --- airflow/dags/run_ogc_process.py | 211 ++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 airflow/dags/run_ogc_process.py diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py new file mode 100644 index 00000000..47e61af1 --- /dev/null +++ b/airflow/dags/run_ogc_process.py @@ -0,0 +1,211 @@ +""" +DAG with two separate tasks to submit a job to an OGC-compliant process API +and then monitor its status. +""" +import json +import logging +from datetime import datetime + +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.models.baseoperator import chain +from airflow.operators.python import PythonOperator +from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator +from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret +from airflow.utils.trigger_rule import TriggerRule +from kubernetes.client import models as k8s + +# --- Configuration Constants --- + +# The name of the Kubernetes secret that holds the PGT token. +K8S_SECRET_NAME = "pgt-token-secret" +K8S_SECRET_KEY = "pgt-token" +TOKEN_ENV_VAR = "PGT_TOKEN" + +# The base URL for the OGC Process API. +API_BASE_URL = "https://api.dit.maap-project.org/api/ogc" + +# The Kubernetes namespace where the pods will run. +POD_NAMESPACE = "airflow" # Change this to your Airflow namespace + +# A lightweight Docker image with curl and jq for making API requests. +DOCKER_IMAGE = "stedolan/jq@sha256:36519247696232f7a09d3a0e6653131093c7deda36f8a4e34a70b09f19e42e61" + +# Define the secret to be mounted as an environment variable. +secret_env_vars = [ + AirflowK8sSecret( + deploy_type="env", + deploy_target=TOKEN_ENV_VAR, + secret=K8S_SECRET_NAME, + key=K8S_SECRET_KEY, + ) +] + +# Default DAG configuration +dag_default_args = { + "owner": "unity-sps", + "depends_on_past": False, + "start_date": datetime(2023, 1, 1), +} + +# --- DAG Definition --- + +dag = DAG( + dag_id="ogc_two_task_job_runner", + description="Submits and monitors an OGC job in two separate tasks.", + dag_display_name="OGC Two-Task Job Runner", + tags=["ogc", "api", "maap", "kubernetes"], + is_paused_upon_creation=False, + catchup=False, + schedule=None, + max_active_runs=10, + default_args=dag_default_args, + params={ + "process_id": Param( + "test-process", + type="string", + title="Process ID", + description="The identifier of the OGC process to execute.", + ), + "job_inputs": Param( + json.dumps( + { + "queue": "maap-dps-sandbox", + "inputs": {}, + } + ), + type="string", + title="Job Inputs (JSON string)", + description="A JSON string representing the inputs payload for the job.", + ), + }, +) + +# --- Task Definitions --- + +def setup(**context): + """A simple setup task to log parameters.""" + logging.info("Starting OGC job submission and monitoring DAG.") + logging.info(f"Parameters received: {context['params']}") + +setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) + +# This shell command submits the job and writes the jobID to a special file +# that Airflow uses for XComs. +submit_command = [ + "/bin/sh", + "-c", + f""" + set -e + echo "Submitting job for process: {{ params.process_id }}" + + SUBMIT_URL="{API_BASE_URL}/processes/{{ params.process_id }}/execution" + + # The payload is now passed as a templated argument + PAYLOAD='{{ params.job_inputs }}' + + # Make the request and extract jobID + response=$(curl -s -f -X POST "$SUBMIT_URL" \\ + -H "Authorization: Bearer ${TOKEN_ENV_VAR}" \\ + -H "Content-Type: application/json" \\ + -d "$PAYLOAD") + + echo "API Response: $response" + job_id=$(echo "$response" | jq -r .jobID) + + if [ "$job_id" = "null" ] || [ -z "$job_id" ]; then + echo "Failed to get jobID from response." + exit 1 + fi + + echo "Job submitted successfully. Job ID: $job_id" + + # Write the job_id to the XCom return file for the next task + # The value MUST be JSON-parsable, so we quote it. + echo -n "\\"\\"{job_id}\\"\\"" > /airflow/xcom/return.json + """, +] + +submit_job_task = KubernetesPodOperator( + task_id="submit_job_task", + namespace=POD_NAMESPACE, + image=DOCKER_IMAGE, + name="ogc-submit-pod", + cmds=submit_command, + secrets=secret_env_vars, + in_cluster=True, + get_logs=True, + # This is crucial for enabling XCom push from the pod + do_xcom_push=True, + dag=dag, +) + +# This shell command polls for job status. The jobID is passed in as an argument. +monitor_command = [ + "/bin/sh", + "-c", + """ + set -e + job_id="$1" # The jobID is the first argument + if [ -z "$job_id" ]; then + echo "job_id argument not provided." + exit 1 + fi + + echo "Starting to monitor job ID: $job_id" + STATUS_URL="${API_BASE_URL}/jobs/$job_id" + + TIMEOUT=3600 + POLL_INTERVAL=30 + SECONDS=0 + + while [ $SECONDS -lt $TIMEOUT ]; do + echo "Checking status..." + response=$(curl -s -f -H "Authorization: Bearer ${PGT_TOKEN}" "$STATUS_URL") + status=$(echo "$response" | jq -r .status) + + echo "Current status is: $status" + + if [ "$status" = "successful" ]; then + echo "Job completed successfully!" + exit 0 + elif [ "$status" = "failed" ]; then + echo "Job failed!" + echo "Error details: $(echo "$response" | jq .)" + exit 1 + fi + + sleep $POLL_INTERVAL + SECONDS=$((SECONDS + POLL_INTERVAL)) + done + + echo "Job monitoring timed out after $TIMEOUT seconds." + exit 1 + """, +] + +monitor_job_task = KubernetesPodOperator( + task_id="monitor_job_task", + namespace=POD_NAMESPACE, + image=DOCKER_IMAGE, + name="ogc-monitor-pod", + cmds=monitor_command, + # The job_id is pulled from the previous task's XCom return value + # and passed as the first argument to the monitor_command script. + arguments=["{{ ti.xcom_pull(task_ids='submit_job_task') }}"], + secrets=secret_env_vars, + in_cluster=True, + get_logs=True, + dag=dag, +) + +def cleanup(**context): + """A placeholder cleanup task.""" + logging.info("Cleanup executed.") + +cleanup_task = PythonOperator( + task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE +) + +# Define the task execution chain +chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) From c8848aef8662f596ef75a08f8dc9036fceeb6f0d Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Tue, 15 Jul 2025 15:03:02 -0700 Subject: [PATCH 02/19] updating DAG and dockerfile to run process --- airflow/dags/run_ogc_process.py | 133 +++++++++--------- airflow/docker/run_ogc_process/Dockerfile | 30 ++++ .../run_ogc_process_entrypoint.sh | 29 ++++ terraform-unity/main.tf | 6 + 4 files changed, 129 insertions(+), 69 deletions(-) create mode 100644 airflow/docker/run_ogc_process/Dockerfile create mode 100644 airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 47e61af1..8755dfd8 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -14,30 +14,31 @@ from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret from airflow.utils.trigger_rule import TriggerRule from kubernetes.client import models as k8s +from unity_sps_utils import ( + DEFAULT_LOG_LEVEL, + EC2_TYPES, + NODE_POOL_DEFAULT, + NODE_POOL_HIGH_WORKLOAD, + POD_LABEL, + POD_NAMESPACE, + build_ec2_type_label, + get_affinity, +) # --- Configuration Constants --- -# The name of the Kubernetes secret that holds the PGT token. -K8S_SECRET_NAME = "pgt-token-secret" -K8S_SECRET_KEY = "pgt-token" -TOKEN_ENV_VAR = "PGT_TOKEN" - -# The base URL for the OGC Process API. -API_BASE_URL = "https://api.dit.maap-project.org/api/ogc" - -# The Kubernetes namespace where the pods will run. -POD_NAMESPACE = "airflow" # Change this to your Airflow namespace +K8S_SECRET_NAME = "sps-app-credentials" # A lightweight Docker image with curl and jq for making API requests. -DOCKER_IMAGE = "stedolan/jq@sha256:36519247696232f7a09d3a0e6653131093c7deda36f8a4e34a70b09f19e42e61" +DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" # Define the secret to be mounted as an environment variable. secret_env_vars = [ AirflowK8sSecret( deploy_type="env", - deploy_target=TOKEN_ENV_VAR, + deploy_target="MAAP_PGT", secret=K8S_SECRET_NAME, - key=K8S_SECRET_KEY, + key="MAAP_PGT", ) ] @@ -45,16 +46,25 @@ dag_default_args = { "owner": "unity-sps", "depends_on_past": False, - "start_date": datetime(2023, 1, 1), + "start_date": datetime.utcfromtimestamp(0), } +submit_job_env_vars = [ + k8s.V1EnvVar( + name="SUBMIT_JOB_URL", + value="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", + ), + k8s.V1EnvVar(name="PROCESS_ID", value="{{ params.process_id }}"), + k8s.V1EnvVar(name="JOB_INPUTS", value="{{ params.job_inputs }}") +] + # --- DAG Definition --- dag = DAG( dag_id="ogc_two_task_job_runner", description="Submits and monitors an OGC job in two separate tasks.", dag_display_name="OGC Two-Task Job Runner", - tags=["ogc", "api", "maap", "kubernetes"], + tags=["ogc", "job"], is_paused_upon_creation=False, catchup=False, schedule=None, @@ -90,54 +100,38 @@ def setup(**context): setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) -# This shell command submits the job and writes the jobID to a special file -# that Airflow uses for XComs. -submit_command = [ - "/bin/sh", - "-c", - f""" - set -e - echo "Submitting job for process: {{ params.process_id }}" - - SUBMIT_URL="{API_BASE_URL}/processes/{{ params.process_id }}/execution" - - # The payload is now passed as a templated argument - PAYLOAD='{{ params.job_inputs }}' - - # Make the request and extract jobID - response=$(curl -s -f -X POST "$SUBMIT_URL" \\ - -H "Authorization: Bearer ${TOKEN_ENV_VAR}" \\ - -H "Content-Type: application/json" \\ - -d "$PAYLOAD") - - echo "API Response: $response" - job_id=$(echo "$response" | jq -r .jobID) - - if [ "$job_id" = "null" ] || [ -z "$job_id" ]; then - echo "Failed to get jobID from response." - exit 1 - fi - - echo "Job submitted successfully. Job ID: $job_id" - - # Write the job_id to the XCom return file for the next task - # The value MUST be JSON-parsable, so we quote it. - echo -n "\\"\\"{job_id}\\"\\"" > /airflow/xcom/return.json - """, -] - submit_job_task = KubernetesPodOperator( task_id="submit_job_task", namespace=POD_NAMESPACE, image=DOCKER_IMAGE, name="ogc-submit-pod", - cmds=submit_command, + env_vars=submit_job_env_vars, secrets=secret_env_vars, + service_account_name="airflow-worker", in_cluster=True, get_logs=True, - # This is crucial for enabling XCom push from the pod - do_xcom_push=True, + startup_timeout_seconds=600, + container_security_context={"privileged": True}, + container_resources=k8s.V1ResourceRequirements( + requests={ + "ephemeral-storage": "{{ti.xcom_pull(task_ids='Setup', key='container_storage')}}", + }, + ), + container_logs=True, dag=dag, + node_selector={ + "karpenter.sh/nodepool": "{{ti.xcom_pull(task_ids='Setup', key='node_pool')}}", + "node.kubernetes.io/instance-type": "{{ti.xcom_pull(task_ids='Setup', key='instance_type')}}", + }, + labels={"pod": POD_LABEL}, + annotations={"karpenter.sh/do-not-disrupt": "true"}, + # note: 'affinity' cannot yet be templated + affinity=get_affinity( + capacity_type=["spot"], + anti_affinity_label=POD_LABEL, + ), + on_finish_action="keep_pod", + is_delete_operator_pod=False, ) # This shell command polls for job status. The jobID is passed in as an argument. @@ -184,20 +178,20 @@ def setup(**context): """, ] -monitor_job_task = KubernetesPodOperator( - task_id="monitor_job_task", - namespace=POD_NAMESPACE, - image=DOCKER_IMAGE, - name="ogc-monitor-pod", - cmds=monitor_command, - # The job_id is pulled from the previous task's XCom return value - # and passed as the first argument to the monitor_command script. - arguments=["{{ ti.xcom_pull(task_ids='submit_job_task') }}"], - secrets=secret_env_vars, - in_cluster=True, - get_logs=True, - dag=dag, -) +# monitor_job_task = KubernetesPodOperator( +# task_id="monitor_job_task", +# namespace=POD_NAMESPACE, +# image=DOCKER_IMAGE, +# name="ogc-monitor-pod", +# cmds=monitor_command, +# # The job_id is pulled from the previous task's XCom return value +# # and passed as the first argument to the monitor_command script. +# arguments=["{{ ti.xcom_pull(task_ids='submit_job_task') }}"], +# secrets=secret_env_vars, +# in_cluster=True, +# get_logs=True, +# dag=dag, +# ) def cleanup(**context): """A placeholder cleanup task.""" @@ -208,4 +202,5 @@ def cleanup(**context): ) # Define the task execution chain -chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) +chain(setup_task, submit_job_task, cleanup_task) +#chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) diff --git a/airflow/docker/run_ogc_process/Dockerfile b/airflow/docker/run_ogc_process/Dockerfile new file mode 100644 index 00000000..c019c8d1 --- /dev/null +++ b/airflow/docker/run_ogc_process/Dockerfile @@ -0,0 +1,30 @@ +# --- Stage 1: Builder --- +# Use a lightweight Linux distribution (Alpine) to install the tools. +# Naming this stage "builder" allows us to reference it later. +FROM alpine:3.18 as builder + +# Install curl and jq using the Alpine package manager (apk). +# --no-cache avoids writing the package index to disk, keeping the layer small. +RUN apk add --no-cache curl jq + + +# --- Stage 2: Final Image --- +# Use the same base image for the final product. This ensures compatibility +# of the copied binaries and their libraries. +FROM alpine:3.18 + +# Copy only the necessary executable files from the "builder" stage. +# This is the key to a small final image. We are not copying the package +# manager or any other unnecessary files. +COPY --from=builder /usr/bin/curl /usr/bin/curl +COPY --from=builder /usr/bin/jq /usr/bin/jq + +# Although Airflow will override this with its own commands, +# setting a default command is good practice. It can be used for testing. +# This command simply shows that the tools are available. +CMD ["/bin/sh", "-c", "echo 'curl and jq are installed.'; curl --version; jq --version"] + +COPY run_ogc_process_entrypoint.sh /usr/share/ogc/run_ogc_process_entrypoint.sh +WORKDIR /usr/share/ogc +RUN chmod +x /usr/share/ogc/run_ogc_process_entrypoint.sh +ENTRYPOINT ["/usr/share/ogc/run_ogc_process_entrypoint.sh"] diff --git a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh new file mode 100644 index 00000000..cbd268cc --- /dev/null +++ b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +set -e + +echo "in entrypoint for running an ogc process" + +# Submit job to endpoint +SUBMIT_JOB_URL="SUBMIT_JOB_URL=$(echo "$SUBMIT_JOB_URL" | sed "s/{process_id}/$PROCESS_ID/")" + +echo "submit url is $SUBMIT_JOB_URL" + +response=$(curl --location ${SUBMIT_JOB_URL} \ +--header "proxy-ticket: ${MAAP_PGT}" \ +--header "Content-Type: application/json" \ +--data "${JOB_INPUTS}") + +echo "API Response: $response" +job_id=$(echo "$response" | jq -r .id) + +if [ "$job_id" = "null" ] || [ -z "$job_id" ]; then + echo "Failed to get jobID from response." + exit 1 +fi + +echo "Job submitted successfully. Job ID: $job_id" + +# Write the job_id to the XCom return file for the next task +mkdir -p /airflow/xcom/ +printf '{"job_id": "%s"}' "$job_id" > /airflow/xcom/return.json \ No newline at end of file diff --git a/terraform-unity/main.tf b/terraform-unity/main.tf index 74822380..ff4f775a 100644 --- a/terraform-unity/main.tf +++ b/terraform-unity/main.tf @@ -29,6 +29,11 @@ data "aws_ssm_parameter" "dockstore_token" { with_decryption = true } +data "aws_ssm_parameter" "maap_pgt" { + name = "/unity/ads/ogc/development/maap_pgt_grace_test_acct" + with_decryption = true +} + resource "kubernetes_secret" "sps-app-credentials" { metadata { name = "sps-app-credentials" @@ -39,6 +44,7 @@ resource "kubernetes_secret" "sps-app-credentials" { "DOCKERHUB_USERNAME" = data.aws_ssm_parameter.dockerhub_username.value "DOCKERHUB_TOKEN" = data.aws_ssm_parameter.dockerhub_api_key.value "DOCKSTORE_TOKEN" = data.aws_ssm_parameter.dockstore_token.value + "MAAP_PGT" = data.aws_ssm_parameter.maap_pgt.value } type = "Opaque" From eb7b628074fa5f910450cdc476462e60fe4ae59d Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 16 Jul 2025 11:18:52 -0700 Subject: [PATCH 03/19] successfully submitting a job but not monitoring yet --- airflow/dags/run_ogc_process.py | 69 ++++++++++++++++--- airflow/docker/run_ogc_process/Dockerfile | 24 +------ .../run_ogc_process_entrypoint.sh | 8 +-- 3 files changed, 65 insertions(+), 36 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 8755dfd8..f6f7d411 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -5,6 +5,7 @@ import json import logging from datetime import datetime +import os from airflow.models.dag import DAG from airflow.models.param import Param @@ -24,10 +25,12 @@ build_ec2_type_label, get_affinity, ) +from airflow.operators.python import PythonOperator, get_current_context # --- Configuration Constants --- K8S_SECRET_NAME = "sps-app-credentials" +LOG_LEVEL_TYPE = {10: "DEBUG", 20: "INFO"} # A lightweight Docker image with curl and jq for making API requests. DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" @@ -61,9 +64,9 @@ # --- DAG Definition --- dag = DAG( - dag_id="ogc_two_task_job_runner", - description="Submits and monitors an OGC job in two separate tasks.", - dag_display_name="OGC Two-Task Job Runner", + dag_id="run_ogc_process", + description="Submits a job to an OGC process and monitors", + dag_display_name="Run an OGC Process", tags=["ogc", "job"], is_paused_upon_creation=False, catchup=False, @@ -72,10 +75,9 @@ default_args=dag_default_args, params={ "process_id": Param( - "test-process", - type="string", + type="integer", title="Process ID", - description="The identifier of the OGC process to execute.", + description="The numerical identifier of the OGC process to execute.", ), "job_inputs": Param( json.dumps( @@ -88,15 +90,66 @@ title="Job Inputs (JSON string)", description="A JSON string representing the inputs payload for the job.", ), + "log_level": Param( + DEFAULT_LOG_LEVEL, + type="integer", + enum=list(LOG_LEVEL_TYPE.keys()), + values_display={key: f"{key} ({value})" for key, value in LOG_LEVEL_TYPE.items()}, + title="Processing log levels", + description=("Log level for DAG processing"), + ), + "request_instance_type": Param( + "t3.medium", + type="string", + enum=list(EC2_TYPES.keys()), + values_display={key: f"{build_ec2_type_label(key)}" for key in EC2_TYPES.keys()}, + title="EC2 instance type", + ), + "request_storage": Param( + "10Gi", type="string", enum=["10Gi", "50Gi", "100Gi", "150Gi", "200Gi", "250Gi"] + ), + "use_ecr": Param(False, type="boolean", title="Log into AWS Elastic Container Registry (ECR)"), }, ) # --- Task Definitions --- -def setup(**context): - """A simple setup task to log parameters.""" +def setup(ti=None,**context): + """Task that selects the proper Karpenter Node Pool depending on the user requested resources.""" + logging.info("Starting OGC job submission and monitoring DAG.") logging.info(f"Parameters received: {context['params']}") + context = get_current_context() + logging.info(f"DAG Run parameters: {json.dumps(context['params'], sort_keys=True, indent=4)}") + + # select the node pool based on what resources were requested + node_pool = NODE_POOL_DEFAULT + storage = context["params"]["request_storage"] # 100Gi + container_storage = int(storage[0:-2]) # 100 + ti.xcom_push(key="container_storage", value=container_storage) + + # from "t3.large (General Purpose: 2vCPU, 8GiB)" to "t3.large" + instance_type = context["params"]["request_instance_type"] + cpu = EC2_TYPES[instance_type]["cpu"] + memory = EC2_TYPES[instance_type]["memory"] + ti.xcom_push(key="instance_type", value=instance_type) + logging.info(f"Requesting EC2 instance type={instance_type}") + + logging.info(f"Requesting container storage={container_storage}Gi") + if (container_storage > 30) or (cpu > 16) or (memory > 32): + node_pool = NODE_POOL_HIGH_WORKLOAD + logging.info(f"Selecting node pool={node_pool}") + ti.xcom_push(key="node_pool", value=node_pool) + + # select "use_ecr" argument and determine if ECR login is required + logging.info("Use ECR: %s", context["params"]["use_ecr"]) + if context["params"]["use_ecr"]: + ecr_login = os.environ["AIRFLOW_VAR_ECR_URI"] + ti.xcom_push(key="ecr_login", value=ecr_login) + logging.info("ECR login: %s", ecr_login) + + # select log level based on debug + logging.info(f"Selecting log level: {context['params']['log_level']}.") setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) diff --git a/airflow/docker/run_ogc_process/Dockerfile b/airflow/docker/run_ogc_process/Dockerfile index c019c8d1..bb246eea 100644 --- a/airflow/docker/run_ogc_process/Dockerfile +++ b/airflow/docker/run_ogc_process/Dockerfile @@ -1,29 +1,7 @@ -# --- Stage 1: Builder --- -# Use a lightweight Linux distribution (Alpine) to install the tools. -# Naming this stage "builder" allows us to reference it later. -FROM alpine:3.18 as builder +FROM alpine:3.18 -# Install curl and jq using the Alpine package manager (apk). -# --no-cache avoids writing the package index to disk, keeping the layer small. RUN apk add --no-cache curl jq - -# --- Stage 2: Final Image --- -# Use the same base image for the final product. This ensures compatibility -# of the copied binaries and their libraries. -FROM alpine:3.18 - -# Copy only the necessary executable files from the "builder" stage. -# This is the key to a small final image. We are not copying the package -# manager or any other unnecessary files. -COPY --from=builder /usr/bin/curl /usr/bin/curl -COPY --from=builder /usr/bin/jq /usr/bin/jq - -# Although Airflow will override this with its own commands, -# setting a default command is good practice. It can be used for testing. -# This command simply shows that the tools are available. -CMD ["/bin/sh", "-c", "echo 'curl and jq are installed.'; curl --version; jq --version"] - COPY run_ogc_process_entrypoint.sh /usr/share/ogc/run_ogc_process_entrypoint.sh WORKDIR /usr/share/ogc RUN chmod +x /usr/share/ogc/run_ogc_process_entrypoint.sh diff --git a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh index cbd268cc..84c6162d 100644 --- a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh +++ b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh @@ -2,12 +2,10 @@ set -e -echo "in entrypoint for running an ogc process" - # Submit job to endpoint -SUBMIT_JOB_URL="SUBMIT_JOB_URL=$(echo "$SUBMIT_JOB_URL" | sed "s/{process_id}/$PROCESS_ID/")" +SUBMIT_JOB_URL=$(echo "$SUBMIT_JOB_URL" | sed "s/{process_id}/$PROCESS_ID/") -echo "submit url is $SUBMIT_JOB_URL" +echo "Submitting the job to ${SUBMIT_JOB_URL}" response=$(curl --location ${SUBMIT_JOB_URL} \ --header "proxy-ticket: ${MAAP_PGT}" \ @@ -22,7 +20,7 @@ if [ "$job_id" = "null" ] || [ -z "$job_id" ]; then exit 1 fi -echo "Job submitted successfully. Job ID: $job_id" +echo "Job submitted successfully. Job ID: ${job_id}" # Write the job_id to the XCom return file for the next task mkdir -p /airflow/xcom/ From f260bca2f61772c738d85bd07fe3dab93f8cabff Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 16 Jul 2025 16:10:10 -0700 Subject: [PATCH 04/19] can successfully run and monitor job now --- airflow/dags/run_ogc_process.py | 57 +++++++++++++-- .../run_ogc_process_entrypoint.sh | 73 ++++++++++++++----- 2 files changed, 106 insertions(+), 24 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index f6f7d411..6618e506 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -32,8 +32,7 @@ K8S_SECRET_NAME = "sps-app-credentials" LOG_LEVEL_TYPE = {10: "DEBUG", 20: "INFO"} -# A lightweight Docker image with curl and jq for making API requests. -DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" +DOCKER_IMAGE_SUBMIT_JOB = "jplmdps/ogc-job-runner:latest" # Define the secret to be mounted as an environment variable. secret_env_vars = [ @@ -58,7 +57,17 @@ value="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", ), k8s.V1EnvVar(name="PROCESS_ID", value="{{ params.process_id }}"), - k8s.V1EnvVar(name="JOB_INPUTS", value="{{ params.job_inputs }}") + k8s.V1EnvVar(name="JOB_INPUTS", value="{{ params.job_inputs }}"), + k8s.V1EnvVar(name="SUBMIT_JOB", value="true") +] + +monitor_job_env_vars = [ + k8s.V1EnvVar( + name="MONITOR_JOB_URL", + value="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", + ), + k8s.V1EnvVar(name="JOB_ID", value="{{ ti.xcom_pull(task_ids='submit_job_task', key='return_value')['job_id'] }}"), + k8s.V1EnvVar(name="SUBMIT_JOB", value="false") ] # --- DAG Definition --- @@ -156,7 +165,7 @@ def setup(ti=None,**context): submit_job_task = KubernetesPodOperator( task_id="submit_job_task", namespace=POD_NAMESPACE, - image=DOCKER_IMAGE, + image=DOCKER_IMAGE_SUBMIT_JOB, name="ogc-submit-pod", env_vars=submit_job_env_vars, secrets=secret_env_vars, @@ -171,6 +180,7 @@ def setup(ti=None,**context): }, ), container_logs=True, + do_xcom_push=True, dag=dag, node_selector={ "karpenter.sh/nodepool": "{{ti.xcom_pull(task_ids='Setup', key='node_pool')}}", @@ -231,6 +241,40 @@ def setup(ti=None,**context): """, ] +monitor_job_task = KubernetesPodOperator( + task_id="monitor_job_task", + namespace=POD_NAMESPACE, + image=DOCKER_IMAGE_SUBMIT_JOB, + name="ogc-monitor-pod", + env_vars=monitor_job_env_vars, + secrets=secret_env_vars, + service_account_name="airflow-worker", + in_cluster=True, + get_logs=True, + startup_timeout_seconds=600, + container_security_context={"privileged": True}, + container_resources=k8s.V1ResourceRequirements( + requests={ + "ephemeral-storage": "{{ti.xcom_pull(task_ids='Setup', key='container_storage')}}", + }, + ), + container_logs=True, + dag=dag, + node_selector={ + "karpenter.sh/nodepool": "{{ti.xcom_pull(task_ids='Setup', key='node_pool')}}", + "node.kubernetes.io/instance-type": "{{ti.xcom_pull(task_ids='Setup', key='instance_type')}}", + }, + labels={"pod": POD_LABEL}, + annotations={"karpenter.sh/do-not-disrupt": "true"}, + # note: 'affinity' cannot yet be templated + affinity=get_affinity( + capacity_type=["spot"], + anti_affinity_label=POD_LABEL, + ), + on_finish_action="keep_pod", + is_delete_operator_pod=False, +) + # monitor_job_task = KubernetesPodOperator( # task_id="monitor_job_task", # namespace=POD_NAMESPACE, @@ -254,6 +298,5 @@ def cleanup(**context): task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE ) -# Define the task execution chain -chain(setup_task, submit_job_task, cleanup_task) -#chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) +#chain(setup_task, submit_job_task, cleanup_task) +chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) diff --git a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh index 84c6162d..5bf05f62 100644 --- a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh +++ b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh @@ -2,26 +2,65 @@ set -e -# Submit job to endpoint -SUBMIT_JOB_URL=$(echo "$SUBMIT_JOB_URL" | sed "s/{process_id}/$PROCESS_ID/") +if [ "$SUBMIT_JOB" = "true" ] || [ "$SUBMIT_JOB" = "True" ]; then + echo "Submitting job" -echo "Submitting the job to ${SUBMIT_JOB_URL}" + SUBMIT_JOB_URL=$(echo "$SUBMIT_JOB_URL" | sed "s/{process_id}/$PROCESS_ID/") -response=$(curl --location ${SUBMIT_JOB_URL} \ ---header "proxy-ticket: ${MAAP_PGT}" \ ---header "Content-Type: application/json" \ ---data "${JOB_INPUTS}") + echo "Submitting the job to ${SUBMIT_JOB_URL}" -echo "API Response: $response" -job_id=$(echo "$response" | jq -r .id) + response=$(curl --location ${SUBMIT_JOB_URL} \ + --header "proxy-ticket: ${MAAP_PGT}" \ + --header "Content-Type: application/json" \ + --data "${JOB_INPUTS}") -if [ "$job_id" = "null" ] || [ -z "$job_id" ]; then - echo "Failed to get jobID from response." - exit 1 -fi + echo "API Response: $response" + job_id=$(echo "$response" | jq -r .id) + + if [ "$job_id" = "null" ] || [ -z "$job_id" ]; then + echo "Failed to get jobID from response." + exit 1 + fi -echo "Job submitted successfully. Job ID: ${job_id}" + echo "Job submitted successfully. Job ID: ${job_id}" -# Write the job_id to the XCom return file for the next task -mkdir -p /airflow/xcom/ -printf '{"job_id": "%s"}' "$job_id" > /airflow/xcom/return.json \ No newline at end of file + # Write the job_id to the XCom return file for the next task + mkdir -p /airflow/xcom/ + printf '{"job_id": "%s"}' "$job_id" > /airflow/xcom/return.json +elif [ "$SUBMIT_JOB" = "false" ] || [ "$SUBMIT_JOB" = "False" ]; then + echo "Monitoring job status" + echo "graceal job id in the entrypiont is $JOB_ID" + MONITOR_JOB_URL=$(echo "$MONITOR_JOB_URL" | sed "s/{job_id}/$JOB_ID/") + echo "graceal the monitor job url is $MONITOR_JOB_URL" + TIMEOUT=3600 + POLL_INTERVAL=30 + SECONDS=0 + + while [ $SECONDS -lt $TIMEOUT ]; do + echo "Checking status..." + response=$(curl --location ${MONITOR_JOB_URL} \ + --header "proxy-ticket: ${MAAP_PGT}" \ + --header "Content-Type: application/json") + + status=$(echo "$response" | jq -r .status) + + echo "Current status is: $status" + + if [ "$status" = "successful" ]; then + echo "Job completed successfully!" + exit 0 + elif [ "$status" = "failed" ]; then + echo "Job failed!" + echo "Error details: $(echo "$response" | jq .)" + exit 0 # TODO should this be 1 or 0? + fi + + sleep $POLL_INTERVAL + SECONDS=$((SECONDS + POLL_INTERVAL)) + done + + echo "Job monitoring timed out after $TIMEOUT seconds." + exit 1 +else + echo "SUBMIT_JOB variable must be specified and set to true or false" +fi \ No newline at end of file From eeec99fa4633165023aa3f8f6f85c97ad6d61d16 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Thu, 17 Jul 2025 10:41:32 -0700 Subject: [PATCH 05/19] cleaned up comments and print statements --- airflow/dags/run_ogc_process.py | 72 ++----------------- .../run_ogc_process_entrypoint.sh | 4 +- 2 files changed, 6 insertions(+), 70 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 6618e506..637ede5e 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -27,14 +27,11 @@ ) from airflow.operators.python import PythonOperator, get_current_context -# --- Configuration Constants --- - K8S_SECRET_NAME = "sps-app-credentials" LOG_LEVEL_TYPE = {10: "DEBUG", 20: "INFO"} -DOCKER_IMAGE_SUBMIT_JOB = "jplmdps/ogc-job-runner:latest" +DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" -# Define the secret to be mounted as an environment variable. secret_env_vars = [ AirflowK8sSecret( deploy_type="env", @@ -44,7 +41,6 @@ ) ] -# Default DAG configuration dag_default_args = { "owner": "unity-sps", "depends_on_past": False, @@ -165,7 +161,7 @@ def setup(ti=None,**context): submit_job_task = KubernetesPodOperator( task_id="submit_job_task", namespace=POD_NAMESPACE, - image=DOCKER_IMAGE_SUBMIT_JOB, + image=DOCKER_IMAGE, name="ogc-submit-pod", env_vars=submit_job_env_vars, secrets=secret_env_vars, @@ -197,54 +193,10 @@ def setup(ti=None,**context): is_delete_operator_pod=False, ) -# This shell command polls for job status. The jobID is passed in as an argument. -monitor_command = [ - "/bin/sh", - "-c", - """ - set -e - job_id="$1" # The jobID is the first argument - if [ -z "$job_id" ]; then - echo "job_id argument not provided." - exit 1 - fi - - echo "Starting to monitor job ID: $job_id" - STATUS_URL="${API_BASE_URL}/jobs/$job_id" - - TIMEOUT=3600 - POLL_INTERVAL=30 - SECONDS=0 - - while [ $SECONDS -lt $TIMEOUT ]; do - echo "Checking status..." - response=$(curl -s -f -H "Authorization: Bearer ${PGT_TOKEN}" "$STATUS_URL") - status=$(echo "$response" | jq -r .status) - - echo "Current status is: $status" - - if [ "$status" = "successful" ]; then - echo "Job completed successfully!" - exit 0 - elif [ "$status" = "failed" ]; then - echo "Job failed!" - echo "Error details: $(echo "$response" | jq .)" - exit 1 - fi - - sleep $POLL_INTERVAL - SECONDS=$((SECONDS + POLL_INTERVAL)) - done - - echo "Job monitoring timed out after $TIMEOUT seconds." - exit 1 - """, -] - monitor_job_task = KubernetesPodOperator( task_id="monitor_job_task", namespace=POD_NAMESPACE, - image=DOCKER_IMAGE_SUBMIT_JOB, + image=DOCKER_IMAGE, name="ogc-monitor-pod", env_vars=monitor_job_env_vars, secrets=secret_env_vars, @@ -275,21 +227,6 @@ def setup(ti=None,**context): is_delete_operator_pod=False, ) -# monitor_job_task = KubernetesPodOperator( -# task_id="monitor_job_task", -# namespace=POD_NAMESPACE, -# image=DOCKER_IMAGE, -# name="ogc-monitor-pod", -# cmds=monitor_command, -# # The job_id is pulled from the previous task's XCom return value -# # and passed as the first argument to the monitor_command script. -# arguments=["{{ ti.xcom_pull(task_ids='submit_job_task') }}"], -# secrets=secret_env_vars, -# in_cluster=True, -# get_logs=True, -# dag=dag, -# ) - def cleanup(**context): """A placeholder cleanup task.""" logging.info("Cleanup executed.") @@ -298,5 +235,4 @@ def cleanup(**context): task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE ) -#chain(setup_task, submit_job_task, cleanup_task) -chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) +chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) \ No newline at end of file diff --git a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh index 5bf05f62..26acfe6e 100644 --- a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh +++ b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh @@ -29,9 +29,9 @@ if [ "$SUBMIT_JOB" = "true" ] || [ "$SUBMIT_JOB" = "True" ]; then printf '{"job_id": "%s"}' "$job_id" > /airflow/xcom/return.json elif [ "$SUBMIT_JOB" = "false" ] || [ "$SUBMIT_JOB" = "False" ]; then echo "Monitoring job status" - echo "graceal job id in the entrypiont is $JOB_ID" + MONITOR_JOB_URL=$(echo "$MONITOR_JOB_URL" | sed "s/{job_id}/$JOB_ID/") - echo "graceal the monitor job url is $MONITOR_JOB_URL" + TIMEOUT=3600 POLL_INTERVAL=30 SECONDS=0 From 84d3d6ac8529b2438912394cec2f20c79c683da2 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 21 Jul 2025 13:53:56 -0700 Subject: [PATCH 06/19] removed Unity specific params --- airflow/dags/run_ogc_process.py | 85 +++---------------- .../run_ogc_process_entrypoint.sh | 10 ++- 2 files changed, 19 insertions(+), 76 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 637ede5e..e503971a 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -6,6 +6,7 @@ import logging from datetime import datetime import os +import requests from airflow.models.dag import DAG from airflow.models.param import Param @@ -54,6 +55,7 @@ ), k8s.V1EnvVar(name="PROCESS_ID", value="{{ params.process_id }}"), k8s.V1EnvVar(name="JOB_INPUTS", value="{{ params.job_inputs }}"), + k8s.V1EnvVar(name="QUEUE", value="{{ params.queue }}"), k8s.V1EnvVar(name="SUBMIT_JOB", value="true") ] @@ -84,36 +86,18 @@ title="Process ID", description="The numerical identifier of the OGC process to execute.", ), - "job_inputs": Param( - json.dumps( - { - "queue": "maap-dps-sandbox", - "inputs": {}, - } - ), + "queue": Param( + "maap-dps-sandbox", type="string", - title="Job Inputs (JSON string)", - description="A JSON string representing the inputs payload for the job.", - ), - "log_level": Param( - DEFAULT_LOG_LEVEL, - type="integer", - enum=list(LOG_LEVEL_TYPE.keys()), - values_display={key: f"{key} ({value})" for key, value in LOG_LEVEL_TYPE.items()}, - title="Processing log levels", - description=("Log level for DAG processing"), + title="Queue", + description="The MAAP queue to submit the job to", ), - "request_instance_type": Param( - "t3.medium", + "job_inputs": Param( + {}, type="string", - enum=list(EC2_TYPES.keys()), - values_display={key: f"{build_ec2_type_label(key)}" for key in EC2_TYPES.keys()}, - title="EC2 instance type", - ), - "request_storage": Param( - "10Gi", type="string", enum=["10Gi", "50Gi", "100Gi", "150Gi", "200Gi", "250Gi"] - ), - "use_ecr": Param(False, type="boolean", title="Log into AWS Elastic Container Registry (ECR)"), + title="Job Inputs", + description="A JSON string representing the inputs payload for the job.", + ) }, ) @@ -127,35 +111,6 @@ def setup(ti=None,**context): context = get_current_context() logging.info(f"DAG Run parameters: {json.dumps(context['params'], sort_keys=True, indent=4)}") - # select the node pool based on what resources were requested - node_pool = NODE_POOL_DEFAULT - storage = context["params"]["request_storage"] # 100Gi - container_storage = int(storage[0:-2]) # 100 - ti.xcom_push(key="container_storage", value=container_storage) - - # from "t3.large (General Purpose: 2vCPU, 8GiB)" to "t3.large" - instance_type = context["params"]["request_instance_type"] - cpu = EC2_TYPES[instance_type]["cpu"] - memory = EC2_TYPES[instance_type]["memory"] - ti.xcom_push(key="instance_type", value=instance_type) - logging.info(f"Requesting EC2 instance type={instance_type}") - - logging.info(f"Requesting container storage={container_storage}Gi") - if (container_storage > 30) or (cpu > 16) or (memory > 32): - node_pool = NODE_POOL_HIGH_WORKLOAD - logging.info(f"Selecting node pool={node_pool}") - ti.xcom_push(key="node_pool", value=node_pool) - - # select "use_ecr" argument and determine if ECR login is required - logging.info("Use ECR: %s", context["params"]["use_ecr"]) - if context["params"]["use_ecr"]: - ecr_login = os.environ["AIRFLOW_VAR_ECR_URI"] - ti.xcom_push(key="ecr_login", value=ecr_login) - logging.info("ECR login: %s", ecr_login) - - # select log level based on debug - logging.info(f"Selecting log level: {context['params']['log_level']}.") - setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) submit_job_task = KubernetesPodOperator( @@ -170,18 +125,9 @@ def setup(ti=None,**context): get_logs=True, startup_timeout_seconds=600, container_security_context={"privileged": True}, - container_resources=k8s.V1ResourceRequirements( - requests={ - "ephemeral-storage": "{{ti.xcom_pull(task_ids='Setup', key='container_storage')}}", - }, - ), container_logs=True, do_xcom_push=True, dag=dag, - node_selector={ - "karpenter.sh/nodepool": "{{ti.xcom_pull(task_ids='Setup', key='node_pool')}}", - "node.kubernetes.io/instance-type": "{{ti.xcom_pull(task_ids='Setup', key='instance_type')}}", - }, labels={"pod": POD_LABEL}, annotations={"karpenter.sh/do-not-disrupt": "true"}, # note: 'affinity' cannot yet be templated @@ -205,17 +151,8 @@ def setup(ti=None,**context): get_logs=True, startup_timeout_seconds=600, container_security_context={"privileged": True}, - container_resources=k8s.V1ResourceRequirements( - requests={ - "ephemeral-storage": "{{ti.xcom_pull(task_ids='Setup', key='container_storage')}}", - }, - ), container_logs=True, dag=dag, - node_selector={ - "karpenter.sh/nodepool": "{{ti.xcom_pull(task_ids='Setup', key='node_pool')}}", - "node.kubernetes.io/instance-type": "{{ti.xcom_pull(task_ids='Setup', key='instance_type')}}", - }, labels={"pod": POD_LABEL}, annotations={"karpenter.sh/do-not-disrupt": "true"}, # note: 'affinity' cannot yet be templated diff --git a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh index 26acfe6e..786a93db 100644 --- a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh +++ b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh @@ -6,13 +6,19 @@ if [ "$SUBMIT_JOB" = "true" ] || [ "$SUBMIT_JOB" = "True" ]; then echo "Submitting job" SUBMIT_JOB_URL=$(echo "$SUBMIT_JOB_URL" | sed "s/{process_id}/$PROCESS_ID/") + SUBMIT_JOB_ARGUMENTS=$(jq -n \ + --arg queue "$QUEUE" \ + --argjson inputs "$JOB_INPUTS" \ + '{queue: $queue, inputs: $inputs}') + echo "graceal showing the job arguments which is " + echo $SUBMIT_JOB_ARGUMENTS echo "Submitting the job to ${SUBMIT_JOB_URL}" response=$(curl --location ${SUBMIT_JOB_URL} \ --header "proxy-ticket: ${MAAP_PGT}" \ --header "Content-Type: application/json" \ - --data "${JOB_INPUTS}") + --data "${SUBMIT_JOB_ARGUMENTS}") echo "API Response: $response" job_id=$(echo "$response" | jq -r .id) @@ -52,7 +58,7 @@ elif [ "$SUBMIT_JOB" = "false" ] || [ "$SUBMIT_JOB" = "False" ]; then elif [ "$status" = "failed" ]; then echo "Job failed!" echo "Error details: $(echo "$response" | jq .)" - exit 0 # TODO should this be 1 or 0? + exit 1 fi sleep $POLL_INTERVAL From 4f6a4d575d2fc4be3c1a62c41689671b13ad19ee Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 21 Jul 2025 16:11:35 -0700 Subject: [PATCH 07/19] idea of custom operators i want to follow --- airflow/dags/run_ogc_process.py | 257 +++++++++++------- .../run_ogc_process_entrypoint.sh | 2 - 2 files changed, 166 insertions(+), 93 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index e503971a..fc4d4749 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -10,37 +10,161 @@ from airflow.models.dag import DAG from airflow.models.param import Param -from airflow.models.baseoperator import chain -from airflow.operators.python import PythonOperator -from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator -from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret -from airflow.utils.trigger_rule import TriggerRule -from kubernetes.client import models as k8s -from unity_sps_utils import ( - DEFAULT_LOG_LEVEL, - EC2_TYPES, - NODE_POOL_DEFAULT, - NODE_POOL_HIGH_WORKLOAD, - POD_LABEL, - POD_NAMESPACE, - build_ec2_type_label, - get_affinity, -) +from airflow.models.baseoperator import BaseOperator, chain from airflow.operators.python import PythonOperator, get_current_context +from airflow.utils.trigger_rule import TriggerRule +from airflow.exceptions import AirflowException +from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook +import time K8S_SECRET_NAME = "sps-app-credentials" -LOG_LEVEL_TYPE = {10: "DEBUG", 20: "INFO"} -DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" - -secret_env_vars = [ - AirflowK8sSecret( - deploy_type="env", - deploy_target="MAAP_PGT", - secret=K8S_SECRET_NAME, - key="MAAP_PGT", - ) -] +class OGCSubmitJobOperator(BaseOperator): + """Custom operator to submit jobs to OGC API endpoints.""" + + template_fields = ("process_id", "job_inputs", "job_queue") + + def __init__(self, process_id, job_inputs, job_queue, + submit_url_template="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", + **kwargs): + super().__init__(**kwargs) + self.process_id = process_id + self.job_inputs = job_inputs + self.job_queue = job_queue + self.submit_url_template = submit_url_template + + def execute(self, context): + """Submit job to OGC API and return job ID.""" + + try: + # Get MAAP token from Kubernetes secret + maap_pgt = get_kubernetes_secret_value(K8S_SECRET_NAME, "MAAP_PGT") + + if not maap_pgt: + raise AirflowException("MAAP_PGT token not found in Kubernetes secret") + + # Extract process ID if in format "id:version" + #actual_process_id = self.process_id.split(':')[0] if ':' in str(self.process_id) else self.process_id + + # Prepare URL and payload + submit_url = self.submit_url_template.format(process_id=self.process_id) + + # Parse job inputs if it's a string + if isinstance(self.job_inputs, str): + try: + job_inputs_dict = json.loads(self.job_inputs) + except json.JSONDecodeError: + job_inputs_dict = {} + else: + job_inputs_dict = self.job_inputs or {} + + payload = { + "queue": self.job_queue, + "inputs": job_inputs_dict + } + + headers = { + "proxy-ticket": maap_pgt, + "Content-Type": "application/json" + } + + self.log.info(f"Submitting job to {submit_url}") + self.log.info(f"Job payload: {json.dumps(payload, indent=2)}") + + # Submit job + response = requests.post(submit_url, json=payload, headers=headers, timeout=60) + response.raise_for_status() + + result = response.json() + job_id = result.get("id") + + if not job_id: + raise AirflowException(f"Failed to get job ID from response: {result}") + + self.log.info(f"Job submitted successfully. Job ID: {job_id}") + + # Return job_id for next task + return {"job_id": job_id} + + except requests.RequestException as e: + self.log.error(f"HTTP request failed: {e}") + raise AirflowException(f"Failed to submit job: {e}") + except Exception as e: + self.log.error(f"Job submission failed: {e}") + raise AirflowException(f"Job submission error: {e}") + + +class OGCMonitorJobOperator(BaseOperator): + """Custom operator to monitor OGC job status.""" + + template_fields = ("job_id",) + + def __init__(self, job_id, + monitor_url_template="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", + timeout=3600, poll_interval=30, **kwargs): + super().__init__(**kwargs) + self.job_id = job_id + self.monitor_url_template = monitor_url_template + self.timeout = timeout + self.poll_interval = poll_interval + + def execute(self, context): + """Monitor job status until completion or timeout.""" + + try: + self.log.info(f"Monitoring job with ID: {self.job_id}") + + # Get MAAP token from Kubernetes secret + maap_pgt = get_kubernetes_secret_value(K8S_SECRET_NAME, "MAAP_PGT") + + if not maap_pgt: + raise AirflowException("MAAP_PGT token not found in Kubernetes secret") + + monitor_url = self.monitor_url_template.format(job_id=self.job_id) + headers = { + "proxy-ticket": maap_pgt, + "Content-Type": "application/json" + } + + self.log.info(f"Monitoring job {self.job_id} at {monitor_url}") + + start_time = time.time() + + while time.time() - start_time < self.timeout: + try: + response = requests.get(monitor_url, headers=headers, timeout=30) + response.raise_for_status() + + result = response.json() + status = result.get("status", "unknown") + + self.log.info(f"Job {self.job_id} status: {status}") + + if status == "successful": + self.log.info(f"Job {self.job_id} completed successfully!") + return {"status": "successful", "result": result} + elif status == "failed": + error_msg = result.get("message", "No error message provided") + self.log.error(f"Job {self.job_id} failed: {error_msg}") + raise AirflowException(f"Job {self.job_id} failed: {error_msg}") + elif status in ["running", "accepted", "processing"]: + self.log.info(f"Job {self.job_id} still {status}, waiting {self.poll_interval}s...") + time.sleep(self.poll_interval) + else: + self.log.warning(f"Unknown job status: {status}") + time.sleep(self.poll_interval) + + except requests.RequestException as e: + self.log.warning(f"Request failed, retrying: {e}") + time.sleep(self.poll_interval) + continue + + # Timeout reached + raise AirflowException(f"Job {self.job_id} monitoring timed out after {self.timeout} seconds") + + except Exception as e: + self.log.error(f"Job monitoring failed: {e}") + raise dag_default_args = { "owner": "unity-sps", @@ -48,26 +172,6 @@ "start_date": datetime.utcfromtimestamp(0), } -submit_job_env_vars = [ - k8s.V1EnvVar( - name="SUBMIT_JOB_URL", - value="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", - ), - k8s.V1EnvVar(name="PROCESS_ID", value="{{ params.process_id }}"), - k8s.V1EnvVar(name="JOB_INPUTS", value="{{ params.job_inputs }}"), - k8s.V1EnvVar(name="QUEUE", value="{{ params.queue }}"), - k8s.V1EnvVar(name="SUBMIT_JOB", value="true") -] - -monitor_job_env_vars = [ - k8s.V1EnvVar( - name="MONITOR_JOB_URL", - value="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", - ), - k8s.V1EnvVar(name="JOB_ID", value="{{ ti.xcom_pull(task_ids='submit_job_task', key='return_value')['job_id'] }}"), - k8s.V1EnvVar(name="SUBMIT_JOB", value="false") -] - # --- DAG Definition --- dag = DAG( @@ -86,7 +190,7 @@ title="Process ID", description="The numerical identifier of the OGC process to execute.", ), - "queue": Param( + "job_queue": Param( "maap-dps-sandbox", type="string", title="Queue", @@ -113,55 +217,26 @@ def setup(ti=None,**context): setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) -submit_job_task = KubernetesPodOperator( +submit_job_task = OGCSubmitJobOperator( task_id="submit_job_task", - namespace=POD_NAMESPACE, - image=DOCKER_IMAGE, - name="ogc-submit-pod", - env_vars=submit_job_env_vars, - secrets=secret_env_vars, - service_account_name="airflow-worker", - in_cluster=True, - get_logs=True, - startup_timeout_seconds=600, - container_security_context={"privileged": True}, - container_logs=True, - do_xcom_push=True, - dag=dag, - labels={"pod": POD_LABEL}, - annotations={"karpenter.sh/do-not-disrupt": "true"}, - # note: 'affinity' cannot yet be templated - affinity=get_affinity( - capacity_type=["spot"], - anti_affinity_label=POD_LABEL, + process_id="{{ params.process_id }}", + job_inputs="{{ params.job_inputs }}", + job_queue="{{ params.job_queue }}", + maap_pgt =AirflowK8sSecret( + deploy_type="env", + deploy_target="MAAP_PGT", + secret=K8S_SECRET_NAME, + key="MAAP_PGT", ), - on_finish_action="keep_pod", - is_delete_operator_pod=False, + dag=dag, ) -monitor_job_task = KubernetesPodOperator( +monitor_job_task = OGCMonitorJobOperator( task_id="monitor_job_task", - namespace=POD_NAMESPACE, - image=DOCKER_IMAGE, - name="ogc-monitor-pod", - env_vars=monitor_job_env_vars, - secrets=secret_env_vars, - service_account_name="airflow-worker", - in_cluster=True, - get_logs=True, - startup_timeout_seconds=600, - container_security_context={"privileged": True}, - container_logs=True, + job_id="{{ ti.xcom_pull(task_ids='submit_job_task', key='return_value')['job_id'] }}", + timeout=3600, + poll_interval=30, dag=dag, - labels={"pod": POD_LABEL}, - annotations={"karpenter.sh/do-not-disrupt": "true"}, - # note: 'affinity' cannot yet be templated - affinity=get_affinity( - capacity_type=["spot"], - anti_affinity_label=POD_LABEL, - ), - on_finish_action="keep_pod", - is_delete_operator_pod=False, ) def cleanup(**context): diff --git a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh index 786a93db..1e3902c1 100644 --- a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh +++ b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh @@ -10,8 +10,6 @@ if [ "$SUBMIT_JOB" = "true" ] || [ "$SUBMIT_JOB" = "True" ]; then --arg queue "$QUEUE" \ --argjson inputs "$JOB_INPUTS" \ '{queue: $queue, inputs: $inputs}') - echo "graceal showing the job arguments which is " - echo $SUBMIT_JOB_ARGUMENTS echo "Submitting the job to ${SUBMIT_JOB_URL}" From e1f9bd7cab5293db48d4e13961aa2d26ea12c08c Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Tue, 22 Jul 2025 10:35:03 -0700 Subject: [PATCH 08/19] still working on dropdown progress, also attempting approach to get kubernetes secrets with custom operators --- airflow/dags/run_ogc_process.py | 22 ++- airflow/dags/run_ogc_process2.py | 229 +++++++++++++++++++++++++++++++ airflow/helm/values.tmpl.yaml | 2 + 3 files changed, 240 insertions(+), 13 deletions(-) create mode 100644 airflow/dags/run_ogc_process2.py diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index fc4d4749..1c90c264 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -14,7 +14,7 @@ from airflow.operators.python import PythonOperator, get_current_context from airflow.utils.trigger_rule import TriggerRule from airflow.exceptions import AirflowException -from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook +from airflow.hooks.base import BaseHook import time K8S_SECRET_NAME = "sps-app-credentials" @@ -37,11 +37,12 @@ def execute(self, context): """Submit job to OGC API and return job ID.""" try: - # Get MAAP token from Kubernetes secret - maap_pgt = get_kubernetes_secret_value(K8S_SECRET_NAME, "MAAP_PGT") + # Get MAAP token from Airflow connection + connection = BaseHook.get_connection('maap_api_pgt') + maap_pgt = connection.password if not maap_pgt: - raise AirflowException("MAAP_PGT token not found in Kubernetes secret") + raise AirflowException("MAAP_PGT token not found in Airflow connection") # Extract process ID if in format "id:version" #actual_process_id = self.process_id.split(':')[0] if ':' in str(self.process_id) else self.process_id @@ -114,11 +115,12 @@ def execute(self, context): try: self.log.info(f"Monitoring job with ID: {self.job_id}") - # Get MAAP token from Kubernetes secret - maap_pgt = get_kubernetes_secret_value(K8S_SECRET_NAME, "MAAP_PGT") + # Get MAAP token from Airflow connection + connection = BaseHook.get_connection('maap_api_pgt') + maap_pgt = connection.password if not maap_pgt: - raise AirflowException("MAAP_PGT token not found in Kubernetes secret") + raise AirflowException("MAAP_PGT token not found in Airflow connection") monitor_url = self.monitor_url_template.format(job_id=self.job_id) headers = { @@ -222,12 +224,6 @@ def setup(ti=None,**context): process_id="{{ params.process_id }}", job_inputs="{{ params.job_inputs }}", job_queue="{{ params.job_queue }}", - maap_pgt =AirflowK8sSecret( - deploy_type="env", - deploy_target="MAAP_PGT", - secret=K8S_SECRET_NAME, - key="MAAP_PGT", - ), dag=dag, ) diff --git a/airflow/dags/run_ogc_process2.py b/airflow/dags/run_ogc_process2.py new file mode 100644 index 00000000..ada1ce0b --- /dev/null +++ b/airflow/dags/run_ogc_process2.py @@ -0,0 +1,229 @@ +""" +DAG with two separate tasks to submit a job to an OGC-compliant process API +and then monitor its status. +""" +import json +import logging +from datetime import datetime +import os +import requests +import re + +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.models.baseoperator import chain +from airflow.operators.python import PythonOperator +from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator +from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret +from airflow.utils.trigger_rule import TriggerRule +from kubernetes.client import models as k8s +from unity_sps_utils import ( + DEFAULT_LOG_LEVEL, + EC2_TYPES, + NODE_POOL_DEFAULT, + NODE_POOL_HIGH_WORKLOAD, + POD_LABEL, + POD_NAMESPACE, + build_ec2_type_label, + get_affinity, +) +from airflow.operators.python import PythonOperator, get_current_context + +def fetch_ogc_processes(): + """Fetch available processes from the OGC API and create mapping.""" + try: + response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) + response.raise_for_status() + + processes_data = response.json() + process_mapping = {} + dropdown_options = [] + + for process in processes_data.get("processes", []): + process_id = process.get("id") + process_version = process.get("version") + + # Extract numerical ID from links + numerical_id = None + for link in process.get("links", []): + if link.get("rel") == "self": + href = link.get("href", "") + # Extract number from href like "/ogc/processes/7" + match = re.search(r'/processes/(\d+)$', href) + if match: + numerical_id = int(match.group(1)) + break + + if process_id and numerical_id: + display_name = f"{process_id}:{process_version}" + dropdown_options.append(display_name) + process_mapping[display_name] = numerical_id + + return process_mapping, dropdown_options + + except requests.RequestException as e: + logging.error(f"Failed to fetch processes: {e}") + # Return fallback mapping + return {"example-process:1.0": 1}, ["example-process:1.0"] + except Exception as e: + logging.error(f"Error processing OGC processes: {e}") + return {"example-process:1.0": 1}, ["example-process:1.0"] + +K8S_SECRET_NAME = "sps-app-credentials" +LOG_LEVEL_TYPE = {10: "DEBUG", 20: "INFO"} +PROCESS_MAPPING, DROPDOWN_OPTIONS = fetch_ogc_processes() + +DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" + +secret_env_vars = [ + AirflowK8sSecret( + deploy_type="env", + deploy_target="MAAP_PGT", + secret=K8S_SECRET_NAME, + key="MAAP_PGT", + ) +] + +dag_default_args = { + "owner": "unity-sps", + "depends_on_past": False, + "start_date": datetime.utcfromtimestamp(0), +} + +submit_job_env_vars = [ + k8s.V1EnvVar( + name="SUBMIT_JOB_URL", + value="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", + ), + k8s.V1EnvVar(name="PROCESS_ID", value="{{ ti.xcom_pull(task_ids='Setup', key='return_value')['numerical_process_id'] }}"), + k8s.V1EnvVar(name="JOB_INPUTS", value="{{ params.job_inputs }}"), + k8s.V1EnvVar(name="QUEUE", value="{{ params.queue }}"), + k8s.V1EnvVar(name="SUBMIT_JOB", value="true") +] + +monitor_job_env_vars = [ + k8s.V1EnvVar( + name="MONITOR_JOB_URL", + value="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", + ), + k8s.V1EnvVar(name="JOB_ID", value="{{ ti.xcom_pull(task_ids='submit_job_task', key='return_value')['job_id'] }}"), + k8s.V1EnvVar(name="SUBMIT_JOB", value="false") +] + +# --- DAG Definition --- + +dag = DAG( + dag_id="run_ogc_process2", + description="Submits a job to an OGC process and monitors", + dag_display_name="Run an OGC Process2", + tags=["ogc", "job"], + is_paused_upon_creation=False, + catchup=False, + schedule=None, + max_active_runs=10, + default_args=dag_default_args, + params={ + "selected_process": Param( + default=DROPDOWN_OPTIONS[0] if DROPDOWN_OPTIONS else "example-process:1.0", + enum=DROPDOWN_OPTIONS, + title="Process Selection", + description=f"Select a process to execute. Available processes: {', '.join(DROPDOWN_OPTIONS)}", + ), + "queue": Param( + "maap-dps-sandbox", + type="string", + title="Queue", + description="The MAAP queue to submit the job to", + ), + "job_inputs": Param( + {}, + type="string", + title="Job Inputs", + description="A JSON string representing the inputs payload for the job.", + ) + }, +) + +# --- Task Definitions --- +def setup(ti=None, **context): + """Task that logs DAG parameters and process mapping information.""" + + logging.info("Starting OGC job submission and monitoring DAG (Dynamic Version).") + logging.info(f"Parameters received: {context['params']}") + logging.info(f"Available processes: {len(DROPDOWN_OPTIONS)}") + logging.info(f"Process mapping: {json.dumps(PROCESS_MAPPING, indent=2)}") + + context = get_current_context() + logging.info(f"DAG Run parameters: {json.dumps(context['params'], sort_keys=True, indent=4)}") + + selected_process = context['params'].get('selected_process') + if selected_process in PROCESS_MAPPING: + numerical_id = PROCESS_MAPPING[selected_process] + logging.info(f"Selected process '{selected_process}' maps to numerical ID: {numerical_id}") + return {"numerical_process_id": numerical_id} + else: + logging.warning(f"Selected process '{selected_process}' not found in mapping") + return {"numerical_process_id": 1} + +setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) + +submit_job_task = KubernetesPodOperator( + task_id="submit_job_task2", + namespace=POD_NAMESPACE, + image=DOCKER_IMAGE, + name="ogc-submit-pod", + env_vars=submit_job_env_vars, + secrets=secret_env_vars, + service_account_name="airflow-worker", + in_cluster=True, + get_logs=True, + startup_timeout_seconds=600, + container_security_context={"privileged": True}, + container_logs=True, + do_xcom_push=True, + dag=dag, + labels={"pod": POD_LABEL}, + annotations={"karpenter.sh/do-not-disrupt": "true"}, + # note: 'affinity' cannot yet be templated + affinity=get_affinity( + capacity_type=["spot"], + anti_affinity_label=POD_LABEL, + ), + on_finish_action="keep_pod", + is_delete_operator_pod=False, +) + +monitor_job_task = KubernetesPodOperator( + task_id="monitor_job_task2", + namespace=POD_NAMESPACE, + image=DOCKER_IMAGE, + name="ogc-monitor-pod", + env_vars=monitor_job_env_vars, + secrets=secret_env_vars, + service_account_name="airflow-worker", + in_cluster=True, + get_logs=True, + startup_timeout_seconds=600, + container_security_context={"privileged": True}, + container_logs=True, + dag=dag, + labels={"pod": POD_LABEL}, + annotations={"karpenter.sh/do-not-disrupt": "true"}, + # note: 'affinity' cannot yet be templated + affinity=get_affinity( + capacity_type=["spot"], + anti_affinity_label=POD_LABEL, + ), + on_finish_action="keep_pod", + is_delete_operator_pod=False, +) + +def cleanup(**context): + """A placeholder cleanup task.""" + logging.info("Cleanup executed.") + +cleanup_task = PythonOperator( + task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE +) + +chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) \ No newline at end of file diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 2d6abe31..b6a56b48 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -372,3 +372,5 @@ extraEnv: | value: "1024" - name: AIRFLOW__WEBSERVER__EXPOSE_CONFIG value: "True" + - name: AIRFLOW_CONN_MAAP_API_PGT + value: "http://secret:k8s-secret:sps-app-credentials:MAAP_PGT@https://api.dit.maap-project.org" From ee785a491f58397bcac9e71fb3922d594eb30ecf Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Tue, 22 Jul 2025 14:54:41 -0700 Subject: [PATCH 09/19] tried another way for the custom operators approach --- airflow/dags/run_ogc_process.py | 36 +++++++++++++++++++------------- airflow/dags/run_ogc_process2.py | 4 ++-- airflow/helm/values.tmpl.yaml | 2 -- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 1c90c264..2d3b35f5 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -14,7 +14,7 @@ from airflow.operators.python import PythonOperator, get_current_context from airflow.utils.trigger_rule import TriggerRule from airflow.exceptions import AirflowException -from airflow.hooks.base import BaseHook +from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook import time K8S_SECRET_NAME = "sps-app-credentials" @@ -37,12 +37,16 @@ def execute(self, context): """Submit job to OGC API and return job ID.""" try: - # Get MAAP token from Airflow connection - connection = BaseHook.get_connection('maap_api_pgt') - maap_pgt = connection.password - - if not maap_pgt: - raise AirflowException("MAAP_PGT token not found in Airflow connection") + # Get MAAP token from Kubernetes secret + k8s_hook = KubernetesHook() + secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) + maap_pgt = secret.data.get("MAAP_PGT") + + if maap_pgt: + import base64 + maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') + else: + raise AirflowException("MAAP_PGT token not found in Kubernetes secret") # Extract process ID if in format "id:version" #actual_process_id = self.process_id.split(':')[0] if ':' in str(self.process_id) else self.process_id @@ -115,12 +119,16 @@ def execute(self, context): try: self.log.info(f"Monitoring job with ID: {self.job_id}") - # Get MAAP token from Airflow connection - connection = BaseHook.get_connection('maap_api_pgt') - maap_pgt = connection.password - - if not maap_pgt: - raise AirflowException("MAAP_PGT token not found in Airflow connection") + # Get MAAP token from Kubernetes secret + k8s_hook = KubernetesHook() + secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) + maap_pgt = secret.data.get("MAAP_PGT") + + if maap_pgt: + import base64 + maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') + else: + raise AirflowException("MAAP_PGT token not found in Kubernetes secret") monitor_url = self.monitor_url_template.format(job_id=self.job_id) headers = { @@ -179,7 +187,7 @@ def execute(self, context): dag = DAG( dag_id="run_ogc_process", description="Submits a job to an OGC process and monitors", - dag_display_name="Run an OGC Process", + dag_display_name="Run an OGC Process (custom operators)", tags=["ogc", "job"], is_paused_upon_creation=False, catchup=False, diff --git a/airflow/dags/run_ogc_process2.py b/airflow/dags/run_ogc_process2.py index ada1ce0b..dcd101c3 100644 --- a/airflow/dags/run_ogc_process2.py +++ b/airflow/dags/run_ogc_process2.py @@ -106,7 +106,7 @@ def fetch_ogc_processes(): name="MONITOR_JOB_URL", value="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", ), - k8s.V1EnvVar(name="JOB_ID", value="{{ ti.xcom_pull(task_ids='submit_job_task', key='return_value')['job_id'] }}"), + k8s.V1EnvVar(name="JOB_ID", value="{{ ti.xcom_pull(task_ids='submit_job_task2', key='return_value')['job_id'] }}"), k8s.V1EnvVar(name="SUBMIT_JOB", value="false") ] @@ -115,7 +115,7 @@ def fetch_ogc_processes(): dag = DAG( dag_id="run_ogc_process2", description="Submits a job to an OGC process and monitors", - dag_display_name="Run an OGC Process2", + dag_display_name="Run an OGC Process (KubernetesPodOperators approach)", tags=["ogc", "job"], is_paused_upon_creation=False, catchup=False, diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index b6a56b48..2d6abe31 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -372,5 +372,3 @@ extraEnv: | value: "1024" - name: AIRFLOW__WEBSERVER__EXPOSE_CONFIG value: "True" - - name: AIRFLOW_CONN_MAAP_API_PGT - value: "http://secret:k8s-secret:sps-app-credentials:MAAP_PGT@https://api.dit.maap-project.org" From da99125679d223c3e312a190c44edb0ebad62a2a Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Fri, 1 Aug 2025 16:29:45 -0700 Subject: [PATCH 10/19] AI attempts of a dynamic UI to load inputs for processes --- airflow/dags/dynamic_form_test_dag.py | 155 ++++++++ airflow/dags/ogc_process_selector.py | 392 +++++++++++++++++++ airflow/dags/run_ogc_process.py | 403 +++++++++++--------- airflow/dags/run_ogc_process2.py | 364 +++++++++++------- airflow/dags/run_ogc_process_executor.py | 329 ++++++++++++++++ airflow/plugins/dynamic_form_plugin.py | 55 +++ airflow/plugins/templates/dynamic_form.html | 237 ++++++++++++ 7 files changed, 1632 insertions(+), 303 deletions(-) create mode 100644 airflow/dags/dynamic_form_test_dag.py create mode 100644 airflow/dags/ogc_process_selector.py create mode 100644 airflow/dags/run_ogc_process_executor.py create mode 100644 airflow/plugins/dynamic_form_plugin.py create mode 100644 airflow/plugins/templates/dynamic_form.html diff --git a/airflow/dags/dynamic_form_test_dag.py b/airflow/dags/dynamic_form_test_dag.py new file mode 100644 index 00000000..806c87bb --- /dev/null +++ b/airflow/dags/dynamic_form_test_dag.py @@ -0,0 +1,155 @@ +""" +Dynamic Form Test DAG + +This DAG demonstrates dynamic form functionality where: +- Initial dropdown has options a, b, c +- Option a shows fields 1 and 2 +- Option b shows fields 3 and 4 +- Option c shows fields 5 and 6 + +To use this DAG: +1. Access the form at: http://localhost:8080/dynamic_form/dynamic_form_test +2. Select an option from the dropdown +3. Fill in the conditional fields that appear +4. Submit to trigger the DAG with the form data +""" + +from datetime import datetime, timedelta +from airflow import DAG +from airflow.operators.python import PythonOperator +import logging + +logger = logging.getLogger(__name__) + +default_args = { + "owner": "unity-sps", + "depends_on_past": False, + "start_date": datetime(2024, 1, 1), + "email_on_failure": False, + "email_on_retry": False, + "retries": 1, + "retry_delay": timedelta(minutes=5), +} + +def process_form_data(**context): + """Process the form data received from the dynamic form""" + conf = context.get('dag_run').conf or {} + + logger.info("=== Dynamic Form Data Processing ===") + logger.info(f"Received configuration: {conf}") + + main_option = conf.get('main_option') + logger.info(f"Main option selected: {main_option}") + + if main_option == 'a': + field_1 = conf.get('field_1', '') + field_2 = conf.get('field_2', '') + logger.info(f"Option A selected - Field 1: {field_1}, Field 2: {field_2}") + print(f"Processing Option A with values: Field 1='{field_1}', Field 2='{field_2}'") + + elif main_option == 'b': + field_3 = conf.get('field_3', '') + field_4 = conf.get('field_4', '') + logger.info(f"Option B selected - Field 3: {field_3}, Field 4: {field_4}") + print(f"Processing Option B with values: Field 3='{field_3}', Field 4='{field_4}'") + + elif main_option == 'c': + field_5 = conf.get('field_5', '') + field_6 = conf.get('field_6', '') + logger.info(f"Option C selected - Field 5: {field_5}, Field 6: {field_6}") + print(f"Processing Option C with values: Field 5='{field_5}', Field 6='{field_6}'") + + else: + logger.warning(f"Unknown or missing main_option: {main_option}") + print(f"Warning: Unknown option '{main_option}' or no option provided") + + return f"Successfully processed form data for option: {main_option}" + +def validate_form_data(**context): + """Validate the form data received""" + conf = context.get('dag_run').conf or {} + + main_option = conf.get('main_option') + + if not main_option: + raise ValueError("No main_option provided in form data") + + if main_option not in ['a', 'b', 'c']: + raise ValueError(f"Invalid main_option: {main_option}. Must be 'a', 'b', or 'c'") + + # Validate required fields based on option + if main_option == 'a': + if not conf.get('field_1') or not conf.get('field_2'): + raise ValueError("Option A requires both field_1 and field_2 to be filled") + elif main_option == 'b': + if not conf.get('field_3') or not conf.get('field_4'): + raise ValueError("Option B requires both field_3 and field_4 to be filled") + elif main_option == 'c': + if not conf.get('field_5') or not conf.get('field_6'): + raise ValueError("Option C requires both field_5 and field_6 to be filled") + + logger.info("Form data validation passed") + return "Validation successful" + +def simulate_processing(**context): + """Simulate some processing based on the selected option""" + conf = context.get('dag_run').conf or {} + main_option = conf.get('main_option') + + import time + + if main_option == 'a': + logger.info("Simulating processing for Option A...") + print("Executing Option A workflow...") + time.sleep(5) # Simulate work + print("Option A processing completed") + + elif main_option == 'b': + logger.info("Simulating processing for Option B...") + print("Executing Option B workflow...") + time.sleep(3) # Simulate work + print("Option B processing completed") + + elif main_option == 'c': + logger.info("Simulating processing for Option C...") + print("Executing Option C workflow...") + time.sleep(7) # Simulate work + print("Option C processing completed") + + return f"Processing completed for option {main_option}" + +# Create the DAG +with DAG( + dag_id="dynamic_form_test", + default_args=default_args, + description="Test DAG for dynamic form functionality", + schedule=None, # Only triggered manually via form + is_paused_upon_creation=False, + catchup=False, + tags=["test", "dynamic-form", "proof-of-concept"], + doc_md=__doc__, +) as dag: + + # Task 1: Validate form data + validate_task = PythonOperator( + task_id="validate_form_data", + python_callable=validate_form_data, + doc_md="Validates that required form fields are present based on selected option" + ) + + # Task 2: Process form data + process_task = PythonOperator( + task_id="process_form_data", + python_callable=process_form_data, + doc_md="Processes and logs the form data received from the dynamic form" + ) + + # Task 3: Simulate processing + simulate_task = PythonOperator( + task_id="simulate_processing", + python_callable=simulate_processing, + doc_md="Simulates different processing workflows based on the selected option" + ) + + # Set task dependencies + validate_task >> process_task >> simulate_task \ No newline at end of file diff --git a/airflow/dags/ogc_process_selector.py b/airflow/dags/ogc_process_selector.py new file mode 100644 index 00000000..105716ea --- /dev/null +++ b/airflow/dags/ogc_process_selector.py @@ -0,0 +1,392 @@ +""" +OGC Process Selector DAG - Step 1: User selects a process and this creates/updates +dynamic input DAGs with specific fields for that process. +""" +import json +import logging +from datetime import datetime +import requests +import re +import os + +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.operators.python import PythonOperator +from airflow.exceptions import AirflowException +import time + +def fetch_ogc_processes(): + """Fetch available processes from the OGC API and create mapping.""" + try: + response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) + response.raise_for_status() + + processes_data = response.json() + process_mapping = {} + dropdown_options = [] + + for process in processes_data.get("processes", []): + process_id = process.get("id") + process_version = process.get("version") + + # Extract numerical ID from links + numerical_id = None + for link in process.get("links", []): + if link.get("rel") == "self": + href = link.get("href", "") + # Extract number from href like "/ogc/processes/7" + match = re.search(r'/processes/(\d+)$', href) + if match: + numerical_id = int(match.group(1)) + break + + if process_id and numerical_id: + display_name = f"{process_id}:{process_version}" if process_version else process_id + dropdown_options.append(display_name) + process_mapping[display_name] = numerical_id + + return process_mapping, dropdown_options + + except requests.RequestException as e: + logging.error(f"Failed to fetch processes: {e}") + return {"example-process:1.0": 1}, ["example-process:1.0"] + except Exception as e: + logging.error(f"Error processing OGC processes: {e}") + return {"example-process:1.0": 1}, ["example-process:1.0"] + +# Constants +PROCESS_MAPPING, DROPDOWN_OPTIONS = fetch_ogc_processes() + +dag_default_args = { + "owner": "unity-sps", + "depends_on_past": False, + "start_date": datetime.utcfromtimestamp(0), +} + +# --- DAG Definition --- + +dag = DAG( + dag_id="ogc_process_selector", + description="Step 1: Select an OGC process to create dynamic input DAG", + dag_display_name="🔧 Step 1: Select OGC Process", + tags=["ogc", "step1", "selector"], + is_paused_upon_creation=False, + catchup=False, + schedule=None, + max_active_runs=10, + default_args=dag_default_args, + params={ + "selected_process": Param( + default=DROPDOWN_OPTIONS[0] if DROPDOWN_OPTIONS else "example-process:1.0", + enum=DROPDOWN_OPTIONS, + title="🎯 Select Process", + description=f"Choose a process to configure. Available: {', '.join(DROPDOWN_OPTIONS[:3])}{'...' if len(DROPDOWN_OPTIONS) > 3 else ''}", + ), + "queue": Param( + "maap-dps-sandbox", + type="string", + title="🚀 Execution Queue", + description="The MAAP queue to submit the job to", + ), + }, +) + +def create_dynamic_input_dag(**context): + """Create a dynamic input DAG for the selected process.""" + + selected_process = context['params'].get('selected_process') + queue = context['params'].get('queue', 'maap-dps-sandbox') + + if not selected_process or selected_process not in PROCESS_MAPPING: + raise AirflowException(f"Invalid process selection: {selected_process}") + + numerical_id = PROCESS_MAPPING[selected_process] + logging.info(f"Creating input DAG for process '{selected_process}' (ID: {numerical_id})") + + # Fetch process schema + try: + process_url = f"https://api.dit.maap-project.org/api/ogc/processes/{numerical_id}" + response = requests.get(process_url, timeout=30) + response.raise_for_status() + + process_details = response.json() + inputs_schema = process_details.get("inputs", {}) + process_title = process_details.get("title", selected_process) + process_description = process_details.get("description", "No description available") + + logging.info(f"Process: {process_title}") + logging.info(f"Description: {process_description}") + logging.info(f"Input fields: {list(inputs_schema.keys())}") + + except requests.RequestException as e: + raise AirflowException(f"Failed to fetch process schema: {e}") + + # Generate the dynamic DAG file + dag_content = generate_input_dag_content( + selected_process=selected_process, + numerical_id=numerical_id, + inputs_schema=inputs_schema, + process_title=process_title, + process_description=process_description, + queue=queue + ) + + # Write the DAG file + dags_folder = os.path.dirname(os.path.abspath(__file__)) + safe_process_name = selected_process.replace(":", "_").replace("-", "_") + dag_filename = f"ogc_input_{safe_process_name}.py" + dag_filepath = os.path.join(dags_folder, dag_filename) + + try: + with open(dag_filepath, 'w') as f: + f.write(dag_content) + + logging.info(f"✅ Created dynamic input DAG: {dag_filename}") + logging.info("=" * 60) + logging.info("🎉 SUCCESS! Your input DAG has been created!") + logging.info("=" * 60) + logging.info(f"📋 Process: {process_title}") + logging.info(f"🆔 DAG ID: ogc_input_{safe_process_name}") + logging.info(f"📁 File: {dag_filename}") + logging.info("=" * 60) + logging.info("📝 NEXT STEPS:") + logging.info("1. Wait 10-30 seconds for Airflow to detect the new DAG") + logging.info(f"2. Look for DAG: 'Step 2: {process_title} - Inputs'") + logging.info("3. Run that DAG to configure your process inputs") + logging.info("=" * 60) + + return { + "success": True, + "dag_id": f"ogc_input_{safe_process_name}", + "dag_file": dag_filename, + "process_title": process_title, + "input_count": len(inputs_schema) + } + + except Exception as e: + logging.error(f"Failed to write DAG file: {e}") + raise AirflowException(f"Failed to create input DAG: {e}") + +def generate_input_dag_content(selected_process, numerical_id, inputs_schema, process_title, process_description, queue): + """Generate the content for the dynamic input DAG.""" + + safe_process_name = selected_process.replace(":", "_").replace("-", "_") + + # Generate Param definitions for each input + param_definitions = [] + for input_key, input_def in inputs_schema.items(): + input_title = input_def.get('title', input_key) + input_desc = input_def.get('description', f'Input for {input_key}') + input_type = input_def.get('type', 'string') + input_default = input_def.get('default') + input_placeholder = input_def.get('placeholder', '') + + # Map OGC types to Airflow Param types + if input_type in ['text', 'string']: + param_type = 'string' + default_value = input_default or input_placeholder or "" + elif input_type in ['number', 'integer', 'float']: + param_type = 'number' + default_value = input_default or 0 + elif input_type == 'boolean': + param_type = 'boolean' + default_value = input_default or False + else: + param_type = 'string' + default_value = input_default or "" + + # Create description with type info + full_description = f"{input_desc}" + if input_placeholder: + full_description += f" (e.g., {input_placeholder})" + + param_def = f''' "{input_key}": Param( + default={repr(default_value)}, + type="{param_type}", + title="🔧 {input_title}", + description="{full_description}", + ),''' + + param_definitions.append(param_def) + + params_section = "\n".join(param_definitions) + + # Generate the DAG content + dag_content = f'''""" +Dynamic Input DAG for {process_title} +Generated automatically for process: {selected_process} + +{process_description} +""" +import json +import logging +from datetime import datetime + +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.operators.python import PythonOperator +from airflow.operators.trigger_dagrun import TriggerDagRunOperator +from airflow.exceptions import AirflowException + +dag_default_args = {{ + "owner": "unity-sps", + "depends_on_past": False, + "start_date": datetime.utcfromtimestamp(0), +}} + +# Process configuration +SELECTED_PROCESS = "{selected_process}" +NUMERICAL_ID = {numerical_id} +PROCESS_TITLE = "{process_title}" +DEFAULT_QUEUE = "{queue}" + +# Input schema +INPUTS_SCHEMA = {json.dumps(inputs_schema, indent=4)} + +# --- DAG Definition --- + +dag = DAG( + dag_id="ogc_input_{safe_process_name}", + description="Step 2: Configure inputs for {process_title}", + dag_display_name="⚙️ Step 2: {process_title} - Inputs", + tags=["ogc", "step2", "inputs", "{safe_process_name}"], + is_paused_upon_creation=False, + catchup=False, + schedule=None, + max_active_runs=5, + default_args=dag_default_args, + params={{ +{params_section} + "queue": Param( + default=DEFAULT_QUEUE, + type="string", + title="🚀 Execution Queue", + description="The MAAP queue to submit the job to", + ), + }}, +) + +def validate_and_trigger_execution(**context): + """Validate inputs and trigger the execution DAG.""" + + logging.info("Validating inputs and preparing execution...") + logging.info(f"Process: {{PROCESS_TITLE}}") + logging.info(f"Process ID: {{NUMERICAL_ID}}") + + # Collect all input values + validated_inputs = {{}} + params = context['params'] + + for input_key, input_def in INPUTS_SCHEMA.items(): + if input_key in params: + value = params[input_key] + validated_inputs[input_key] = value + logging.info(f"✓ {{input_key}}: {{value}}") + elif input_def.get('default') is not None: + default_value = input_def.get('default') + validated_inputs[input_key] = default_value + logging.info(f"→ {{input_key}}: {{default_value}} (default)") + else: + logging.warning(f"⚠ No value for {{input_key}}") + + queue = params.get('queue', DEFAULT_QUEUE) + + # Prepare execution parameters + execution_params = {{ + "selected_process": SELECTED_PROCESS, + "numerical_process_id": NUMERICAL_ID, + "queue": queue, + "job_inputs": json.dumps(validated_inputs), + "process_title": PROCESS_TITLE + }} + + logging.info("=" * 60) + logging.info("🚀 TRIGGERING EXECUTION") + logging.info("=" * 60) + logging.info(f"Final inputs: {{json.dumps(validated_inputs, indent=2)}}") + logging.info(f"Queue: {{queue}}") + logging.info("=" * 60) + + return {{ + "execution_params": execution_params, + "validated_inputs": validated_inputs + }} + +def execution_summary(**context): + """Provide execution summary.""" + + validation_result = context['ti'].xcom_pull(task_ids='validate_inputs') + + logging.info("=" * 60) + logging.info("✅ INPUT VALIDATION COMPLETED") + logging.info("=" * 60) + logging.info(f"Process: {{PROCESS_TITLE}}") + logging.info(f"Input fields configured: {{len(validation_result['validated_inputs'])}}") + logging.info("The execution DAG has been triggered!") + logging.info("Check the 'ogc_process_executor' DAG for progress.") + logging.info("=" * 60) + +# Task to validate inputs +validate_task = PythonOperator( + task_id="validate_inputs", + python_callable=validate_and_trigger_execution, + dag=dag, +) + +# Task to trigger execution +trigger_task = TriggerDagRunOperator( + task_id="trigger_execution", + trigger_dag_id="ogc_process_executor", + conf="{{{{ ti.xcom_pull(task_ids='validate_inputs')['execution_params'] }}}}", + wait_for_completion=False, + dag=dag, +) + +# Summary task +summary_task = PythonOperator( + task_id="execution_summary", + python_callable=execution_summary, + dag=dag, +) + +validate_task >> trigger_task >> summary_task +''' + + return dag_content + +# Task to create the dynamic DAG +create_dag_task = PythonOperator( + task_id="create_input_dag", + python_callable=create_dynamic_input_dag, + dag=dag, +) + +def completion_message(**context): + """Display completion message with next steps.""" + + result = context['ti'].xcom_pull(task_ids='create_input_dag') + + if result and result.get('success'): + logging.info("=" * 60) + logging.info("🎉 PROCESS SELECTION COMPLETED!") + logging.info("=" * 60) + logging.info(f"📋 Process: {result['process_title']}") + logging.info(f"🆔 Input DAG ID: {result['dag_id']}") + logging.info(f"📊 Input fields: {result['input_count']}") + logging.info("=" * 60) + logging.info("📝 WHAT'S NEXT:") + logging.info("1. Wait 10-30 seconds for the new DAG to appear") + logging.info(f"2. Look for: 'Step 2: {result['process_title']} - Inputs'") + logging.info("3. Run that DAG to configure your specific inputs") + logging.info("4. The execution will be triggered automatically") + logging.info("=" * 60) + else: + logging.error("Failed to create input DAG") + +completion_task = PythonOperator( + task_id="completion_message", + python_callable=completion_message, + dag=dag, +) + +create_dag_task >> completion_task \ No newline at end of file diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 2d3b35f5..d047beb2 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -1,180 +1,211 @@ """ -DAG with two separate tasks to submit a job to an OGC-compliant process API -and then monitor its status. +DAG with custom SPSOGCOperator that subclasses KubernetesPodOperator +for OGC process execution with SPS-specific functionality. """ import json import logging from datetime import datetime -import os import requests +import re from airflow.models.dag import DAG from airflow.models.param import Param -from airflow.models.baseoperator import BaseOperator, chain +from airflow.models.baseoperator import chain from airflow.operators.python import PythonOperator, get_current_context +from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator +from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret from airflow.utils.trigger_rule import TriggerRule -from airflow.exceptions import AirflowException -from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook -import time +from kubernetes.client import models as k8s +from unity_sps_utils import ( + DEFAULT_LOG_LEVEL, + EC2_TYPES, + NODE_POOL_DEFAULT, + NODE_POOL_HIGH_WORKLOAD, + POD_LABEL, + POD_NAMESPACE, + build_ec2_type_label, + get_affinity, +) + +def fetch_ogc_processes(): + """Fetch available processes from the OGC API and create mapping.""" + try: + response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) + response.raise_for_status() + + processes_data = response.json() + process_mapping = {} + dropdown_options = [] + + for process in processes_data.get("processes", []): + process_id = process.get("id") + process_version = process.get("version") + + # Extract numerical ID from links + numerical_id = None + for link in process.get("links", []): + if link.get("rel") == "self": + href = link.get("href", "") + # Extract number from href like "/ogc/processes/7" + match = re.search(r'/processes/(\d+)$', href) + if match: + numerical_id = int(match.group(1)) + break + + if process_id and numerical_id: + display_name = f"{process_id}:{process_version}" if process_version else process_id + dropdown_options.append(display_name) + process_mapping[display_name] = numerical_id + + return process_mapping, dropdown_options + + except requests.RequestException as e: + logging.error(f"Failed to fetch processes: {e}") + # Return fallback mapping + return {"example-process:1.0": 1}, ["example-process:1.0"] + except Exception as e: + logging.error(f"Error processing OGC processes: {e}") + return {"example-process:1.0": 1}, ["example-process:1.0"] +# Constants K8S_SECRET_NAME = "sps-app-credentials" +DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" +PROCESS_MAPPING, DROPDOWN_OPTIONS = fetch_ogc_processes() + +# SPS-specific secrets +secret_env_vars = [ + AirflowK8sSecret( + deploy_type="env", + deploy_target="MAAP_PGT", + secret=K8S_SECRET_NAME, + key="MAAP_PGT", + ) +] -class OGCSubmitJobOperator(BaseOperator): - """Custom operator to submit jobs to OGC API endpoints.""" +class SPSOGCOperator(KubernetesPodOperator): + """ + Custom operator for SPS OGC process execution that subclasses KubernetesPodOperator. - template_fields = ("process_id", "job_inputs", "job_queue") + This operator encapsulates all SPS-specific configuration and provides a clean + interface for OGC process submission and monitoring. + """ - def __init__(self, process_id, job_inputs, job_queue, - submit_url_template="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", + def __init__(self, + operation_type: str, + selected_process: str = None, + job_inputs: str = None, + job_queue: str = None, + job_id: str = None, **kwargs): - super().__init__(**kwargs) - self.process_id = process_id + """ + Initialize the SPSOGCOperator. + + Args: + operation_type: Either "submit" or "monitor" + selected_process: Process selection for submit operations + job_inputs: JSON string of job inputs for submit operations + job_queue: Queue name for submit operations + job_id: Job ID for monitor operations + """ + self.operation_type = operation_type + self.selected_process = selected_process self.job_inputs = job_inputs self.job_queue = job_queue - self.submit_url_template = submit_url_template + self.job_id = job_id + + # Set SPS-specific defaults + kwargs.setdefault('namespace', POD_NAMESPACE) + kwargs.setdefault('image', DOCKER_IMAGE) + kwargs.setdefault('service_account_name', 'airflow-worker') + kwargs.setdefault('secrets', secret_env_vars) + kwargs.setdefault('in_cluster', True) + kwargs.setdefault('get_logs', True) + kwargs.setdefault('startup_timeout_seconds', 600) + kwargs.setdefault('container_security_context', {"privileged": True}) + kwargs.setdefault('container_logs', True) + kwargs.setdefault('labels', {"pod": POD_LABEL}) + kwargs.setdefault('annotations', {"karpenter.sh/do-not-disrupt": "true"}) + kwargs.setdefault('affinity', get_affinity( + capacity_type=["spot"], + anti_affinity_label=POD_LABEL, + )) + kwargs.setdefault('on_finish_action', "keep_pod") + kwargs.setdefault('is_delete_operator_pod', False) + + # Build operation-specific environment variables + if operation_type == "submit": + kwargs['env_vars'] = self._build_submit_env_vars() + kwargs['name'] = f"ogc-submit-pod-{kwargs.get('task_id', 'unknown')}" + kwargs.setdefault('do_xcom_push', True) # Submit tasks need to return job ID + elif operation_type == "monitor": + kwargs['env_vars'] = self._build_monitor_env_vars() + kwargs['name'] = f"ogc-monitor-pod-{kwargs.get('task_id', 'unknown')}" + else: + raise ValueError(f"Invalid operation_type: {operation_type}. Must be 'submit' or 'monitor'") + + super().__init__(**kwargs) - def execute(self, context): - """Submit job to OGC API and return job ID.""" + def _build_submit_env_vars(self): + """Build environment variables for job submission.""" + # Resolve numerical process ID from selected process + numerical_process_id = self._resolve_process_id() - try: - # Get MAAP token from Kubernetes secret - k8s_hook = KubernetesHook() - secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) - maap_pgt = secret.data.get("MAAP_PGT") - - if maap_pgt: - import base64 - maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') - else: - raise AirflowException("MAAP_PGT token not found in Kubernetes secret") - - # Extract process ID if in format "id:version" - #actual_process_id = self.process_id.split(':')[0] if ':' in str(self.process_id) else self.process_id - - # Prepare URL and payload - submit_url = self.submit_url_template.format(process_id=self.process_id) - - # Parse job inputs if it's a string - if isinstance(self.job_inputs, str): - try: - job_inputs_dict = json.loads(self.job_inputs) - except json.JSONDecodeError: - job_inputs_dict = {} - else: - job_inputs_dict = self.job_inputs or {} - - payload = { - "queue": self.job_queue, - "inputs": job_inputs_dict - } - - headers = { - "proxy-ticket": maap_pgt, - "Content-Type": "application/json" - } - - self.log.info(f"Submitting job to {submit_url}") - self.log.info(f"Job payload: {json.dumps(payload, indent=2)}") - - # Submit job - response = requests.post(submit_url, json=payload, headers=headers, timeout=60) - response.raise_for_status() - - result = response.json() - job_id = result.get("id") - - if not job_id: - raise AirflowException(f"Failed to get job ID from response: {result}") - - self.log.info(f"Job submitted successfully. Job ID: {job_id}") - - # Return job_id for next task - return {"job_id": job_id} - - except requests.RequestException as e: - self.log.error(f"HTTP request failed: {e}") - raise AirflowException(f"Failed to submit job: {e}") - except Exception as e: - self.log.error(f"Job submission failed: {e}") - raise AirflowException(f"Job submission error: {e}") - - -class OGCMonitorJobOperator(BaseOperator): - """Custom operator to monitor OGC job status.""" + return [ + k8s.V1EnvVar( + name="SUBMIT_JOB_URL", + value="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", + ), + k8s.V1EnvVar(name="PROCESS_ID", value=str(numerical_process_id)), + k8s.V1EnvVar(name="JOB_INPUTS", value=self.job_inputs or "{}"), + k8s.V1EnvVar(name="QUEUE", value=self.job_queue or "maap-dps-sandbox"), + k8s.V1EnvVar(name="SUBMIT_JOB", value="true") + ] - template_fields = ("job_id",) + def _build_monitor_env_vars(self): + """Build environment variables for job monitoring.""" + return [ + k8s.V1EnvVar( + name="MONITOR_JOB_URL", + value="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", + ), + k8s.V1EnvVar(name="JOB_ID", value=self.job_id), + k8s.V1EnvVar(name="SUBMIT_JOB", value="false") + ] - def __init__(self, job_id, - monitor_url_template="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", - timeout=3600, poll_interval=30, **kwargs): - super().__init__(**kwargs) - self.job_id = job_id - self.monitor_url_template = monitor_url_template - self.timeout = timeout - self.poll_interval = poll_interval + def _resolve_process_id(self): + """Resolve the selected process to a numerical process ID.""" + if not self.selected_process: + raise ValueError("selected_process is required for submit operations") + + # Handle templated values - they won't be resolved yet during __init__ + if "{{" in str(self.selected_process): + # Return a template that will be resolved at runtime + return "{{ ti.xcom_pull(task_ids='Setup', key='return_value')['numerical_process_id'] }}" + + # Direct lookup for non-templated values + numerical_id = PROCESS_MAPPING.get(self.selected_process) + if numerical_id is None: + self.log.warning(f"Process '{self.selected_process}' not found in mapping, defaulting to ID 1") + return 1 + + return numerical_id def execute(self, context): - """Monitor job status until completion or timeout.""" + """Execute the operator with additional SPS-specific logging.""" + self.log.info(f"Starting SPS OGC {self.operation_type} operation") - try: - self.log.info(f"Monitoring job with ID: {self.job_id}") - - # Get MAAP token from Kubernetes secret - k8s_hook = KubernetesHook() - secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) - maap_pgt = secret.data.get("MAAP_PGT") - - if maap_pgt: - import base64 - maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') - else: - raise AirflowException("MAAP_PGT token not found in Kubernetes secret") - - monitor_url = self.monitor_url_template.format(job_id=self.job_id) - headers = { - "proxy-ticket": maap_pgt, - "Content-Type": "application/json" - } - - self.log.info(f"Monitoring job {self.job_id} at {monitor_url}") - - start_time = time.time() - - while time.time() - start_time < self.timeout: - try: - response = requests.get(monitor_url, headers=headers, timeout=30) - response.raise_for_status() - - result = response.json() - status = result.get("status", "unknown") - - self.log.info(f"Job {self.job_id} status: {status}") - - if status == "successful": - self.log.info(f"Job {self.job_id} completed successfully!") - return {"status": "successful", "result": result} - elif status == "failed": - error_msg = result.get("message", "No error message provided") - self.log.error(f"Job {self.job_id} failed: {error_msg}") - raise AirflowException(f"Job {self.job_id} failed: {error_msg}") - elif status in ["running", "accepted", "processing"]: - self.log.info(f"Job {self.job_id} still {status}, waiting {self.poll_interval}s...") - time.sleep(self.poll_interval) - else: - self.log.warning(f"Unknown job status: {status}") - time.sleep(self.poll_interval) - - except requests.RequestException as e: - self.log.warning(f"Request failed, retrying: {e}") - time.sleep(self.poll_interval) - continue - - # Timeout reached - raise AirflowException(f"Job {self.job_id} monitoring timed out after {self.timeout} seconds") - - except Exception as e: - self.log.error(f"Job monitoring failed: {e}") - raise + if self.operation_type == "submit": + self.log.info(f"Selected process: {self.selected_process}") + self.log.info(f"Job queue: {self.job_queue}") + self.log.info(f"Job inputs: {self.job_inputs}") + elif self.operation_type == "monitor": + self.log.info(f"Monitoring job ID: {self.job_id}") + + # Call parent execute method + result = super().execute(context) + + self.log.info(f"SPS OGC {self.operation_type} operation completed") + return result dag_default_args = { "owner": "unity-sps", @@ -185,29 +216,30 @@ def execute(self, context): # --- DAG Definition --- dag = DAG( - dag_id="run_ogc_process", - description="Submits a job to an OGC process and monitors", - dag_display_name="Run an OGC Process (custom operators)", - tags=["ogc", "job"], + dag_id="run_ogc_process3", + description="Submits a job to an OGC process and monitors (using custom SPSOGCOperator)", + dag_display_name="Run an OGC Process (Custom Operator from KubernetesPodOperator)", + tags=["ogc", "job", "custom-operator"], is_paused_upon_creation=False, catchup=False, schedule=None, max_active_runs=10, default_args=dag_default_args, params={ - "process_id": Param( - type="integer", - title="Process ID", - description="The numerical identifier of the OGC process to execute.", + "selected_process": Param( + default=DROPDOWN_OPTIONS[0] if DROPDOWN_OPTIONS else "Error loading dropdown", + enum=DROPDOWN_OPTIONS, + title="Process Selection", + description=f"Select a process to execute.", ), - "job_queue": Param( + "queue": Param( "maap-dps-sandbox", type="string", title="Queue", description="The MAAP queue to submit the job to", ), "job_inputs": Param( - {}, + "{}", type="string", title="Job Inputs", description="A JSON string representing the inputs payload for the job.", @@ -217,35 +249,56 @@ def execute(self, context): # --- Task Definitions --- -def setup(ti=None,**context): - """Task that selects the proper Karpenter Node Pool depending on the user requested resources.""" - - logging.info("Starting OGC job submission and monitoring DAG.") +def setup(ti=None, **context): + """Task that logs DAG parameters and process mapping information.""" + + logging.info("Starting OGC job submission and monitoring DAG (Custom Operator Version).") logging.info(f"Parameters received: {context['params']}") + logging.info(f"Available processes: {len(DROPDOWN_OPTIONS)}") + logging.info(f"Process mapping: {json.dumps(PROCESS_MAPPING, indent=2)}") + context = get_current_context() logging.info(f"DAG Run parameters: {json.dumps(context['params'], sort_keys=True, indent=4)}") + + selected_process = context['params'].get('selected_process') + if selected_process in PROCESS_MAPPING: + numerical_id = PROCESS_MAPPING[selected_process] + logging.info(f"Selected process '{selected_process}' maps to numerical ID: {numerical_id}") + return {"numerical_process_id": numerical_id} + else: + logging.warning(f"Selected process '{selected_process}' not found in mapping") + return {"numerical_process_id": 1} setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) -submit_job_task = OGCSubmitJobOperator( - task_id="submit_job_task", - process_id="{{ params.process_id }}", +submit_job_task = SPSOGCOperator( + task_id="submit_job_task3", + operation_type="submit", + selected_process="{{ params.selected_process }}", job_inputs="{{ params.job_inputs }}", - job_queue="{{ params.job_queue }}", + job_queue="{{ params.queue }}", dag=dag, ) -monitor_job_task = OGCMonitorJobOperator( - task_id="monitor_job_task", - job_id="{{ ti.xcom_pull(task_ids='submit_job_task', key='return_value')['job_id'] }}", - timeout=3600, - poll_interval=30, +monitor_job_task = SPSOGCOperator( + task_id="monitor_job_task3", + operation_type="monitor", + job_id="{{ ti.xcom_pull(task_ids='submit_job_task3', key='return_value')['job_id'] }}", dag=dag, ) def cleanup(**context): - """A placeholder cleanup task.""" + """A placeholder cleanup task""" logging.info("Cleanup executed.") + + # Log final results if available + submit_result = context['ti'].xcom_pull(task_ids='submit_job_task3', key='return_value') + monitor_result = context['ti'].xcom_pull(task_ids='monitor_job_task3', key='return_value') + + if submit_result: + logging.info(f"Job submission result: {submit_result}") + if monitor_result: + logging.info(f"Job monitoring result: {monitor_result}") cleanup_task = PythonOperator( task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE diff --git a/airflow/dags/run_ogc_process2.py b/airflow/dags/run_ogc_process2.py index dcd101c3..e9c5be84 100644 --- a/airflow/dags/run_ogc_process2.py +++ b/airflow/dags/run_ogc_process2.py @@ -1,33 +1,24 @@ """ -DAG with two separate tasks to submit a job to an OGC-compliant process API -and then monitor its status. +Dynamic OGC Process Launcher DAG - Fetches process input schema and triggers execution DAG. + +This DAG works in two stages: +1. User selects a process and this DAG fetches the input schema +2. This DAG triggers the execution DAG with the proper input parameters """ import json import logging from datetime import datetime -import os import requests import re from airflow.models.dag import DAG from airflow.models.param import Param from airflow.models.baseoperator import chain -from airflow.operators.python import PythonOperator -from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator -from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret -from airflow.utils.trigger_rule import TriggerRule -from kubernetes.client import models as k8s -from unity_sps_utils import ( - DEFAULT_LOG_LEVEL, - EC2_TYPES, - NODE_POOL_DEFAULT, - NODE_POOL_HIGH_WORKLOAD, - POD_LABEL, - POD_NAMESPACE, - build_ec2_type_label, - get_affinity, -) from airflow.operators.python import PythonOperator, get_current_context +from airflow.operators.trigger_dagrun import TriggerDagRunOperator +from airflow.exceptions import AirflowException +from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook +import time def fetch_ogc_processes(): """Fetch available processes from the OGC API and create mapping.""" @@ -55,7 +46,7 @@ def fetch_ogc_processes(): break if process_id and numerical_id: - display_name = f"{process_id}:{process_version}" + display_name = f"{process_id}:{process_version}" if process_version else process_id dropdown_options.append(display_name) process_mapping[display_name] = numerical_id @@ -69,54 +60,39 @@ def fetch_ogc_processes(): logging.error(f"Error processing OGC processes: {e}") return {"example-process:1.0": 1}, ["example-process:1.0"] +def get_maap_token(): + """Helper function to get MAAP token from Kubernetes secret.""" + try: + k8s_hook = KubernetesHook() + secret = k8s_hook.get_secret(name="sps-app-credentials", namespace=k8s_hook.get_namespace()) + maap_pgt = secret.data.get("MAAP_PGT") + + if maap_pgt: + import base64 + return base64.b64decode(maap_pgt).decode('utf-8') + else: + raise AirflowException("MAAP_PGT token not found in Kubernetes secret") + except Exception as e: + logging.error(f"Failed to get MAAP token: {e}") + return None + +# Constants K8S_SECRET_NAME = "sps-app-credentials" -LOG_LEVEL_TYPE = {10: "DEBUG", 20: "INFO"} PROCESS_MAPPING, DROPDOWN_OPTIONS = fetch_ogc_processes() -DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" - -secret_env_vars = [ - AirflowK8sSecret( - deploy_type="env", - deploy_target="MAAP_PGT", - secret=K8S_SECRET_NAME, - key="MAAP_PGT", - ) -] - dag_default_args = { "owner": "unity-sps", "depends_on_past": False, "start_date": datetime.utcfromtimestamp(0), } -submit_job_env_vars = [ - k8s.V1EnvVar( - name="SUBMIT_JOB_URL", - value="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", - ), - k8s.V1EnvVar(name="PROCESS_ID", value="{{ ti.xcom_pull(task_ids='Setup', key='return_value')['numerical_process_id'] }}"), - k8s.V1EnvVar(name="JOB_INPUTS", value="{{ params.job_inputs }}"), - k8s.V1EnvVar(name="QUEUE", value="{{ params.queue }}"), - k8s.V1EnvVar(name="SUBMIT_JOB", value="true") -] - -monitor_job_env_vars = [ - k8s.V1EnvVar( - name="MONITOR_JOB_URL", - value="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", - ), - k8s.V1EnvVar(name="JOB_ID", value="{{ ti.xcom_pull(task_ids='submit_job_task2', key='return_value')['job_id'] }}"), - k8s.V1EnvVar(name="SUBMIT_JOB", value="false") -] - # --- DAG Definition --- dag = DAG( dag_id="run_ogc_process2", - description="Submits a job to an OGC process and monitors", - dag_display_name="Run an OGC Process (KubernetesPodOperators approach)", - tags=["ogc", "job"], + description="Dynamic OGC Process Launcher - Fetches input schema and triggers execution", + dag_display_name="OGC Process Launcher (Dynamic Inputs)", + tags=["ogc", "launcher", "dynamic"], is_paused_upon_creation=False, catchup=False, schedule=None, @@ -135,95 +111,227 @@ def fetch_ogc_processes(): title="Queue", description="The MAAP queue to submit the job to", ), - "job_inputs": Param( - {}, + # Dynamic input fields will be populated based on process schema + "dynamic_inputs": Param( + "{}", type="string", - title="Job Inputs", - description="A JSON string representing the inputs payload for the job.", + title="Process Inputs (JSON)", + description="Enter process inputs as JSON. Schema will be displayed in logs after process selection.", ) }, ) # --- Task Definitions --- -def setup(ti=None, **context): - """Task that logs DAG parameters and process mapping information.""" - - logging.info("Starting OGC job submission and monitoring DAG (Dynamic Version).") - logging.info(f"Parameters received: {context['params']}") - logging.info(f"Available processes: {len(DROPDOWN_OPTIONS)}") - logging.info(f"Process mapping: {json.dumps(PROCESS_MAPPING, indent=2)}") + +def fetch_process_schema(**context): + """Fetch the input schema for the selected process.""" - context = get_current_context() - logging.info(f"DAG Run parameters: {json.dumps(context['params'], sort_keys=True, indent=4)}") + logging.info("Fetching process input schema...") selected_process = context['params'].get('selected_process') - if selected_process in PROCESS_MAPPING: - numerical_id = PROCESS_MAPPING[selected_process] - logging.info(f"Selected process '{selected_process}' maps to numerical ID: {numerical_id}") - return {"numerical_process_id": numerical_id} - else: - logging.warning(f"Selected process '{selected_process}' not found in mapping") - return {"numerical_process_id": 1} - -setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) - -submit_job_task = KubernetesPodOperator( - task_id="submit_job_task2", - namespace=POD_NAMESPACE, - image=DOCKER_IMAGE, - name="ogc-submit-pod", - env_vars=submit_job_env_vars, - secrets=secret_env_vars, - service_account_name="airflow-worker", - in_cluster=True, - get_logs=True, - startup_timeout_seconds=600, - container_security_context={"privileged": True}, - container_logs=True, - do_xcom_push=True, + if not selected_process or selected_process not in PROCESS_MAPPING: + raise AirflowException(f"Invalid process selection: {selected_process}") + + numerical_id = PROCESS_MAPPING[selected_process] + logging.info(f"Selected process '{selected_process}' maps to numerical ID: {numerical_id}") + + # Fetch process details + try: + + process_url = f"https://api.dit.maap-project.org/api/ogc/processes/{numerical_id}" + response = requests.get(process_url, timeout=30) + response.raise_for_status() + + process_details = response.json() + inputs_schema = process_details.get("inputs", {}) + + logging.info(f"Process Details URL: {process_url}") + logging.info(f"Process Title: {process_details.get('title', 'N/A')}") + logging.info(f"Process Description: {process_details.get('description', 'N/A')}") + logging.info("=" * 60) + logging.info("INPUT SCHEMA FOR THIS PROCESS:") + logging.info("=" * 60) + + # Format the schema nicely for logging + for input_key, input_def in inputs_schema.items(): + logging.info(f"Input: {input_key}") + logging.info(f" Title: {input_def.get('title', input_key)}") + logging.info(f" Description: {input_def.get('description', 'No description')}") + logging.info(f" Type: {input_def.get('type', 'unknown')}") + logging.info(f" Default: {input_def.get('default', 'None')}") + if input_def.get('placeholder'): + logging.info(f" Placeholder: {input_def.get('placeholder')}") + logging.info("-" * 40) + + logging.info("=" * 60) + logging.info("EXAMPLE JSON INPUT:") + logging.info("=" * 60) + + # Create example JSON input + example_inputs = {} + for input_key, input_def in inputs_schema.items(): + if input_def.get('default') is not None: + example_inputs[input_key] = input_def.get('default') + elif input_def.get('placeholder'): + example_inputs[input_key] = input_def.get('placeholder') + else: + input_type = input_def.get('type', 'string') + if input_type == 'text' or input_type == 'string': + example_inputs[input_key] = f"example_{input_key}_value" + elif input_type == 'number' or input_type == 'integer': + example_inputs[input_key] = 0 + elif input_type == 'boolean': + example_inputs[input_key] = True + else: + example_inputs[input_key] = f"example_{input_key}_value" + + example_json = json.dumps(example_inputs, indent=2) + logging.info(example_json) + + logging.info("=" * 60) + logging.info("INSTRUCTIONS:") + logging.info("Copy the example JSON above, modify the values as needed,") + logging.info("and paste it into the 'Process Inputs (JSON)' field when") + logging.info("re-triggering this DAG.") + logging.info("=" * 60) + + return { + "numerical_process_id": numerical_id, + "selected_process": selected_process, + "inputs_schema": inputs_schema, + "example_inputs": example_inputs, + "schema_fetched": True + } + + except requests.RequestException as e: + logging.error(f"Failed to fetch process schema: {e}") + raise AirflowException(f"Failed to fetch process schema: {e}") + +def validate_and_trigger_execution(**context): + """Validate the dynamic inputs and trigger the execution DAG.""" + + logging.info("Validating inputs and preparing execution...") + + # Get schema info from previous task + schema_info = context['ti'].xcom_pull(task_ids='fetch_schema') + if not schema_info or not schema_info.get('schema_fetched'): + raise AirflowException("Schema was not properly fetched") + + selected_process = schema_info['selected_process'] + numerical_process_id = schema_info['numerical_process_id'] + inputs_schema = schema_info['inputs_schema'] + + # Parse user-provided dynamic inputs + dynamic_inputs_str = context['params'].get('dynamic_inputs', '{}') + try: + dynamic_inputs = json.loads(dynamic_inputs_str) if dynamic_inputs_str != '{}' else {} + except json.JSONDecodeError as e: + raise AirflowException(f"Invalid JSON in dynamic_inputs: {e}") + + logging.info(f"User provided inputs: {dynamic_inputs}") + + # Validate inputs against schema + validated_inputs = {} + for input_key, input_def in inputs_schema.items(): + if input_key in dynamic_inputs: + validated_inputs[input_key] = dynamic_inputs[input_key] + logging.info(f"✓ Using provided value for '{input_key}': {dynamic_inputs[input_key]}") + elif input_def.get('default') is not None: + validated_inputs[input_key] = input_def.get('default') + logging.info(f"→ Using default value for '{input_key}': {input_def.get('default')}") + else: + logging.warning(f"⚠ No value provided for required input '{input_key}'") + + # If no inputs were provided, show schema again and stop + if not dynamic_inputs: + logging.info("=" * 60) + logging.info("NO INPUTS PROVIDED - DISPLAYING SCHEMA AGAIN") + logging.info("=" * 60) + logging.info("Please provide inputs in the 'Process Inputs (JSON)' field") + logging.info("and re-trigger this DAG to proceed with execution.") + logging.info("=" * 60) + return { + "action": "schema_display_only", + "message": "Re-trigger DAG with proper inputs to execute the process" + } + + logging.info(f"Final validated inputs: {json.dumps(validated_inputs, indent=2)}") + + # Prepare parameters for execution DAG + execution_params = { + "selected_process": selected_process, + "queue": context['params'].get('queue', 'maap-dps-sandbox'), + "job_inputs": json.dumps(validated_inputs) + } + + logging.info("=" * 60) + logging.info("TRIGGERING EXECUTION DAG") + logging.info("=" * 60) + logging.info(f"Execution parameters: {json.dumps(execution_params, indent=2)}") + + return { + "action": "trigger_execution", + "execution_params": execution_params, + "numerical_process_id": numerical_process_id + } + +# Task to fetch process schema +fetch_schema_task = PythonOperator( + task_id="fetch_schema", + python_callable=fetch_process_schema, + dag=dag, +) + +# Task to validate inputs and prepare execution +validate_inputs_task = PythonOperator( + task_id="validate_inputs", + python_callable=validate_and_trigger_execution, dag=dag, - labels={"pod": POD_LABEL}, - annotations={"karpenter.sh/do-not-disrupt": "true"}, - # note: 'affinity' cannot yet be templated - affinity=get_affinity( - capacity_type=["spot"], - anti_affinity_label=POD_LABEL, - ), - on_finish_action="keep_pod", - is_delete_operator_pod=False, ) -monitor_job_task = KubernetesPodOperator( - task_id="monitor_job_task2", - namespace=POD_NAMESPACE, - image=DOCKER_IMAGE, - name="ogc-monitor-pod", - env_vars=monitor_job_env_vars, - secrets=secret_env_vars, - service_account_name="airflow-worker", - in_cluster=True, - get_logs=True, - startup_timeout_seconds=600, - container_security_context={"privileged": True}, - container_logs=True, +# Task to trigger the execution DAG +trigger_execution_task = TriggerDagRunOperator( + task_id="trigger_execution", + trigger_dag_id="run_ogc_process_executor", # This DAG needs to be created + conf="{{ ti.xcom_pull(task_ids='validate_inputs', key='return_value')['execution_params'] }}", + wait_for_completion=False, dag=dag, - labels={"pod": POD_LABEL}, - annotations={"karpenter.sh/do-not-disrupt": "true"}, - # note: 'affinity' cannot yet be templated - affinity=get_affinity( - capacity_type=["spot"], - anti_affinity_label=POD_LABEL, - ), - on_finish_action="keep_pod", - is_delete_operator_pod=False, + trigger_rule="none_failed", # Only run if validation succeeded ) -def cleanup(**context): - """A placeholder cleanup task.""" - logging.info("Cleanup executed.") +def completion_summary(**context): + """Provide a summary of what happened.""" + + validation_result = context['ti'].xcom_pull(task_ids='validate_inputs', key='return_value') + + if validation_result and validation_result.get('action') == 'schema_display_only': + logging.info("=" * 60) + logging.info("LAUNCHER DAG COMPLETED - SCHEMA DISPLAY MODE") + logging.info("=" * 60) + logging.info("The process schema has been displayed in the logs.") + logging.info("Please check the 'fetch_schema' task logs for the input schema") + logging.info("and re-trigger this DAG with proper inputs to execute.") + logging.info("=" * 60) + elif validation_result and validation_result.get('action') == 'trigger_execution': + logging.info("=" * 60) + logging.info("LAUNCHER DAG COMPLETED - EXECUTION TRIGGERED") + logging.info("=" * 60) + logging.info("The execution DAG has been triggered successfully.") + logging.info("Check the 'run_ogc_process_executor' DAG for execution progress.") + logging.info("=" * 60) + else: + logging.info("DAG completed with unknown status") -cleanup_task = PythonOperator( - task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE +summary_task = PythonOperator( + task_id="completion_summary", + python_callable=completion_summary, + dag=dag, + trigger_rule="none_failed_min_one_success", ) -chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) \ No newline at end of file +# Chain the tasks +chain( + fetch_schema_task, + validate_inputs_task, + [trigger_execution_task, summary_task] +) \ No newline at end of file diff --git a/airflow/dags/run_ogc_process_executor.py b/airflow/dags/run_ogc_process_executor.py new file mode 100644 index 00000000..3cc6f16f --- /dev/null +++ b/airflow/dags/run_ogc_process_executor.py @@ -0,0 +1,329 @@ +""" +OGC Process Executor DAG - Executes the actual OGC process with validated inputs. + +This DAG is triggered by the launcher DAG (run_ogc_process2) and performs +the actual process execution and monitoring. +""" +import json +import logging +from datetime import datetime +import requests +import re + +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.models.baseoperator import BaseOperator, chain +from airflow.operators.python import PythonOperator, get_current_context +from airflow.utils.trigger_rule import TriggerRule +from airflow.exceptions import AirflowException +from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook +import time + +K8S_SECRET_NAME = "sps-app-credentials" + +def fetch_ogc_processes(): + """Fetch available processes from the OGC API and create mapping.""" + try: + response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) + response.raise_for_status() + + processes_data = response.json() + process_mapping = {} + + for process in processes_data.get("processes", []): + process_id = process.get("id") + process_version = process.get("version") + + # Extract numerical ID from links + numerical_id = None + for link in process.get("links", []): + if link.get("rel") == "self": + href = link.get("href", "") + # Extract number from href like "/ogc/processes/7" + match = re.search(r'/processes/(\d+)$', href) + if match: + numerical_id = int(match.group(1)) + break + + if process_id and numerical_id: + display_name = f"{process_id}:{process_version}" if process_version else process_id + process_mapping[display_name] = numerical_id + + return process_mapping + + except requests.RequestException as e: + logging.error(f"Failed to fetch processes: {e}") + return {"example-process:1.0": 1} + except Exception as e: + logging.error(f"Error processing OGC processes: {e}") + return {"example-process:1.0": 1} + +PROCESS_MAPPING = fetch_ogc_processes() + +class OGCSubmitJobOperator(BaseOperator): + """Custom operator to submit jobs to OGC API endpoints.""" + + template_fields = ("process_id", "job_inputs", "job_queue") + + def __init__(self, process_id, job_inputs, job_queue, + submit_url_template="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", + **kwargs): + super().__init__(**kwargs) + self.process_id = process_id + self.job_inputs = job_inputs + self.job_queue = job_queue + self.submit_url_template = submit_url_template + + def execute(self, context): + """Submit job to OGC API and return job ID.""" + + try: + # Get MAAP token from Kubernetes secret + k8s_hook = KubernetesHook() + secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) + maap_pgt = secret.data.get("MAAP_PGT") + + if maap_pgt: + import base64 + maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') + else: + raise AirflowException("MAAP_PGT token not found in Kubernetes secret") + + # Prepare URL and payload + submit_url = self.submit_url_template.format(process_id=self.process_id) + + # Parse job inputs if it's a string + if isinstance(self.job_inputs, str): + try: + job_inputs_dict = json.loads(self.job_inputs) + except json.JSONDecodeError: + job_inputs_dict = {} + else: + job_inputs_dict = self.job_inputs or {} + + payload = { + "queue": self.job_queue, + "inputs": job_inputs_dict + } + + headers = { + "proxy-ticket": maap_pgt, + "Content-Type": "application/json" + } + + self.log.info(f"Submitting job to {submit_url}") + self.log.info(f"Job payload: {json.dumps(payload, indent=2)}") + + # Submit job + response = requests.post(submit_url, json=payload, headers=headers, timeout=60) + response.raise_for_status() + + result = response.json() + job_id = result.get("id") + + if not job_id: + raise AirflowException(f"Failed to get job ID from response: {result}") + + self.log.info(f"Job submitted successfully. Job ID: {job_id}") + + # Return job_id for next task + return {"job_id": job_id} + + except requests.RequestException as e: + self.log.error(f"HTTP request failed: {e}") + raise AirflowException(f"Failed to submit job: {e}") + except Exception as e: + self.log.error(f"Job submission failed: {e}") + raise AirflowException(f"Job submission error: {e}") + + +class OGCMonitorJobOperator(BaseOperator): + """Custom operator to monitor OGC job status.""" + + template_fields = ("job_id",) + + def __init__(self, job_id, + monitor_url_template="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", + timeout=3600, poll_interval=30, **kwargs): + super().__init__(**kwargs) + self.job_id = job_id + self.monitor_url_template = monitor_url_template + self.timeout = timeout + self.poll_interval = poll_interval + + def execute(self, context): + """Monitor job status until completion or timeout.""" + + try: + self.log.info(f"Monitoring job with ID: {self.job_id}") + + # Get MAAP token from Kubernetes secret + k8s_hook = KubernetesHook() + secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) + maap_pgt = secret.data.get("MAAP_PGT") + + if maap_pgt: + import base64 + maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') + else: + raise AirflowException("MAAP_PGT token not found in Kubernetes secret") + + monitor_url = self.monitor_url_template.format(job_id=self.job_id) + headers = { + "proxy-ticket": maap_pgt, + "Content-Type": "application/json" + } + + self.log.info(f"Monitoring job {self.job_id} at {monitor_url}") + + start_time = time.time() + + while time.time() - start_time < self.timeout: + try: + response = requests.get(monitor_url, headers=headers, timeout=30) + response.raise_for_status() + + result = response.json() + status = result.get("status", "unknown") + + self.log.info(f"Job {self.job_id} status: {status}") + + if status == "successful": + self.log.info(f"Job {self.job_id} completed successfully!") + return {"status": "successful", "result": result} + elif status == "failed": + error_msg = result.get("message", "No error message provided") + self.log.error(f"Job {self.job_id} failed: {error_msg}") + raise AirflowException(f"Job {self.job_id} failed: {error_msg}") + elif status in ["running", "accepted", "processing"]: + self.log.info(f"Job {self.job_id} still {status}, waiting {self.poll_interval}s...") + time.sleep(self.poll_interval) + else: + self.log.warning(f"Unknown job status: {status}") + time.sleep(self.poll_interval) + + except requests.RequestException as e: + self.log.warning(f"Request failed, retrying: {e}") + time.sleep(self.poll_interval) + continue + + # Timeout reached + raise AirflowException(f"Job {self.job_id} monitoring timed out after {self.timeout} seconds") + + except Exception as e: + self.log.error(f"Job monitoring failed: {e}") + raise + +dag_default_args = { + "owner": "unity-sps", + "depends_on_past": False, + "start_date": datetime.utcfromtimestamp(0), +} + +# --- DAG Definition --- + +dag = DAG( + dag_id="run_ogc_process_executor", + description="Executes OGC processes with validated inputs (triggered by launcher)", + dag_display_name="OGC Process Executor", + tags=["ogc", "executor", "triggered"], + is_paused_upon_creation=False, + catchup=False, + schedule=None, + max_active_runs=10, + default_args=dag_default_args, + # This DAG expects to be triggered with conf parameters +) + +# --- Task Definitions --- + +def setup_execution(**context): + """Setup task that processes the triggered DAG configuration.""" + + logging.info("Setting up OGC process execution...") + + # Get configuration from trigger + dag_run_conf = context.get('dag_run').conf or {} + logging.info(f"Received configuration: {json.dumps(dag_run_conf, indent=2)}") + + selected_process = dag_run_conf.get('selected_process') + queue = dag_run_conf.get('queue', 'maap-dps-sandbox') + job_inputs = dag_run_conf.get('job_inputs', '{}') + + if not selected_process: + raise AirflowException("No selected_process provided in trigger configuration") + + # Get numerical process ID + numerical_id = PROCESS_MAPPING.get(selected_process) + if not numerical_id: + raise AirflowException(f"Process '{selected_process}' not found in mapping") + + logging.info(f"Selected process: {selected_process}") + logging.info(f"Numerical process ID: {numerical_id}") + logging.info(f"Queue: {queue}") + logging.info(f"Job inputs: {job_inputs}") + + return { + "selected_process": selected_process, + "numerical_process_id": numerical_id, + "queue": queue, + "job_inputs": job_inputs + } + +setup_task = PythonOperator( + task_id="setup_execution", + python_callable=setup_execution, + dag=dag, +) + +submit_job_task = OGCSubmitJobOperator( + task_id="submit_job", + process_id="{{ ti.xcom_pull(task_ids='setup_execution', key='return_value')['numerical_process_id'] }}", + job_inputs="{{ ti.xcom_pull(task_ids='setup_execution', key='return_value')['job_inputs'] }}", + job_queue="{{ ti.xcom_pull(task_ids='setup_execution', key='return_value')['queue'] }}", + dag=dag, +) + +monitor_job_task = OGCMonitorJobOperator( + task_id="monitor_job", + job_id="{{ ti.xcom_pull(task_ids='submit_job', key='return_value')['job_id'] }}", + timeout=3600, + poll_interval=30, + dag=dag, +) + +def cleanup_execution(**context): + """Cleanup and final reporting.""" + + logging.info("OGC process execution completed.") + + # Get results from previous tasks + setup_result = context['ti'].xcom_pull(task_ids='setup_execution', key='return_value') + submit_result = context['ti'].xcom_pull(task_ids='submit_job', key='return_value') + monitor_result = context['ti'].xcom_pull(task_ids='monitor_job', key='return_value') + + logging.info("=" * 60) + logging.info("EXECUTION SUMMARY") + logging.info("=" * 60) + + if setup_result: + logging.info(f"Process: {setup_result.get('selected_process')}") + logging.info(f"Process ID: {setup_result.get('numerical_process_id')}") + logging.info(f"Queue: {setup_result.get('queue')}") + + if submit_result: + logging.info(f"Job ID: {submit_result.get('job_id')}") + + if monitor_result: + logging.info(f"Final Status: {monitor_result.get('status')}") + + logging.info("=" * 60) + +cleanup_task = PythonOperator( + task_id="cleanup_execution", + python_callable=cleanup_execution, + dag=dag, + trigger_rule=TriggerRule.ALL_DONE +) + +chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) \ No newline at end of file diff --git a/airflow/plugins/dynamic_form_plugin.py b/airflow/plugins/dynamic_form_plugin.py new file mode 100644 index 00000000..9a12c78d --- /dev/null +++ b/airflow/plugins/dynamic_form_plugin.py @@ -0,0 +1,55 @@ +from airflow.plugins_manager import AirflowPlugin +from airflow.www import auth +from flask import Blueprint, request, render_template, redirect, url_for, jsonify +from airflow.models import DagRun +from airflow.utils.state import DagRunState +from airflow.utils import timezone +from airflow.api.common.trigger_dag import trigger_dag +import logging + +logger = logging.getLogger(__name__) + +dynamic_form_bp = Blueprint( + "dynamic_form", + __name__, + template_folder="templates", + static_folder="static" +) + +@dynamic_form_bp.route('/dynamic_form/') +@auth.has_access_dag('GET') +def show_form(dag_id): + return render_template('dynamic_form.html', dag_id=dag_id) + +@dynamic_form_bp.route('/submit_form/', methods=['POST']) +@auth.has_access_dag('POST') +def submit_form(dag_id): + try: + form_data = request.form.to_dict() + logger.info(f"Received form data for DAG {dag_id}: {form_data}") + + # Trigger DAG with form data + dag_run = trigger_dag( + dag_id=dag_id, + run_id=None, + conf=form_data, + execution_date=None, + replace_microseconds=False + ) + + return jsonify({ + 'status': 'success', + 'message': f'DAG {dag_id} triggered successfully', + 'dag_run_id': dag_run.run_id + }) + + except Exception as e: + logger.error(f"Error triggering DAG {dag_id}: {str(e)}") + return jsonify({ + 'status': 'error', + 'message': f'Error triggering DAG: {str(e)}' + }), 500 + +class DynamicFormPlugin(AirflowPlugin): + name = "dynamic_form" + flask_blueprints = [dynamic_form_bp] \ No newline at end of file diff --git a/airflow/plugins/templates/dynamic_form.html b/airflow/plugins/templates/dynamic_form.html new file mode 100644 index 00000000..b456bc4b --- /dev/null +++ b/airflow/plugins/templates/dynamic_form.html @@ -0,0 +1,237 @@ + + + + + + Dynamic Form - {{ dag_id }} + + + +
+

Dynamic Form Test - {{ dag_id }}

+ +
+
+ + +
+ + +
+

Option A Selected - Additional Fields:

+
+ + +
+
+ + +
+
+ + +
+

Option B Selected - Additional Fields:

+
+ + +
+
+ + +
+
+ + +
+

Option C Selected - Additional Fields:

+
+ + +
+
+ + +
+
+ + +
+ +
+
+ + + + \ No newline at end of file From 2957854029a3a755be7858a16c03b7c927bc73be Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Fri, 1 Aug 2025 16:31:51 -0700 Subject: [PATCH 11/19] deleted dynamic attempts --- airflow/dags/dynamic_form_test_dag.py | 155 --------- airflow/dags/ogc_process_selector.py | 392 ----------------------- airflow/dags/run_ogc_process2.py | 337 ------------------- airflow/dags/run_ogc_process_executor.py | 329 ------------------- 4 files changed, 1213 deletions(-) delete mode 100644 airflow/dags/dynamic_form_test_dag.py delete mode 100644 airflow/dags/ogc_process_selector.py delete mode 100644 airflow/dags/run_ogc_process2.py delete mode 100644 airflow/dags/run_ogc_process_executor.py diff --git a/airflow/dags/dynamic_form_test_dag.py b/airflow/dags/dynamic_form_test_dag.py deleted file mode 100644 index 806c87bb..00000000 --- a/airflow/dags/dynamic_form_test_dag.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -Dynamic Form Test DAG - -This DAG demonstrates dynamic form functionality where: -- Initial dropdown has options a, b, c -- Option a shows fields 1 and 2 -- Option b shows fields 3 and 4 -- Option c shows fields 5 and 6 - -To use this DAG: -1. Access the form at: http://localhost:8080/dynamic_form/dynamic_form_test -2. Select an option from the dropdown -3. Fill in the conditional fields that appear -4. Submit to trigger the DAG with the form data -""" - -from datetime import datetime, timedelta -from airflow import DAG -from airflow.operators.python import PythonOperator -import logging - -logger = logging.getLogger(__name__) - -default_args = { - "owner": "unity-sps", - "depends_on_past": False, - "start_date": datetime(2024, 1, 1), - "email_on_failure": False, - "email_on_retry": False, - "retries": 1, - "retry_delay": timedelta(minutes=5), -} - -def process_form_data(**context): - """Process the form data received from the dynamic form""" - conf = context.get('dag_run').conf or {} - - logger.info("=== Dynamic Form Data Processing ===") - logger.info(f"Received configuration: {conf}") - - main_option = conf.get('main_option') - logger.info(f"Main option selected: {main_option}") - - if main_option == 'a': - field_1 = conf.get('field_1', '') - field_2 = conf.get('field_2', '') - logger.info(f"Option A selected - Field 1: {field_1}, Field 2: {field_2}") - print(f"Processing Option A with values: Field 1='{field_1}', Field 2='{field_2}'") - - elif main_option == 'b': - field_3 = conf.get('field_3', '') - field_4 = conf.get('field_4', '') - logger.info(f"Option B selected - Field 3: {field_3}, Field 4: {field_4}") - print(f"Processing Option B with values: Field 3='{field_3}', Field 4='{field_4}'") - - elif main_option == 'c': - field_5 = conf.get('field_5', '') - field_6 = conf.get('field_6', '') - logger.info(f"Option C selected - Field 5: {field_5}, Field 6: {field_6}") - print(f"Processing Option C with values: Field 5='{field_5}', Field 6='{field_6}'") - - else: - logger.warning(f"Unknown or missing main_option: {main_option}") - print(f"Warning: Unknown option '{main_option}' or no option provided") - - return f"Successfully processed form data for option: {main_option}" - -def validate_form_data(**context): - """Validate the form data received""" - conf = context.get('dag_run').conf or {} - - main_option = conf.get('main_option') - - if not main_option: - raise ValueError("No main_option provided in form data") - - if main_option not in ['a', 'b', 'c']: - raise ValueError(f"Invalid main_option: {main_option}. Must be 'a', 'b', or 'c'") - - # Validate required fields based on option - if main_option == 'a': - if not conf.get('field_1') or not conf.get('field_2'): - raise ValueError("Option A requires both field_1 and field_2 to be filled") - elif main_option == 'b': - if not conf.get('field_3') or not conf.get('field_4'): - raise ValueError("Option B requires both field_3 and field_4 to be filled") - elif main_option == 'c': - if not conf.get('field_5') or not conf.get('field_6'): - raise ValueError("Option C requires both field_5 and field_6 to be filled") - - logger.info("Form data validation passed") - return "Validation successful" - -def simulate_processing(**context): - """Simulate some processing based on the selected option""" - conf = context.get('dag_run').conf or {} - main_option = conf.get('main_option') - - import time - - if main_option == 'a': - logger.info("Simulating processing for Option A...") - print("Executing Option A workflow...") - time.sleep(5) # Simulate work - print("Option A processing completed") - - elif main_option == 'b': - logger.info("Simulating processing for Option B...") - print("Executing Option B workflow...") - time.sleep(3) # Simulate work - print("Option B processing completed") - - elif main_option == 'c': - logger.info("Simulating processing for Option C...") - print("Executing Option C workflow...") - time.sleep(7) # Simulate work - print("Option C processing completed") - - return f"Processing completed for option {main_option}" - -# Create the DAG -with DAG( - dag_id="dynamic_form_test", - default_args=default_args, - description="Test DAG for dynamic form functionality", - schedule=None, # Only triggered manually via form - is_paused_upon_creation=False, - catchup=False, - tags=["test", "dynamic-form", "proof-of-concept"], - doc_md=__doc__, -) as dag: - - # Task 1: Validate form data - validate_task = PythonOperator( - task_id="validate_form_data", - python_callable=validate_form_data, - doc_md="Validates that required form fields are present based on selected option" - ) - - # Task 2: Process form data - process_task = PythonOperator( - task_id="process_form_data", - python_callable=process_form_data, - doc_md="Processes and logs the form data received from the dynamic form" - ) - - # Task 3: Simulate processing - simulate_task = PythonOperator( - task_id="simulate_processing", - python_callable=simulate_processing, - doc_md="Simulates different processing workflows based on the selected option" - ) - - # Set task dependencies - validate_task >> process_task >> simulate_task \ No newline at end of file diff --git a/airflow/dags/ogc_process_selector.py b/airflow/dags/ogc_process_selector.py deleted file mode 100644 index 105716ea..00000000 --- a/airflow/dags/ogc_process_selector.py +++ /dev/null @@ -1,392 +0,0 @@ -""" -OGC Process Selector DAG - Step 1: User selects a process and this creates/updates -dynamic input DAGs with specific fields for that process. -""" -import json -import logging -from datetime import datetime -import requests -import re -import os - -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.operators.python import PythonOperator -from airflow.exceptions import AirflowException -import time - -def fetch_ogc_processes(): - """Fetch available processes from the OGC API and create mapping.""" - try: - response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) - response.raise_for_status() - - processes_data = response.json() - process_mapping = {} - dropdown_options = [] - - for process in processes_data.get("processes", []): - process_id = process.get("id") - process_version = process.get("version") - - # Extract numerical ID from links - numerical_id = None - for link in process.get("links", []): - if link.get("rel") == "self": - href = link.get("href", "") - # Extract number from href like "/ogc/processes/7" - match = re.search(r'/processes/(\d+)$', href) - if match: - numerical_id = int(match.group(1)) - break - - if process_id and numerical_id: - display_name = f"{process_id}:{process_version}" if process_version else process_id - dropdown_options.append(display_name) - process_mapping[display_name] = numerical_id - - return process_mapping, dropdown_options - - except requests.RequestException as e: - logging.error(f"Failed to fetch processes: {e}") - return {"example-process:1.0": 1}, ["example-process:1.0"] - except Exception as e: - logging.error(f"Error processing OGC processes: {e}") - return {"example-process:1.0": 1}, ["example-process:1.0"] - -# Constants -PROCESS_MAPPING, DROPDOWN_OPTIONS = fetch_ogc_processes() - -dag_default_args = { - "owner": "unity-sps", - "depends_on_past": False, - "start_date": datetime.utcfromtimestamp(0), -} - -# --- DAG Definition --- - -dag = DAG( - dag_id="ogc_process_selector", - description="Step 1: Select an OGC process to create dynamic input DAG", - dag_display_name="🔧 Step 1: Select OGC Process", - tags=["ogc", "step1", "selector"], - is_paused_upon_creation=False, - catchup=False, - schedule=None, - max_active_runs=10, - default_args=dag_default_args, - params={ - "selected_process": Param( - default=DROPDOWN_OPTIONS[0] if DROPDOWN_OPTIONS else "example-process:1.0", - enum=DROPDOWN_OPTIONS, - title="🎯 Select Process", - description=f"Choose a process to configure. Available: {', '.join(DROPDOWN_OPTIONS[:3])}{'...' if len(DROPDOWN_OPTIONS) > 3 else ''}", - ), - "queue": Param( - "maap-dps-sandbox", - type="string", - title="🚀 Execution Queue", - description="The MAAP queue to submit the job to", - ), - }, -) - -def create_dynamic_input_dag(**context): - """Create a dynamic input DAG for the selected process.""" - - selected_process = context['params'].get('selected_process') - queue = context['params'].get('queue', 'maap-dps-sandbox') - - if not selected_process or selected_process not in PROCESS_MAPPING: - raise AirflowException(f"Invalid process selection: {selected_process}") - - numerical_id = PROCESS_MAPPING[selected_process] - logging.info(f"Creating input DAG for process '{selected_process}' (ID: {numerical_id})") - - # Fetch process schema - try: - process_url = f"https://api.dit.maap-project.org/api/ogc/processes/{numerical_id}" - response = requests.get(process_url, timeout=30) - response.raise_for_status() - - process_details = response.json() - inputs_schema = process_details.get("inputs", {}) - process_title = process_details.get("title", selected_process) - process_description = process_details.get("description", "No description available") - - logging.info(f"Process: {process_title}") - logging.info(f"Description: {process_description}") - logging.info(f"Input fields: {list(inputs_schema.keys())}") - - except requests.RequestException as e: - raise AirflowException(f"Failed to fetch process schema: {e}") - - # Generate the dynamic DAG file - dag_content = generate_input_dag_content( - selected_process=selected_process, - numerical_id=numerical_id, - inputs_schema=inputs_schema, - process_title=process_title, - process_description=process_description, - queue=queue - ) - - # Write the DAG file - dags_folder = os.path.dirname(os.path.abspath(__file__)) - safe_process_name = selected_process.replace(":", "_").replace("-", "_") - dag_filename = f"ogc_input_{safe_process_name}.py" - dag_filepath = os.path.join(dags_folder, dag_filename) - - try: - with open(dag_filepath, 'w') as f: - f.write(dag_content) - - logging.info(f"✅ Created dynamic input DAG: {dag_filename}") - logging.info("=" * 60) - logging.info("🎉 SUCCESS! Your input DAG has been created!") - logging.info("=" * 60) - logging.info(f"📋 Process: {process_title}") - logging.info(f"🆔 DAG ID: ogc_input_{safe_process_name}") - logging.info(f"📁 File: {dag_filename}") - logging.info("=" * 60) - logging.info("📝 NEXT STEPS:") - logging.info("1. Wait 10-30 seconds for Airflow to detect the new DAG") - logging.info(f"2. Look for DAG: 'Step 2: {process_title} - Inputs'") - logging.info("3. Run that DAG to configure your process inputs") - logging.info("=" * 60) - - return { - "success": True, - "dag_id": f"ogc_input_{safe_process_name}", - "dag_file": dag_filename, - "process_title": process_title, - "input_count": len(inputs_schema) - } - - except Exception as e: - logging.error(f"Failed to write DAG file: {e}") - raise AirflowException(f"Failed to create input DAG: {e}") - -def generate_input_dag_content(selected_process, numerical_id, inputs_schema, process_title, process_description, queue): - """Generate the content for the dynamic input DAG.""" - - safe_process_name = selected_process.replace(":", "_").replace("-", "_") - - # Generate Param definitions for each input - param_definitions = [] - for input_key, input_def in inputs_schema.items(): - input_title = input_def.get('title', input_key) - input_desc = input_def.get('description', f'Input for {input_key}') - input_type = input_def.get('type', 'string') - input_default = input_def.get('default') - input_placeholder = input_def.get('placeholder', '') - - # Map OGC types to Airflow Param types - if input_type in ['text', 'string']: - param_type = 'string' - default_value = input_default or input_placeholder or "" - elif input_type in ['number', 'integer', 'float']: - param_type = 'number' - default_value = input_default or 0 - elif input_type == 'boolean': - param_type = 'boolean' - default_value = input_default or False - else: - param_type = 'string' - default_value = input_default or "" - - # Create description with type info - full_description = f"{input_desc}" - if input_placeholder: - full_description += f" (e.g., {input_placeholder})" - - param_def = f''' "{input_key}": Param( - default={repr(default_value)}, - type="{param_type}", - title="🔧 {input_title}", - description="{full_description}", - ),''' - - param_definitions.append(param_def) - - params_section = "\n".join(param_definitions) - - # Generate the DAG content - dag_content = f'''""" -Dynamic Input DAG for {process_title} -Generated automatically for process: {selected_process} - -{process_description} -""" -import json -import logging -from datetime import datetime - -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.operators.python import PythonOperator -from airflow.operators.trigger_dagrun import TriggerDagRunOperator -from airflow.exceptions import AirflowException - -dag_default_args = {{ - "owner": "unity-sps", - "depends_on_past": False, - "start_date": datetime.utcfromtimestamp(0), -}} - -# Process configuration -SELECTED_PROCESS = "{selected_process}" -NUMERICAL_ID = {numerical_id} -PROCESS_TITLE = "{process_title}" -DEFAULT_QUEUE = "{queue}" - -# Input schema -INPUTS_SCHEMA = {json.dumps(inputs_schema, indent=4)} - -# --- DAG Definition --- - -dag = DAG( - dag_id="ogc_input_{safe_process_name}", - description="Step 2: Configure inputs for {process_title}", - dag_display_name="⚙️ Step 2: {process_title} - Inputs", - tags=["ogc", "step2", "inputs", "{safe_process_name}"], - is_paused_upon_creation=False, - catchup=False, - schedule=None, - max_active_runs=5, - default_args=dag_default_args, - params={{ -{params_section} - "queue": Param( - default=DEFAULT_QUEUE, - type="string", - title="🚀 Execution Queue", - description="The MAAP queue to submit the job to", - ), - }}, -) - -def validate_and_trigger_execution(**context): - """Validate inputs and trigger the execution DAG.""" - - logging.info("Validating inputs and preparing execution...") - logging.info(f"Process: {{PROCESS_TITLE}}") - logging.info(f"Process ID: {{NUMERICAL_ID}}") - - # Collect all input values - validated_inputs = {{}} - params = context['params'] - - for input_key, input_def in INPUTS_SCHEMA.items(): - if input_key in params: - value = params[input_key] - validated_inputs[input_key] = value - logging.info(f"✓ {{input_key}}: {{value}}") - elif input_def.get('default') is not None: - default_value = input_def.get('default') - validated_inputs[input_key] = default_value - logging.info(f"→ {{input_key}}: {{default_value}} (default)") - else: - logging.warning(f"⚠ No value for {{input_key}}") - - queue = params.get('queue', DEFAULT_QUEUE) - - # Prepare execution parameters - execution_params = {{ - "selected_process": SELECTED_PROCESS, - "numerical_process_id": NUMERICAL_ID, - "queue": queue, - "job_inputs": json.dumps(validated_inputs), - "process_title": PROCESS_TITLE - }} - - logging.info("=" * 60) - logging.info("🚀 TRIGGERING EXECUTION") - logging.info("=" * 60) - logging.info(f"Final inputs: {{json.dumps(validated_inputs, indent=2)}}") - logging.info(f"Queue: {{queue}}") - logging.info("=" * 60) - - return {{ - "execution_params": execution_params, - "validated_inputs": validated_inputs - }} - -def execution_summary(**context): - """Provide execution summary.""" - - validation_result = context['ti'].xcom_pull(task_ids='validate_inputs') - - logging.info("=" * 60) - logging.info("✅ INPUT VALIDATION COMPLETED") - logging.info("=" * 60) - logging.info(f"Process: {{PROCESS_TITLE}}") - logging.info(f"Input fields configured: {{len(validation_result['validated_inputs'])}}") - logging.info("The execution DAG has been triggered!") - logging.info("Check the 'ogc_process_executor' DAG for progress.") - logging.info("=" * 60) - -# Task to validate inputs -validate_task = PythonOperator( - task_id="validate_inputs", - python_callable=validate_and_trigger_execution, - dag=dag, -) - -# Task to trigger execution -trigger_task = TriggerDagRunOperator( - task_id="trigger_execution", - trigger_dag_id="ogc_process_executor", - conf="{{{{ ti.xcom_pull(task_ids='validate_inputs')['execution_params'] }}}}", - wait_for_completion=False, - dag=dag, -) - -# Summary task -summary_task = PythonOperator( - task_id="execution_summary", - python_callable=execution_summary, - dag=dag, -) - -validate_task >> trigger_task >> summary_task -''' - - return dag_content - -# Task to create the dynamic DAG -create_dag_task = PythonOperator( - task_id="create_input_dag", - python_callable=create_dynamic_input_dag, - dag=dag, -) - -def completion_message(**context): - """Display completion message with next steps.""" - - result = context['ti'].xcom_pull(task_ids='create_input_dag') - - if result and result.get('success'): - logging.info("=" * 60) - logging.info("🎉 PROCESS SELECTION COMPLETED!") - logging.info("=" * 60) - logging.info(f"📋 Process: {result['process_title']}") - logging.info(f"🆔 Input DAG ID: {result['dag_id']}") - logging.info(f"📊 Input fields: {result['input_count']}") - logging.info("=" * 60) - logging.info("📝 WHAT'S NEXT:") - logging.info("1. Wait 10-30 seconds for the new DAG to appear") - logging.info(f"2. Look for: 'Step 2: {result['process_title']} - Inputs'") - logging.info("3. Run that DAG to configure your specific inputs") - logging.info("4. The execution will be triggered automatically") - logging.info("=" * 60) - else: - logging.error("Failed to create input DAG") - -completion_task = PythonOperator( - task_id="completion_message", - python_callable=completion_message, - dag=dag, -) - -create_dag_task >> completion_task \ No newline at end of file diff --git a/airflow/dags/run_ogc_process2.py b/airflow/dags/run_ogc_process2.py deleted file mode 100644 index e9c5be84..00000000 --- a/airflow/dags/run_ogc_process2.py +++ /dev/null @@ -1,337 +0,0 @@ -""" -Dynamic OGC Process Launcher DAG - Fetches process input schema and triggers execution DAG. - -This DAG works in two stages: -1. User selects a process and this DAG fetches the input schema -2. This DAG triggers the execution DAG with the proper input parameters -""" -import json -import logging -from datetime import datetime -import requests -import re - -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.models.baseoperator import chain -from airflow.operators.python import PythonOperator, get_current_context -from airflow.operators.trigger_dagrun import TriggerDagRunOperator -from airflow.exceptions import AirflowException -from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook -import time - -def fetch_ogc_processes(): - """Fetch available processes from the OGC API and create mapping.""" - try: - response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) - response.raise_for_status() - - processes_data = response.json() - process_mapping = {} - dropdown_options = [] - - for process in processes_data.get("processes", []): - process_id = process.get("id") - process_version = process.get("version") - - # Extract numerical ID from links - numerical_id = None - for link in process.get("links", []): - if link.get("rel") == "self": - href = link.get("href", "") - # Extract number from href like "/ogc/processes/7" - match = re.search(r'/processes/(\d+)$', href) - if match: - numerical_id = int(match.group(1)) - break - - if process_id and numerical_id: - display_name = f"{process_id}:{process_version}" if process_version else process_id - dropdown_options.append(display_name) - process_mapping[display_name] = numerical_id - - return process_mapping, dropdown_options - - except requests.RequestException as e: - logging.error(f"Failed to fetch processes: {e}") - # Return fallback mapping - return {"example-process:1.0": 1}, ["example-process:1.0"] - except Exception as e: - logging.error(f"Error processing OGC processes: {e}") - return {"example-process:1.0": 1}, ["example-process:1.0"] - -def get_maap_token(): - """Helper function to get MAAP token from Kubernetes secret.""" - try: - k8s_hook = KubernetesHook() - secret = k8s_hook.get_secret(name="sps-app-credentials", namespace=k8s_hook.get_namespace()) - maap_pgt = secret.data.get("MAAP_PGT") - - if maap_pgt: - import base64 - return base64.b64decode(maap_pgt).decode('utf-8') - else: - raise AirflowException("MAAP_PGT token not found in Kubernetes secret") - except Exception as e: - logging.error(f"Failed to get MAAP token: {e}") - return None - -# Constants -K8S_SECRET_NAME = "sps-app-credentials" -PROCESS_MAPPING, DROPDOWN_OPTIONS = fetch_ogc_processes() - -dag_default_args = { - "owner": "unity-sps", - "depends_on_past": False, - "start_date": datetime.utcfromtimestamp(0), -} - -# --- DAG Definition --- - -dag = DAG( - dag_id="run_ogc_process2", - description="Dynamic OGC Process Launcher - Fetches input schema and triggers execution", - dag_display_name="OGC Process Launcher (Dynamic Inputs)", - tags=["ogc", "launcher", "dynamic"], - is_paused_upon_creation=False, - catchup=False, - schedule=None, - max_active_runs=10, - default_args=dag_default_args, - params={ - "selected_process": Param( - default=DROPDOWN_OPTIONS[0] if DROPDOWN_OPTIONS else "example-process:1.0", - enum=DROPDOWN_OPTIONS, - title="Process Selection", - description=f"Select a process to execute. Available processes: {', '.join(DROPDOWN_OPTIONS)}", - ), - "queue": Param( - "maap-dps-sandbox", - type="string", - title="Queue", - description="The MAAP queue to submit the job to", - ), - # Dynamic input fields will be populated based on process schema - "dynamic_inputs": Param( - "{}", - type="string", - title="Process Inputs (JSON)", - description="Enter process inputs as JSON. Schema will be displayed in logs after process selection.", - ) - }, -) - -# --- Task Definitions --- - -def fetch_process_schema(**context): - """Fetch the input schema for the selected process.""" - - logging.info("Fetching process input schema...") - - selected_process = context['params'].get('selected_process') - if not selected_process or selected_process not in PROCESS_MAPPING: - raise AirflowException(f"Invalid process selection: {selected_process}") - - numerical_id = PROCESS_MAPPING[selected_process] - logging.info(f"Selected process '{selected_process}' maps to numerical ID: {numerical_id}") - - # Fetch process details - try: - - process_url = f"https://api.dit.maap-project.org/api/ogc/processes/{numerical_id}" - response = requests.get(process_url, timeout=30) - response.raise_for_status() - - process_details = response.json() - inputs_schema = process_details.get("inputs", {}) - - logging.info(f"Process Details URL: {process_url}") - logging.info(f"Process Title: {process_details.get('title', 'N/A')}") - logging.info(f"Process Description: {process_details.get('description', 'N/A')}") - logging.info("=" * 60) - logging.info("INPUT SCHEMA FOR THIS PROCESS:") - logging.info("=" * 60) - - # Format the schema nicely for logging - for input_key, input_def in inputs_schema.items(): - logging.info(f"Input: {input_key}") - logging.info(f" Title: {input_def.get('title', input_key)}") - logging.info(f" Description: {input_def.get('description', 'No description')}") - logging.info(f" Type: {input_def.get('type', 'unknown')}") - logging.info(f" Default: {input_def.get('default', 'None')}") - if input_def.get('placeholder'): - logging.info(f" Placeholder: {input_def.get('placeholder')}") - logging.info("-" * 40) - - logging.info("=" * 60) - logging.info("EXAMPLE JSON INPUT:") - logging.info("=" * 60) - - # Create example JSON input - example_inputs = {} - for input_key, input_def in inputs_schema.items(): - if input_def.get('default') is not None: - example_inputs[input_key] = input_def.get('default') - elif input_def.get('placeholder'): - example_inputs[input_key] = input_def.get('placeholder') - else: - input_type = input_def.get('type', 'string') - if input_type == 'text' or input_type == 'string': - example_inputs[input_key] = f"example_{input_key}_value" - elif input_type == 'number' or input_type == 'integer': - example_inputs[input_key] = 0 - elif input_type == 'boolean': - example_inputs[input_key] = True - else: - example_inputs[input_key] = f"example_{input_key}_value" - - example_json = json.dumps(example_inputs, indent=2) - logging.info(example_json) - - logging.info("=" * 60) - logging.info("INSTRUCTIONS:") - logging.info("Copy the example JSON above, modify the values as needed,") - logging.info("and paste it into the 'Process Inputs (JSON)' field when") - logging.info("re-triggering this DAG.") - logging.info("=" * 60) - - return { - "numerical_process_id": numerical_id, - "selected_process": selected_process, - "inputs_schema": inputs_schema, - "example_inputs": example_inputs, - "schema_fetched": True - } - - except requests.RequestException as e: - logging.error(f"Failed to fetch process schema: {e}") - raise AirflowException(f"Failed to fetch process schema: {e}") - -def validate_and_trigger_execution(**context): - """Validate the dynamic inputs and trigger the execution DAG.""" - - logging.info("Validating inputs and preparing execution...") - - # Get schema info from previous task - schema_info = context['ti'].xcom_pull(task_ids='fetch_schema') - if not schema_info or not schema_info.get('schema_fetched'): - raise AirflowException("Schema was not properly fetched") - - selected_process = schema_info['selected_process'] - numerical_process_id = schema_info['numerical_process_id'] - inputs_schema = schema_info['inputs_schema'] - - # Parse user-provided dynamic inputs - dynamic_inputs_str = context['params'].get('dynamic_inputs', '{}') - try: - dynamic_inputs = json.loads(dynamic_inputs_str) if dynamic_inputs_str != '{}' else {} - except json.JSONDecodeError as e: - raise AirflowException(f"Invalid JSON in dynamic_inputs: {e}") - - logging.info(f"User provided inputs: {dynamic_inputs}") - - # Validate inputs against schema - validated_inputs = {} - for input_key, input_def in inputs_schema.items(): - if input_key in dynamic_inputs: - validated_inputs[input_key] = dynamic_inputs[input_key] - logging.info(f"✓ Using provided value for '{input_key}': {dynamic_inputs[input_key]}") - elif input_def.get('default') is not None: - validated_inputs[input_key] = input_def.get('default') - logging.info(f"→ Using default value for '{input_key}': {input_def.get('default')}") - else: - logging.warning(f"⚠ No value provided for required input '{input_key}'") - - # If no inputs were provided, show schema again and stop - if not dynamic_inputs: - logging.info("=" * 60) - logging.info("NO INPUTS PROVIDED - DISPLAYING SCHEMA AGAIN") - logging.info("=" * 60) - logging.info("Please provide inputs in the 'Process Inputs (JSON)' field") - logging.info("and re-trigger this DAG to proceed with execution.") - logging.info("=" * 60) - return { - "action": "schema_display_only", - "message": "Re-trigger DAG with proper inputs to execute the process" - } - - logging.info(f"Final validated inputs: {json.dumps(validated_inputs, indent=2)}") - - # Prepare parameters for execution DAG - execution_params = { - "selected_process": selected_process, - "queue": context['params'].get('queue', 'maap-dps-sandbox'), - "job_inputs": json.dumps(validated_inputs) - } - - logging.info("=" * 60) - logging.info("TRIGGERING EXECUTION DAG") - logging.info("=" * 60) - logging.info(f"Execution parameters: {json.dumps(execution_params, indent=2)}") - - return { - "action": "trigger_execution", - "execution_params": execution_params, - "numerical_process_id": numerical_process_id - } - -# Task to fetch process schema -fetch_schema_task = PythonOperator( - task_id="fetch_schema", - python_callable=fetch_process_schema, - dag=dag, -) - -# Task to validate inputs and prepare execution -validate_inputs_task = PythonOperator( - task_id="validate_inputs", - python_callable=validate_and_trigger_execution, - dag=dag, -) - -# Task to trigger the execution DAG -trigger_execution_task = TriggerDagRunOperator( - task_id="trigger_execution", - trigger_dag_id="run_ogc_process_executor", # This DAG needs to be created - conf="{{ ti.xcom_pull(task_ids='validate_inputs', key='return_value')['execution_params'] }}", - wait_for_completion=False, - dag=dag, - trigger_rule="none_failed", # Only run if validation succeeded -) - -def completion_summary(**context): - """Provide a summary of what happened.""" - - validation_result = context['ti'].xcom_pull(task_ids='validate_inputs', key='return_value') - - if validation_result and validation_result.get('action') == 'schema_display_only': - logging.info("=" * 60) - logging.info("LAUNCHER DAG COMPLETED - SCHEMA DISPLAY MODE") - logging.info("=" * 60) - logging.info("The process schema has been displayed in the logs.") - logging.info("Please check the 'fetch_schema' task logs for the input schema") - logging.info("and re-trigger this DAG with proper inputs to execute.") - logging.info("=" * 60) - elif validation_result and validation_result.get('action') == 'trigger_execution': - logging.info("=" * 60) - logging.info("LAUNCHER DAG COMPLETED - EXECUTION TRIGGERED") - logging.info("=" * 60) - logging.info("The execution DAG has been triggered successfully.") - logging.info("Check the 'run_ogc_process_executor' DAG for execution progress.") - logging.info("=" * 60) - else: - logging.info("DAG completed with unknown status") - -summary_task = PythonOperator( - task_id="completion_summary", - python_callable=completion_summary, - dag=dag, - trigger_rule="none_failed_min_one_success", -) - -# Chain the tasks -chain( - fetch_schema_task, - validate_inputs_task, - [trigger_execution_task, summary_task] -) \ No newline at end of file diff --git a/airflow/dags/run_ogc_process_executor.py b/airflow/dags/run_ogc_process_executor.py deleted file mode 100644 index 3cc6f16f..00000000 --- a/airflow/dags/run_ogc_process_executor.py +++ /dev/null @@ -1,329 +0,0 @@ -""" -OGC Process Executor DAG - Executes the actual OGC process with validated inputs. - -This DAG is triggered by the launcher DAG (run_ogc_process2) and performs -the actual process execution and monitoring. -""" -import json -import logging -from datetime import datetime -import requests -import re - -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.models.baseoperator import BaseOperator, chain -from airflow.operators.python import PythonOperator, get_current_context -from airflow.utils.trigger_rule import TriggerRule -from airflow.exceptions import AirflowException -from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook -import time - -K8S_SECRET_NAME = "sps-app-credentials" - -def fetch_ogc_processes(): - """Fetch available processes from the OGC API and create mapping.""" - try: - response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) - response.raise_for_status() - - processes_data = response.json() - process_mapping = {} - - for process in processes_data.get("processes", []): - process_id = process.get("id") - process_version = process.get("version") - - # Extract numerical ID from links - numerical_id = None - for link in process.get("links", []): - if link.get("rel") == "self": - href = link.get("href", "") - # Extract number from href like "/ogc/processes/7" - match = re.search(r'/processes/(\d+)$', href) - if match: - numerical_id = int(match.group(1)) - break - - if process_id and numerical_id: - display_name = f"{process_id}:{process_version}" if process_version else process_id - process_mapping[display_name] = numerical_id - - return process_mapping - - except requests.RequestException as e: - logging.error(f"Failed to fetch processes: {e}") - return {"example-process:1.0": 1} - except Exception as e: - logging.error(f"Error processing OGC processes: {e}") - return {"example-process:1.0": 1} - -PROCESS_MAPPING = fetch_ogc_processes() - -class OGCSubmitJobOperator(BaseOperator): - """Custom operator to submit jobs to OGC API endpoints.""" - - template_fields = ("process_id", "job_inputs", "job_queue") - - def __init__(self, process_id, job_inputs, job_queue, - submit_url_template="https://api.dit.maap-project.org/api/ogc/processes/{process_id}/execution", - **kwargs): - super().__init__(**kwargs) - self.process_id = process_id - self.job_inputs = job_inputs - self.job_queue = job_queue - self.submit_url_template = submit_url_template - - def execute(self, context): - """Submit job to OGC API and return job ID.""" - - try: - # Get MAAP token from Kubernetes secret - k8s_hook = KubernetesHook() - secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) - maap_pgt = secret.data.get("MAAP_PGT") - - if maap_pgt: - import base64 - maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') - else: - raise AirflowException("MAAP_PGT token not found in Kubernetes secret") - - # Prepare URL and payload - submit_url = self.submit_url_template.format(process_id=self.process_id) - - # Parse job inputs if it's a string - if isinstance(self.job_inputs, str): - try: - job_inputs_dict = json.loads(self.job_inputs) - except json.JSONDecodeError: - job_inputs_dict = {} - else: - job_inputs_dict = self.job_inputs or {} - - payload = { - "queue": self.job_queue, - "inputs": job_inputs_dict - } - - headers = { - "proxy-ticket": maap_pgt, - "Content-Type": "application/json" - } - - self.log.info(f"Submitting job to {submit_url}") - self.log.info(f"Job payload: {json.dumps(payload, indent=2)}") - - # Submit job - response = requests.post(submit_url, json=payload, headers=headers, timeout=60) - response.raise_for_status() - - result = response.json() - job_id = result.get("id") - - if not job_id: - raise AirflowException(f"Failed to get job ID from response: {result}") - - self.log.info(f"Job submitted successfully. Job ID: {job_id}") - - # Return job_id for next task - return {"job_id": job_id} - - except requests.RequestException as e: - self.log.error(f"HTTP request failed: {e}") - raise AirflowException(f"Failed to submit job: {e}") - except Exception as e: - self.log.error(f"Job submission failed: {e}") - raise AirflowException(f"Job submission error: {e}") - - -class OGCMonitorJobOperator(BaseOperator): - """Custom operator to monitor OGC job status.""" - - template_fields = ("job_id",) - - def __init__(self, job_id, - monitor_url_template="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", - timeout=3600, poll_interval=30, **kwargs): - super().__init__(**kwargs) - self.job_id = job_id - self.monitor_url_template = monitor_url_template - self.timeout = timeout - self.poll_interval = poll_interval - - def execute(self, context): - """Monitor job status until completion or timeout.""" - - try: - self.log.info(f"Monitoring job with ID: {self.job_id}") - - # Get MAAP token from Kubernetes secret - k8s_hook = KubernetesHook() - secret = k8s_hook.get_secret(name=K8S_SECRET_NAME, namespace=k8s_hook.get_namespace()) - maap_pgt = secret.data.get("MAAP_PGT") - - if maap_pgt: - import base64 - maap_pgt = base64.b64decode(maap_pgt).decode('utf-8') - else: - raise AirflowException("MAAP_PGT token not found in Kubernetes secret") - - monitor_url = self.monitor_url_template.format(job_id=self.job_id) - headers = { - "proxy-ticket": maap_pgt, - "Content-Type": "application/json" - } - - self.log.info(f"Monitoring job {self.job_id} at {monitor_url}") - - start_time = time.time() - - while time.time() - start_time < self.timeout: - try: - response = requests.get(monitor_url, headers=headers, timeout=30) - response.raise_for_status() - - result = response.json() - status = result.get("status", "unknown") - - self.log.info(f"Job {self.job_id} status: {status}") - - if status == "successful": - self.log.info(f"Job {self.job_id} completed successfully!") - return {"status": "successful", "result": result} - elif status == "failed": - error_msg = result.get("message", "No error message provided") - self.log.error(f"Job {self.job_id} failed: {error_msg}") - raise AirflowException(f"Job {self.job_id} failed: {error_msg}") - elif status in ["running", "accepted", "processing"]: - self.log.info(f"Job {self.job_id} still {status}, waiting {self.poll_interval}s...") - time.sleep(self.poll_interval) - else: - self.log.warning(f"Unknown job status: {status}") - time.sleep(self.poll_interval) - - except requests.RequestException as e: - self.log.warning(f"Request failed, retrying: {e}") - time.sleep(self.poll_interval) - continue - - # Timeout reached - raise AirflowException(f"Job {self.job_id} monitoring timed out after {self.timeout} seconds") - - except Exception as e: - self.log.error(f"Job monitoring failed: {e}") - raise - -dag_default_args = { - "owner": "unity-sps", - "depends_on_past": False, - "start_date": datetime.utcfromtimestamp(0), -} - -# --- DAG Definition --- - -dag = DAG( - dag_id="run_ogc_process_executor", - description="Executes OGC processes with validated inputs (triggered by launcher)", - dag_display_name="OGC Process Executor", - tags=["ogc", "executor", "triggered"], - is_paused_upon_creation=False, - catchup=False, - schedule=None, - max_active_runs=10, - default_args=dag_default_args, - # This DAG expects to be triggered with conf parameters -) - -# --- Task Definitions --- - -def setup_execution(**context): - """Setup task that processes the triggered DAG configuration.""" - - logging.info("Setting up OGC process execution...") - - # Get configuration from trigger - dag_run_conf = context.get('dag_run').conf or {} - logging.info(f"Received configuration: {json.dumps(dag_run_conf, indent=2)}") - - selected_process = dag_run_conf.get('selected_process') - queue = dag_run_conf.get('queue', 'maap-dps-sandbox') - job_inputs = dag_run_conf.get('job_inputs', '{}') - - if not selected_process: - raise AirflowException("No selected_process provided in trigger configuration") - - # Get numerical process ID - numerical_id = PROCESS_MAPPING.get(selected_process) - if not numerical_id: - raise AirflowException(f"Process '{selected_process}' not found in mapping") - - logging.info(f"Selected process: {selected_process}") - logging.info(f"Numerical process ID: {numerical_id}") - logging.info(f"Queue: {queue}") - logging.info(f"Job inputs: {job_inputs}") - - return { - "selected_process": selected_process, - "numerical_process_id": numerical_id, - "queue": queue, - "job_inputs": job_inputs - } - -setup_task = PythonOperator( - task_id="setup_execution", - python_callable=setup_execution, - dag=dag, -) - -submit_job_task = OGCSubmitJobOperator( - task_id="submit_job", - process_id="{{ ti.xcom_pull(task_ids='setup_execution', key='return_value')['numerical_process_id'] }}", - job_inputs="{{ ti.xcom_pull(task_ids='setup_execution', key='return_value')['job_inputs'] }}", - job_queue="{{ ti.xcom_pull(task_ids='setup_execution', key='return_value')['queue'] }}", - dag=dag, -) - -monitor_job_task = OGCMonitorJobOperator( - task_id="monitor_job", - job_id="{{ ti.xcom_pull(task_ids='submit_job', key='return_value')['job_id'] }}", - timeout=3600, - poll_interval=30, - dag=dag, -) - -def cleanup_execution(**context): - """Cleanup and final reporting.""" - - logging.info("OGC process execution completed.") - - # Get results from previous tasks - setup_result = context['ti'].xcom_pull(task_ids='setup_execution', key='return_value') - submit_result = context['ti'].xcom_pull(task_ids='submit_job', key='return_value') - monitor_result = context['ti'].xcom_pull(task_ids='monitor_job', key='return_value') - - logging.info("=" * 60) - logging.info("EXECUTION SUMMARY") - logging.info("=" * 60) - - if setup_result: - logging.info(f"Process: {setup_result.get('selected_process')}") - logging.info(f"Process ID: {setup_result.get('numerical_process_id')}") - logging.info(f"Queue: {setup_result.get('queue')}") - - if submit_result: - logging.info(f"Job ID: {submit_result.get('job_id')}") - - if monitor_result: - logging.info(f"Final Status: {monitor_result.get('status')}") - - logging.info("=" * 60) - -cleanup_task = PythonOperator( - task_id="cleanup_execution", - python_callable=cleanup_execution, - dag=dag, - trigger_rule=TriggerRule.ALL_DONE -) - -chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) \ No newline at end of file From 2c7973a04d94bb26dff5fb33f46384d58c033533 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Fri, 1 Aug 2025 16:33:20 -0700 Subject: [PATCH 12/19] deleted other dynamic UI functions --- airflow/plugins/dynamic_form_plugin.py | 55 ----- airflow/plugins/templates/dynamic_form.html | 237 -------------------- 2 files changed, 292 deletions(-) delete mode 100644 airflow/plugins/dynamic_form_plugin.py delete mode 100644 airflow/plugins/templates/dynamic_form.html diff --git a/airflow/plugins/dynamic_form_plugin.py b/airflow/plugins/dynamic_form_plugin.py deleted file mode 100644 index 9a12c78d..00000000 --- a/airflow/plugins/dynamic_form_plugin.py +++ /dev/null @@ -1,55 +0,0 @@ -from airflow.plugins_manager import AirflowPlugin -from airflow.www import auth -from flask import Blueprint, request, render_template, redirect, url_for, jsonify -from airflow.models import DagRun -from airflow.utils.state import DagRunState -from airflow.utils import timezone -from airflow.api.common.trigger_dag import trigger_dag -import logging - -logger = logging.getLogger(__name__) - -dynamic_form_bp = Blueprint( - "dynamic_form", - __name__, - template_folder="templates", - static_folder="static" -) - -@dynamic_form_bp.route('/dynamic_form/') -@auth.has_access_dag('GET') -def show_form(dag_id): - return render_template('dynamic_form.html', dag_id=dag_id) - -@dynamic_form_bp.route('/submit_form/', methods=['POST']) -@auth.has_access_dag('POST') -def submit_form(dag_id): - try: - form_data = request.form.to_dict() - logger.info(f"Received form data for DAG {dag_id}: {form_data}") - - # Trigger DAG with form data - dag_run = trigger_dag( - dag_id=dag_id, - run_id=None, - conf=form_data, - execution_date=None, - replace_microseconds=False - ) - - return jsonify({ - 'status': 'success', - 'message': f'DAG {dag_id} triggered successfully', - 'dag_run_id': dag_run.run_id - }) - - except Exception as e: - logger.error(f"Error triggering DAG {dag_id}: {str(e)}") - return jsonify({ - 'status': 'error', - 'message': f'Error triggering DAG: {str(e)}' - }), 500 - -class DynamicFormPlugin(AirflowPlugin): - name = "dynamic_form" - flask_blueprints = [dynamic_form_bp] \ No newline at end of file diff --git a/airflow/plugins/templates/dynamic_form.html b/airflow/plugins/templates/dynamic_form.html deleted file mode 100644 index b456bc4b..00000000 --- a/airflow/plugins/templates/dynamic_form.html +++ /dev/null @@ -1,237 +0,0 @@ - - - - - - Dynamic Form - {{ dag_id }} - - - -
-

Dynamic Form Test - {{ dag_id }}

- -
-
- - -
- - -
-

Option A Selected - Additional Fields:

-
- - -
-
- - -
-
- - -
-

Option B Selected - Additional Fields:

-
- - -
-
- - -
-
- - -
-

Option C Selected - Additional Fields:

-
- - -
-
- - -
-
- - -
- -
-
- - - - \ No newline at end of file From 677ec3c1129d4300194d4b5fd4ce90353924d015 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 4 Aug 2025 10:06:58 -0700 Subject: [PATCH 13/19] cleaned up DAG --- airflow/dags/run_ogc_process.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index d047beb2..41f6f013 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -17,20 +17,17 @@ from airflow.utils.trigger_rule import TriggerRule from kubernetes.client import models as k8s from unity_sps_utils import ( - DEFAULT_LOG_LEVEL, - EC2_TYPES, - NODE_POOL_DEFAULT, - NODE_POOL_HIGH_WORKLOAD, POD_LABEL, POD_NAMESPACE, - build_ec2_type_label, get_affinity, ) +PROCESSES_ENDPOINT = "https://api.dit.maap-project.org/api/ogc/processes" + def fetch_ogc_processes(): """Fetch available processes from the OGC API and create mapping.""" try: - response = requests.get("https://api.dit.maap-project.org/api/ogc/processes", timeout=30) + response = requests.get(PROCESSES_ENDPOINT, timeout=30) response.raise_for_status() processes_data = response.json() @@ -272,7 +269,7 @@ def setup(ti=None, **context): setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) submit_job_task = SPSOGCOperator( - task_id="submit_job_task3", + task_id="submit_job_task", operation_type="submit", selected_process="{{ params.selected_process }}", job_inputs="{{ params.job_inputs }}", @@ -281,9 +278,9 @@ def setup(ti=None, **context): ) monitor_job_task = SPSOGCOperator( - task_id="monitor_job_task3", + task_id="monitor_job_task", operation_type="monitor", - job_id="{{ ti.xcom_pull(task_ids='submit_job_task3', key='return_value')['job_id'] }}", + job_id="{{ ti.xcom_pull(task_ids='submit_job_task', key='return_value')['job_id'] }}", dag=dag, ) @@ -292,8 +289,8 @@ def cleanup(**context): logging.info("Cleanup executed.") # Log final results if available - submit_result = context['ti'].xcom_pull(task_ids='submit_job_task3', key='return_value') - monitor_result = context['ti'].xcom_pull(task_ids='monitor_job_task3', key='return_value') + submit_result = context['ti'].xcom_pull(task_ids='submit_job_task', key='return_value') + monitor_result = context['ti'].xcom_pull(task_ids='monitor_job_task', key='return_value') if submit_result: logging.info(f"Job submission result: {submit_result}") From e840e15f1ac72f60f6bca317fd983a2c67f5c796 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 4 Aug 2025 10:28:36 -0700 Subject: [PATCH 14/19] ran precommit --- airflow/dags/run_ogc_process.py | 154 ++++++++++-------- airflow/docker/run_ogc_process/Dockerfile | 2 +- .../run_ogc_process_entrypoint.sh | 18 +- 3 files changed, 94 insertions(+), 80 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 41f6f013..4ea321c6 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -2,15 +2,16 @@ DAG with custom SPSOGCOperator that subclasses KubernetesPodOperator for OGC process execution with SPS-specific functionality. """ + import json import logging -from datetime import datetime -import requests import re +from datetime import datetime +import requests +from airflow.models.baseoperator import chain from airflow.models.dag import DAG from airflow.models.param import Param -from airflow.models.baseoperator import chain from airflow.operators.python import PythonOperator, get_current_context from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret @@ -24,38 +25,39 @@ PROCESSES_ENDPOINT = "https://api.dit.maap-project.org/api/ogc/processes" + def fetch_ogc_processes(): """Fetch available processes from the OGC API and create mapping.""" try: response = requests.get(PROCESSES_ENDPOINT, timeout=30) response.raise_for_status() - + processes_data = response.json() process_mapping = {} dropdown_options = [] - + for process in processes_data.get("processes", []): process_id = process.get("id") process_version = process.get("version") - + # Extract numerical ID from links numerical_id = None for link in process.get("links", []): if link.get("rel") == "self": href = link.get("href", "") # Extract number from href like "/ogc/processes/7" - match = re.search(r'/processes/(\d+)$', href) + match = re.search(r"/processes/(\d+)$", href) if match: numerical_id = int(match.group(1)) break - + if process_id and numerical_id: display_name = f"{process_id}:{process_version}" if process_version else process_id dropdown_options.append(display_name) process_mapping[display_name] = numerical_id - + return process_mapping, dropdown_options - + except requests.RequestException as e: logging.error(f"Failed to fetch processes: {e}") # Return fallback mapping @@ -64,6 +66,7 @@ def fetch_ogc_processes(): logging.error(f"Error processing OGC processes: {e}") return {"example-process:1.0": 1}, ["example-process:1.0"] + # Constants K8S_SECRET_NAME = "sps-app-credentials" DOCKER_IMAGE = "jplmdps/ogc-job-runner:latest" @@ -79,28 +82,31 @@ def fetch_ogc_processes(): ) ] + class SPSOGCOperator(KubernetesPodOperator): """ Custom operator for SPS OGC process execution that subclasses KubernetesPodOperator. - + This operator encapsulates all SPS-specific configuration and provides a clean interface for OGC process submission and monitoring. """ - - def __init__(self, - operation_type: str, - selected_process: str = None, - job_inputs: str = None, - job_queue: str = None, - job_id: str = None, - **kwargs): + + def __init__( + self, + operation_type: str, + selected_process: str = None, + job_inputs: str = None, + job_queue: str = None, + job_id: str = None, + **kwargs, + ): """ Initialize the SPSOGCOperator. - + Args: operation_type: Either "submit" or "monitor" selected_process: Process selection for submit operations - job_inputs: JSON string of job inputs for submit operations + job_inputs: JSON string of job inputs for submit operations job_queue: Queue name for submit operations job_id: Job ID for monitor operations """ @@ -109,44 +115,47 @@ def __init__(self, self.job_inputs = job_inputs self.job_queue = job_queue self.job_id = job_id - + # Set SPS-specific defaults - kwargs.setdefault('namespace', POD_NAMESPACE) - kwargs.setdefault('image', DOCKER_IMAGE) - kwargs.setdefault('service_account_name', 'airflow-worker') - kwargs.setdefault('secrets', secret_env_vars) - kwargs.setdefault('in_cluster', True) - kwargs.setdefault('get_logs', True) - kwargs.setdefault('startup_timeout_seconds', 600) - kwargs.setdefault('container_security_context', {"privileged": True}) - kwargs.setdefault('container_logs', True) - kwargs.setdefault('labels', {"pod": POD_LABEL}) - kwargs.setdefault('annotations', {"karpenter.sh/do-not-disrupt": "true"}) - kwargs.setdefault('affinity', get_affinity( - capacity_type=["spot"], - anti_affinity_label=POD_LABEL, - )) - kwargs.setdefault('on_finish_action', "keep_pod") - kwargs.setdefault('is_delete_operator_pod', False) - + kwargs.setdefault("namespace", POD_NAMESPACE) + kwargs.setdefault("image", DOCKER_IMAGE) + kwargs.setdefault("service_account_name", "airflow-worker") + kwargs.setdefault("secrets", secret_env_vars) + kwargs.setdefault("in_cluster", True) + kwargs.setdefault("get_logs", True) + kwargs.setdefault("startup_timeout_seconds", 600) + kwargs.setdefault("container_security_context", {"privileged": True}) + kwargs.setdefault("container_logs", True) + kwargs.setdefault("labels", {"pod": POD_LABEL}) + kwargs.setdefault("annotations", {"karpenter.sh/do-not-disrupt": "true"}) + kwargs.setdefault( + "affinity", + get_affinity( + capacity_type=["spot"], + anti_affinity_label=POD_LABEL, + ), + ) + kwargs.setdefault("on_finish_action", "keep_pod") + kwargs.setdefault("is_delete_operator_pod", False) + # Build operation-specific environment variables if operation_type == "submit": - kwargs['env_vars'] = self._build_submit_env_vars() - kwargs['name'] = f"ogc-submit-pod-{kwargs.get('task_id', 'unknown')}" - kwargs.setdefault('do_xcom_push', True) # Submit tasks need to return job ID + kwargs["env_vars"] = self._build_submit_env_vars() + kwargs["name"] = f"ogc-submit-pod-{kwargs.get('task_id', 'unknown')}" + kwargs.setdefault("do_xcom_push", True) # Submit tasks need to return job ID elif operation_type == "monitor": - kwargs['env_vars'] = self._build_monitor_env_vars() - kwargs['name'] = f"ogc-monitor-pod-{kwargs.get('task_id', 'unknown')}" + kwargs["env_vars"] = self._build_monitor_env_vars() + kwargs["name"] = f"ogc-monitor-pod-{kwargs.get('task_id', 'unknown')}" else: raise ValueError(f"Invalid operation_type: {operation_type}. Must be 'submit' or 'monitor'") - + super().__init__(**kwargs) - + def _build_submit_env_vars(self): """Build environment variables for job submission.""" # Resolve numerical process ID from selected process numerical_process_id = self._resolve_process_id() - + return [ k8s.V1EnvVar( name="SUBMIT_JOB_URL", @@ -155,9 +164,9 @@ def _build_submit_env_vars(self): k8s.V1EnvVar(name="PROCESS_ID", value=str(numerical_process_id)), k8s.V1EnvVar(name="JOB_INPUTS", value=self.job_inputs or "{}"), k8s.V1EnvVar(name="QUEUE", value=self.job_queue or "maap-dps-sandbox"), - k8s.V1EnvVar(name="SUBMIT_JOB", value="true") + k8s.V1EnvVar(name="SUBMIT_JOB", value="true"), ] - + def _build_monitor_env_vars(self): """Build environment variables for job monitoring.""" return [ @@ -166,44 +175,45 @@ def _build_monitor_env_vars(self): value="https://api.dit.maap-project.org/api/ogc/jobs/{job_id}", ), k8s.V1EnvVar(name="JOB_ID", value=self.job_id), - k8s.V1EnvVar(name="SUBMIT_JOB", value="false") + k8s.V1EnvVar(name="SUBMIT_JOB", value="false"), ] - + def _resolve_process_id(self): """Resolve the selected process to a numerical process ID.""" if not self.selected_process: raise ValueError("selected_process is required for submit operations") - + # Handle templated values - they won't be resolved yet during __init__ if "{{" in str(self.selected_process): # Return a template that will be resolved at runtime return "{{ ti.xcom_pull(task_ids='Setup', key='return_value')['numerical_process_id'] }}" - + # Direct lookup for non-templated values numerical_id = PROCESS_MAPPING.get(self.selected_process) if numerical_id is None: self.log.warning(f"Process '{self.selected_process}' not found in mapping, defaulting to ID 1") return 1 - + return numerical_id - + def execute(self, context): """Execute the operator with additional SPS-specific logging.""" self.log.info(f"Starting SPS OGC {self.operation_type} operation") - + if self.operation_type == "submit": self.log.info(f"Selected process: {self.selected_process}") self.log.info(f"Job queue: {self.job_queue}") self.log.info(f"Job inputs: {self.job_inputs}") elif self.operation_type == "monitor": self.log.info(f"Monitoring job ID: {self.job_id}") - + # Call parent execute method result = super().execute(context) - + self.log.info(f"SPS OGC {self.operation_type} operation completed") return result + dag_default_args = { "owner": "unity-sps", "depends_on_past": False, @@ -227,7 +237,7 @@ def execute(self, context): default=DROPDOWN_OPTIONS[0] if DROPDOWN_OPTIONS else "Error loading dropdown", enum=DROPDOWN_OPTIONS, title="Process Selection", - description=f"Select a process to execute.", + description="Select a process to execute.", ), "queue": Param( "maap-dps-sandbox", @@ -240,24 +250,25 @@ def execute(self, context): type="string", title="Job Inputs", description="A JSON string representing the inputs payload for the job.", - ) + ), }, ) # --- Task Definitions --- + def setup(ti=None, **context): """Task that logs DAG parameters and process mapping information.""" - + logging.info("Starting OGC job submission and monitoring DAG (Custom Operator Version).") logging.info(f"Parameters received: {context['params']}") logging.info(f"Available processes: {len(DROPDOWN_OPTIONS)}") logging.info(f"Process mapping: {json.dumps(PROCESS_MAPPING, indent=2)}") - + context = get_current_context() logging.info(f"DAG Run parameters: {json.dumps(context['params'], sort_keys=True, indent=4)}") - - selected_process = context['params'].get('selected_process') + + selected_process = context["params"].get("selected_process") if selected_process in PROCESS_MAPPING: numerical_id = PROCESS_MAPPING[selected_process] logging.info(f"Selected process '{selected_process}' maps to numerical ID: {numerical_id}") @@ -266,6 +277,7 @@ def setup(ti=None, **context): logging.warning(f"Selected process '{selected_process}' not found in mapping") return {"numerical_process_id": 1} + setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) submit_job_task = SPSOGCOperator( @@ -284,21 +296,23 @@ def setup(ti=None, **context): dag=dag, ) + def cleanup(**context): """A placeholder cleanup task""" logging.info("Cleanup executed.") - + # Log final results if available - submit_result = context['ti'].xcom_pull(task_ids='submit_job_task', key='return_value') - monitor_result = context['ti'].xcom_pull(task_ids='monitor_job_task', key='return_value') - + submit_result = context["ti"].xcom_pull(task_ids="submit_job_task", key="return_value") + monitor_result = context["ti"].xcom_pull(task_ids="monitor_job_task", key="return_value") + if submit_result: logging.info(f"Job submission result: {submit_result}") if monitor_result: logging.info(f"Job monitoring result: {monitor_result}") + cleanup_task = PythonOperator( task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE ) -chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) \ No newline at end of file +chain(setup_task, submit_job_task, monitor_job_task, cleanup_task) diff --git a/airflow/docker/run_ogc_process/Dockerfile b/airflow/docker/run_ogc_process/Dockerfile index bb246eea..2151ee8e 100644 --- a/airflow/docker/run_ogc_process/Dockerfile +++ b/airflow/docker/run_ogc_process/Dockerfile @@ -1,4 +1,4 @@ -FROM alpine:3.18 +FROM alpine:3.18 RUN apk add --no-cache curl jq diff --git a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh index 1e3902c1..590e239e 100644 --- a/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh +++ b/airflow/docker/run_ogc_process/run_ogc_process_entrypoint.sh @@ -33,13 +33,13 @@ if [ "$SUBMIT_JOB" = "true" ] || [ "$SUBMIT_JOB" = "True" ]; then printf '{"job_id": "%s"}' "$job_id" > /airflow/xcom/return.json elif [ "$SUBMIT_JOB" = "false" ] || [ "$SUBMIT_JOB" = "False" ]; then echo "Monitoring job status" - + MONITOR_JOB_URL=$(echo "$MONITOR_JOB_URL" | sed "s/{job_id}/$JOB_ID/") - + TIMEOUT=3600 POLL_INTERVAL=30 SECONDS=0 - + while [ $SECONDS -lt $TIMEOUT ]; do echo "Checking status..." response=$(curl --location ${MONITOR_JOB_URL} \ @@ -47,24 +47,24 @@ elif [ "$SUBMIT_JOB" = "false" ] || [ "$SUBMIT_JOB" = "False" ]; then --header "Content-Type: application/json") status=$(echo "$response" | jq -r .status) - + echo "Current status is: $status" - + if [ "$status" = "successful" ]; then echo "Job completed successfully!" exit 0 elif [ "$status" = "failed" ]; then echo "Job failed!" echo "Error details: $(echo "$response" | jq .)" - exit 1 + exit 1 fi - + sleep $POLL_INTERVAL SECONDS=$((SECONDS + POLL_INTERVAL)) done - + echo "Job monitoring timed out after $TIMEOUT seconds." exit 1 else echo "SUBMIT_JOB variable must be specified and set to true or false" -fi \ No newline at end of file +fi From 93cb0e27661ec378fee3d56fe5a2e141a18f847a Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 4 Aug 2025 10:31:37 -0700 Subject: [PATCH 15/19] corrected failing test github that runs locally --- airflow/dags/run_ogc_process.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 4ea321c6..6523872c 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -17,11 +17,7 @@ from airflow.providers.cncf.kubernetes.secret import Secret as AirflowK8sSecret from airflow.utils.trigger_rule import TriggerRule from kubernetes.client import models as k8s -from unity_sps_utils import ( - POD_LABEL, - POD_NAMESPACE, - get_affinity, -) +from unity_sps_utils import POD_LABEL, POD_NAMESPACE, get_affinity PROCESSES_ENDPOINT = "https://api.dit.maap-project.org/api/ogc/processes" From 2f05e8f080d712524f8f5e9eaf92e826add4b686 Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 6 Aug 2025 14:30:52 -0700 Subject: [PATCH 16/19] added to post deployment script --- airflow/dags/run_ogc_process.py | 2 +- ogc-application-packages/run_ogc_process.json | 53 +++++++++++++++++++ utils/post_deployment.sh | 2 +- utils/post_deployment_terraform.sh | 2 +- 4 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 ogc-application-packages/run_ogc_process.json diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 6523872c..096f7c6e 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -219,7 +219,7 @@ def execute(self, context): # --- DAG Definition --- dag = DAG( - dag_id="run_ogc_process3", + dag_id="run_ogc_process", description="Submits a job to an OGC process and monitors (using custom SPSOGCOperator)", dag_display_name="Run an OGC Process (Custom Operator from KubernetesPodOperator)", tags=["ogc", "job", "custom-operator"], diff --git a/ogc-application-packages/run_ogc_process.json b/ogc-application-packages/run_ogc_process.json new file mode 100644 index 00000000..52a237ab --- /dev/null +++ b/ogc-application-packages/run_ogc_process.json @@ -0,0 +1,53 @@ +{ + "executionUnit": { + "image": "jplmdps/ogc-job-runner:latest", + "type": "docker" + }, + "processDescription": { + "description": "Submits a job to an OGC process and monitors (using custom SPSOGCOperator)", + "id": "run_ogc_process", + "inputs": { + "job_inputs": { + "description": "A JSON string representing the inputs payload for the job.", + "maxOccurs": 1, + "minOccurs": 1, + "schema": { + "type": "string" + }, + "title": "Job Inputs" + }, + "queue": { + "description": "The MAAP queue to submit the job to", + "maxOccurs": 1, + "minOccurs": 1, + "schema": { + "type": "string" + }, + "title": "Queue" + }, + "selected_process": { + "description": "Select a process to execute.", + "maxOccurs": 1, + "minOccurs": 1, + "schema": { + "type": "string" + }, + "title": "Process Selection" + } + }, + "jobControlOptions": [ + "async-execute" + ], + "outputs": { + "result": { + "description": "The result of the OGC process execution", + "schema": { + "$ref": "some-ref" + }, + "title": "Process Result" + } + }, + "title": "Run an OGC Process (Custom Operator from KubernetesPodOperator)", + "version": "1.0.0" + } +} \ No newline at end of file diff --git a/utils/post_deployment.sh b/utils/post_deployment.sh index 2adfe270..d8e27641 100755 --- a/utils/post_deployment.sh +++ b/utils/post_deployment.sh @@ -19,7 +19,7 @@ export TOKEN="$(python cognito-token-fetch.py -u $UNITY_USERNAME -c $UNITY_CLIE echo $TOKEN # list of processes to be registered -declare -a procs=("cwl_dag.json" "karpenter_test.json" "appgen_dag.json" "cwl_dag_modular.json" "db_cleanup_dag.json") +declare -a procs=("cwl_dag.json" "karpenter_test.json" "appgen_dag.json" "cwl_dag_modular.json" "db_cleanup_dag.json" "run_ogc_process.json") for proc in "${procs[@]}" do diff --git a/utils/post_deployment_terraform.sh b/utils/post_deployment_terraform.sh index 281e9cbd..57fc08fc 100755 --- a/utils/post_deployment_terraform.sh +++ b/utils/post_deployment_terraform.sh @@ -31,7 +31,7 @@ token=$(echo $token_response | jq -r '.AuthenticationResult.AccessToken') echo "Cognito token retrieved." # list of processes to be registered -declare -a procs=("cwl_dag.json" "karpenter_test.json" "appgen_dag.json" "cwl_dag_modular.json" "db_cleanup_dag.json") +declare -a procs=("cwl_dag.json" "karpenter_test.json" "appgen_dag.json" "cwl_dag_modular.json" "db_cleanup_dag.json" "run_ogc_process.json") for proc in "${procs[@]}" do From 4746f75dd6d24fe683df68e298d526733a7a6e5e Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 6 Aug 2025 14:32:33 -0700 Subject: [PATCH 17/19] ran precommit --- ogc-application-packages/run_ogc_process.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ogc-application-packages/run_ogc_process.json b/ogc-application-packages/run_ogc_process.json index 52a237ab..d196e108 100644 --- a/ogc-application-packages/run_ogc_process.json +++ b/ogc-application-packages/run_ogc_process.json @@ -50,4 +50,4 @@ "title": "Run an OGC Process (Custom Operator from KubernetesPodOperator)", "version": "1.0.0" } -} \ No newline at end of file +} From 02266b0ecae808a46bb99f4375f1d607b702466a Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Wed, 6 Aug 2025 17:25:14 -0700 Subject: [PATCH 18/19] updated ssm to point to new mdps dev account pgt token --- terraform-unity/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform-unity/main.tf b/terraform-unity/main.tf index 3ba52162..fb4bdb9f 100644 --- a/terraform-unity/main.tf +++ b/terraform-unity/main.tf @@ -30,7 +30,7 @@ data "aws_ssm_parameter" "dockstore_token" { } data "aws_ssm_parameter" "maap_pgt" { - name = "/unity/ads/ogc/development/maap_pgt_grace_test_acct" + name = "/unity/ads/ogc/development/maap_pgt_mdps_dev_acct" with_decryption = true } From 7e9d815d5959a02e0354ec10a6ccc1eede02f5ca Mon Sep 17 00:00:00 2001 From: grallewellyn Date: Mon, 11 Aug 2025 10:59:50 -0700 Subject: [PATCH 19/19] updated default queue to be maap-dps-worker-cardamom since that is the only one the new MDPS dev account has access to --- airflow/dags/run_ogc_process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/run_ogc_process.py b/airflow/dags/run_ogc_process.py index 096f7c6e..cc54f19d 100644 --- a/airflow/dags/run_ogc_process.py +++ b/airflow/dags/run_ogc_process.py @@ -159,7 +159,7 @@ def _build_submit_env_vars(self): ), k8s.V1EnvVar(name="PROCESS_ID", value=str(numerical_process_id)), k8s.V1EnvVar(name="JOB_INPUTS", value=self.job_inputs or "{}"), - k8s.V1EnvVar(name="QUEUE", value=self.job_queue or "maap-dps-sandbox"), + k8s.V1EnvVar(name="QUEUE", value=self.job_queue or "maap-dps-worker-cardamom"), k8s.V1EnvVar(name="SUBMIT_JOB", value="true"), ] @@ -236,7 +236,7 @@ def execute(self, context): description="Select a process to execute.", ), "queue": Param( - "maap-dps-sandbox", + "maap-dps-worker-cardamom", type="string", title="Queue", description="The MAAP queue to submit the job to",