From aa5a552cc5a71424cbf41cfd5a10fc25ed38036b Mon Sep 17 00:00:00 2001 From: Roland Kunkel Date: Tue, 17 Feb 2026 12:53:28 +0100 Subject: [PATCH] SREP-3640: add dry run option and link to logs to output for CAD run command --- cmd/cluster/cad/run.go | 68 +++++++++++++++++++--------------- cmd/cluster/cad/run_test.go | 14 ++++++- docs/README.md | 15 ++++++-- docs/osdctl_cluster_cad_run.md | 15 ++++++-- 4 files changed, 75 insertions(+), 37 deletions(-) diff --git a/cmd/cluster/cad/run.go b/cmd/cluster/cad/run.go index 1626c22e4..da40e4f0b 100644 --- a/cmd/cluster/cad/run.go +++ b/cmd/cluster/cad/run.go @@ -3,6 +3,7 @@ package cad import ( "context" "fmt" + "slices" "github.com/openshift/osdctl/pkg/k8s" "github.com/openshift/osdctl/pkg/utils" @@ -15,6 +16,8 @@ import ( const ( cadClusterIDProd = "2fbi9mjhqpobh20ot5d7e5eeq3a8gfhs" // These IDs are hard-coded in app-interface cadClusterIDStage = "2f9ghpikkv446iidcv7b92em2hgk13q9" + cadNamespaceProd = "configuration-anomaly-detection-production" + cadNamespaceStage = "configuration-anomaly-detection-stage" ) var validInvestigations = []string{ @@ -40,6 +43,7 @@ type cadRunOptions struct { investigation string elevationReason string environment string + isDryRun bool } func newCmdRun() *cobra.Command { @@ -51,7 +55,7 @@ func newCmdRun() *cobra.Command { Long: `Run a manual investigation on the Configuration Anomaly Detection (CAD) cluster. This command schedules a Tekton PipelineRun on the appropriate CAD cluster (stage or production) -to run an investigation against a target cluster. +to run an investigation against a target cluster. The results will be written to a backplane report. Prerequisites: - Connected to the target cluster's OCM environment (production or stage) @@ -61,7 +65,7 @@ Available Investigations: chgm, cmbb, can-not-retrieve-updates, ai, cpd, etcd-quota-low, insightsoperatordown, machine-health-check, must-gather, upgrade-config -Example: +Examples: # Run a change management investigation on a production cluster osdctl cluster cad run \ --cluster-id 1a2b3c4d5e6f7g8h9i0j \ @@ -69,6 +73,14 @@ Example: --environment production \ --reason "OHSS-12345" + # Run a dry-run investigation (does not create a report) + osdctl cluster cad run \ + --cluster-id 1a2b3c4d5e6f7g8h9i0j \ + --investigation chgm \ + --environment production \ + --reason "OHSS-12345" \ + --dry-run + Note: After the investigation completes (may take several minutes), view results using: osdctl cluster reports list -C -l 1 @@ -83,9 +95,15 @@ Note: runCmd.Flags().StringVarP(&opts.clusterID, "cluster-id", "C", "", "Cluster ID (internal or external)") runCmd.Flags().StringVarP(&opts.investigation, "investigation", "i", "", "Investigation name") - runCmd.Flags().StringVarP(&opts.environment, "environment", "e", "", "Environment of the cluster we want to run the investigation on. Allowed values: \"stage\" or \"production\"") + runCmd.Flags().StringVarP(&opts.environment, "environment", "e", "", "Environment in which the target cluster runs. Allowed values: \"stage\" or \"production\"") + runCmd.Flags().BoolVarP(&opts.isDryRun, "dry-run", "d", false, "Dry-Run: Run the investigation with the dry-run flag. This will not create a report.") runCmd.Flags().StringVar(&opts.elevationReason, "reason", "", "Provide a reason for running a manual investigation, used for backplane. Eg: 'OHSS-XXXX', or '#ITN-2024-XXXXX.") + _ = runCmd.MarkFlagRequired("cluster-id") + _ = runCmd.MarkFlagRequired("investigation") + _ = runCmd.MarkFlagRequired("environment") + _ = runCmd.MarkFlagRequired("reason") + _ = runCmd.RegisterFlagCompletionFunc("investigation", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { return validInvestigations, cobra.ShellCompDirectiveNoFileComp }) @@ -123,42 +141,32 @@ func (o *cadRunOptions) run() error { return fmt.Errorf("failed to schedule task: %w", err) } - reportCmd := fmt.Sprintf("'osdctl cluster reports list -C %s -l 1'", o.clusterID) - fmt.Println("Successfully scheduled manual investigation. It can take several minutes until a report is available. Run this command to check the latest report for the results while being connected to the right OCM backplane environment. " + reportCmd) + // Get the generated name created by the API server + pipelineRunName := u.GetName() + + logsLink := fmt.Sprintf("https://grafana.app-sre.devshift.net/explore?schemaVersion=1&panes=%%7B%%22buh%%22:%%7B%%22datasource%%22:%%22P1A97A9592CB7F392%%22,%%22queries%%22:%%5B%%7B%%22id%%22:%%22%%22,%%22region%%22:%%22us-east-1%%22,%%22namespace%%22:%%22%%22,%%22refId%%22:%%22A%%22,%%22datasource%%22:%%7B%%22type%%22:%%22cloudwatch%%22,%%22uid%%22:%%22P1A97A9592CB7F392%%22%%7D,%%22queryMode%%22:%%22Logs%%22,%%22logGroups%%22:%%5B%%7B%%22arn%%22:%%22arn:aws:logs:us-east-1:744086762512:log-group:cads01ue1.configuration-anomaly-detection-stage:%%2A%%22,%%22name%%22:%%22cads01ue1.configuration-anomaly-detection-stage%%22,%%22accountId%%22:%%22744086762512%%22%%7D,%%7B%%22arn%%22:%%22arn:aws:logs:us-east-1:744086762512:log-group:cadp01ue1.configuration-anomaly-detection-production:%%2A%%22,%%22name%%22:%%22cadp01ue1.configuration-anomaly-detection-production%%22,%%22accountId%%22:%%22744086762512%%22%%7D%%5D,%%22expression%%22:%%22fields%%20message%%5Cn%%7C%%20filter%%20kubernetes.pod_name%%20like%%20%%5C%%22%s%%5C%%22%%22,%%22statsGroups%%22:%%5B%%5D%%7D%%5D,%%22range%%22:%%7B%%22from%%22:%%22now-1h%%22,%%22to%%22:%%22now%%22%%7D,%%22panelsState%%22:%%7B%%22logs%%22:%%7B%%22visualisationType%%22:%%22logs%%22%%7D%%7D%%7D%%7D&orgId=1", pipelineRunName) + if !o.isDryRun { + reportCmd := fmt.Sprintf("'osdctl cluster reports list -C %s -l 1'", o.clusterID) + fmt.Println("Successfully scheduled manual investigation. It can take several minutes until a report is available. \n" + + "Run this command to check the latest report for the results while being connected to the right OCM backplane environment. " + reportCmd + " \n" + + "If a report fails to show up, check the TaskRun pod logs here after a few minutes: " + logsLink) + } else { + fmt.Println("Dry-run investigation scheduled. Check for logs here: ", logsLink) + } return nil } func (o *cadRunOptions) validate() error { - conn, err := utils.CreateConnection() - if err != nil { - return err - } - defer conn.Close() - if o.clusterID == "" { return fmt.Errorf("cluster-id is required") } - validInvestigation := false - for _, v := range validInvestigations { - if o.investigation == v { - validInvestigation = true - break - } - } - if !validInvestigation { + if !slices.Contains(validInvestigations, o.investigation) { return fmt.Errorf("invalid investigation %q, must be one of: %v", o.investigation, validInvestigations) } - validEnvironment := false - for _, v := range validEnvironments { - if o.environment == v { - validEnvironment = true - break - } - } - if !validEnvironment { + if !slices.Contains(validEnvironments, o.environment) { return fmt.Errorf("invalid environment %q, must be one of: %v", o.environment, validEnvironments) } @@ -171,9 +179,9 @@ func (o *cadRunOptions) validate() error { func (o *cadRunOptions) getCADClusterConfig() (clusterID, namespace string) { if o.environment == "stage" { - return cadClusterIDStage, "configuration-anomaly-detection-stage" + return cadClusterIDStage, cadNamespaceStage } - return cadClusterIDProd, "configuration-anomaly-detection-production" + return cadClusterIDProd, cadNamespaceProd } func (o *cadRunOptions) pipelineRunTemplate(cadNamespace string) *unstructured.Unstructured { @@ -197,7 +205,7 @@ func (o *cadRunOptions) pipelineRunTemplate(cadNamespace string) *unstructured.U }, { "name": "dry-run", - "value": "false", + "value": o.isDryRun, }, }, "pipelineRef": map[string]interface{}{ diff --git a/cmd/cluster/cad/run_test.go b/cmd/cluster/cad/run_test.go index 8e82b0077..aaf5f9579 100644 --- a/cmd/cluster/cad/run_test.go +++ b/cmd/cluster/cad/run_test.go @@ -56,6 +56,7 @@ func TestPipelineRunTemplate(t *testing.T) { clusterID string investigation string cadNamespace string + isDryRun bool expectedNamespace string }{ { @@ -63,6 +64,7 @@ func TestPipelineRunTemplate(t *testing.T) { clusterID: "test-cluster-123", investigation: "chgm", cadNamespace: "configuration-anomaly-detection-production", + isDryRun: false, expectedNamespace: "configuration-anomaly-detection-production", }, { @@ -70,8 +72,17 @@ func TestPipelineRunTemplate(t *testing.T) { clusterID: "stage-cluster-456", investigation: "cmbb", cadNamespace: "configuration-anomaly-detection-stage", + isDryRun: false, expectedNamespace: "configuration-anomaly-detection-stage", }, + { + name: "dry-run pipeline run", + clusterID: "test-cluster-789", + investigation: "ai", + cadNamespace: "configuration-anomaly-detection-production", + isDryRun: true, + expectedNamespace: "configuration-anomaly-detection-production", + }, } for _, tt := range tests { @@ -79,6 +90,7 @@ func TestPipelineRunTemplate(t *testing.T) { opts := &cadRunOptions{ clusterID: tt.clusterID, investigation: tt.investigation, + isDryRun: tt.isDryRun, } result := opts.pipelineRunTemplate(tt.cadNamespace) @@ -103,7 +115,7 @@ func TestPipelineRunTemplate(t *testing.T) { assert.Equal(t, tt.investigation, params[1]["value"], "investigation value should match") assert.Equal(t, "dry-run", params[2]["name"], "third param should be dry-run") - assert.Equal(t, "false", params[2]["value"], "dry-run should be false") + assert.Equal(t, tt.isDryRun, params[2]["value"], "dry-run value should match") }) } } diff --git a/docs/README.md b/docs/README.md index 7fa0e4d5a..264328b32 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1292,7 +1292,7 @@ osdctl cluster cad [flags] Run a manual investigation on the Configuration Anomaly Detection (CAD) cluster. This command schedules a Tekton PipelineRun on the appropriate CAD cluster (stage or production) -to run an investigation against a target cluster. +to run an investigation against a target cluster. The results will be written to a backplane report. Prerequisites: - Connected to the target cluster's OCM environment (production or stage) @@ -1302,7 +1302,7 @@ Available Investigations: chgm, cmbb, can-not-retrieve-updates, ai, cpd, etcd-quota-low, insightsoperatordown, machine-health-check, must-gather, upgrade-config -Example: +Examples: # Run a change management investigation on a production cluster osdctl cluster cad run \ --cluster-id 1a2b3c4d5e6f7g8h9i0j \ @@ -1310,6 +1310,14 @@ Example: --environment production \ --reason "OHSS-12345" + # Run a dry-run investigation (does not create a report) + osdctl cluster cad run \ + --cluster-id 1a2b3c4d5e6f7g8h9i0j \ + --investigation chgm \ + --environment production \ + --reason "OHSS-12345" \ + --dry-run + Note: After the investigation completes (may take several minutes), view results using: osdctl cluster reports list -C -l 1 @@ -1327,7 +1335,8 @@ osdctl cluster cad run [flags] --cluster string The name of the kubeconfig cluster to use -C, --cluster-id string Cluster ID (internal or external) --context string The name of the kubeconfig context to use - -e, --environment string Environment of the cluster we want to run the investigation on. Allowed values: "stage" or "production" + -d, --dry-run Dry-Run: Run the investigation with the dry-run flag. This will not create a report. + -e, --environment string Environment in which the target cluster runs. Allowed values: "stage" or "production" -h, --help help for run --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure -i, --investigation string Investigation name diff --git a/docs/osdctl_cluster_cad_run.md b/docs/osdctl_cluster_cad_run.md index 11943432d..65573268b 100644 --- a/docs/osdctl_cluster_cad_run.md +++ b/docs/osdctl_cluster_cad_run.md @@ -7,7 +7,7 @@ Run a manual investigation on the CAD cluster Run a manual investigation on the Configuration Anomaly Detection (CAD) cluster. This command schedules a Tekton PipelineRun on the appropriate CAD cluster (stage or production) -to run an investigation against a target cluster. +to run an investigation against a target cluster. The results will be written to a backplane report. Prerequisites: - Connected to the target cluster's OCM environment (production or stage) @@ -17,7 +17,7 @@ Available Investigations: chgm, cmbb, can-not-retrieve-updates, ai, cpd, etcd-quota-low, insightsoperatordown, machine-health-check, must-gather, upgrade-config -Example: +Examples: # Run a change management investigation on a production cluster osdctl cluster cad run \ --cluster-id 1a2b3c4d5e6f7g8h9i0j \ @@ -25,6 +25,14 @@ Example: --environment production \ --reason "OHSS-12345" + # Run a dry-run investigation (does not create a report) + osdctl cluster cad run \ + --cluster-id 1a2b3c4d5e6f7g8h9i0j \ + --investigation chgm \ + --environment production \ + --reason "OHSS-12345" \ + --dry-run + Note: After the investigation completes (may take several minutes), view results using: osdctl cluster reports list -C -l 1 @@ -39,7 +47,8 @@ osdctl cluster cad run [flags] ``` -C, --cluster-id string Cluster ID (internal or external) - -e, --environment string Environment of the cluster we want to run the investigation on. Allowed values: "stage" or "production" + -d, --dry-run Dry-Run: Run the investigation with the dry-run flag. This will not create a report. + -e, --environment string Environment in which the target cluster runs. Allowed values: "stage" or "production" -h, --help help for run -i, --investigation string Investigation name --reason string Provide a reason for running a manual investigation, used for backplane. Eg: 'OHSS-XXXX', or '#ITN-2024-XXXXX.