diff --git a/horizon/internal/infrastructure/controller/controller.go b/horizon/internal/infrastructure/controller/controller.go index 0d295892..46c59492 100644 --- a/horizon/internal/infrastructure/controller/controller.go +++ b/horizon/internal/infrastructure/controller/controller.go @@ -1,11 +1,15 @@ package controller import ( + "errors" "fmt" "net/http" + "strconv" + "strings" infrastructurehandler "github.com/Meesho/BharatMLStack/horizon/internal/infrastructure/handler" workflowHandler "github.com/Meesho/BharatMLStack/horizon/internal/workflow/handler" + "github.com/Meesho/BharatMLStack/horizon/pkg/argocd" "github.com/gin-gonic/gin" ) @@ -18,17 +22,22 @@ func getWorkingEnv(ctx *gin.Context) string { ctx.JSON(http.StatusInternalServerError, gin.H{"error": "workingEnv not found in context"}) return "" } - return workingEnv.(string) + workingEnvStr, ok := workingEnv.(string) + if !ok { + ctx.JSON(http.StatusInternalServerError, gin.H{"error": "workingEnv has invalid type"}) + return "" + } + return workingEnvStr } type InfrastructureController struct { - handler infrastructurehandler.InfrastructureHandler + handler infrastructurehandler.InfrastructureHandler workflowHandler workflowHandler.Handler } func NewController() *InfrastructureController { return &InfrastructureController{ - handler: infrastructurehandler.InitInfrastructureHandler(), + handler: infrastructurehandler.InitInfrastructureHandler(), workflowHandler: workflowHandler.GetWorkflowHandler(), } } @@ -61,6 +70,17 @@ type UpdateAutoscalingTriggersRequest struct { Triggers []interface{} `json:"triggers" binding:"required"` // Array of trigger objects (CPU, cron, prometheus, etc.) } +// ApplicationLogsQuery holds query params for GET /applications/:appName/logs +type ApplicationLogsQuery struct { + PodName string `form:"podName"` + Container string `form:"container"` + Follow bool `form:"follow"` + Previous bool `form:"previous"` + SinceSeconds string `form:"sinceSeconds"` // string int64, default "0" + TailLines string `form:"tailLines"` // string int64, default "1000" + Filter string `form:"filter"` +} + func (c *InfrastructureController) GetHPAConfig(ctx *gin.Context) { appName := ctx.Param("appName") workingEnv := getWorkingEnv(ctx) @@ -103,6 +123,96 @@ func (c *InfrastructureController) GetResourceDetail(ctx *gin.Context) { ctx.JSON(http.StatusOK, resourceDetail) } +func (c *InfrastructureController) GetApplicationLogs(ctx *gin.Context) { + appName := strings.TrimSpace(ctx.Param("appName")) + workingEnv := getWorkingEnv(ctx) + if workingEnv == "" { + return + } + if appName == "" { + ctx.JSON(http.StatusBadRequest, gin.H{"error": gin.H{ + "message": "appName is required", + }}) + return + } + + var q ApplicationLogsQuery + if err := ctx.ShouldBindQuery(&q); err != nil { + ctx.JSON(http.StatusBadRequest, gin.H{"error": gin.H{ + "message": "invalid query parameters", + }}) + return + } + + container := strings.TrimSpace(q.Container) + if container == "" { + ctx.JSON(http.StatusBadRequest, gin.H{ + "error": gin.H{ + "message": "container is required", + }, + }) + return + } + + if q.Follow { + ctx.JSON(http.StatusBadRequest, gin.H{ + "error": gin.H{ + "message": "streaming (follow=true) is not supported", + }, + }) + return + } + + sinceSeconds := int64(0) + if q.SinceSeconds != "" { + var err error + sinceSeconds, err = strconv.ParseInt(q.SinceSeconds, 10, 64) + if err != nil || sinceSeconds < 0 { + ctx.JSON(http.StatusBadRequest, gin.H{"error": gin.H{ + "message": "sinceSeconds must be a non-negative integer", + }}) + return + } + } + tailLines := int64(1000) + if q.TailLines != "" { + var err error + tailLines, err = strconv.ParseInt(q.TailLines, 10, 64) + if err != nil || tailLines < 0 { + ctx.JSON(http.StatusBadRequest, gin.H{"error": gin.H{ + "message": "tailLines must be a non-negative integer", + }}) + return + } + } + + opts := &argocd.ApplicationLogsOptions{ + PodName: q.PodName, + Container: container, + Previous: q.Previous, + SinceSeconds: sinceSeconds, + TailLines: tailLines, + Filter: q.Filter, + } + + entries, err := c.handler.GetApplicationLogs(appName, workingEnv, opts) + if err != nil { + status := http.StatusInternalServerError + var apiErr *argocd.ArgoCDAPIError + if errors.As(err, &apiErr) { + status = apiErr.StatusCode + } + ctx.JSON(status, gin.H{ + "error": gin.H{ + "message": err.Error(), + }, + }) + return + } + + ctx.JSON(http.StatusOK, entries) +} + func (c *InfrastructureController) RestartDeployment(ctx *gin.Context) { appName := ctx.Param("appName") workingEnv := getWorkingEnv(ctx) diff --git a/horizon/internal/infrastructure/controller/controller_test.go b/horizon/internal/infrastructure/controller/controller_test.go index 00daa41e..f7173f58 100644 --- a/horizon/internal/infrastructure/controller/controller_test.go +++ b/horizon/internal/infrastructure/controller/controller_test.go @@ -3,13 +3,16 @@ package controller import ( "bytes" "encoding/json" + "fmt" "net/http" "net/http/httptest" + "strings" "testing" infrastructurehandler "github.com/Meesho/BharatMLStack/horizon/internal/infrastructure/handler" inframiddleware "github.com/Meesho/BharatMLStack/horizon/internal/infrastructure/middleware" workflowPkg "github.com/Meesho/BharatMLStack/horizon/internal/workflow" + "github.com/Meesho/BharatMLStack/horizon/pkg/argocd" "github.com/Meesho/BharatMLStack/horizon/pkg/etcd" "github.com/gin-gonic/gin" "github.com/stretchr/testify/assert" @@ -42,6 +45,14 @@ func (m *MockInfrastructureHandler) GetResourceDetail(appName, workingEnv string return args.Get(0).(*infrastructurehandler.ResourceDetail), args.Error(1) } +func (m *MockInfrastructureHandler) GetApplicationLogs(appName, workingEnv string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + args := m.Called(appName, workingEnv, opts) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]argocd.ApplicationLogEntry), args.Error(1) +} + func (m *MockInfrastructureHandler) RestartDeployment(appName, workingEnv string, isCanary bool) error { args := m.Called(appName, workingEnv, isCanary) return args.Error(0) @@ -832,6 +843,390 @@ func TestInfrastructureController_UpdateCPUThreshold(t *testing.T) { } } +func TestInfrastructureController_GetApplicationLogs(t *testing.T) { + ts := "2026-02-24T11:53:35Z" + + tests := []struct { + name string + appName string + queryParams string + mockSetup func(*MockInfrastructureHandler) + expectedStatus int + validateBody func(t *testing.T, body map[string]interface{}) + description string + }{ + { + name: "success with valid params", + appName: "test-app", + queryParams: "container=main&tailLines=100", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), mock.AnythingOfType("*argocd.ApplicationLogsOptions")). + Return([]argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "log line 1", TimeStamp: &ts, PodName: "pod-1"}}, + }, nil) + }, + expectedStatus: http.StatusOK, + description: "Valid request should return log entries", + }, + { + name: "success with all optional params", + appName: "test-app", + queryParams: "container=sidecar&podName=pod-xyz&previous=true&sinceSeconds=600&tailLines=50&filter=ERROR", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), + mock.MatchedBy(func(opts *argocd.ApplicationLogsOptions) bool { + return opts.Container == "sidecar" && + opts.PodName == "pod-xyz" && + opts.Previous == true && + opts.SinceSeconds == 600 && + opts.TailLines == 50 && + opts.Filter == "ERROR" + })). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "All optional params should be forwarded to handler", + }, + { + name: "success returns empty entries", + appName: "test-app", + queryParams: "container=main", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), mock.AnythingOfType("*argocd.ApplicationLogsOptions")). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "Empty log result should return 200 with empty array", + }, + { + name: "missing workingEnv", + appName: "test-app", + queryParams: "container=main&_skipWorkingEnv=true", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + description: "Missing workingEnv should be rejected by middleware", + }, + { + name: "whitespace-only appName", + appName: "%20%20", + queryParams: "container=main", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "appName is required", errObj["message"]) + }, + description: "Whitespace-only appName should return 400", + }, + { + name: "missing container", + appName: "test-app", + queryParams: "", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "container is required", errObj["message"]) + }, + description: "Missing container should return 400", + }, + { + name: "whitespace-only container", + appName: "test-app", + queryParams: "container=%20%20", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "container is required", errObj["message"]) + }, + description: "Whitespace-only container should return 400", + }, + { + name: "follow true rejected", + appName: "test-app", + queryParams: "container=main&follow=true", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "streaming (follow=true) is not supported", errObj["message"]) + }, + description: "follow=true should be rejected", + }, + { + name: "sinceSeconds non-numeric", + appName: "test-app", + queryParams: "container=main&sinceSeconds=abc", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "sinceSeconds must be a non-negative integer", errObj["message"]) + }, + description: "Non-numeric sinceSeconds should return 400", + }, + { + name: "sinceSeconds negative", + appName: "test-app", + queryParams: "container=main&sinceSeconds=-10", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "sinceSeconds must be a non-negative integer", errObj["message"]) + }, + description: "Negative sinceSeconds should return 400", + }, + { + name: "tailLines non-numeric", + appName: "test-app", + queryParams: "container=main&tailLines=xyz", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "tailLines must be a non-negative integer", errObj["message"]) + }, + description: "Non-numeric tailLines should return 400", + }, + { + name: "tailLines negative", + appName: "test-app", + queryParams: "container=main&tailLines=-5", + mockSetup: func(m *MockInfrastructureHandler) {}, + expectedStatus: http.StatusBadRequest, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.Equal(t, "tailLines must be a non-negative integer", errObj["message"]) + }, + description: "Negative tailLines should return 400", + }, + { + name: "large tailLines is accepted", + appName: "test-app", + queryParams: "container=main&tailLines=20000", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), + mock.MatchedBy(func(opts *argocd.ApplicationLogsOptions) bool { + return opts.TailLines == 20000 + })). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "Large tailLines value should be passed through to handler", + }, + { + name: "default tailLines when omitted", + appName: "test-app", + queryParams: "container=main", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), + mock.MatchedBy(func(opts *argocd.ApplicationLogsOptions) bool { + return opts.TailLines == 1000 + })). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "Omitted tailLines should default to 1000", + }, + { + name: "default sinceSeconds when omitted", + appName: "test-app", + queryParams: "container=main", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), + mock.MatchedBy(func(opts *argocd.ApplicationLogsOptions) bool { + return opts.SinceSeconds == 0 + })). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "Omitted sinceSeconds should default to 0", + }, + { + name: "handler returns error", + appName: "test-app", + queryParams: "container=main", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), mock.AnythingOfType("*argocd.ApplicationLogsOptions")). + Return(nil, assert.AnError) + }, + expectedStatus: http.StatusInternalServerError, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + assert.NotEmpty(t, errObj["message"]) + }, + description: "Handler error should return 500 with error message", + }, + { + name: "handler returns ArgoCD 404 error", + appName: "test-app", + queryParams: "container=main", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), mock.AnythingOfType("*argocd.ApplicationLogsOptions")). + Return(nil, fmt.Errorf("failed to get application logs: %w", &argocd.ArgoCDAPIError{ + StatusCode: http.StatusNotFound, + HTTPStatus: "Not Found", + Message: "application not found", + })) + }, + expectedStatus: http.StatusNotFound, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + msg, _ := errObj["message"].(string) + assert.Contains(t, msg, "application not found") + }, + description: "ArgoCD 404 should be forwarded as HTTP 404", + }, + { + name: "handler returns ArgoCD 403 error", + appName: "test-app", + queryParams: "container=main", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), mock.AnythingOfType("*argocd.ApplicationLogsOptions")). + Return(nil, fmt.Errorf("failed to get application logs: %w", &argocd.ArgoCDAPIError{ + StatusCode: http.StatusForbidden, + HTTPStatus: "Forbidden", + Message: "permission denied", + })) + }, + expectedStatus: http.StatusForbidden, + validateBody: func(t *testing.T, body map[string]interface{}) { + errObj, ok := body["error"].(map[string]interface{}) + if !ok { + t.Fatal("expected error object in response") + } + msg, _ := errObj["message"].(string) + assert.Contains(t, msg, "permission denied") + }, + description: "ArgoCD 403 should be forwarded as HTTP 403", + }, + { + name: "sinceSeconds zero string is valid", + appName: "test-app", + queryParams: "container=main&sinceSeconds=0", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), + mock.MatchedBy(func(opts *argocd.ApplicationLogsOptions) bool { + return opts.SinceSeconds == 0 + })). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "sinceSeconds=0 is valid and should pass through", + }, + { + name: "tailLines zero is valid", + appName: "test-app", + queryParams: "container=main&tailLines=0", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), + mock.MatchedBy(func(opts *argocd.ApplicationLogsOptions) bool { + return opts.TailLines == 0 + })). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "tailLines=0 is valid and should pass through", + }, + { + name: "follow false is accepted", + appName: "test-app", + queryParams: "container=main&follow=false", + mockSetup: func(m *MockInfrastructureHandler) { + m.On("GetApplicationLogs", "test-app", mock.AnythingOfType("string"), + mock.AnythingOfType("*argocd.ApplicationLogsOptions")). + Return([]argocd.ApplicationLogEntry{}, nil) + }, + expectedStatus: http.StatusOK, + description: "follow=false should be accepted", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockHandler := new(MockInfrastructureHandler) + if tt.mockSetup != nil { + tt.mockSetup(mockHandler) + } + + controller := &InfrastructureController{ + handler: mockHandler, + } + + router := setupTestRouter() + router.GET("/api/v1/infrastructure/applications/:appName/logs", controller.GetApplicationLogs) + + urlPath := "/api/v1/infrastructure/applications/" + tt.appName + "/logs" + + skipWorkingEnv := false + qp := tt.queryParams + if strings.Contains(qp, "_skipWorkingEnv=true") { + skipWorkingEnv = true + qp = strings.ReplaceAll(qp, "_skipWorkingEnv=true", "") + qp = strings.TrimRight(qp, "&") + } + + if !skipWorkingEnv { + if qp != "" { + qp = "workingEnv=gcp_prd&" + qp + } else { + qp = "workingEnv=gcp_prd" + } + } + if qp != "" { + urlPath += "?" + qp + } + + req := httptest.NewRequest(http.MethodGet, urlPath, nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + assert.Equal(t, tt.expectedStatus, w.Code, tt.description) + + if tt.validateBody != nil { + var body map[string]interface{} + err := json.Unmarshal(w.Body.Bytes(), &body) + assert.NoError(t, err, "response body should be valid JSON") + tt.validateBody(t, body) + } + + mockHandler.AssertExpectations(t) + }) + } +} + func TestInfrastructureController_UpdateGPUThreshold(t *testing.T) { tests := []struct { name string diff --git a/horizon/internal/infrastructure/handler/handler.go b/horizon/internal/infrastructure/handler/handler.go index 5a3c962c..7a9dbce9 100644 --- a/horizon/internal/infrastructure/handler/handler.go +++ b/horizon/internal/infrastructure/handler/handler.go @@ -21,6 +21,7 @@ type InfrastructureHandler interface { GetHPAProperties(appName, workingEnv string) (*HPAConfig, error) GetConfig(serviceName, workingEnv string) Config GetResourceDetail(appName, workingEnv string) (*ResourceDetail, error) + GetApplicationLogs(appName, workingEnv string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) RestartDeployment(appName, workingEnv string, isCanary bool) error UpdateCPUThreshold(appName, threshold, email, workingEnv string) error UpdateGPUThreshold(appName, threshold, email, workingEnv string) error @@ -212,6 +213,30 @@ func (h *infrastructureHandler) GetResourceDetail(appName, workingEnv string) (* return &ResourceDetail{Nodes: nodes}, nil } +func (h *infrastructureHandler) GetApplicationLogs(appName, workingEnv string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + log.Info(). + Str("appName", appName). + Str("workingEnv", workingEnv). + Msg("GetApplicationLogs: Starting ArgoCD application logs lookup") + + argocdAppName := getArgoCDApplicationName(appName, workingEnv) + entries, err := argocd.GetApplicationLogs(argocdAppName, workingEnv, opts) + if err != nil { + log.Error(). + Err(err). + Str("appName", appName). + Str("workingEnv", workingEnv). + Msg("GetApplicationLogs: Failed to get logs from ArgoCD") + return nil, fmt.Errorf("failed to get application logs: %w", err) + } + log.Info(). + Str("appName", appName). + Str("workingEnv", workingEnv). + Int("entryCount", len(entries)). + Msg("GetApplicationLogs: Successfully retrieved logs from ArgoCD") + return entries, nil +} + func (h *infrastructureHandler) RestartDeployment(appName, workingEnv string, isCanary bool) error { log.Info().Str("appName", appName).Str("workingEnv", workingEnv).Bool("isCanary", isCanary).Msg("Restarting deployment") diff --git a/horizon/internal/infrastructure/router/router.go b/horizon/internal/infrastructure/router/router.go index a2782d62..77211ca7 100644 --- a/horizon/internal/infrastructure/router/router.go +++ b/horizon/internal/infrastructure/router/router.go @@ -22,6 +22,7 @@ func Init() { { // Application resource operations infrastructureAPI.GET("/applications/:appName/hpa", controller.NewController().GetHPAConfig) + infrastructureAPI.GET("/applications/:appName/logs", controller.NewController().GetApplicationLogs) infrastructureAPI.GET("/applications/resources", controller.NewController().GetResourceDetail) infrastructureAPI.POST("/applications/:appName/restart", controller.NewController().RestartDeployment) infrastructureAPI.PUT("/applications/:appName/hpa/cpu", controller.NewController().UpdateCPUThreshold) diff --git a/horizon/internal/predator/handler/predator_validation.go b/horizon/internal/predator/handler/predator_validation.go index 15021baf..7462d061 100644 --- a/horizon/internal/predator/handler/predator_validation.go +++ b/horizon/internal/predator/handler/predator_validation.go @@ -262,7 +262,7 @@ func (p *Predator) startHealthCheckingProcess(job *validationjob.Table) { if isHealthy { log.Info().Msgf("Deployment is healthy for job %d, validation successful", job.ID) p.completeValidationJob(job.ID, true, "Deployment is healthy and running successfully") - p.updateRequestValidationStatus(job.GroupID, true) + p.updateRequestValidationStatus(job.GroupID, true, "") return } @@ -271,8 +271,9 @@ func (p *Predator) startHealthCheckingProcess(job *validationjob.Table) { // If we reach here, max health checks exceeded log.Warn().Msgf("Max health checks exceeded for job %d, marking as failed", job.ID) + gcsLogPath := p.captureAndUploadFailureLogs(job) p.completeValidationJob(job.ID, false, fmt.Sprintf("Deployment failed to become healthy after %d checks", job.MaxHealthChecks)) - p.updateRequestValidationStatus(job.GroupID, false) + p.updateRequestValidationStatus(job.GroupID, false, gcsLogPath) } // checkDeploymentHealth checks if the deployment is healthy using infrastructure handler @@ -321,8 +322,9 @@ func (p *Predator) completeValidationJob(jobID uint, success bool, message strin } } -// updateRequestValidationStatus updates the request table with validation results -func (p *Predator) updateRequestValidationStatus(groupID string, success bool) { +// updateRequestValidationStatus updates the request table with validation results. +// gcsLogPath is the GCS URI of the uploaded failure logs (empty on success). +func (p *Predator) updateRequestValidationStatus(groupID string, success bool, gcsLogPath string) { id, err := strconv.ParseUint(groupID, 10, 32) if err != nil { log.Error().Err(err).Msgf("Failed to parse group ID %s for status update", groupID) @@ -340,7 +342,11 @@ func (p *Predator) updateRequestValidationStatus(groupID string, success bool) { request.UpdatedAt = time.Now() request.IsValid = success if !success { - request.RejectReason = "Validation Failed" + if gcsLogPath != "" { + request.RejectReason = fmt.Sprintf("Validation Failed. Logs: %s", gcsLogPath) + } else { + request.RejectReason = "Validation Failed" + } request.Status = statusRejected request.UpdatedBy = "Validation Job" request.UpdatedAt = time.Now() diff --git a/horizon/internal/predator/handler/predator_validation_logs.go b/horizon/internal/predator/handler/predator_validation_logs.go new file mode 100644 index 00000000..82636cfe --- /dev/null +++ b/horizon/internal/predator/handler/predator_validation_logs.go @@ -0,0 +1,192 @@ +package handler + +import ( + "fmt" + "path" + "strings" + "time" + + infrastructurehandler "github.com/Meesho/BharatMLStack/horizon/internal/infrastructure/handler" + pred "github.com/Meesho/BharatMLStack/horizon/internal/predator" + "github.com/Meesho/BharatMLStack/horizon/internal/repositories/sql/validationjob" + "github.com/Meesho/BharatMLStack/horizon/pkg/argocd" + "github.com/rs/zerolog/log" +) + +const ( + validationLogsDir = "validation-logs" + istOffsetSeconds = 5*60*60 + 30*60 + istTimeFmt = "2006-01-02 15:04:05 IST" + podSeparator = "==============================" + headerSeparator = "========================================" +) + +var istLocation = time.FixedZone("IST", istOffsetSeconds) + +type podLogEntry struct { + name string + logs string + err string +} + +// captureAndUploadFailureLogs fetches previous container logs from degraded pods +// of the test deployable and uploads them to GCS in a human-readable format. +// Errors are logged but never propagated so that the validation failure flow +// is not affected. +func (p *Predator) captureAndUploadFailureLogs(job *validationjob.Table) string { + if job == nil { + log.Error().Msg("captureAndUploadFailureLogs: job is nil, skipping log capture") + return "" + } + + log.Info().Uint("jobID", job.ID).Str("groupID", job.GroupID). + Msg("Starting failure log capture for validation job") + + degradedPods := p.findDegradedPods(job.ServiceName) + if len(degradedPods) == 0 { + log.Warn().Uint("jobID", job.ID).Str("serviceName", job.ServiceName). + Msg("No degraded pods found, skipping log capture") + return "" + } + + var podLogs []podLogEntry + for _, pod := range degradedPods { + entry := podLogEntry{name: pod.Name} + + logs, err := p.fetchDegradedPodLogs(job.ServiceName, pod.Name) + if err != nil { + entry.err = fmt.Sprintf("failed to fetch logs: %v", err) + log.Error().Err(err).Str("pod", pod.Name). + Msg("Failed to fetch degraded pod logs") + } + entry.logs = logs + podLogs = append(podLogs, entry) + } + + data := formatValidationLogsPlainText(job, p.workingEnv, podLogs) + + bucket, objectPath := p.buildValidationLogsGCSPath(job) + if bucket == "" { + log.Error().Uint("jobID", job.ID). + Msg("Failed to determine GCS bucket for validation logs") + return "" + } + + if err := p.GcsClient.UploadFile(bucket, objectPath, data); err != nil { + log.Error().Err(err).Str("bucket", bucket).Str("path", objectPath). + Msg("Failed to upload validation failure logs to GCS") + return "" + } + + gcsURL := fmt.Sprintf("https://storage.cloud.google.com/%s/%s", bucket, objectPath) + log.Info().Str("gcsPath", gcsURL).Uint("jobID", job.ID). + Msg("Validation failure logs uploaded to GCS") + return gcsURL +} + +// findDegradedPods queries the ArgoCD resource tree and returns only pods +// whose health status is "Degraded". +func (p *Predator) findDegradedPods(serviceName string) []infrastructurehandler.Node { + resourceDetail, err := p.infrastructureHandler.GetResourceDetail(serviceName, p.workingEnv) + if err != nil { + log.Error().Err(err).Str("serviceName", serviceName). + Msg("Failed to get resource detail for log capture") + return nil + } + + if resourceDetail == nil || len(resourceDetail.Nodes) == 0 { + return nil + } + + var degraded []infrastructurehandler.Node + for _, node := range resourceDetail.Nodes { + if node.Kind == "Pod" && node.Health.Status == "Degraded" { + degraded = append(degraded, node) + } + } + return degraded +} + +// fetchDegradedPodLogs retrieves previous container logs for a degraded pod +// with timestamps converted to IST. +func (p *Predator) fetchDegradedPodLogs(serviceName, podName string) (string, error) { + opts := &argocd.ApplicationLogsOptions{ + PodName: podName, + Container: serviceName, + Previous: true, + } + entries, err := p.infrastructureHandler.GetApplicationLogs(serviceName, p.workingEnv, opts) + if err != nil { + return "", fmt.Errorf("failed to fetch logs for pod %s: %w", podName, err) + } + + var sb strings.Builder + for _, e := range entries { + ts := convertToIST(e.Result.TimeStampStr) + sb.WriteString(fmt.Sprintf("[%s] %s\n", ts, e.Result.Content)) + } + return sb.String(), nil +} + +// convertToIST parses an RFC3339 timestamp string and formats it in IST. +// Falls back to the raw string if parsing fails, or uses the current IST time +// if the input is empty. +func convertToIST(timestampStr string) string { + if timestampStr == "" { + return "" + } + t, err := time.Parse(time.RFC3339Nano, timestampStr) + if err != nil { + if t, err = time.Parse(time.RFC3339, timestampStr); err != nil { + return timestampStr + } + } + return t.In(istLocation).Format(istTimeFmt) +} + +// formatValidationLogsPlainText produces a human-readable log report with +// a metadata header followed by per-pod sections. +func formatValidationLogsPlainText(job *validationjob.Table, workingEnv string, pods []podLogEntry) []byte { + var sb strings.Builder + + sb.WriteString(headerSeparator + "\n") + sb.WriteString("Validation Failure Log Report\n") + sb.WriteString(headerSeparator + "\n") + sb.WriteString(fmt.Sprintf("Group ID : %s\n", job.GroupID)) + sb.WriteString(fmt.Sprintf("Job ID : %d\n", job.ID)) + sb.WriteString(fmt.Sprintf("Service Name : %s\n", job.ServiceName)) + sb.WriteString(fmt.Sprintf("Working Env : %s\n", workingEnv)) + sb.WriteString(fmt.Sprintf("Captured At : %s\n", time.Now().In(istLocation).Format(istTimeFmt))) + sb.WriteString(headerSeparator + "\n\n") + + for _, pod := range pods { + sb.WriteString(podSeparator + "\n") + sb.WriteString(fmt.Sprintf("Pod: %s\n", pod.name)) + sb.WriteString(podSeparator + "\n") + + if pod.err != "" { + sb.WriteString(fmt.Sprintf("[ERROR] %s\n", pod.err)) + } + if pod.logs != "" { + sb.WriteString(pod.logs) + } + sb.WriteString("\n") + } + + return []byte(sb.String()) +} + +// buildValidationLogsGCSPath determines the GCS bucket and object path for +// uploading validation failure logs. +// Prod: GcsConfigBucket +// Non-prod: GcsModelBucket +func (p *Predator) buildValidationLogsGCSPath(job *validationjob.Table) (bucket, objectPath string) { + if p.isNonProductionEnvironment() { + bucket = pred.GcsModelBucket + objectPath = path.Join(validationLogsDir, job.ServiceName, job.GroupID+".log") + } else { + bucket = pred.GcsConfigBucket + objectPath = path.Join(validationLogsDir, job.ServiceName, job.GroupID+".log") + } + return bucket, objectPath +} diff --git a/horizon/internal/predator/handler/predator_validation_logs_test.go b/horizon/internal/predator/handler/predator_validation_logs_test.go new file mode 100644 index 00000000..9e9d6c53 --- /dev/null +++ b/horizon/internal/predator/handler/predator_validation_logs_test.go @@ -0,0 +1,1092 @@ +package handler + +import ( + "errors" + "strings" + "testing" + "time" + + "github.com/Meesho/BharatMLStack/horizon/internal/externalcall" + infrastructurehandler "github.com/Meesho/BharatMLStack/horizon/internal/infrastructure/handler" + pred "github.com/Meesho/BharatMLStack/horizon/internal/predator" + "github.com/Meesho/BharatMLStack/horizon/internal/repositories/sql/validationjob" + "github.com/Meesho/BharatMLStack/horizon/pkg/argocd" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- Mock: InfrastructureHandler --- + +type mockInfrastructureHandler struct { + getResourceDetailFn func(appName, workingEnv string) (*infrastructurehandler.ResourceDetail, error) + getApplicationLogsFn func(appName, workingEnv string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) + restartDeploymentFn func(appName, workingEnv string, isCanary bool) error +} + +func (m *mockInfrastructureHandler) GetHPAProperties(_, _ string) (*infrastructurehandler.HPAConfig, error) { + return nil, nil +} +func (m *mockInfrastructureHandler) GetConfig(_, _ string) infrastructurehandler.Config { + return infrastructurehandler.Config{} +} +func (m *mockInfrastructureHandler) GetResourceDetail(appName, workingEnv string) (*infrastructurehandler.ResourceDetail, error) { + if m.getResourceDetailFn != nil { + return m.getResourceDetailFn(appName, workingEnv) + } + return nil, errors.New("not implemented") +} +func (m *mockInfrastructureHandler) GetApplicationLogs(appName, workingEnv string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + if m.getApplicationLogsFn != nil { + return m.getApplicationLogsFn(appName, workingEnv, opts) + } + return nil, errors.New("not implemented") +} +func (m *mockInfrastructureHandler) RestartDeployment(appName, workingEnv string, isCanary bool) error { + if m.restartDeploymentFn != nil { + return m.restartDeploymentFn(appName, workingEnv, isCanary) + } + return nil +} +func (m *mockInfrastructureHandler) UpdateCPUThreshold(_, _, _, _ string) error { return nil } +func (m *mockInfrastructureHandler) UpdateGPUThreshold(_, _, _, _ string) error { return nil } +func (m *mockInfrastructureHandler) UpdateSharedMemory(_, _, _, _ string) error { return nil } +func (m *mockInfrastructureHandler) UpdatePodAnnotations(_ string, _ map[string]string, _, _ string) error { + return nil +} +func (m *mockInfrastructureHandler) UpdateAutoscalingTriggers(_ string, _ []interface{}, _, _ string) error { + return nil +} + +// --- Mock: GCSClientInterface --- + +type mockGCSClient struct { + uploadFileFn func(bucket, objectPath string, data []byte) error +} + +func (m *mockGCSClient) ReadFile(_, _ string) ([]byte, error) { return nil, nil } +func (m *mockGCSClient) TransferFolder(_, _, _, _, _, _ string) error { return nil } +func (m *mockGCSClient) TransferAndDeleteFolder(_, _, _, _, _, _ string) error { return nil } +func (m *mockGCSClient) TransferFolderWithSplitSources(_, _, _, _, _, _, _, _ string) error { + return nil +} +func (m *mockGCSClient) DeleteFolder(_, _, _ string) error { return nil } +func (m *mockGCSClient) ListFolders(_, _ string) ([]string, error) { return nil, nil } +func (m *mockGCSClient) UploadFile(bucket, objectPath string, data []byte) error { + if m.uploadFileFn != nil { + return m.uploadFileFn(bucket, objectPath, data) + } + return nil +} +func (m *mockGCSClient) CheckFileExists(_, _ string) (bool, error) { return false, nil } +func (m *mockGCSClient) CheckFolderExists(_, _ string) (bool, error) { return false, nil } +func (m *mockGCSClient) UploadFolderFromLocal(_, _, _ string) error { return nil } +func (m *mockGCSClient) GetFolderInfo(_, _ string) (*externalcall.GCSFolderInfo, error) { + return nil, nil +} +func (m *mockGCSClient) ListFoldersWithTimestamp(_, _ string) ([]externalcall.GCSFolderInfo, error) { + return nil, nil +} +func (m *mockGCSClient) FindFileWithSuffix(_, _, _ string) (bool, string, error) { + return false, "", nil +} + +// --- Mock: ValidationJobRepository --- + +type mockValidationJobRepo struct { + getByIDFn func(id uint) (*validationjob.Table, error) + updateValidationResult func(id uint, result bool, errorMessage string) error +} + +func (m *mockValidationJobRepo) Create(_ *validationjob.Table) error { return nil } +func (m *mockValidationJobRepo) GetByID(id uint) (*validationjob.Table, error) { + if m.getByIDFn != nil { + return m.getByIDFn(id) + } + return nil, errors.New("not implemented") +} +func (m *mockValidationJobRepo) GetByGroupID(_ string) (*validationjob.Table, error) { + return nil, nil +} +func (m *mockValidationJobRepo) GetPendingJobs() ([]validationjob.Table, error) { return nil, nil } +func (m *mockValidationJobRepo) UpdateStatus(_ uint, _, _ string) error { return nil } +func (m *mockValidationJobRepo) UpdateValidationResult(id uint, result bool, errorMessage string) error { + if m.updateValidationResult != nil { + return m.updateValidationResult(id, result, errorMessage) + } + return nil +} +func (m *mockValidationJobRepo) IncrementHealthCheck(_ uint) error { return nil } +func (m *mockValidationJobRepo) GetJobsToCleanup(_ time.Duration) ([]validationjob.Table, error) { + return nil, nil +} +func (m *mockValidationJobRepo) Delete(_ uint) error { return nil } + +// --- helper to save/restore package-level predator vars --- + +type predVarSnapshot struct { + modelBucket string + modelBase string + configBucket string + configBase string + appEnv string +} + +func savePredVars() predVarSnapshot { + return predVarSnapshot{ + modelBucket: pred.GcsModelBucket, + modelBase: pred.GcsModelBasePath, + configBucket: pred.GcsConfigBucket, + configBase: pred.GcsConfigBasePath, + appEnv: pred.AppEnv, + } +} + +func (s predVarSnapshot) restore() { + pred.GcsModelBucket = s.modelBucket + pred.GcsModelBasePath = s.modelBase + pred.GcsConfigBucket = s.configBucket + pred.GcsConfigBasePath = s.configBase + pred.AppEnv = s.appEnv +} + +func setNonProd() { + pred.GcsModelBucket = "model-bucket" + pred.GcsModelBasePath = "models/v1" + pred.AppEnv = "int" +} + +func setProd() { + pred.GcsConfigBucket = "config-bucket" + pred.GcsConfigBasePath = "configs/v1" + pred.AppEnv = "prd" +} + +// ============================================================ +// findDegradedPods +// ============================================================ + +func TestFindDegradedPods_ReturnsDegradedOnly(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-healthy", Health: infrastructurehandler.Health{Status: "Healthy"}}, + {Kind: "Pod", Name: "pod-degraded-1", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "Pod", Name: "pod-missing", Health: infrastructurehandler.Health{Status: "Missing"}}, + {Kind: "Pod", Name: "pod-degraded-2", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "Deployment", Name: "deploy-1", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + pods := p.findDegradedPods("test-svc") + + assert.Len(t, pods, 2) + assert.Equal(t, "pod-degraded-1", pods[0].Name) + assert.Equal(t, "pod-degraded-2", pods[1].Name) +} + +func TestFindDegradedPods_NoDegraded_ReturnsNil(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-1", Health: infrastructurehandler.Health{Status: "Healthy"}}, + {Kind: "Pod", Name: "pod-2", Health: infrastructurehandler.Health{Status: "Missing"}}, + }, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + pods := p.findDegradedPods("test-svc") + + assert.Nil(t, pods, "should return nil when no pods are Degraded") +} + +func TestFindDegradedPods_OnlyNonPodNodes_ReturnsNil(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Deployment", Name: "deploy-1", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "ReplicaSet", Name: "rs-1", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + pods := p.findDegradedPods("test-svc") + + assert.Nil(t, pods) +} + +func TestFindDegradedPods_APIError_ReturnsNil(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return nil, errors.New("connection refused") + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + pods := p.findDegradedPods("test-svc") + + assert.Nil(t, pods) +} + +func TestFindDegradedPods_NilResourceDetail_ReturnsNil(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return nil, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + pods := p.findDegradedPods("test-svc") + + assert.Nil(t, pods) +} + +func TestFindDegradedPods_EmptyNodes_ReturnsNil(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{Nodes: []infrastructurehandler.Node{}}, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + pods := p.findDegradedPods("test-svc") + + assert.Nil(t, pods) +} + +func TestFindDegradedPods_PassesCorrectArgs(t *testing.T) { + var capturedApp, capturedEnv string + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(appName, workingEnv string) (*infrastructurehandler.ResourceDetail, error) { + capturedApp = appName + capturedEnv = workingEnv + return nil, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "staging"} + + p.findDegradedPods("my-service") + + assert.Equal(t, "my-service", capturedApp) + assert.Equal(t, "staging", capturedEnv) +} + +// ============================================================ +// fetchDegradedPodLogs +// ============================================================ + +func TestFetchDegradedPodLogs_SetsCorrectOptsAndConvertsIST(t *testing.T) { + infra := &mockInfrastructureHandler{ + getApplicationLogsFn: func(appName, _ string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + assert.Equal(t, "test-svc", appName) + assert.Equal(t, "pod-1", opts.PodName) + assert.Equal(t, "test-svc", opts.Container) + assert.True(t, opts.Previous, "Previous flag must be true for crash logs") + + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "OOM killed", TimeStampStr: "2024-01-15T10:30:00Z"}}, + {Result: argocd.ApplicationLogResult{Content: "container exited", TimeStampStr: "2024-01-15T10:30:01Z"}}, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + logs, err := p.fetchDegradedPodLogs("test-svc", "pod-1") + + require.NoError(t, err) + assert.Contains(t, logs, "[2024-01-15 16:00:00 IST] OOM killed") + assert.Contains(t, logs, "[2024-01-15 16:00:01 IST] container exited") +} + +func TestFetchDegradedPodLogs_Error_WrapsWithPodName(t *testing.T) { + infra := &mockInfrastructureHandler{ + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return nil, errors.New("ArgoCD unavailable") + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + _, err := p.fetchDegradedPodLogs("test-svc", "pod-crash") + + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to fetch logs for pod pod-crash") + assert.Contains(t, err.Error(), "ArgoCD unavailable") +} + +func TestFetchDegradedPodLogs_EmptyEntries_ReturnsEmptyString(t *testing.T) { + infra := &mockInfrastructureHandler{ + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return []argocd.ApplicationLogEntry{}, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + logs, err := p.fetchDegradedPodLogs("svc", "pod-1") + + require.NoError(t, err) + assert.Empty(t, logs) +} + +func TestFetchDegradedPodLogs_NilEntries_ReturnsEmptyString(t *testing.T) { + infra := &mockInfrastructureHandler{ + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return nil, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + logs, err := p.fetchDegradedPodLogs("svc", "pod-1") + + require.NoError(t, err) + assert.Empty(t, logs) +} + +func TestFetchDegradedPodLogs_EntryWithEmptyTimestamp_ReturnsEmptyTimestamp(t *testing.T) { + infra := &mockInfrastructureHandler{ + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "panic: nil pointer", TimeStampStr: ""}}, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + logs, err := p.fetchDegradedPodLogs("svc", "pod-1") + + require.NoError(t, err) + assert.Contains(t, logs, "[] panic: nil pointer") +} + +func TestFetchDegradedPodLogs_EntryWithEmptyContent_FormatsCorrectly(t *testing.T) { + infra := &mockInfrastructureHandler{ + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "", TimeStampStr: "2024-01-15T10:30:00Z"}}, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + logs, err := p.fetchDegradedPodLogs("svc", "pod-1") + + require.NoError(t, err) + assert.Contains(t, logs, "[2024-01-15 16:00:00 IST] \n") +} + +// ============================================================ +// convertToIST +// ============================================================ + +func TestConvertToIST_RFC3339(t *testing.T) { + result := convertToIST("2024-01-15T10:30:00Z") + assert.Equal(t, "2024-01-15 16:00:00 IST", result) +} + +func TestConvertToIST_RFC3339Nano(t *testing.T) { + result := convertToIST("2024-01-15T10:30:00.123456789Z") + assert.Equal(t, "2024-01-15 16:00:00 IST", result) +} + +func TestConvertToIST_Empty_ReturnsEmpty(t *testing.T) { + result := convertToIST("") + assert.Empty(t, result, "empty input should return empty string") +} + +func TestConvertToIST_InvalidFormat_ReturnsRaw(t *testing.T) { + result := convertToIST("not-a-timestamp") + assert.Equal(t, "not-a-timestamp", result) +} + +func TestConvertToIST_WithTimezoneOffset(t *testing.T) { + result := convertToIST("2024-01-15T16:00:00+05:30") + assert.Equal(t, "2024-01-15 16:00:00 IST", result) +} + +func TestConvertToIST_RFC3339_NonUTC(t *testing.T) { + result := convertToIST("2024-01-15T05:30:00-05:00") + assert.Equal(t, "2024-01-15 16:00:00 IST", result) +} + +// ============================================================ +// formatValidationLogsPlainText +// ============================================================ + +func TestFormatLogs_MultiplePods(t *testing.T) { + job := &validationjob.Table{ID: 1, GroupID: "42", ServiceName: "test-svc"} + pods := []podLogEntry{ + {name: "pod-bad-1", logs: "[2024-01-15 16:00:00 IST] OOM killed\n"}, + {name: "pod-bad-2", logs: "[2024-01-15 16:05:00 IST] segfault\n"}, + } + + data := formatValidationLogsPlainText(job, "int", pods) + output := string(data) + + assert.Contains(t, output, "Validation Failure Log Report") + assert.Contains(t, output, "Group ID : 42") + assert.Contains(t, output, "Job ID : 1") + assert.Contains(t, output, "Service Name : test-svc") + assert.Contains(t, output, "Working Env : int") + assert.Contains(t, output, "Captured At :") + assert.Contains(t, output, "IST") + + assert.Contains(t, output, "Pod: pod-bad-1") + assert.Contains(t, output, "OOM killed") + assert.Contains(t, output, "Pod: pod-bad-2") + assert.Contains(t, output, "segfault") + + assert.Equal(t, 2, strings.Count(output, "Pod: "), "should have exactly 2 pod headers") +} + +func TestFormatLogs_PodWithError(t *testing.T) { + job := &validationjob.Table{ID: 1, GroupID: "42", ServiceName: "test-svc"} + pods := []podLogEntry{ + {name: "pod-err", err: "failed to fetch logs: connection refused"}, + } + + data := formatValidationLogsPlainText(job, "int", pods) + output := string(data) + + assert.Contains(t, output, "Pod: pod-err") + assert.Contains(t, output, "[ERROR] failed to fetch logs: connection refused") +} + +func TestFormatLogs_PodWithBothErrorAndLogs(t *testing.T) { + job := &validationjob.Table{ID: 1, GroupID: "42", ServiceName: "test-svc"} + pods := []podLogEntry{ + {name: "pod-partial", err: "partial failure", logs: "some recovered logs\n"}, + } + + data := formatValidationLogsPlainText(job, "int", pods) + output := string(data) + + assert.Contains(t, output, "[ERROR] partial failure") + assert.Contains(t, output, "some recovered logs") +} + +func TestFormatLogs_EmptyPodsList_HeaderOnly(t *testing.T) { + job := &validationjob.Table{ID: 5, GroupID: "99", ServiceName: "empty-svc"} + + data := formatValidationLogsPlainText(job, "prd", []podLogEntry{}) + output := string(data) + + assert.Contains(t, output, "Validation Failure Log Report") + assert.Contains(t, output, "Group ID : 99") + assert.Contains(t, output, "Job ID : 5") + assert.NotContains(t, output, "Pod: ") +} + +func TestFormatLogs_PodWithEmptyName(t *testing.T) { + job := &validationjob.Table{ID: 1, GroupID: "1", ServiceName: "svc"} + pods := []podLogEntry{ + {name: "", logs: "log line\n"}, + } + + data := formatValidationLogsPlainText(job, "int", pods) + output := string(data) + + assert.Contains(t, output, "Pod: \n") + assert.Contains(t, output, "log line") +} + +func TestFormatLogs_PodWithEmptyLogsAndNoError(t *testing.T) { + job := &validationjob.Table{ID: 1, GroupID: "1", ServiceName: "svc"} + pods := []podLogEntry{ + {name: "pod-empty"}, + } + + data := formatValidationLogsPlainText(job, "int", pods) + output := string(data) + + assert.Contains(t, output, "Pod: pod-empty") + assert.NotContains(t, output, "[ERROR]") +} + +// ============================================================ +// buildValidationLogsGCSPath +// ============================================================ + +func TestBuildGCSPath_NonProd(t *testing.T) { + snap := savePredVars() + defer snap.restore() + setNonProd() + + p := &Predator{workingEnv: "int"} + job := &validationjob.Table{ServiceName: "test-svc", GroupID: "42"} + + bucket, objectPath := p.buildValidationLogsGCSPath(job) + + assert.Equal(t, "model-bucket", bucket) + assert.Equal(t, "validation-logs/test-svc/42.log", objectPath) +} + +func TestBuildGCSPath_Prod(t *testing.T) { + snap := savePredVars() + defer snap.restore() + setProd() + + p := &Predator{workingEnv: "prd"} + job := &validationjob.Table{ServiceName: "prod-svc", GroupID: "99"} + + bucket, objectPath := p.buildValidationLogsGCSPath(job) + + assert.Equal(t, "config-bucket", bucket) + assert.Equal(t, "validation-logs/prod-svc/99.log", objectPath) +} + +func TestBuildGCSPath_Deterministic(t *testing.T) { + snap := savePredVars() + defer snap.restore() + setNonProd() + + p := &Predator{workingEnv: "int"} + job := &validationjob.Table{ServiceName: "svc-a", GroupID: "77"} + + b1, p1 := p.buildValidationLogsGCSPath(job) + b2, p2 := p.buildValidationLogsGCSPath(job) + + assert.Equal(t, b1, b2) + assert.Equal(t, p1, p2) +} + +func TestBuildGCSPath_EmptyBucketVar_ReturnsEmpty(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "" + pred.AppEnv = "int" + + p := &Predator{workingEnv: "int"} + job := &validationjob.Table{ServiceName: "svc", GroupID: "1"} + + bucket, _ := p.buildValidationLogsGCSPath(job) + + assert.Empty(t, bucket, "should return empty bucket when GcsModelBucket is unset") +} + +func TestBuildGCSPath_EmptyServiceName(t *testing.T) { + snap := savePredVars() + defer snap.restore() + setNonProd() + + p := &Predator{workingEnv: "int"} + job := &validationjob.Table{ServiceName: "", GroupID: "42"} + + _, objectPath := p.buildValidationLogsGCSPath(job) + + assert.Equal(t, "validation-logs/42.log", objectPath, + "path.Join collapses empty segment") +} + +func TestBuildGCSPath_EmptyGroupID(t *testing.T) { + snap := savePredVars() + defer snap.restore() + setNonProd() + + p := &Predator{workingEnv: "int"} + job := &validationjob.Table{ServiceName: "svc", GroupID: ""} + + _, objectPath := p.buildValidationLogsGCSPath(job) + + assert.Equal(t, "validation-logs/svc/.log", objectPath) +} + +// ============================================================ +// captureAndUploadFailureLogs — end-to-end +// ============================================================ + +func TestCapture_EndToEnd_HappyPath(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "test-bucket" + pred.AppEnv = "int" + + var capturedBucket, capturedPath string + var capturedData []byte + + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-bad", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "Pod", Name: "pod-ok", Health: infrastructurehandler.Health{Status: "Healthy"}}, + }, + }, nil + }, + getApplicationLogsFn: func(_, _ string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + assert.True(t, opts.Previous, "Previous flag must be true") + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "OOM killed", TimeStampStr: "2024-01-15T10:30:00Z"}}, + }, nil + }, + } + gcs := &mockGCSClient{ + uploadFileFn: func(bucket, objectPath string, data []byte) error { + capturedBucket = bucket + capturedPath = objectPath + capturedData = data + return nil + }, + } + + p := &Predator{ + infrastructureHandler: infra, + GcsClient: gcs, + workingEnv: "int", + } + job := &validationjob.Table{ID: 1, GroupID: "42", ServiceName: "test-svc"} + + gcsURI := p.captureAndUploadFailureLogs(job) + + assert.Equal(t, "https://storage.cloud.google.com/test-bucket/validation-logs/test-svc/42.log", gcsURI) + assert.Equal(t, "test-bucket", capturedBucket) + assert.Equal(t, "validation-logs/test-svc/42.log", capturedPath) + + output := string(capturedData) + assert.Contains(t, output, "Group ID : 42") + assert.Contains(t, output, "Pod: pod-bad") + assert.NotContains(t, output, "Pod: pod-ok", "healthy pod must not appear") + assert.Contains(t, output, "OOM killed") + assert.Contains(t, output, "IST") + assert.True(t, len(capturedData) > 0, "uploaded data must not be empty") +} + +func TestCapture_NilJob_ReturnsEmpty(t *testing.T) { + p := &Predator{} + result := p.captureAndUploadFailureLogs(nil) + assert.Empty(t, result) +} + +func TestCapture_GCSUploadFails_ReturnsEmpty(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "bucket" + pred.AppEnv = "int" + + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-bad", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "log line", TimeStampStr: "2024-01-15T10:30:00Z"}}, + }, nil + }, + } + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, _ []byte) error { + return errors.New("GCS unavailable") + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "10", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.Empty(t, result, "must return empty on GCS failure") +} + +func TestCapture_NoDegradedPods_ReturnsEmpty_NoUpload(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-ok", Health: infrastructurehandler.Health{Status: "Healthy"}}, + }, + }, nil + }, + } + uploaded := false + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, _ []byte) error { + uploaded = true + return nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "7", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.Empty(t, result) + assert.False(t, uploaded, "must not upload when no degraded pods") +} + +func TestCapture_GetResourceDetailFails_ReturnsEmpty_NoUpload(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return nil, errors.New("ArgoCD connection timeout") + }, + } + uploaded := false + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, _ []byte) error { + uploaded = true + return nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "10", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.Empty(t, result, "must return empty when ArgoCD fails") + assert.False(t, uploaded, "must not upload when ArgoCD fails") +} + +func TestCapture_AllPodsFailLogFetch_StillUploadsWithErrors(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "bucket" + pred.AppEnv = "int" + + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-a", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "Pod", Name: "pod-b", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return nil, errors.New("logs unavailable") + }, + } + var capturedData []byte + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, data []byte) error { + capturedData = data + return nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 3, GroupID: "50", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.NotEmpty(t, result, "must still upload report even when all log fetches fail") + + output := string(capturedData) + assert.Contains(t, output, "Pod: pod-a") + assert.Contains(t, output, "Pod: pod-b") + assert.Equal(t, 2, strings.Count(output, "[ERROR]"), "each pod should have an [ERROR] entry") + assert.Contains(t, output, "logs unavailable") +} + +func TestCapture_MixedLogFetch_SomeSucceedSomeFail(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "bucket" + pred.AppEnv = "int" + + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-ok-logs", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "Pod", Name: "pod-no-logs", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + getApplicationLogsFn: func(_, _ string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + if opts.PodName == "pod-ok-logs" { + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "segfault at 0x0", TimeStampStr: "2024-06-01T12:00:00Z"}}, + }, nil + } + return nil, errors.New("pod evicted") + }, + } + var capturedData []byte + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, data []byte) error { + capturedData = data + return nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 5, GroupID: "88", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.NotEmpty(t, result) + + output := string(capturedData) + assert.Contains(t, output, "Pod: pod-ok-logs") + assert.Contains(t, output, "segfault at 0x0") + assert.Contains(t, output, "Pod: pod-no-logs") + assert.Contains(t, output, "[ERROR]") + assert.Contains(t, output, "pod evicted") +} + +func TestCapture_EmptyBucket_ReturnsEmpty_NoUpload(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "" + pred.AppEnv = "int" + + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-bad", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{Content: "err", TimeStampStr: "2024-01-15T10:30:00Z"}}, + }, nil + }, + } + uploaded := false + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, _ []byte) error { + uploaded = true + return nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "1", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.Empty(t, result, "must return empty when bucket is not configured") + assert.False(t, uploaded, "must not attempt upload with empty bucket") +} + +func TestCapture_MultipleDegradedPods_AllLogsInReport(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "bucket" + pred.AppEnv = "int" + + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-a", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "Pod", Name: "pod-b", Health: infrastructurehandler.Health{Status: "Degraded"}}, + {Kind: "Pod", Name: "pod-c", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + getApplicationLogsFn: func(_, _ string, opts *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return []argocd.ApplicationLogEntry{ + {Result: argocd.ApplicationLogResult{ + Content: "crash in " + opts.PodName, + TimeStampStr: "2024-01-15T10:30:00Z", + }}, + }, nil + }, + } + var capturedData []byte + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, data []byte) error { + capturedData = data + return nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "42", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.NotEmpty(t, result) + output := string(capturedData) + assert.Equal(t, 3, strings.Count(output, "Pod: ")) + assert.Contains(t, output, "crash in pod-a") + assert.Contains(t, output, "crash in pod-b") + assert.Contains(t, output, "crash in pod-c") +} + +func TestCapture_GCSURIFormat(t *testing.T) { + snap := savePredVars() + defer snap.restore() + pred.GcsModelBucket = "my-bucket" + pred.AppEnv = "int" + + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "p", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + getApplicationLogsFn: func(_, _ string, _ *argocd.ApplicationLogsOptions) ([]argocd.ApplicationLogEntry, error) { + return nil, nil + }, + } + gcs := &mockGCSClient{} + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "55", ServiceName: "my-svc"} + + uri := p.captureAndUploadFailureLogs(job) + + assert.True(t, strings.HasPrefix(uri, "https://storage.cloud.google.com/"), "must start with GCS browser URL") + assert.Equal(t, "https://storage.cloud.google.com/my-bucket/validation-logs/my-svc/55.log", uri) +} + +func TestCapture_NilResourceDetail_ReturnsEmpty(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return nil, nil + }, + } + uploaded := false + gcs := &mockGCSClient{ + uploadFileFn: func(_, _ string, _ []byte) error { + uploaded = true + return nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: gcs, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "1", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.Empty(t, result) + assert.False(t, uploaded) +} + +func TestCapture_EmptyNodesSlice_ReturnsEmpty(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{Nodes: []infrastructurehandler.Node{}}, nil + }, + } + + p := &Predator{infrastructureHandler: infra, GcsClient: &mockGCSClient{}, workingEnv: "int"} + job := &validationjob.Table{ID: 1, GroupID: "1", ServiceName: "svc"} + + result := p.captureAndUploadFailureLogs(job) + + assert.Empty(t, result) +} + +// ============================================================ +// checkDeploymentHealth +// ============================================================ + +func TestCheckHealth_AllDeploymentsHealthy(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Deployment", Name: "deploy-1", Health: infrastructurehandler.Health{Status: "Healthy"}}, + {Kind: "Pod", Name: "pod-1", Health: infrastructurehandler.Health{Status: "Healthy"}}, + }, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + healthy, err := p.checkDeploymentHealth("svc") + + require.NoError(t, err) + assert.True(t, healthy) +} + +func TestCheckHealth_SomeDeploymentsUnhealthy(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Deployment", Name: "deploy-1", Health: infrastructurehandler.Health{Status: "Healthy"}}, + {Kind: "Deployment", Name: "deploy-2", Health: infrastructurehandler.Health{Status: "Degraded"}}, + }, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + healthy, err := p.checkDeploymentHealth("svc") + + require.NoError(t, err) + assert.False(t, healthy) +} + +func TestCheckHealth_APIError(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return nil, errors.New("timeout") + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + healthy, err := p.checkDeploymentHealth("svc") + + require.Error(t, err) + assert.False(t, healthy) + assert.Contains(t, err.Error(), "failed to get resource detail") +} + +func TestCheckHealth_NilResourceDetail_ReturnsFalse(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return nil, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + healthy, err := p.checkDeploymentHealth("svc") + + require.NoError(t, err) + assert.False(t, healthy) +} + +func TestCheckHealth_EmptyNodes_ReturnsFalse(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{Nodes: []infrastructurehandler.Node{}}, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + healthy, err := p.checkDeploymentHealth("svc") + + require.NoError(t, err) + assert.False(t, healthy) +} + +func TestCheckHealth_ZeroDeployments_ReturnsTrue(t *testing.T) { + infra := &mockInfrastructureHandler{ + getResourceDetailFn: func(_, _ string) (*infrastructurehandler.ResourceDetail, error) { + return &infrastructurehandler.ResourceDetail{ + Nodes: []infrastructurehandler.Node{ + {Kind: "Pod", Name: "pod-1", Health: infrastructurehandler.Health{Status: "Healthy"}}, + {Kind: "ReplicaSet", Name: "rs-1", Health: infrastructurehandler.Health{Status: "Healthy"}}, + }, + }, nil + }, + } + p := &Predator{infrastructureHandler: infra, workingEnv: "int"} + + healthy, err := p.checkDeploymentHealth("svc") + + require.NoError(t, err) + // 0 total == 0 healthy → returns true (edge: vacuously healthy) + assert.True(t, healthy) +} diff --git a/horizon/pkg/argocd/logs.go b/horizon/pkg/argocd/logs.go new file mode 100644 index 00000000..7f6847d8 --- /dev/null +++ b/horizon/pkg/argocd/logs.go @@ -0,0 +1,199 @@ +package argocd + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + + "github.com/rs/zerolog/log" +) + +// ApplicationLogsOptions holds optional parameters for GetApplicationLogs. +// Streaming (follow) is not supported; reject follow=true at the call site. +// Defaults: Previous=false, SinceSeconds=0, Filter="". +type ApplicationLogsOptions struct { + PodName string + Container string + Previous bool + SinceSeconds int64 + TailLines int64 + Filter string +} + +// ApplicationLogResult is the inner result object in a log entry. +type ApplicationLogResult struct { + Content string `json:"content"` + TimeStamp *string `json:"timeStamp"` + Last bool `json:"last"` + TimeStampStr string `json:"timeStampStr"` + PodName string `json:"podName"` +} + +// ApplicationLogEntry is a single log entry (wrapper with "result"). +type ApplicationLogEntry struct { + Result ApplicationLogResult `json:"result"` +} + +// ApplicationLogsErrorDetails is the error body returned by Argo CD on non-2xx. +type ApplicationLogsErrorDetails struct { + Details []interface{} `json:"details"` + GrpcCode int `json:"grpc_code"` + HttpCode int `json:"http_code"` + HttpStatus string `json:"http_status"` + Message string `json:"message"` +} + +// ArgoCDAPIError is a structured error returned when the Argo CD API +// responds with a non-2xx status code. Callers can use errors.As to +// extract the upstream HTTP status and forward it appropriately. +type ArgoCDAPIError struct { + StatusCode int + HTTPStatus string + Message string +} + +func (e *ArgoCDAPIError) Error() string { + return fmt.Sprintf("argo CD logs API error (HTTP %d - %s): %s", + e.StatusCode, e.HTTPStatus, e.Message) +} + +func validateLogsOptions(opts *ApplicationLogsOptions) error { + if opts == nil { + return fmt.Errorf("log options must not be nil") + } + if opts.Container == "" { + return fmt.Errorf("container is required") + } + return nil +} + +func buildLogsURL(workingEnv, name string, opts *ApplicationLogsOptions) (string, error) { + base := getArgoCDAPI(workingEnv) + if base == "" { + return "", fmt.Errorf("ArgoCD API not configured for environment %s", workingEnv) + } + path := "/api/v1/applications/" + url.PathEscape(name) + "/logs" + u, err := url.Parse(base + path) + if err != nil { + return "", fmt.Errorf("failed to parse logs URL: %w", err) + } + q := url.Values{} + q.Set("container", opts.Container) + if opts.Previous { + q.Set("previous", "true") + } + if opts.SinceSeconds > 0 { + q.Set("sinceSeconds", strconv.FormatInt(opts.SinceSeconds, 10)) + } + tailLines := int64(1000) + if opts.TailLines > 0 { + tailLines = opts.TailLines + } + q.Set("tailLines", strconv.FormatInt(tailLines, 10)) + if opts.Filter != "" { + q.Set("filter", opts.Filter) + } + if opts.PodName != "" { + q.Set("podName", opts.PodName) + } + u.RawQuery = q.Encode() + return u.String(), nil +} + +// parseNDJSONLogs decodes NDJSON from r (e.g. resp.Body) without buffering the full stream. +func parseNDJSONLogs(r io.Reader) ([]ApplicationLogEntry, error) { + entries := make([]ApplicationLogEntry, 0) + dec := json.NewDecoder(r) + for { + var e ApplicationLogEntry + err := dec.Decode(&e) + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("failed to decode log entry: %w", err) + } + if e.Result.Last { + break + } + if e.Result.TimeStamp == nil { + return nil, fmt.Errorf("argo log stream error: %s", e.Result.Content) + } + entries = append(entries, e) + } + return entries, nil +} + +// GetApplicationLogs fetches application logs from Argo CD (non-streaming only). +// name is the Argo CD application name. opts cannot be nil +// When follow=true, returns an error that streaming is not supported. +func GetApplicationLogs(name, workingEnv string, opts *ApplicationLogsOptions) ([]ApplicationLogEntry, error) { + if name == "" { + return nil, fmt.Errorf("application name is required") + } + if err := validateLogsOptions(opts); err != nil { + return nil, err + } + + fullURL, err := buildLogsURL(workingEnv, name, opts) + if err != nil { + log.Error().Err(err).Str("name", name).Str("workingEnv", workingEnv).Msg("GetApplicationLogs: failed to build URL") + return nil, err + } + + req, err := getArgoCDClient(fullURL, nil, http.MethodGet, workingEnv, "", "") + if err != nil { + log.Error().Err(err).Str("name", name).Str("workingEnv", workingEnv).Msg("GetApplicationLogs: failed to create request") + return nil, err + } + + client := getHTTPClient() + resp, err := client.Do(req) + if err != nil { + log.Error().Err(err).Str("name", name).Str("workingEnv", workingEnv).Msg("GetApplicationLogs: request failed") + return nil, fmt.Errorf("failed to get logs from Argo CD: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + var wrapper struct { + Error ApplicationLogsErrorDetails `json:"error"` + } + if err := json.NewDecoder(resp.Body).Decode(&wrapper); err != nil { + return nil, &ArgoCDAPIError{ + StatusCode: resp.StatusCode, + HTTPStatus: http.StatusText(resp.StatusCode), + Message: fmt.Sprintf("failed to decode error response: %v", err), + } + } + msg := wrapper.Error.Message + if msg == "" { + msg = "no error message in response" + } + httpStatus := wrapper.Error.HttpStatus + if httpStatus == "" { + httpStatus = http.StatusText(resp.StatusCode) + } + log.Debug(). + Str("name", name). + Int("status", resp.StatusCode). + Str("message", msg). + Msg("GetApplicationLogs: Argo CD returned error") + return nil, &ArgoCDAPIError{ + StatusCode: resp.StatusCode, + HTTPStatus: httpStatus, + Message: msg, + } + } + + entries, err := parseNDJSONLogs(resp.Body) + if err != nil { + log.Error().Err(err).Str("name", name).Msg("GetApplicationLogs: failed to parse response") + return nil, err + } + log.Info().Str("name", name).Str("workingEnv", workingEnv).Int("count", len(entries)).Int("status", resp.StatusCode).Msg("GetApplicationLogs: success") + return entries, nil +} diff --git a/horizon/pkg/argocd/logs_test.go b/horizon/pkg/argocd/logs_test.go new file mode 100644 index 00000000..4046d6e3 --- /dev/null +++ b/horizon/pkg/argocd/logs_test.go @@ -0,0 +1,571 @@ +package argocd + +import ( + "bytes" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/spf13/viper" +) + +// --------------------------------------------------------------------------- +// parseNDJSONLogs +// --------------------------------------------------------------------------- + +func TestParseNDJSONLogs_MultipleValidEntries(t *testing.T) { + ts1 := "2026-02-24T11:53:35Z" + ts2 := "2026-02-24T11:53:36Z" + ent1 := ApplicationLogEntry{Result: ApplicationLogResult{Content: "a", TimeStamp: &ts1, PodName: "p1"}} + ent2 := ApplicationLogEntry{Result: ApplicationLogResult{Content: "b", TimeStamp: &ts2, PodName: "p2"}} + + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + _ = enc.Encode(ent1) + _ = enc.Encode(ent2) + + entries, err := parseNDJSONLogs(&buf) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(entries) != 2 { + t.Fatalf("expected 2 entries, got %d", len(entries)) + } + if entries[0].Result.Content != "a" { + t.Errorf("entry[0] content = %q, want %q", entries[0].Result.Content, "a") + } + if entries[1].Result.Content != "b" { + t.Errorf("entry[1] content = %q, want %q", entries[1].Result.Content, "b") + } +} + +func TestParseNDJSONLogs_EmptyInput(t *testing.T) { + entries, err := parseNDJSONLogs(strings.NewReader("")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(entries) != 0 { + t.Fatalf("expected 0 entries, got %d", len(entries)) + } +} + +func TestParseNDJSONLogs_LastEntryStopsDecoding(t *testing.T) { + ts := "2026-02-24T11:53:35Z" + regular := ApplicationLogEntry{Result: ApplicationLogResult{Content: "line1", TimeStamp: &ts, PodName: "p1"}} + last := ApplicationLogEntry{Result: ApplicationLogResult{Content: "", Last: true, PodName: "p1"}} + trailing := ApplicationLogEntry{Result: ApplicationLogResult{Content: "should-not-appear", TimeStamp: &ts, PodName: "p1"}} + + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + _ = enc.Encode(regular) + _ = enc.Encode(last) + _ = enc.Encode(trailing) + + entries, err := parseNDJSONLogs(&buf) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(entries) != 1 { + t.Fatalf("expected 1 entry (Last stops decoding), got %d", len(entries)) + } + if entries[0].Result.Content != "line1" { + t.Errorf("entry content = %q, want %q", entries[0].Result.Content, "line1") + } +} + +func TestParseNDJSONLogs_NilTimestampNonLast_StreamError(t *testing.T) { + errEntry := ApplicationLogEntry{Result: ApplicationLogResult{ + Content: "rpc error: code = NotFound", + }} + + var buf bytes.Buffer + _ = json.NewEncoder(&buf).Encode(errEntry) + + _, err := parseNDJSONLogs(&buf) + if err == nil { + t.Fatal("expected error for nil TimeStamp on non-last entry") + } + if !strings.Contains(err.Error(), "argo log stream error") { + t.Errorf("error = %q, want substring %q", err.Error(), "argo log stream error") + } + if !strings.Contains(err.Error(), "rpc error: code = NotFound") { + t.Errorf("error should contain original content, got %q", err.Error()) + } +} + +func TestParseNDJSONLogs_MalformedJSON(t *testing.T) { + _, err := parseNDJSONLogs(strings.NewReader(`{"result": INVALID}`)) + if err == nil { + t.Fatal("expected error for malformed JSON") + } + if !strings.Contains(err.Error(), "failed to decode log entry") { + t.Errorf("error = %q, want substring %q", err.Error(), "failed to decode log entry") + } +} + +func TestParseNDJSONLogs_ValidEntriesThenLast(t *testing.T) { + ts := "2026-02-24T12:00:00Z" + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + for i := 0; i < 5; i++ { + _ = enc.Encode(ApplicationLogEntry{Result: ApplicationLogResult{ + Content: "line", TimeStamp: &ts, PodName: "pod", + }}) + } + _ = enc.Encode(ApplicationLogEntry{Result: ApplicationLogResult{Last: true}}) + + entries, err := parseNDJSONLogs(&buf) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(entries) != 5 { + t.Fatalf("expected 5 entries, got %d", len(entries)) + } +} + +// --------------------------------------------------------------------------- +// buildLogsURL +// --------------------------------------------------------------------------- + +func TestBuildLogsURL_UnconfiguredEnv(t *testing.T) { + viper.Set("NONEXISTENT_ENV_ARGOCD_API", "") + viper.Set("ARGOCD_API", "") + defer func() { + viper.Set("NONEXISTENT_ENV_ARGOCD_API", nil) + viper.Set("ARGOCD_API", nil) + }() + + _, err := buildLogsURL("nonexistent_env", "my-app", &ApplicationLogsOptions{}) + if err == nil { + t.Fatal("expected error for unconfigured env") + } + if !strings.Contains(err.Error(), "not configured") { + t.Errorf("error = %q, want substring %q", err.Error(), "not configured") + } +} + +func TestBuildLogsURL_ZeroTailLinesFallsBackToDefault(t *testing.T) { + viper.Set("TEST_ARGOCD_API", "https://argocd.example.com") + defer viper.Set("TEST_ARGOCD_API", nil) + + u, err := buildLogsURL("test", "my-app", &ApplicationLogsOptions{Container: "main"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(u, "tailLines=1000") { + t.Errorf("expected tailLines=1000 (zero value falls back to default) in URL, got %q", u) + } + if !strings.Contains(u, "container=main") { + t.Errorf("expected container=main in URL, got %q", u) + } +} + +func TestBuildLogsURL_NegativeTailLinesFallsBackToDefault(t *testing.T) { + viper.Set("TEST_ARGOCD_API", "https://argocd.example.com") + defer viper.Set("TEST_ARGOCD_API", nil) + + u, err := buildLogsURL("test", "my-app", &ApplicationLogsOptions{Container: "main", TailLines: -1}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(u, "tailLines=1000") { + t.Errorf("expected default tailLines=1000 for negative value, got %q", u) + } +} + +func TestBuildLogsURL_CustomTailLines(t *testing.T) { + viper.Set("TEST_ARGOCD_API", "https://argocd.example.com") + defer viper.Set("TEST_ARGOCD_API", nil) + + u, err := buildLogsURL("test", "my-app", &ApplicationLogsOptions{TailLines: 50}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(u, "tailLines=50") { + t.Errorf("expected tailLines=50 in URL, got %q", u) + } +} + +func TestBuildLogsURL_AllOptions(t *testing.T) { + viper.Set("TEST_ARGOCD_API", "https://argocd.example.com") + defer viper.Set("TEST_ARGOCD_API", nil) + + opts := &ApplicationLogsOptions{ + PodName: "pod-abc", + Container: "sidecar", + Previous: true, + SinceSeconds: 3600, + TailLines: 200, + Filter: "ERROR", + } + u, err := buildLogsURL("test", "my-app", opts) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + checks := map[string]string{ + "container": "container=sidecar", + "previous": "previous=true", + "sinceSeconds": "sinceSeconds=3600", + "tailLines": "tailLines=200", + "filter": "filter=ERROR", + "podName": "podName=pod-abc", + } + for name, want := range checks { + if !strings.Contains(u, want) { + t.Errorf("expected %s (%q) in URL, got %q", name, want, u) + } + } +} + +func TestBuildLogsURL_OptionalFieldsOmitted(t *testing.T) { + viper.Set("TEST_ARGOCD_API", "https://argocd.example.com") + defer viper.Set("TEST_ARGOCD_API", nil) + + u, err := buildLogsURL("test", "my-app", &ApplicationLogsOptions{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if strings.Contains(u, "previous=") { + t.Errorf("previous should be omitted when false, got %q", u) + } + if strings.Contains(u, "sinceSeconds=") { + t.Errorf("sinceSeconds should be omitted when 0, got %q", u) + } + if strings.Contains(u, "filter=") { + t.Errorf("filter should be omitted when empty, got %q", u) + } + if strings.Contains(u, "podName=") { + t.Errorf("podName should be omitted when empty, got %q", u) + } +} + +func TestBuildLogsURL_NameIsPathEscaped(t *testing.T) { + viper.Set("TEST_ARGOCD_API", "https://argocd.example.com") + defer viper.Set("TEST_ARGOCD_API", nil) + + u, err := buildLogsURL("test", "ns/app-name", &ApplicationLogsOptions{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if strings.Contains(u, "ns/app-name/logs") { + t.Errorf("name with slash should be escaped, got %q", u) + } + if !strings.Contains(u, "ns%2Fapp-name") { + t.Errorf("expected path-escaped name in URL, got %q", u) + } +} + +// --------------------------------------------------------------------------- +// GetApplicationLogs +// --------------------------------------------------------------------------- + +func setViperForTest(t *testing.T, serverURL string) { + t.Helper() + oldAPI := viper.GetString("GCP_INT_ARGOCD_API") + oldToken := viper.GetString("GCP_INT_ARGOCD_TOKEN") + viper.Set("GCP_INT_ARGOCD_API", serverURL) + viper.Set("GCP_INT_ARGOCD_TOKEN", "test-token") + t.Cleanup(func() { + viper.Set("GCP_INT_ARGOCD_API", oldAPI) + viper.Set("GCP_INT_ARGOCD_TOKEN", oldToken) + }) +} + +func TestGetApplicationLogs_EmptyName(t *testing.T) { + _, err := GetApplicationLogs("", "gcp_int", &ApplicationLogsOptions{}) + if err == nil { + t.Fatal("expected error when application name is empty") + } + if err.Error() != "application name is required" { + t.Errorf("error = %q, want %q", err.Error(), "application name is required") + } +} + +func TestGetApplicationLogs_NilOpts(t *testing.T) { + _, err := GetApplicationLogs("my-app", "gcp_int", nil) + if err == nil { + t.Fatal("expected error when opts is nil") + } + if err.Error() != "log options must not be nil" { + t.Errorf("error = %q, want %q", err.Error(), "log options must not be nil") + } +} + +func TestGetApplicationLogs_EmptyContainer(t *testing.T) { + _, err := GetApplicationLogs("my-app", "gcp_int", &ApplicationLogsOptions{}) + if err == nil { + t.Fatal("expected error when container is empty") + } + if err.Error() != "container is required" { + t.Errorf("error = %q, want %q", err.Error(), "container is required") + } +} + +func TestGetApplicationLogs_BuildURLFails(t *testing.T) { + viper.Set("BADENV_ARGOCD_API", "") + viper.Set("ARGOCD_API", "") + defer func() { + viper.Set("BADENV_ARGOCD_API", nil) + viper.Set("ARGOCD_API", nil) + }() + + _, err := GetApplicationLogs("my-app", "badenv", &ApplicationLogsOptions{Container: "main"}) + if err == nil { + t.Fatal("expected error when buildLogsURL fails") + } + if !strings.Contains(err.Error(), "not configured") { + t.Errorf("error = %q, want substring %q", err.Error(), "not configured") + } +} + +func TestGetApplicationLogs_Success(t *testing.T) { + ts := "2026-02-24T11:53:35Z" + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !strings.Contains(r.URL.Path, "/api/v1/applications/") || !strings.HasSuffix(r.URL.Path, "/logs") { + t.Errorf("unexpected path: %s", r.URL.Path) + } + w.Header().Set("Content-Type", "application/json") + enc := json.NewEncoder(w) + _ = enc.Encode(ApplicationLogEntry{Result: ApplicationLogResult{ + Content: "line1", TimeStamp: &ts, PodName: "pod-1", + TimeStampStr: "2026-02-24T11:53:35.085112331Z", + }}) + _ = enc.Encode(ApplicationLogEntry{Result: ApplicationLogResult{ + Content: "line2", TimeStamp: &ts, PodName: "pod-1", + TimeStampStr: "2026-02-24T11:53:36.000000000Z", + }}) + _ = enc.Encode(ApplicationLogEntry{Result: ApplicationLogResult{Last: true}}) + })) + defer server.Close() + setViperForTest(t, server.URL) + + entries, err := GetApplicationLogs("int-myapp", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(entries) != 2 { + t.Fatalf("expected 2 entries, got %d", len(entries)) + } + if entries[0].Result.Content != "line1" { + t.Errorf("entry[0] content = %q, want %q", entries[0].Result.Content, "line1") + } + if entries[1].Result.Content != "line2" { + t.Errorf("entry[1] content = %q, want %q", entries[1].Result.Content, "line2") + } +} + +func TestGetApplicationLogs_SuccessEmptyLogs(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(ApplicationLogEntry{Result: ApplicationLogResult{Last: true}}) + })) + defer server.Close() + setViperForTest(t, server.URL) + + entries, err := GetApplicationLogs("int-myapp", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(entries) != 0 { + t.Fatalf("expected 0 entries for empty log stream, got %d", len(entries)) + } +} + +func TestGetApplicationLogs_QueryParamsPropagated(t *testing.T) { + var capturedQuery string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedQuery = r.URL.RawQuery + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(ApplicationLogEntry{Result: ApplicationLogResult{Last: true}}) + })) + defer server.Close() + setViperForTest(t, server.URL) + + opts := &ApplicationLogsOptions{ + PodName: "pod-xyz", + Container: "worker", + Previous: true, + SinceSeconds: 600, + TailLines: 100, + Filter: "WARN", + } + _, err := GetApplicationLogs("int-myapp", "gcp_int", opts) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + checks := []string{ + "podName=pod-xyz", + "container=worker", + "previous=true", + "sinceSeconds=600", + "tailLines=100", + "filter=WARN", + } + for _, want := range checks { + if !strings.Contains(capturedQuery, want) { + t.Errorf("query %q missing %q", capturedQuery, want) + } + } +} + +func TestGetApplicationLogs_Non200_ValidErrorBody(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "error": map[string]interface{}{ + "details": []interface{}{}, + "grpc_code": 5, + "http_code": 404, + "http_status": "Not Found", + "message": "application not found", + }, + }) + })) + defer server.Close() + setViperForTest(t, server.URL) + + _, err := GetApplicationLogs("int-nonexistent", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err == nil { + t.Fatal("expected error on 404 response") + } + if !strings.Contains(err.Error(), "404") { + t.Errorf("error should contain status code, got %q", err.Error()) + } + if !strings.Contains(err.Error(), "application not found") { + t.Errorf("error should contain message, got %q", err.Error()) + } + if !strings.Contains(err.Error(), "Not Found") { + t.Errorf("error should contain HTTP status text, got %q", err.Error()) + } + var apiErr *ArgoCDAPIError + if !errors.As(err, &apiErr) { + t.Fatal("expected error to be *ArgoCDAPIError") + } + if apiErr.StatusCode != http.StatusNotFound { + t.Errorf("expected StatusCode 404, got %d", apiErr.StatusCode) + } + if apiErr.Message != "application not found" { + t.Errorf("expected Message %q, got %q", "application not found", apiErr.Message) + } +} + +func TestGetApplicationLogs_Non200_InvalidErrorBody(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte("not json")) + })) + defer server.Close() + setViperForTest(t, server.URL) + + _, err := GetApplicationLogs("int-myapp", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err == nil { + t.Fatal("expected error on 500 with invalid body") + } + if !strings.Contains(err.Error(), "failed to decode error response") { + t.Errorf("error = %q, want substring about decode failure", err.Error()) + } + if !strings.Contains(err.Error(), "500") { + t.Errorf("error should contain status code 500, got %q", err.Error()) + } + var apiErr *ArgoCDAPIError + if !errors.As(err, &apiErr) { + t.Fatal("expected error to be *ArgoCDAPIError even for invalid body") + } + if apiErr.StatusCode != http.StatusInternalServerError { + t.Errorf("expected StatusCode 500, got %d", apiErr.StatusCode) + } +} + +func TestGetApplicationLogs_Non200_Forbidden(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusForbidden) + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "error": map[string]interface{}{ + "grpc_code": 7, + "http_code": 403, + "http_status": "Forbidden", + "message": "permission denied", + }, + }) + })) + defer server.Close() + setViperForTest(t, server.URL) + + _, err := GetApplicationLogs("int-myapp", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err == nil { + t.Fatal("expected error on 403 response") + } + if !strings.Contains(err.Error(), "403") { + t.Errorf("error should contain 403, got %q", err.Error()) + } + if !strings.Contains(err.Error(), "permission denied") { + t.Errorf("error should contain message, got %q", err.Error()) + } + var apiErr *ArgoCDAPIError + if !errors.As(err, &apiErr) { + t.Fatal("expected error to be *ArgoCDAPIError") + } + if apiErr.StatusCode != http.StatusForbidden { + t.Errorf("expected StatusCode 403, got %d", apiErr.StatusCode) + } +} + +func TestGetApplicationLogs_StreamErrorInBody(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(ApplicationLogEntry{Result: ApplicationLogResult{ + Content: "rpc error: code = NotFound desc = app not found", + }}) + })) + defer server.Close() + setViperForTest(t, server.URL) + + _, err := GetApplicationLogs("int-myapp", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err == nil { + t.Fatal("expected error for stream error in body") + } + if !strings.Contains(err.Error(), "argo log stream error") { + t.Errorf("error = %q, want substring %q", err.Error(), "argo log stream error") + } +} + +func TestGetApplicationLogs_MalformedResponseBody(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"result": BROKEN}`)) + })) + defer server.Close() + setViperForTest(t, server.URL) + + _, err := GetApplicationLogs("int-myapp", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err == nil { + t.Fatal("expected error for malformed response body") + } + if !strings.Contains(err.Error(), "failed to decode log entry") { + t.Errorf("error = %q, want substring about decode failure", err.Error()) + } +} + +func TestGetApplicationLogs_ClientRequestFails(t *testing.T) { + viper.Set("GCP_INT_ARGOCD_API", "http://127.0.0.1:1") + viper.Set("GCP_INT_ARGOCD_TOKEN", "test-token") + defer func() { + viper.Set("GCP_INT_ARGOCD_API", nil) + viper.Set("GCP_INT_ARGOCD_TOKEN", nil) + }() + + _, err := GetApplicationLogs("int-myapp", "gcp_int", &ApplicationLogsOptions{Container: "main"}) + if err == nil { + t.Fatal("expected error when HTTP client cannot connect") + } + if !strings.Contains(err.Error(), "failed to get logs from Argo CD") { + t.Errorf("error = %q, want substring about request failure", err.Error()) + } +}