diff --git a/CHANGELOG.md b/CHANGELOG.md index d225cf4..55bda7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ All notable changes to the AxonFlow Python SDK will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.5.0] - 2026-02-18 + +### Added + +- **Media Governance Types**: `MediaContent`, `MediaAnalysisResult`, `MediaAnalysisResponse` for multimodal image governance +- **`proxy_llm_call_with_media()`**: Async + sync methods to send images (base64 or URL) alongside queries for governance analysis before LLM routing + +### Breaking + +- `MediaAnalysisResult.extracted_text` replaced by `has_extracted_text` (bool) and `extracted_text_length` (int). Raw extracted text is no longer exposed in API responses. + +--- + ## [3.4.0] - 2026-02-13 ### Added @@ -13,11 +26,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `async fail_workflow(workflow_id, reason=None)` + sync wrapper - Sends `POST /api/v1/workflows/{id}/fail` - **HITL Queue API** (Enterprise): Human-in-the-loop approval queue management - - `list_hitl_queue(opts)` — list pending approvals with filtering - - `get_hitl_request(request_id)` — get approval details - - `approve_hitl_request(request_id, review)` — approve a request - - `reject_hitl_request(request_id, review)` — reject a request - - `get_hitl_stats()` — dashboard statistics + - `list_hitl_queue(opts)`: list pending approvals with filtering + - `get_hitl_request(request_id)`: get approval details + - `approve_hitl_request(request_id, review)`: approve a request + - `reject_hitl_request(request_id, review)`: reject a request + - `get_hitl_stats()`: dashboard statistics - New models: `HITLApprovalRequest`, `HITLQueueListOptions`, `HITLQueueListResponse`, `HITLReviewInput`, `HITLStats` ### Fixed diff --git a/axonflow/__init__.py b/axonflow/__init__.py index f0529b6..0ce7194 100644 --- a/axonflow/__init__.py +++ b/axonflow/__init__.py @@ -148,6 +148,9 @@ ListExecutionsResponse, ListUsageRecordsOptions, ListWebhooksResponse, + MediaAnalysisResponse, + MediaAnalysisResult, + MediaContent, Mode, ModelPricing, PlanExecutionResponse, @@ -213,6 +216,10 @@ "ClientResponse", "PolicyEvaluationInfo", "CodeArtifact", + # Multimodal Image Governance types + "MediaContent", + "MediaAnalysisResult", + "MediaAnalysisResponse", # Connector types "ConnectorMetadata", "ConnectorInstallRequest", diff --git a/axonflow/client.py b/axonflow/client.py index eebb07b..76ce36b 100644 --- a/axonflow/client.py +++ b/axonflow/client.py @@ -151,6 +151,7 @@ ListExecutionsResponse, ListUsageRecordsOptions, ListWebhooksResponse, + MediaContent, Mode, PlanExecutionResponse, PlanResponse, @@ -770,6 +771,81 @@ async def proxy_llm_call( return response + async def proxy_llm_call_with_media( + self, + user_token: str, + query: str, + request_type: str, + media: list[MediaContent], + context: dict[str, Any] | None = None, + ) -> ClientResponse: + """Send a request with media content (images) for governance analysis. + + Media items are analyzed for PII, content safety, biometric data, and + document classification before being forwarded to the LLM provider. + + Args: + user_token: User authentication token. + query: The prompt/query text. + request_type: Type of request (e.g., "chat", "sql"). + media: List of MediaContent items (images) to analyze. + context: Optional additional context. + + Returns: + ClientResponse with media_analysis field populated. + + Raises: + PolicyViolationError: If request is blocked by policy + AuthenticationError: If credentials are invalid + TimeoutError: If request times out + """ + # Default to "anonymous" if user_token is empty (community mode) + if not user_token: + user_token = "anonymous" # noqa: S105 - not a password, just a placeholder + + # Media requests skip cache: analysis is non-deterministic and + # cache keys don't incorporate binary image data. + request = ClientRequest( + query=query, + user_token=user_token, + client_id=self._config.client_id, + request_type=request_type, + context=context or {}, + media=media, + ) + + if self._config.debug: + self._logger.debug( + "Executing multimodal query", + request_type=request_type, + query=query[:50] if query else "", + media_count=len(media), + ) + + response_data = await self._request( + "POST", + "/api/request", + json_data=request.model_dump(), + ) + + response = ClientResponse.model_validate(response_data) + + # Check for policy violation + if response.blocked: + # Extract policy name from policy_info if available + policy = None + if response.policy_info and response.policy_info.policies_evaluated: + policy = response.policy_info.policies_evaluated[0] + raise PolicyViolationError( + response.block_reason or "Request blocked by policy", + policy=policy, + block_reason=response.block_reason, + ) + + # Media requests are never cached (cache_key is always empty above). + + return response + async def list_connectors(self) -> list[ConnectorMetadata]: """List all available MCP connectors. @@ -5625,6 +5701,25 @@ def proxy_llm_call( self._async_client.proxy_llm_call(user_token, query, request_type, context) ) + def proxy_llm_call_with_media( + self, + user_token: str, + query: str, + request_type: str, + media: list[MediaContent], + context: dict[str, str] | None = None, + ) -> ClientResponse: + """Send a request with media content (images) for governance analysis. + + This is Proxy Mode with multimodal support - media items are analyzed + for PII, content safety, biometric data, and document classification. + """ + return self._run_sync( + self._async_client.proxy_llm_call_with_media( + user_token, query, request_type, media, context + ) + ) + def list_connectors(self) -> list[ConnectorMetadata]: """List all available MCP connectors.""" return self._run_sync(self._async_client.list_connectors()) diff --git a/axonflow/types.py b/axonflow/types.py index bd081e8..5a8717e 100644 --- a/axonflow/types.py +++ b/axonflow/types.py @@ -8,7 +8,7 @@ from datetime import datetime from enum import Enum -from typing import Any +from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field, field_validator @@ -77,6 +77,54 @@ class AxonFlowConfig(BaseModel): cache: CacheConfig = Field(default_factory=CacheConfig) +class MediaContent(BaseModel): + """Media content (image) to include with a request for governance analysis.""" + + source: Literal["base64", "url"] = Field( + ..., description="How media is provided: 'base64' or 'url'" + ) + base64_data: str | None = Field(default=None, description="Base64-encoded image data") + url: str | None = Field(default=None, description="Image URL") + mime_type: str = Field(..., description="Media content type (e.g., 'image/jpeg')") + + +class MediaAnalysisResult(BaseModel): + """Analysis results for a single media item.""" + + media_index: int = Field(default=0, description="Index in the request media array") + sha256_hash: str = Field(default="", description="SHA-256 hash of the image data") + has_faces: bool = Field(default=False, description="Whether faces were detected") + face_count: int = Field(default=0, ge=0, description="Number of faces detected") + has_biometric_data: bool = Field( + default=False, description="Biometric data detected (GDPR Art. 9)" + ) + nsfw_score: float = Field(default=0.0, ge=0, le=1, description="NSFW content score") + violence_score: float = Field(default=0.0, ge=0, le=1, description="Violence content score") + content_safe: bool = Field(default=True, description="Aggregated content safety flag") + document_type: str | None = Field(default=None, description="Classified document type") + is_sensitive_document: bool = Field(default=False, description="Sensitive document flag") + has_pii: bool = Field(default=False, description="PII detected in image text via OCR") + pii_types: list[str] = Field(default_factory=list, description="Types of PII detected") + has_extracted_text: bool = Field( + default=False, description="Whether text was extracted from image via OCR" + ) + extracted_text_length: int = Field( + default=0, ge=0, description="Length of extracted text in characters" + ) + estimated_cost_usd: float = Field(default=0.0, ge=0, description="Analysis cost for this item") + warnings: list[str] = Field(default_factory=list, description="Governance warnings") + + +class MediaAnalysisResponse(BaseModel): + """Aggregated media analysis results in the response.""" + + results: list[MediaAnalysisResult] = Field( + default_factory=list, description="Per-item analysis results" + ) + total_cost_usd: float = Field(default=0.0, ge=0, description="Total analysis cost") + analysis_time_ms: int = Field(default=0, ge=0, description="Total analysis time (ms)") + + class ClientRequest(BaseModel): """Request to AxonFlow Agent.""" @@ -85,6 +133,9 @@ class ClientRequest(BaseModel): client_id: str | None = Field(default=None, description="Client ID (optional)") request_type: str = Field(..., description="Request type") context: dict[str, Any] = Field(default_factory=dict, description="Additional context") + media: list[MediaContent] | None = Field( + default=None, description="Optional media for multimodal governance" + ) class CodeArtifact(BaseModel): @@ -148,6 +199,9 @@ class ClientResponse(BaseModel): block_reason: str | None = Field(default=None, description="Block reason") policy_info: PolicyEvaluationInfo | None = Field(default=None) budget_info: BudgetInfo | None = Field(default=None, description="Budget status (Issue #1082)") + media_analysis: MediaAnalysisResponse | None = Field( + default=None, description="Media governance results" + ) def model_post_init(self, __context: Any) -> None: """Detect nested data.success=false and surface error."""