From a17aeeef1da7d806b4e429686085d46f199780e9 Mon Sep 17 00:00:00 2001 From: HC-ONLINE Date: Tue, 3 Mar 2026 13:38:58 -0500 Subject: [PATCH 1/4] =?UTF-8?q?feat(config):=20a=C3=B1adir=20claves=20y=20?= =?UTF-8?q?l=C3=ADmites=20de=20tasa=20para=20OpenAI=20en=20la=20configurac?= =?UTF-8?q?i=C3=B3n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 4 ++++ api/config.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/.env.example b/.env.example index 9170bd3..872aa02 100644 --- a/.env.example +++ b/.env.example @@ -14,16 +14,19 @@ REDIS_URL=redis://localhost:6379/0 # Claves API de proveedores GROQ_API_KEY=tu_clave_groq_aqui OPENROUTER_API_KEY=tu_clave_openrouter_aqui +OPENAI_API_KEY=tu_clave_openai_aqui # OLLAMA_API_KEY= # Opcional, Ollama local no requiere autenticación # Configuración de proveedores GROQ_BASE_URL=https://api.groq.com/openai/v1 OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 +OPENAI_BASE_URL=https://api.openai.com/v1 OLLAMA_BASE_URL=http://localhost:11434 # Modelos por defecto por proveedor GROQ_DEFAULT_MODEL=llama-3.3-70b-versatile OPENROUTER_DEFAULT_MODEL=openai/gpt-3.5-turbo +OPENAI_DEFAULT_MODEL=openai/gpt-3.5-turbo OLLAMA_DEFAULT_MODEL=llama3.2:1b # Timeouts (en segundos) @@ -46,6 +49,7 @@ MAX_CONCURRENT_STREAMS=10 # Útil para ajustar planes o límites específicos del proveedor # GROQ_RATE_LIMIT=30 # OPENROUTER_RATE_LIMIT=20 +# OPENAI_RATE_LIMIT=30 # OLLAMA_RATE_LIMIT=100 # Autenticación diff --git a/api/config.py b/api/config.py index b85b836..6159746 100644 --- a/api/config.py +++ b/api/config.py @@ -26,16 +26,19 @@ class Settings(BaseSettings): # Claves API groq_api_key: Optional[str] = None openrouter_api_key: Optional[str] = None + openai_api_key: Optional[str] = None ollama_api_key: Optional[str] = None # Opcional, Ollama local no lo requiere # URLs base de proveedores groq_base_url: str = "https://api.groq.com/openai/v1" openrouter_base_url: str = "https://openrouter.ai/api/v1" + openai_base_url: str = "https://api.openai.com/v1" ollama_base_url: str = "http://localhost:11434" # Modelos por defecto por proveedor groq_default_model: str = "llama-3.3-70b-versatile" openrouter_default_model: str = "openai/gpt-3.5-turbo" + openai_default_model: str = "gpt-4o-mini" ollama_default_model: str = "llama3.2:1b" # Timeouts (segundos) @@ -57,6 +60,7 @@ class Settings(BaseSettings): # El límite global solo se usa si se elimina explícitamente este campo. groq_rate_limit: int = 30 openrouter_rate_limit: int = 20 + openai_rate_limit: int = 30 ollama_rate_limit: int = 100 # Autenticación @@ -80,6 +84,7 @@ def get_provider_rate_limit(self, provider_name: str) -> int: provider_limits = { "groq": self.groq_rate_limit, "openrouter": self.openrouter_rate_limit, + "openai": self.openai_rate_limit, "ollama": self.ollama_rate_limit, } return provider_limits.get(provider_name) or self.rate_limit_requests_per_minute From 66508f9f2f963d283db1d78f029e3e1dd2215c7a Mon Sep 17 00:00:00 2001 From: HC-ONLINE Date: Tue, 3 Mar 2026 14:47:48 -0500 Subject: [PATCH 2/4] =?UTF-8?q?feat(adapter):=20implementar=20OpenAIAdapte?= =?UTF-8?q?r=20con=20m=C3=A9todos=20de=20streaming=20y=20generaci=C3=B3n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/providers/openai_adapter.py | 184 +++++++++++++++++++++++++ tests/test_providers_openai_adapter.py | 56 ++++++++ 2 files changed, 240 insertions(+) create mode 100644 api/providers/openai_adapter.py create mode 100644 tests/test_providers_openai_adapter.py diff --git a/api/providers/openai_adapter.py b/api/providers/openai_adapter.py new file mode 100644 index 0000000..d686c5a --- /dev/null +++ b/api/providers/openai_adapter.py @@ -0,0 +1,184 @@ +""" +Adapter para OpenAI API. +Implementa el contrato ProviderAdapter para interactuar con OpenAI. +""" + +from collections.abc import AsyncGenerator +import json +import logging +from typing import Optional +import httpx + +from api.providers.base import ProviderAdapter +from api.schemas import ChatRequest, ChatResponse, ProviderError +from api.infra.http_client import HTTPClient + +logger = logging.getLogger(__name__) + + +class OpenAIAdapter(ProviderAdapter): + """Adapter para OpenAI API.""" + + name = "openai" + + DEFAULT_MODEL = "gpt-4o-mini" + + def __init__( + self, + http_client: HTTPClient, + api_key: str, + base_url: str = "https://api.openai.com/v1", + timeout: float = 30.0, + default_model: Optional[str] = None, + ): + super().__init__(http_client, api_key, base_url, timeout) + self.default_model = default_model or self.DEFAULT_MODEL + + def _build_payload(self, request: ChatRequest) -> dict: + messages = [ + {"role": msg.role, "content": msg.content} for msg in request.messages + ] + + payload = { + "model": request.model or self.default_model, + "messages": messages, + "max_tokens": request.max_tokens, + "temperature": request.temperature, + "stream": request.stream, + } + + return payload + + async def stream(self, request: ChatRequest) -> AsyncGenerator[str, None]: + request.stream = True + payload = self._build_payload(request) + url = f"{self.base_url}/chat/completions" + headers = self._get_headers() + + try: + async for chunk_bytes in self.http_client.stream_post( + url=url, + json=payload, + headers=headers, + timeout=self.timeout, + ): + chunk_text = chunk_bytes.decode("utf-8") + + for line in chunk_text.split("\n"): + line = line.strip() + + if not line or not line.startswith("data: "): + continue + + data_str = line[6:] + + if data_str == "[DONE]": + break + + try: + data = json.loads(data_str) + delta = data["choices"][0].get("delta", {}) + content = delta.get("content") + + if content: + yield content + + except Exception: + continue + + except httpx.HTTPStatusError as e: + raise self._handle_http_error(e.response.status_code, str(e)) + except httpx.TimeoutException as e: + raise ProviderError( + provider=self.name, + code="TIMEOUT", + message=f"Timeout al conectar con OpenAI: {str(e)}", + retriable=True, + original_error=e, + ) + except Exception as e: + from api.utils import log_provider_error + + log_provider_error( + logger, + provider=self.name, + error_code="UNKNOWN_ERROR", + request_id=getattr(request, "request_id", None), + exc=e, + ) + raise ProviderError( + provider=self.name, + code="UNKNOWN_ERROR", + message=f"Error inesperado: {str(e)}", + retriable=False, + original_error=e, + ) + + async def generate(self, request: ChatRequest) -> ChatResponse: + request.stream = False + payload = self._build_payload(request) + url = f"{self.base_url}/chat/completions" + headers = self._get_headers() + + try: + response = await self.http_client.post( + url=url, + json=payload, + headers=headers, + timeout=self.timeout, + ) + + response.raise_for_status() + data = response.json() + + if "choices" in data and len(data["choices"]) > 0: + text = data["choices"][0]["message"]["content"] + usage = data.get("usage", {}) + provider_meta = { + "tokens_prompt": usage.get("prompt_tokens"), + "tokens_completion": usage.get("completion_tokens"), + "tokens_total": usage.get("total_tokens"), + } + + return ChatResponse( + text=text, + provider=self.name, + model=data.get("model", self.default_model), + provider_meta=provider_meta, + ) + else: + raise ProviderError( + provider=self.name, + code="INVALID_RESPONSE", + message="Respuesta de OpenAI no contiene choices", + retriable=False, + ) + + except httpx.HTTPStatusError as e: + raise self._handle_http_error(e.response.status_code, str(e)) + except httpx.TimeoutException as e: + raise ProviderError( + provider=self.name, + code="TIMEOUT", + message=f"Timeout al conectar con OpenAI: {str(e)}", + retriable=True, + original_error=e, + ) + + except Exception as e: + from api.utils import log_provider_error + + log_provider_error( + logger, + provider=self.name, + error_code="UNKNOWN_ERROR", + request_id=getattr(request, "request_id", None), + exc=e, + ) + raise ProviderError( + provider=self.name, + code="UNKNOWN_ERROR", + message=f"Error inesperado: {str(e)}", + retriable=False, + original_error=e, + ) diff --git a/tests/test_providers_openai_adapter.py b/tests/test_providers_openai_adapter.py new file mode 100644 index 0000000..57d7368 --- /dev/null +++ b/tests/test_providers_openai_adapter.py @@ -0,0 +1,56 @@ +import pytest +from unittest.mock import AsyncMock, MagicMock + +from api.providers.openai_adapter import OpenAIAdapter +from api.schemas import ChatRequest, Message +from api.infra.http_client import HTTPClient + + +@pytest.mark.asyncio +async def test_openai_stream(monkeypatch): + # Mock HTTPClient + mock_http_client = AsyncMock(spec=HTTPClient) + + async def mock_stream_post(*args, **kwargs): + yield b'data: {"choices": [{"delta": {"content": "hi"}}]}' + + mock_http_client.stream_post.side_effect = mock_stream_post + adapter = OpenAIAdapter(http_client=mock_http_client, api_key="test_key") + req = ChatRequest( + messages=[Message(role="user", content="hi")], model="gpt-4o-mini" + ) + monkeypatch.setattr( + adapter, "_get_headers", lambda: {"Authorization": "Bearer test_key"} + ) + + gen = adapter.stream(req) + chunk = await anext(gen) + assert chunk == "hi" + assert isinstance(chunk, str) + + +@pytest.mark.asyncio +async def test_openai_generate(monkeypatch): + # Mock HTTPClient + mock_http_client = AsyncMock(spec=HTTPClient) + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.raise_for_status = MagicMock() + mock_response.json.return_value = { + "choices": [{"message": {"content": "hi"}}], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + mock_http_client.post.return_value = mock_response + + adapter = OpenAIAdapter(http_client=mock_http_client, api_key="test_key") + req = ChatRequest( + messages=[Message(role="user", content="hi")], model="gpt-4o-mini" + ) + monkeypatch.setattr( + adapter, "_get_headers", lambda: {"Authorization": "Bearer test_key"} + ) + + resp = await adapter.generate(req) + assert resp.text == "hi" + assert resp.provider == "openai" + assert resp.provider_meta["tokens_total"] == 15 From 8b027d9551306249be4ea4a0aa0eaaef198d1bb4 Mon Sep 17 00:00:00 2001 From: HC-ONLINE Date: Tue, 3 Mar 2026 14:49:27 -0500 Subject: [PATCH 3/4] =?UTF-8?q?feat(adapter):=20a=C3=B1adir=20configuraci?= =?UTF-8?q?=C3=B3n=20del=20proveedor=20OpenAI=20en=20el=20lifespan?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/main.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/api/main.py b/api/main.py index 1515167..3310ff8 100644 --- a/api/main.py +++ b/api/main.py @@ -19,6 +19,7 @@ from api.providers.base import ProviderAdapter from api.providers.groq_adapter import GroqAdapter from api.providers.openrouter_adapter import OpenRouterAdapter +from api.providers.openai_adapter import OpenAIAdapter from api.providers.ollama_adapter import OllamaAdapter from api.router import Router from api.orchestrator import Orchestrator @@ -77,6 +78,19 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: "OPENROUTER_API_KEY no configurada, OpenRouter no estará disponible" ) + if settings.openai_api_key: + openai_adapter = OpenAIAdapter( + http_client=http_client, + api_key=settings.openai_api_key, + base_url=settings.openai_base_url, + timeout=settings.provider_timeout, + default_model=settings.openai_default_model, + ) + providers.append(openai_adapter) + logger.info("Proveedor OpenAI configurado") + else: + logger.warning("OPENAI_API_KEY no configurada, OpenAI no estará disponible") + # Ollama: siempre intentar configurar (no requiere API key obligatoria) try: ollama_adapter = OllamaAdapter( From 928c2028c42b4fc09da3e4603c738840d824340c Mon Sep 17 00:00:00 2001 From: HC-ONLINE Date: Tue, 3 Mar 2026 14:57:46 -0500 Subject: [PATCH 4/4] feat(docs): actualizar README, ROADMAP y configuration para incluir OpenAI --- README.md | 34 +++++++++++++++++--------------- ROADMAP.md | 25 ++++++++++++------------ docs/configuration.md | 45 +++++++++++++++++++++++-------------------- 3 files changed, 56 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 42506ac..0a3eeb4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ModelRouter -**API HTTP asíncrona con streaming** que orquesta múltiples proveedores de LLM (Groq, OpenRouter y Ollama) con fallback automático y observabilidad. +**API HTTP asíncrona con streaming** que orquesta múltiples proveedores de LLM con fallback automático y observabilidad. [![CI/CD](https://github.com/HC-ONLINE/ModelRouter/workflows/CI%2FCD%20Pipeline/badge.svg)](https://github.com/HC-ONLINE/ModelRouter/actions) [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/) @@ -11,10 +11,14 @@ ## Características Principales -* **Orquestación Multi-proveedor:** Fallback automático entre Groq, OpenRouter y Ollama. -* **Streaming Nativo:** Soporte para Server-Sent Events (SSE). -* **Resiliencia:** Rate limiting, blacklist temporal y backoff exponencial. -* **Production Ready:** Métricas Prometheus, logs estructurados y Dockerizado. +- **Orquestación Multi-proveedor:** Fallback automático entre: + - Groq + - OpenRouter + - OpenAI + - Ollama +- **Streaming Nativo:** Soporte para Server-Sent Events (SSE). +- **Resiliencia:** Rate limiting, blacklist temporal y backoff exponencial. +- **Production Ready:** Métricas Prometheus, logs estructurados y Dockerizado. --- @@ -67,12 +71,12 @@ curl -N -X POST http://localhost:8000/stream \ ### Documentación Detallada -* [Arquitectura](docs/architecture.md) - Cómo funciona internamente. -* [Configuración](docs/configuration.md) - Variables de entorno y rate limits. -* [Ejemplos de Uso](docs/examples.md) - Ejemplos con `curl` y `fetch`. -* [Desarrollo](docs/development.md) - Guía para contribuir, tests y linting. -* [Observabilidad](docs/observability.md) - Métricas y Logs. -* [Seguridad](docs/security.md) - Notas de seguridad y legal. +- [Arquitectura](docs/architecture.md) - Cómo funciona internamente. +- [Configuración](docs/configuration.md) - Variables de entorno y rate limits. +- [Ejemplos de Uso](docs/examples.md) - Ejemplos con `curl` y `fetch`. +- [Desarrollo](docs/development.md) - Guía para contribuir, tests y linting. +- [Observabilidad](docs/observability.md) - Métricas y Logs. +- [Seguridad](docs/security.md) - Notas de seguridad y legal. Estos documentos están en la carpeta `docs/`. @@ -102,10 +106,10 @@ Este proyecto está bajo la Licencia Apache-2.0 (Apache License 2.0). Ver [LICEN Este proyecto es para **uso personal**. Asegúrate de: -* Leer y cumplir los **Terms of Service** de los proveedores usados -* No usar rotación de proveedores para **evadir límites** de uso -* Respetar **rate limits** y políticas de cada proveedor -* No almacenar/procesar datos sensibles sin las medidas de seguridad apropiadas +- Leer y cumplir los **Terms of Service** de los proveedores usados +- No usar rotación de proveedores para **evadir límites** de uso +- Respetar **rate limits** y políticas de cada proveedor +- No almacenar/procesar datos sensibles sin las medidas de seguridad apropiadas **El autor no se hace responsable del uso indebido de esta herramienta.** diff --git a/ROADMAP.md b/ROADMAP.md index af5d21f..917b40e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -2,22 +2,23 @@ ## Completado -- [X] Scaffold proyecto + Docker -- [X] Adapters Groq, OpenRouter y Ollama -- [X] Router con fallback -- [X] Orchestrator -- [X] Endpoints /chat y /stream -- [X] Métricas y logging -- [X] Tests unitarios -- [X] CI/CD -- [X] Definir Rate Limiting por proveedor -- [X] Selección Explícita de Modelo por Proveedor -- [X] Permitir especificar de forma opcional un proveedor en la request +- [x] Scaffold proyecto + Docker +- [x] Adapters Groq, OpenRouter y Ollama +- [x] Router con fallback +- [x] Orchestrator +- [x] Endpoints /chat y /stream +- [x] Métricas y logging +- [x] Tests unitarios +- [x] CI/CD +- [x] Definir Rate Limiting por proveedor +- [x] Selección Explícita de Modelo por Proveedor +- [x] Permitir especificar de forma opcional un proveedor en la request +- [x] Soporte para más proveedores (OpenAI) ## Próximos pasos - [ ] Persistencia de historiales (PostgreSQL) -- [ ] Soporte para más proveedores (Anthropic, OpenAI) +- [ ] Soporte para más proveedores (Anthropic, Gemini, etc.) - [ ] Dashboard Grafana pre-configurado para mas información, ver las issues en el repositorio de GitHub. diff --git a/docs/configuration.md b/docs/configuration.md index bacb670..d96fbb1 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -12,27 +12,30 @@ Listado de variables relevantes y su propósito. Para la configuración completa ## Variables ampliadas -| Variable | Descripción | Por defecto | -|----------------------------------|--------------------------------------|------------------------------| -| `GROQ_API_KEY` | Clave API de Groq | - | -| `OPENROUTER_API_KEY` | Clave API de OpenRouter | - | -| `OLLAMA_API_KEY` | Clave API de Ollama (opcional) | - | -| `OLLAMA_BASE_URL` | URL de Ollama | `http://localhost:11434` | -| `OLLAMA_DEFAULT_MODEL` | Modelo por defecto de Ollama | `llama3.2:1b` | -| `OPENROUTER_DEFAULT_MODEL` | Modelo por defecto de OpenRouter | `openai/gpt-3.5-turbo` | -| `GROQ_DEFAULT_MODEL` | Modelo por defecto de Groq | `llama-3.3-70b-versatile` | -| `API_KEY` | Clave para autenticar clientes | - | -| `REDIS_URL` | URL de conexión Redis | `redis://localhost:6379/0` | -| `PROVIDER_TIMEOUT` | Timeout por proveedor (s) | `30.0` | -| `FIRST_CHUNK_TIMEOUT` | Timeout primer chunk streaming (s) | `3.0` | -| `MAX_OPERATION_TIMEOUT` | Timeout global operación (s) | `120.0` | -| `BACKOFF_BASE_SECONDS` | Backoff base exponencial | `5` | -| `BACKOFF_MAX_SECONDS` | Backoff máximo | `300` | -| `RATE_LIMIT_REQUESTS_PER_MINUTE` | Rate limit global por minuto | `60` | -| `GROQ_RATE_LIMIT` | Rate limit específico Groq (req/min) | `30` | -| `OPENROUTER_RATE_LIMIT` | Rate limit OpenRouter (req/min) | `20` | -| `OLLAMA_RATE_LIMIT` | Rate limit Ollama (req/min) | `100` | -| `MAX_CONCURRENT_STREAMS` | Streams concurrentes máx. | `10` | +| Variable | Descripción | Por defecto | +| -------------------------------- | ------------------------------------ | -------------------------- | +| `GROQ_API_KEY` | Clave API de Groq | - | +| `OPENROUTER_API_KEY` | Clave API de OpenRouter | - | +| `OPENAI_API_KEY` | Clave API de OpenAI (opcional) | - | +| `OLLAMA_API_KEY` | Clave API de Ollama (opcional) | - | +| `OLLAMA_BASE_URL` | URL de Ollama | `http://localhost:11434` | +| `OLLAMA_DEFAULT_MODEL` | Modelo por defecto de Ollama | `llama3.2:1b` | +| `OPENROUTER_DEFAULT_MODEL` | Modelo por defecto de OpenRouter | `openai/gpt-3.5-turbo` | +| `GROQ_DEFAULT_MODEL` | Modelo por defecto de Groq | `llama-3.3-70b-versatile` | +| `OPENAI_DEFAULT_MODEL` | Modelo por defecto de OpenAI | `gpt-4o-mini` | +| `API_KEY` | Clave para autenticar clientes | - | +| `REDIS_URL` | URL de conexión Redis | `redis://localhost:6379/0` | +| `PROVIDER_TIMEOUT` | Timeout por proveedor (s) | `30.0` | +| `FIRST_CHUNK_TIMEOUT` | Timeout primer chunk streaming (s) | `3.0` | +| `MAX_OPERATION_TIMEOUT` | Timeout global operación (s) | `120.0` | +| `BACKOFF_BASE_SECONDS` | Backoff base exponencial | `5` | +| `BACKOFF_MAX_SECONDS` | Backoff máximo | `300` | +| `RATE_LIMIT_REQUESTS_PER_MINUTE` | Rate limit global por minuto | `60` | +| `GROQ_RATE_LIMIT` | Rate limit específico Groq (req/min) | `30` | +| `OPENROUTER_RATE_LIMIT` | Rate limit OpenRouter (req/min) | `20` | +| `OPENAI_RATE_LIMIT` | Rate limit OpenAI (req/min) | `10` | +| `OLLAMA_RATE_LIMIT` | Rate limit Ollama (req/min) | `100` | +| `MAX_CONCURRENT_STREAMS` | Streams concurrentes máx. | `10` | ## Notas operativas