From 2fb6bbb01b4f592b4ededce75d032ec301420a02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Wed, 21 Jan 2026 16:42:13 +0100 Subject: [PATCH] fix: working URL with both http and https --- docker/compose/docker-compose.deepseek-14b-gpu.yml | 3 +-- docker/compose/docker-compose.gemma-27b-gpu.yml | 3 +-- docker/compose/docker-compose.gemma-4b-gpu.ci.yml | 3 +-- docker/compose/docker-compose.gpt-120b-gpu.yml | 3 +-- docker/compose/docker-compose.gpt-20b-gpu.ci.yml | 3 +-- docker/compose/docker-compose.gpt-20b-gpu.yml | 3 +-- docker/compose/docker-compose.llama-1b-cpu.yml | 3 +-- docker/compose/docker-compose.llama-1b-gpu.ci.yml | 3 +-- docker/compose/docker-compose.llama-1b-gpu.yml | 3 +-- docker/compose/docker-compose.llama-3b-gpu.yml | 3 +-- docker/compose/docker-compose.llama-70b-gpu.yml | 3 +-- docker/compose/docker-compose.llama-8b-gpu.yml | 3 +-- docker/compose/docker-compose.lmstudio.yml | 3 +-- docker/compose/docker-compose.nilai-prod-1.yml | 3 +-- docker/compose/docker-compose.nilai-prod-2.yml | 6 ++---- docker/compose/docker-compose.qwen-2b-gpu.ci.yml | 3 +-- nilai-api/src/nilai_api/app.py | 10 ++++------ nilai-models/src/nilai_models/daemon.py | 6 ++---- nilai-models/src/nilai_models/lmstudio_announcer.py | 6 ++---- packages/nilai-common/src/nilai_common/config/host.py | 8 ++++---- packages/nilai-common/src/nilai_common/discovery.py | 2 -- 21 files changed, 29 insertions(+), 54 deletions(-) diff --git a/docker/compose/docker-compose.deepseek-14b-gpu.yml b/docker/compose/docker-compose.deepseek-14b-gpu.yml index d259715c..d0dd3824 100644 --- a/docker/compose/docker-compose.deepseek-14b-gpu.yml +++ b/docker/compose/docker-compose.deepseek-14b-gpu.yml @@ -22,8 +22,7 @@ services: --tensor-parallel-size 1 --uvicorn-log-level warning environment: - - SVC_HOST=deepseek_14b_gpu - - SVC_PORT=8000 + - SVC_URL=http://deepseek_14b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=false volumes: diff --git a/docker/compose/docker-compose.gemma-27b-gpu.yml b/docker/compose/docker-compose.gemma-27b-gpu.yml index 78e6e8c3..bcd5f5ae 100644 --- a/docker/compose/docker-compose.gemma-27b-gpu.yml +++ b/docker/compose/docker-compose.gemma-27b-gpu.yml @@ -27,8 +27,7 @@ services: --kv-cache-dtype fp8 --uvicorn-log-level warning environment: - - SVC_HOST=gemma_27b_gpu - - SVC_PORT=8000 + - SVC_URL=http://gemma_27b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=false - MULTIMODAL_SUPPORT=true diff --git a/docker/compose/docker-compose.gemma-4b-gpu.ci.yml b/docker/compose/docker-compose.gemma-4b-gpu.ci.yml index 6c7a266f..8374c599 100644 --- a/docker/compose/docker-compose.gemma-4b-gpu.ci.yml +++ b/docker/compose/docker-compose.gemma-4b-gpu.ci.yml @@ -26,8 +26,7 @@ services: --uvicorn-log-level warning environment: - - SVC_HOST=gemma_4b_gpu - - SVC_PORT=8000 + - SVC_URL=http://gemma_4b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=false - MULTIMODAL_SUPPORT=true diff --git a/docker/compose/docker-compose.gpt-120b-gpu.yml b/docker/compose/docker-compose.gpt-120b-gpu.yml index f7898cf7..0cf99584 100644 --- a/docker/compose/docker-compose.gpt-120b-gpu.yml +++ b/docker/compose/docker-compose.gpt-120b-gpu.yml @@ -26,8 +26,7 @@ services: --tensor-parallel-size 1 --uvicorn-log-level warning environment: - - SVC_HOST=gpt_120b_gpu - - SVC_PORT=8000 + - SVC_URL=http://gpt_120b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.gpt-20b-gpu.ci.yml b/docker/compose/docker-compose.gpt-20b-gpu.ci.yml index 73988af0..05622d47 100644 --- a/docker/compose/docker-compose.gpt-20b-gpu.ci.yml +++ b/docker/compose/docker-compose.gpt-20b-gpu.ci.yml @@ -21,8 +21,7 @@ services: command: > --model openai/gpt-oss-20b --gpu-memory-utilization 0.95 --max-model-len 10000 --max-num-batched-tokens 10000 --max-num-seqs 2 --tensor-parallel-size 1 --uvicorn-log-level warning --async-scheduling environment: - - SVC_HOST=gpt_20b_gpu - - SVC_PORT=8000 + - SVC_URL=http://gpt_20b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.gpt-20b-gpu.yml b/docker/compose/docker-compose.gpt-20b-gpu.yml index c84ba762..7fa188d9 100644 --- a/docker/compose/docker-compose.gpt-20b-gpu.yml +++ b/docker/compose/docker-compose.gpt-20b-gpu.yml @@ -26,8 +26,7 @@ services: --tensor-parallel-size 1 --uvicorn-log-level warning environment: - - SVC_HOST=gpt_20b_gpu - - SVC_PORT=8000 + - SVC_URL=http://gpt_20b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.llama-1b-cpu.yml b/docker/compose/docker-compose.llama-1b-cpu.yml index 4e97a31a..c1f74e9a 100644 --- a/docker/compose/docker-compose.llama-1b-cpu.yml +++ b/docker/compose/docker-compose.llama-1b-cpu.yml @@ -17,8 +17,7 @@ services: --tool-call-parser llama3_json --uvicorn-log-level warning environment: - - SVC_HOST=llama_1b_cpu - - SVC_PORT=8000 + - SVC_URL=http://llama_1b_cpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.llama-1b-gpu.ci.yml b/docker/compose/docker-compose.llama-1b-gpu.ci.yml index 1b8b242f..20a57190 100644 --- a/docker/compose/docker-compose.llama-1b-gpu.ci.yml +++ b/docker/compose/docker-compose.llama-1b-gpu.ci.yml @@ -30,8 +30,7 @@ services: --uvicorn-log-level warning --dtype half environment: - - SVC_HOST=llama_1b_gpu - - SVC_PORT=8000 + - SVC_URL=http://llama_1b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true - CUDA_LAUNCH_BLOCKING=1 diff --git a/docker/compose/docker-compose.llama-1b-gpu.yml b/docker/compose/docker-compose.llama-1b-gpu.yml index 9da1a810..dc7f27f1 100644 --- a/docker/compose/docker-compose.llama-1b-gpu.yml +++ b/docker/compose/docker-compose.llama-1b-gpu.yml @@ -28,8 +28,7 @@ services: --tool-call-parser llama3_json --uvicorn-log-level warning environment: - - SVC_HOST=llama_1b_gpu - - SVC_PORT=8000 + - SVC_URL=http://llama_1b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.llama-3b-gpu.yml b/docker/compose/docker-compose.llama-3b-gpu.yml index 14e85594..0a059d10 100644 --- a/docker/compose/docker-compose.llama-3b-gpu.yml +++ b/docker/compose/docker-compose.llama-3b-gpu.yml @@ -28,8 +28,7 @@ services: --tool-call-parser llama3_json --uvicorn-log-level warning environment: - - SVC_HOST=llama_3b_gpu - - SVC_PORT=8000 + - SVC_URL=http://llama_3b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.llama-70b-gpu.yml b/docker/compose/docker-compose.llama-70b-gpu.yml index b08a733e..cecb2f95 100644 --- a/docker/compose/docker-compose.llama-70b-gpu.yml +++ b/docker/compose/docker-compose.llama-70b-gpu.yml @@ -28,8 +28,7 @@ services: --tool-call-parser llama3_json --uvicorn-log-level warning environment: - - SVC_HOST=llama_70b_gpu - - SVC_PORT=8000 + - SVC_URL=http://llama_70b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.llama-8b-gpu.yml b/docker/compose/docker-compose.llama-8b-gpu.yml index f2284c12..75c54928 100644 --- a/docker/compose/docker-compose.llama-8b-gpu.yml +++ b/docker/compose/docker-compose.llama-8b-gpu.yml @@ -29,8 +29,7 @@ services: --enable-auto-tool-choice --chat-template /daemon/nilai-models/templates/llama3.1_tool_json.jinja environment: - - SVC_HOST=llama_8b_gpu - - SVC_PORT=8000 + - SVC_URL=http://llama_8b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.lmstudio.yml b/docker/compose/docker-compose.lmstudio.yml index 222736d7..d26de4db 100644 --- a/docker/compose/docker-compose.lmstudio.yml +++ b/docker/compose/docker-compose.lmstudio.yml @@ -7,8 +7,7 @@ services: redis: condition: service_healthy environment: - - SVC_HOST=host.docker.internal - - SVC_PORT=1234 + - SVC_URL=http://host.docker.internal:1234 - DISCOVERY_URL=redis://redis:6379 - LMSTUDIO_SUPPORTED_FEATURES=chat_completion extra_hosts: diff --git a/docker/compose/docker-compose.nilai-prod-1.yml b/docker/compose/docker-compose.nilai-prod-1.yml index 416efa53..52fdc2a2 100644 --- a/docker/compose/docker-compose.nilai-prod-1.yml +++ b/docker/compose/docker-compose.nilai-prod-1.yml @@ -27,8 +27,7 @@ services: --kv-cache-dtype fp8 --uvicorn-log-level warning environment: - - SVC_HOST=gemma_27b_gpu - - SVC_PORT=8000 + - SVC_URL=http://gemma_27b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=false - MULTIMODAL_SUPPORT=true diff --git a/docker/compose/docker-compose.nilai-prod-2.yml b/docker/compose/docker-compose.nilai-prod-2.yml index ac3e2eb8..a0973b67 100644 --- a/docker/compose/docker-compose.nilai-prod-2.yml +++ b/docker/compose/docker-compose.nilai-prod-2.yml @@ -35,8 +35,7 @@ services: --enable-auto-tool-choice --chat-template /daemon/nilai-models/templates/llama3.1_tool_json.jinja environment: - - SVC_HOST=llama_8b_gpu - - SVC_PORT=8000 + - SVC_URL=http://llama_8b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: @@ -74,8 +73,7 @@ services: --tensor-parallel-size 1 --uvicorn-log-level warning environment: - - SVC_HOST=gpt_20b_gpu - - SVC_PORT=8000 + - SVC_URL=http://gpt_20b_gpu:8000 - DISCOVERY_URL=redis://redis:6379 - TOOL_SUPPORT=true volumes: diff --git a/docker/compose/docker-compose.qwen-2b-gpu.ci.yml b/docker/compose/docker-compose.qwen-2b-gpu.ci.yml index 9573729d..021544c6 100644 --- a/docker/compose/docker-compose.qwen-2b-gpu.ci.yml +++ b/docker/compose/docker-compose.qwen-2b-gpu.ci.yml @@ -42,8 +42,7 @@ services: ] environment: - SVC_HOST: qwen2vl_2b_gpu - SVC_PORT: "8000" + SVC_URL: http://qwen2vl_2b_gpu:8000 DISCOVERY_URL: redis://redis:6379 TOOL_SUPPORT: "true" MULTIMODAL_SUPPORT: "true" diff --git a/nilai-api/src/nilai_api/app.py b/nilai-api/src/nilai_api/app.py index a01cc593..adf5ec18 100644 --- a/nilai-api/src/nilai_api/app.py +++ b/nilai-api/src/nilai_api/app.py @@ -1,6 +1,3 @@ -# Fast API and serving - - from prometheus_fastapi_instrumentator import Instrumentator from fastapi import Depends, FastAPI from nilai_api.auth import get_auth_info @@ -25,7 +22,8 @@ async def lifespan(app: FastAPI): yield {"redis": client, "redis_rate_limit_command": rate_limit_command} -host = SETTINGS.host +api_base = SETTINGS.url.rstrip("/") +openapi_url = f"{api_base}/openapi.json" description = f""" An AI model serving platform powered by secure, confidential computing. @@ -39,7 +37,7 @@ async def lifespan(app: FastAPI): pip install openapi-generator-cli # Generate your Python client -openapi-generator-cli generate -i https://{host}/openapi.json -g python -o ./python-client +openapi-generator-cli generate -i {openapi_url} -g python -o ./python-client ``` ### For JavaScript/TypeScript Developers @@ -48,7 +46,7 @@ async def lifespan(app: FastAPI): npm install @openapitools/openapi-generator-cli -g # Generate your TypeScript client -openapi-generator-cli generate -i https://{host}/openapi.json -o ./typescript-client +openapi-generator-cli generate -i {openapi_url} -o ./typescript-client ``` After generating, you'll have a fully functional client library that makes it easy to interact with our AI services. No more manual API request handling! diff --git a/nilai-models/src/nilai_models/daemon.py b/nilai-models/src/nilai_models/daemon.py index 998856a5..106377e8 100644 --- a/nilai-models/src/nilai_models/daemon.py +++ b/nilai-models/src/nilai_models/daemon.py @@ -23,7 +23,7 @@ async def get_metadata(): while True: url = None try: - url = f"http://{SETTINGS.host}:{SETTINGS.port}/v1/models" + url = f"{SETTINGS.url}/v1/models" async with httpx.AsyncClient() as client: response = await client.get(url) response.raise_for_status() @@ -94,9 +94,7 @@ async def main(): # Fetch metadata and create endpoint metadata = await get_metadata() - model_endpoint = ModelEndpoint( - url=f"http://{SETTINGS.host}:{SETTINGS.port}", metadata=metadata - ) + model_endpoint = ModelEndpoint(url=SETTINGS.url.rstrip("/"), metadata=metadata) # Create service task service_task = asyncio.create_task(run_service(discovery_service, model_endpoint)) diff --git a/nilai-models/src/nilai_models/lmstudio_announcer.py b/nilai-models/src/nilai_models/lmstudio_announcer.py index ce58fd6e..1b73c126 100644 --- a/nilai-models/src/nilai_models/lmstudio_announcer.py +++ b/nilai-models/src/nilai_models/lmstudio_announcer.py @@ -144,9 +144,7 @@ async def main(): logging.basicConfig(level=logging.INFO) # Load configuration from environment - api_base = os.getenv( - "LMSTUDIO_API_BASE", f"http://{SETTINGS.host}:{SETTINGS.port}" - ).rstrip("/") + api_base = os.getenv("LMSTUDIO_API_BASE", SETTINGS.url).rstrip("/") models_endpoint = os.getenv("LMSTUDIO_MODELS_ENDPOINT", "/v1/models") registration_url = os.getenv("LMSTUDIO_REGISTRATION_URL", api_base).rstrip("/") lease_ttl = int(os.getenv("LMSTUDIO_LEASE_TTL", "60")) @@ -192,7 +190,7 @@ async def main(): ) logger.info( - "Announcing LMStudio models %s via %s with Redis at %s:%s", + "Announcing LMStudio models %s via %s with Redis at %s", ", ".join(model_ids), registration_url, SETTINGS.discovery_url, diff --git a/packages/nilai-common/src/nilai_common/config/host.py b/packages/nilai-common/src/nilai_common/config/host.py index 3855824e..c62f743a 100644 --- a/packages/nilai-common/src/nilai_common/config/host.py +++ b/packages/nilai-common/src/nilai_common/config/host.py @@ -12,8 +12,9 @@ def to_bool(value: str) -> bool: class HostSettings(BaseModel): """Infrastructure and service host configuration.""" - host: str = Field(default="localhost", description="Host of the service") - port: int = Field(default=8000, description="Port of the service") + url: str = Field( + default="http://localhost:8000", description="Base URL of the service" + ) discovery_url: str = Field( default="redis://redis:6379", description="Redis URL of the discovery service (preferred)", @@ -23,8 +24,7 @@ class HostSettings(BaseModel): # Global host settings instance SETTINGS: HostSettings = HostSettings( - host=str(os.getenv("SVC_HOST", "localhost")), - port=int(os.getenv("SVC_PORT", 8000)), + url=str(os.getenv("SVC_URL", "http://localhost:8000")), discovery_url=str(os.getenv("DISCOVERY_URL", "redis://redis:6379")), gunicorn_workers=int(os.getenv("NILAI_GUNICORN_WORKERS", 10)), ) diff --git a/packages/nilai-common/src/nilai_common/discovery.py b/packages/nilai-common/src/nilai_common/discovery.py index 66a6dcea..acff9504 100644 --- a/packages/nilai-common/src/nilai_common/discovery.py +++ b/packages/nilai-common/src/nilai_common/discovery.py @@ -23,8 +23,6 @@ def __init__( Initialize Redis client for model service discovery. :param url: Redis URL (e.g., redis:// or rediss://). Preferred default. - :param host: Redis server host - :param port: Redis server port :param lease_ttl: TTL time for endpoint registration (in seconds) """ self.lease_ttl = lease_ttl