Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .env.ci
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ POSTGRES_PORT = 5432
REDIS_URL = "redis://redis:6379"

# Model Discovery Redis Docker Compose Config
DISCOVERY_HOST = "redis"
DISCOVERY_PORT = 6379
DISCOVERY_URL = "redis://redis:6379"

# Grafana Docker Compose Config
GF_SECURITY_ADMIN_USER = "admin"
Expand Down
3 changes: 3 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ POSTGRES_PORT = 5432
# Redis Docker Compose Config
REDIS_URL = "redis://redis:6379"

# Model Discovery Redis Docker Compose Config
DISCOVERY_URL = "redis://redis:6379"

# Etcd Docker Compose Config
ETCD_HOST = "etcd"
ETCD_PORT = 2379
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.deepseek-14b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ services:
environment:
- SVC_HOST=deepseek_14b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=false
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.gemma-27b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ services:
environment:
- SVC_HOST=gemma_27b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=false
- MULTIMODAL_SUPPORT=true
- MODEL_NUM_RETRIES=60
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.gemma-4b-gpu.ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ services:
environment:
- SVC_HOST=gemma_4b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=false
- MULTIMODAL_SUPPORT=true
- CUDA_LAUNCH_BLOCKING=1
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.gpt-120b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ services:
environment:
- SVC_HOST=gpt_120b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.gpt-20b-gpu.ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ services:
environment:
- SVC_HOST=gpt_20b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.gpt-20b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ services:
environment:
- SVC_HOST=gpt_20b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.llama-1b-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ services:
environment:
- SVC_HOST=llama_1b_cpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.llama-1b-gpu.ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ services:
environment:
- SVC_HOST=llama_1b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
- CUDA_LAUNCH_BLOCKING=1
volumes:
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.llama-1b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ services:
environment:
- SVC_HOST=llama_1b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.llama-3b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ services:
environment:
- SVC_HOST=llama_3b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.llama-70b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ services:
environment:
- SVC_HOST=llama_70b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.llama-8b-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ services:
environment:
- SVC_HOST=llama_8b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.lmstudio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ services:
environment:
- SVC_HOST=host.docker.internal
- SVC_PORT=1234
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- LMSTUDIO_SUPPORTED_FEATURES=chat_completion
extra_hosts:
- "host.docker.internal:host-gateway"
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.nilai-prod-1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ services:
environment:
- SVC_HOST=gemma_27b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=false
- MULTIMODAL_SUPPORT=true
- MODEL_NUM_RETRIES=60
Expand Down
6 changes: 2 additions & 4 deletions docker/compose/docker-compose.nilai-prod-2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ services:
environment:
- SVC_HOST=llama_8b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface
Expand Down Expand Up @@ -77,8 +76,7 @@ services:
environment:
- SVC_HOST=gpt_20b_gpu
- SVC_PORT=8000
- DISCOVERY_HOST=redis
- DISCOVERY_PORT=6379
- DISCOVERY_URL=redis://redis:6379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
Expand Down
3 changes: 1 addition & 2 deletions docker/compose/docker-compose.qwen-2b-gpu.ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ services:
environment:
SVC_HOST: qwen2vl_2b_gpu
SVC_PORT: "8000"
DISCOVERY_HOST: redis
DISCOVERY_PORT: "6379"
DISCOVERY_URL: redis://redis:6379
TOOL_SUPPORT: "true"
MULTIMODAL_SUPPORT: "true"
CUDA_LAUNCH_BLOCKING: "1"
Expand Down
6 changes: 4 additions & 2 deletions nilai-api/src/nilai_api/config/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ class DatabaseConfig(BaseModel):


class DiscoveryConfig(BaseModel):
host: str = Field(default="localhost", description="Redis host for discovery")
port: int = Field(default=6379, description="Redis port for discovery")
url: str = Field(
default="redis://localhost:6379",
description="Redis URL for discovery (preferred default)",
)


class RedisConfig(BaseModel):
Expand Down
4 changes: 1 addition & 3 deletions nilai-api/src/nilai_api/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ def __init__(self):
self.private_key, self.public_key, self.b64_public_key = generate_key_pair()
self.sem = Semaphore(2)

self.discovery_service = ModelServiceDiscovery(
host=CONFIG.discovery.host, port=CONFIG.discovery.port
)
self.discovery_service = ModelServiceDiscovery(url=CONFIG.discovery.url)
self._discovery_initialized = False
self._uptime = time.time()

Expand Down
4 changes: 1 addition & 3 deletions nilai-models/src/nilai_models/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@ async def main():
logging.basicConfig(level=logging.INFO)

# Initialize discovery service
discovery_service = ModelServiceDiscovery(
host=SETTINGS.discovery_host, port=SETTINGS.discovery_port
)
discovery_service = ModelServiceDiscovery(url=SETTINGS.discovery_url)
await discovery_service.initialize()

# Fetch metadata and create endpoint
Expand Down
13 changes: 4 additions & 9 deletions nilai-models/src/nilai_models/lmstudio_announcer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,12 @@ async def _fetch_model_ids(
async def _announce_model(
metadata: ModelMetadata,
base_url: str,
discovery_host: str,
discovery_port: int,
discovery_url: str,
lease_ttl: int,
prefix: str,
):
"""Register and maintain a model announcement in Redis."""
discovery = ModelServiceDiscovery(
host=discovery_host, port=discovery_port, lease_ttl=lease_ttl
)
discovery = ModelServiceDiscovery(url=discovery_url, lease_ttl=lease_ttl)
await discovery.initialize()

endpoint = ModelEndpoint(url=base_url.rstrip("/"), metadata=metadata)
Expand Down Expand Up @@ -198,8 +195,7 @@ async def main():
"Announcing LMStudio models %s via %s with Redis at %s:%s",
", ".join(model_ids),
registration_url,
SETTINGS.discovery_host,
SETTINGS.discovery_port,
SETTINGS.discovery_url,
)

# Create announcement tasks for all models
Expand All @@ -220,8 +216,7 @@ async def main():
multimodal_default,
),
base_url=registration_url,
discovery_host=SETTINGS.discovery_host,
discovery_port=SETTINGS.discovery_port,
discovery_url=SETTINGS.discovery_url,
lease_ttl=lease_ttl,
prefix=discovery_prefix,
)
Expand Down
11 changes: 4 additions & 7 deletions packages/nilai-common/src/nilai_common/config/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ class HostSettings(BaseModel):

host: str = Field(default="localhost", description="Host of the service")
port: int = Field(default=8000, description="Port of the service")
discovery_host: str = Field(
default="localhost", description="Host of the discovery service"
)
discovery_port: int = Field(
default=6379, description="Port of the discovery service"
discovery_url: str = Field(
default="redis://redis:6379",
description="Redis URL of the discovery service (preferred)",
)
gunicorn_workers: int = Field(default=10, description="Number of gunicorn workers")

Expand All @@ -27,7 +25,6 @@ class HostSettings(BaseModel):
SETTINGS: HostSettings = HostSettings(
host=str(os.getenv("SVC_HOST", "localhost")),
port=int(os.getenv("SVC_PORT", 8000)),
discovery_host=str(os.getenv("DISCOVERY_HOST", "redis")),
discovery_port=int(os.getenv("DISCOVERY_PORT", 6379)),
discovery_url=str(os.getenv("DISCOVERY_URL", "redis://redis:6379")),
gunicorn_workers=int(os.getenv("NILAI_GUNICORN_WORKERS", 10)),
)
17 changes: 11 additions & 6 deletions packages/nilai-common/src/nilai_common/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,21 @@


class ModelServiceDiscovery:
def __init__(self, host: str = "localhost", port: int = 6379, lease_ttl: int = 60):
def __init__(
self,
url: str = "redis://localhost:6379",
lease_ttl: int = 60,
):
"""
Initialize Redis client for model service discovery.

:param url: Redis URL (e.g., redis:// or rediss://). Preferred default.
:param host: Redis server host
:param port: Redis server port
:param lease_ttl: TTL time for endpoint registration (in seconds)
"""
self.host = host
self.port = port
self.lease_ttl = lease_ttl
self.url = url
self._client: Optional[redis.Redis] = None
self._model_key: Optional[str] = None

Expand All @@ -39,9 +43,10 @@ async def initialize(self):
Initialize the Redis client.
"""
if self._client is None:
self._client = await redis.Redis(
host=self.host, port=self.port, decode_responses=True
)
if self.url:
self._client = redis.from_url(self.url, decode_responses=True)
else:
raise ValueError("A Redis URL is required for service discovery")

@property
async def client(self) -> redis.Redis:
Expand Down
8 changes: 3 additions & 5 deletions tests/unit/nilai-common/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
async def model_service_discovery(redis_host_port):
"""Create a ModelServiceDiscovery instance connected to the test Redis container."""
host, port = redis_host_port
discovery = ModelServiceDiscovery(host=host, port=port, lease_ttl=60)
discovery = ModelServiceDiscovery(url=f"redis://{host}:{port}", lease_ttl=60)
await discovery.initialize()
yield discovery
await discovery.close()
Expand Down Expand Up @@ -172,8 +172,7 @@ async def test_keep_alive(model_service_discovery, model_endpoint):
"""Test the keep_alive functionality that refreshes TTL."""
# Register a model with a short TTL
short_ttl_discovery = ModelServiceDiscovery(
host=model_service_discovery.host,
port=model_service_discovery.port,
url=model_service_discovery.url,
lease_ttl=2, # 2 second TTL
)
await short_ttl_discovery.initialize()
Expand Down Expand Up @@ -214,8 +213,7 @@ async def test_keep_alive_with_stored_key(model_service_discovery, model_endpoin
"""Test keep_alive using the stored key from registration."""
# Register a model with a short TTL
short_ttl_discovery = ModelServiceDiscovery(
host=model_service_discovery.host,
port=model_service_discovery.port,
url=model_service_discovery.url,
lease_ttl=2, # 2 second TTL
)
await short_ttl_discovery.initialize()
Expand Down
Loading