Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions extralit-server/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# ==============================================================================
# Extralit Server Configuration Example
# ==============================================================================
# Copy this file to .env and configure the values for your deployment.
# Lines starting with # are comments and will be ignored.
# ==============================================================================

# ------------------------------------------------------------------------------
# Client API Configuration
# ------------------------------------------------------------------------------
# Use these when running the Extralit client SDK to connect to this server
# EXTRALIT_API_URL=http://localhost:6900
# EXTRALIT_API_KEY=your-api-key-here

# ------------------------------------------------------------------------------
# Authentication & Security
# ------------------------------------------------------------------------------
# Secret key for JWT token signing - CHANGE THIS IN PRODUCTION!
# Generate with: python -c "import secrets; print(secrets.token_urlsafe(32))"
# EXTRALIT_AUTH_SECRET_KEY=change-this-to-a-random-secret-key

# Local users database file (for non-OAuth authentication)
EXTRALIT_LOCAL_AUTH_USERS_DB_FILE=.users.yml

# ------------------------------------------------------------------------------
# Database Configuration
# ------------------------------------------------------------------------------
# SQLite (default, good for development)
EXTRALIT_DATABASE_URL=sqlite+aiosqlite:///./extralit-dev.db?check_same_thread=False

# PostgreSQL (recommended for production)
# EXTRALIT_DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/extralit

# ------------------------------------------------------------------------------
# Redis Configuration
# ------------------------------------------------------------------------------
# Redis is used for caching and job queues (Celery/RQ)
EXTRALIT_REDIS_URL=redis://localhost:6379/0

# ------------------------------------------------------------------------------
# S3-Compatible Object Storage (Optional)
# ------------------------------------------------------------------------------
# Configure S3 or S3-compatible storage (MinIO, DigitalOcean Spaces, etc.)
# All three fields are required if you want to use S3 storage
# EXTRALIT_S3_ENDPOINT=http://localhost:9000
# EXTRALIT_S3_ACCESS_KEY=minioadmin
# EXTRALIT_S3_SECRET_KEY=minioadmin
# EXTRALIT_S3_REGION=us-east-1
# EXTRALIT_S3_SECURE=false

# MinIO-specific (alternative to EXTRALIT_S3_*)
# MINIO_ACCESS_KEY=minioadmin
# MINIO_SECRET_KEY=minioadmin

# ------------------------------------------------------------------------------
# Search Engine Configuration (Optional)
# ------------------------------------------------------------------------------
# EXTRALIT_SEARCH_ENGINE=elasticsearch
# EXTRALIT_ELASTICSEARCH=http://localhost:9200

# ------------------------------------------------------------------------------
# Marker PDF Processing Configuration
# ------------------------------------------------------------------------------
# How to run Marker: "local" (in-process) or "modal" (remote API)
MARKER_RUN_MODE=local

# Required when MARKER_RUN_MODE=modal
# MARKER_MODAL_BASE_URL=https://your-modal-deployment.modal.run
# MARKER_MODAL_TIMEOUT_SECS=600

# ------------------------------------------------------------------------------
# Document Preprocessing Configuration
# ------------------------------------------------------------------------------
PREPROCESSING_ENABLED=true
PREPROCESSING_ENABLE_ANALYSIS=true
PREPROCESSING_ROTATE_PAGES=true
PREPROCESSING_ROTATE_PAGES_THRESHOLD=2.0
PREPROCESSING_CLEAN=false
PREPROCESSING_QUIET=false

# ------------------------------------------------------------------------------
# Chat & Message Validation
# ------------------------------------------------------------------------------
EXTRALIT_MIN_MESSAGE_LENGTH=1
EXTRALIT_MAX_MESSAGE_LENGTH=20000
EXTRALIT_MIN_ROLE_LENGTH=1
EXTRALIT_MAX_ROLE_LENGTH=20

# ------------------------------------------------------------------------------
# HuggingFace & Telemetry
# ------------------------------------------------------------------------------
# Disable HuggingFace Hub telemetry collection
HF_HUB_DISABLE_TELEMETRY=true

# ------------------------------------------------------------------------------
# Weaviate Cloud Services (Optional)
# ------------------------------------------------------------------------------
# WCS_HTTP_URL=https://your-cluster.weaviate.network
# WCS_GRPC_URL=grpc://your-cluster.weaviate.network:50051
# WCS_API_KEY=your-wcs-api-key
# WCS_USERNAME=your-username
# WCS_PASSWORD=your-password

# ------------------------------------------------------------------------------
# LLM Service Configuration (Optional)
# ------------------------------------------------------------------------------
# EXTRALIT_EXTRALIT_URL=http://localhost:8000

# ------------------------------------------------------------------------------
# macOS-Specific Configuration
# ------------------------------------------------------------------------------
# Disable Objective-C fork safety warnings (macOS only)
# OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES

# ------------------------------------------------------------------------------
# Alembic Database Migrations
# ------------------------------------------------------------------------------
# ALEMBIC_CONFIG=alembic.ini
12 changes: 6 additions & 6 deletions extralit-server/src/extralit_server/api/schemas/v1/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from pydantic import BaseModel, Field

MIN_MESSAGE_LENGTH = int(os.getenv("EXTRALIT_MIN_MESSAGE_LENGTH", 1))
MAX_MESSAGE_LENGTH = int(os.getenv("EXTRALIT_MAX_MESSAGE_LENGTH", 20000))
from extralit_server.config import settings

MIN_MESSAGE_LENGTH = settings.EXTRALIT_MIN_MESSAGE_LENGTH
MAX_MESSAGE_LENGTH = settings.EXTRALIT_MAX_MESSAGE_LENGTH

MIN_ROLE_LENGTH = int(os.getenv("EXTRALIT_MIN_ROLE_LENGTH", 1))
MAX_ROLE_LENGTH = int(os.getenv("EXTRALIT_MAX_ROLE_LENGTH", 20))
MIN_ROLE_LENGTH = settings.EXTRALIT_MIN_ROLE_LENGTH
MAX_ROLE_LENGTH = settings.EXTRALIT_MAX_ROLE_LENGTH


class ChatFieldValue(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import os
from typing import TYPE_CHECKING, Optional

import typer
import yaml
from pydantic import BaseModel, Field, constr

from extralit_server.config import settings
from extralit_server.database import AsyncSessionLocal
from extralit_server.models import User, UserRole

Expand Down Expand Up @@ -107,7 +107,7 @@ def _user_workspace_names(self, user: dict) -> list[str]:
def migrate():
"""Migrate users defined in YAML file to database."""

users_db_file: str = os.getenv("EXTRALIT_LOCAL_AUTH_USERS_DB_FILE", ".users.yml")
users_db_file: str = settings.EXTRALIT_LOCAL_AUTH_USERS_DB_FILE
asyncio.run(UsersMigrator(users_db_file).migrate())


Expand Down
175 changes: 175 additions & 0 deletions extralit-server/src/extralit_server/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# Copyright 2024-present, Extralit Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Centralized configuration management for Extralit Server.

This module provides a Pydantic-based settings class that loads configuration
from environment variables and .env files. All environment variable access
should go through the `settings` object to ensure type safety and validation.

Usage:
from extralit_server.config import settings

# Access settings
db_url = settings.EXTRALIT_DATABASE_URL
api_key = settings.EXTRALIT_API_KEY.get_secret_value() # For SecretStr fields
"""

from typing import Optional

from pydantic import Field, HttpUrl, SecretStr, field_validator, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
"""
Application settings loaded from environment variables.

Settings are loaded from:
1. Environment variables
2. .env file (if present)
3. Default values defined in field declarations

Most settings use the EXTRALIT_ prefix, but some third-party integrations
(Marker, MinIO, etc.) use their own naming conventions.

For sensitive values (API keys, secrets), use the .get_secret_value() method
to access the underlying string value.
"""

model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")

# Client API Configuration
EXTRALIT_API_URL: Optional[HttpUrl] = Field(
default=None, description="URL of the Extralit API server for client connections"
)
EXTRALIT_API_KEY: Optional[SecretStr] = Field(
default=None, description="API key for authenticating with Extralit server"
)

# Server Configuration
OBJC_DISABLE_INITIALIZE_FORK_SAFETY: Optional[str] = Field(
default=None, description="macOS-specific setting to disable Objective-C fork safety warnings"
)
ALEMBIC_CONFIG: Optional[str] = Field(default=None, description="Path to Alembic configuration file")
EXTRALIT_AUTH_SECRET_KEY: Optional[SecretStr] = Field(
default=None, description="Secret key for JWT token signing and authentication"
)
EXTRALIT_DATABASE_URL: str = Field(
default="sqlite+aiosqlite:///./extralit-dev.db?check_same_thread=False",
description="Database connection URL (supports SQLite and PostgreSQL)",
)
HF_HUB_DISABLE_TELEMETRY: bool = Field(default=True, description="Disable HuggingFace Hub telemetry collection")
# S3 Storage Configuration
EXTRALIT_S3_ENDPOINT: Optional[HttpUrl] = Field(
default=None, description="S3-compatible storage endpoint URL (e.g., MinIO, AWS S3)"
)
EXTRALIT_S3_ACCESS_KEY: Optional[str] = Field(default=None, description="S3 access key ID")
EXTRALIT_S3_SECRET_KEY: Optional[SecretStr] = Field(default=None, description="S3 secret access key")
EXTRALIT_S3_REGION: Optional[str] = Field(default=None, description="S3 bucket region")
EXTRALIT_S3_SECURE: bool = Field(default=False, description="Use HTTPS for S3 connections")

# Search and Cache Configuration
EXTRALIT_EXTRALIT_URL: Optional[HttpUrl] = Field(default=None, description="URL for Extralit LLM serving endpoint")
EXTRALIT_SEARCH_ENGINE: Optional[str] = Field(
default=None, description="Search engine backend (elasticsearch or opensearch)"
)
EXTRALIT_ELASTICSEARCH: Optional[HttpUrl] = Field(default=None, description="Elasticsearch/OpenSearch endpoint URL")
EXTRALIT_REDIS_URL: str = Field(
default="redis://localhost:6379/0", description="Redis connection URL for caching and job queues"
)
# Document Preprocessing Configuration
PREPROCESSING_ENABLED: bool = Field(default=True, description="Enable document preprocessing pipeline")
PREPROCESSING_ENABLE_ANALYSIS: bool = Field(default=True, description="Enable document layout analysis")
PREPROCESSING_ROTATE_PAGES: bool = Field(default=True, description="Auto-rotate pages based on text orientation")
PREPROCESSING_ROTATE_PAGES_THRESHOLD: float = Field(
default=2.0, description="Confidence threshold for page rotation detection"
)
PREPROCESSING_CLEAN: bool = Field(default=False, description="Clean up temporary files after preprocessing")
PREPROCESSING_QUIET: bool = Field(default=False, description="Suppress preprocessing log output")

# External Service Configuration (MinIO, Weaviate)
MINIO_ACCESS_KEY: Optional[str] = Field(default=None, description="MinIO access key for object storage")
MINIO_SECRET_KEY: Optional[SecretStr] = Field(default=None, description="MinIO secret key")
WCS_HTTP_URL: Optional[str] = Field(default=None, description="Weaviate Cloud Services HTTP endpoint")
WCS_GRPC_URL: Optional[str] = Field(default=None, description="Weaviate Cloud Services gRPC endpoint")
WCS_API_KEY: Optional[SecretStr] = Field(default=None, description="Weaviate Cloud Services API key")
WCS_USERNAME: Optional[str] = Field(default=None, description="Weaviate Cloud Services username")
WCS_PASSWORD: Optional[SecretStr] = Field(default=None, description="Weaviate Cloud Services password")

# Marker PDF Processing Configuration
MARKER_RUN_MODE: str = Field(
default="local", description="Marker execution mode: 'local' for in-process or 'modal' for remote API"
)
MARKER_MODAL_BASE_URL: Optional[str] = Field(
default=None, description="Base URL for Modal-hosted Marker service (required when MARKER_RUN_MODE=modal)"
)
MARKER_MODAL_TIMEOUT_SECS: int = Field(default=600, description="Timeout in seconds for Modal Marker API calls")

# Chat and Message Validation
EXTRALIT_MIN_MESSAGE_LENGTH: int = Field(default=1, description="Minimum chat message length")
EXTRALIT_MAX_MESSAGE_LENGTH: int = Field(default=20000, description="Maximum chat message length")
EXTRALIT_MIN_ROLE_LENGTH: int = Field(default=1, description="Minimum chat role name length")
EXTRALIT_MAX_ROLE_LENGTH: int = Field(default=20, description="Maximum chat role name length")

# Authentication Configuration
EXTRALIT_LOCAL_AUTH_USERS_DB_FILE: str = Field(
default=".users.yml", description="Path to local users database file for authentication"
)

@field_validator("MARKER_MODAL_BASE_URL")
@classmethod
def validate_marker_modal_url(cls, v: Optional[str], info) -> Optional[str]:
"""Validate that MARKER_MODAL_BASE_URL is set when using Modal mode."""
if info.data.get("MARKER_RUN_MODE", "").lower() == "modal" and not v:
raise ValueError(
"MARKER_MODAL_BASE_URL must be set when MARKER_RUN_MODE is 'modal'. "
"Please provide the URL of your Modal deployment endpoint."
)
return v

@model_validator(mode="after")
def validate_s3_config(self) -> "Settings":
"""Validate that S3 configuration is complete when any S3 field is provided."""
s3_fields = {
"EXTRALIT_S3_ENDPOINT": self.EXTRALIT_S3_ENDPOINT,
"EXTRALIT_S3_ACCESS_KEY": self.EXTRALIT_S3_ACCESS_KEY,
"EXTRALIT_S3_SECRET_KEY": self.EXTRALIT_S3_SECRET_KEY,
}
provided_fields = {k: v for k, v in s3_fields.items() if v is not None}

# If any S3 field is provided, all required fields must be provided
if provided_fields and len(provided_fields) < 3:
missing = [k for k, v in s3_fields.items() if v is None]
raise ValueError(
f"Incomplete S3 configuration. When using S3 storage, all required fields must be set. "
f"Missing: {', '.join(missing)}"
)
return self

def mask_secrets(self) -> dict:
"""Export settings with sensitive values masked for logging/debugging.

Returns:
dict: Settings dictionary with SecretStr fields masked as '***'
"""
data = self.model_dump()
for key, value in data.items():
if isinstance(getattr(self, key), SecretStr) and value:
data[key] = "***MASKED***"
return data


settings = Settings()
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from pathlib import Path
from typing import Any, Optional

Expand All @@ -21,17 +20,24 @@
from dotenv import load_dotenv

from extralit_server.api.handlers.v1.models import client
from extralit_server.config import settings

load_dotenv() # loads variables from a .env file in the project root

DEFAULT_TIMEOUT = int(os.getenv("MARKER_MODAL_TIMEOUT_SECS", "600"))


def get_modal_base_url() -> str:
base_url = os.getenv("MARKER_MODAL_BASE_URL", "").rstrip("/")
"""Get the Modal base URL from settings.

Returns:
str: The Modal base URL without trailing slash

Raises:
RuntimeError: If MARKER_MODAL_BASE_URL is not set
"""
base_url = settings.MARKER_MODAL_BASE_URL
if not base_url:
raise RuntimeError("MARKER_MODAL_BASE_URL is not set. Set it to your Modal endpoint URL.")
return base_url
return base_url.rstrip("/")


async def convert_document_via_modal(
Expand Down Expand Up @@ -67,7 +73,7 @@ async def convert_document_via_modal(
data = {k: v for k, v in data.items() if v not in (None, "", "none", "null")}

headers = extra_headers or {}
t = timeout if timeout is not None else DEFAULT_TIMEOUT
t = timeout if timeout is not None else settings.MARKER_MODAL_TIMEOUT_SECS
try:
resp = await client.post(url, files=files, data=data, headers=headers, timeout=t)
resp.raise_for_status()
Expand Down
Loading