Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
__pycache__/
scripts/notebooks/
scripts/notebooks/
matrix.egg-info/
build/
.pyre/
.pyre_configuration
.pyre_configuration.local
.watchmanconfig
3 changes: 2 additions & 1 deletion matrix/app_server/app_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from matrix.client.endpoint_cache import EndpointCache
from matrix.common.cluster_info import ClusterInfo, get_head_http_host
from matrix.utils.basics import convert_to_json_compatible, sanitize_app_name
from matrix.utils.logging import get_logger
from matrix.utils.os import download_s3_dir, lock_file, run_and_stream, run_async
from matrix.utils.ray import (
ACTOR_NAME_SPACE,
Expand All @@ -42,7 +43,7 @@
kill_matrix_actors,
)

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")

DEPLOYMENT_YAML = "deployment.yaml"
DEPLOYMENT_SGLANG_YAML = "deployment_sglang.yaml"
Expand Down
3 changes: 2 additions & 1 deletion matrix/app_server/code/code_execution_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
from starlette.responses import JSONResponse

from matrix.app_server.code.sandbox_runner import SandboxRunner
from matrix.utils.logging import get_logger

CODE_EXEC_TIMEOUT = 10
logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")


@serve.deployment(ray_actor_options={"num_cpus": 1, "num_gpus": 0})
Expand Down
6 changes: 4 additions & 2 deletions matrix/app_server/llm/azure_openai_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
ErrorResponse,
)

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -114,7 +116,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

Expand Down
8 changes: 5 additions & 3 deletions matrix/app_server/llm/bedrock_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
from starlette.requests import Request
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -124,11 +126,11 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

logger.log(logging.INFO, f"args: {args}")
logger.info(f"args: {args}")
assert "claude" in args.model_name.lower(), "Only Claude model is supported"

return BedrockDeployment.options( # type: ignore[attr-defined]
Expand Down
3 changes: 2 additions & 1 deletion matrix/app_server/llm/deploy_sglang_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@

from matrix.common.cluster_info import ClusterInfo
from matrix.utils.http import fetch_url, post_url
from matrix.utils.logging import get_logger
from matrix.utils.os import run_and_stream, stop_process
from matrix.utils.ray import ACTOR_NAME_SPACE, get_matrix_actors, get_ray_head_node

logger = logging.getLogger("ray.sglang")
logger = get_logger("ray.sglang")
handler = logging.StreamHandler()
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s")
handler.setFormatter(formatter)
Expand Down
6 changes: 4 additions & 2 deletions matrix/app_server/llm/gemini_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
from starlette.requests import Request
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -209,7 +211,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/llm/metagen_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
)

from matrix.utils.http import post_url
from matrix.utils.logging import get_logger

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -124,7 +125,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

Expand Down
6 changes: 4 additions & 2 deletions matrix/app_server/llm/ray_serve_fastgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
from torch.distributed.device_mesh import DeviceMesh, init_device_mesh

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -350,7 +352,7 @@ def parse_args(cli_args: Dict[str, str]):
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))
parsed_args = parser.parse_args(args=arg_strings)
return parsed_args

Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/llm/ray_serve_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
)

from matrix.app_server.llm import openai_pb2
from matrix.utils.logging import get_logger

try:
from vllm.entrypoints.openai.serving_engine import ( # type: ignore[attr-defined]
Expand Down Expand Up @@ -76,7 +77,7 @@
"enable_tools",
]

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -534,7 +535,7 @@ def parse_vllm_args(cli_args: Dict[str, str]):
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))
parsed_args = parser.parse_args(args=arg_strings)
return parsed_args, deploy_args

Expand Down
8 changes: 5 additions & 3 deletions matrix/app_server/llm/sagemaker_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from starlette.requests import Request
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -122,11 +124,11 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

logger.log(logging.INFO, f"args: {args}")
logger.info(f"args: {args}")

return SageMakerDeployment.options( # type: ignore[attr-defined]
placement_group_bundles=pg_resources,
Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/vision/optical_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
from torch.utils.data import DataLoader, Dataset

from matrix.app_server.vision.utils import SamplingMode, TorchCodecVideoDataset
from matrix.utils.logging import get_logger

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -207,7 +208,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
for key, value in cli_args.items():
if value is not None:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(arg_strings)

Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/vision/perception_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
execute_with_retry,
get_image_transform,
)
from matrix.utils.logging import get_logger

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -289,7 +290,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
for key, value in cli_args.items():
if value is not None:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(arg_strings)

Expand Down
4 changes: 3 additions & 1 deletion matrix/app_server/vision/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from torch.utils.data import Dataset
from torchcodec.decoders import VideoDecoder

logger = logging.getLogger("matrix.app_server.vision.utils")
from matrix.utils.logging import get_logger

logger = get_logger("matrix.app_server.vision.utils")
logging.getLogger("httpx").setLevel(logging.WARNING)


Expand Down
3 changes: 2 additions & 1 deletion matrix/client/client_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
import typing as tp

from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger

logger = logging.getLogger("matrix.client.utils")
logger = get_logger("matrix.client.utils")
logging.getLogger("httpx").setLevel(logging.WARNING)


Expand Down
3 changes: 2 additions & 1 deletion matrix/client/container_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
import aiohttp

from matrix.utils.http import post_url
from matrix.utils.logging import get_logger

logger = logging.getLogger(__name__)
logger = get_logger(__name__)


class ContainerClient:
Expand Down
3 changes: 2 additions & 1 deletion matrix/client/endpoint_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

from matrix.common.cluster_info import ClusterInfo, get_head_http_host
from matrix.utils.http import fetch_url
from matrix.utils.logging import get_logger
from matrix.utils.ray import get_ray_dashboard_address

logger = logging.getLogger("endpoint_cache")
logger = get_logger("endpoint_cache")


class EndpointCache:
Expand Down
3 changes: 2 additions & 1 deletion matrix/client/execute_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@

from matrix.client.client_utils import get_an_endpoint_url, save_to_jsonl
from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger

# Configure logging for execute_code.py
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("execute_code")
logger = get_logger("execute_code")
# Optionally suppress noisy logs from imported modules if not already done
logging.getLogger("httpx").setLevel(logging.WARNING)

Expand Down
3 changes: 2 additions & 1 deletion matrix/client/process_vision_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
save_to_jsonl,
)
from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("process_vision_data")
logger = get_logger("process_vision_data")
logging.getLogger("httpx").setLevel(logging.WARNING)


Expand Down
3 changes: 2 additions & 1 deletion matrix/client/query_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@
from matrix.app_server.llm import openai_pb2, openai_pb2_grpc
from matrix.client.client_utils import get_an_endpoint_url, save_to_jsonl
from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger
from matrix.utils.os import batch_requests_async, run_async

CHAR_PER_TOKEN = 3.61
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("query_llm")
logger = get_logger("query_llm")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down
4 changes: 3 additions & 1 deletion matrix/cluster/ray_dashboard_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@

import ray

from matrix.utils.logging import get_logger

# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("RayDashboardJob")
logger = get_logger("RayDashboardJob")


@ray.remote(max_restarts=10)
Expand Down
2 changes: 1 addition & 1 deletion matrix/data_pipeline/clustering/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def select_sample_ids_from_cluster(cluster_df):
else:
embeddings = np.vstack(cluster_df["embedding"])
centroid = np.mean(embeddings, axis=0)
logger.info(embeddings.shape, centroid.shape)
logger.info(f"{embeddings.shape}, {centroid.shape}")
distances_to_centroid = np.linalg.norm(embeddings - centroid, axis=1)

# First pick the closest point to centroid
Expand Down
3 changes: 2 additions & 1 deletion matrix/data_pipeline/clustering/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
from sentence_transformers import SentenceTransformer

from matrix.utils.basics import get_nested_value, get_user_message_from_llama3_prompt
from matrix.utils.logging import get_logger

# Basic Logging Setup
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
logger = get_logger(__name__)

# --- Model Saving/Loading ---

Expand Down
4 changes: 3 additions & 1 deletion matrix/data_pipeline/generate/vllm_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
from fire import Fire
from vllm import LLM, SamplingParams

logger = logging.getLogger(__name__)
from matrix.utils.logging import get_logger

logger = get_logger(__name__)

USER_MESSAGE = "<user_message>"

Expand Down
Loading