From b28c6a760414b7fa03e4407f67847c4d4d83dad9 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Fri, 25 Oct 2024 10:27:25 -0500 Subject: [PATCH 01/12] feat: Add support for optional Hugging Face embeddings in MFModel - Modified MFModel to support both OpenAI and Hugging Face embeddings. - Introduced parameter to toggle between embedding sources. - Refactored embedding handling logic for better clarity and flexibility. - Updated initialization to allow specifying custom embedding model names. --- .../routers/matrix_factorization/model.py | 60 +++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/routellm/routers/matrix_factorization/model.py b/routellm/routers/matrix_factorization/model.py index 09fbb25..d7beefa 100644 --- a/routellm/routers/matrix_factorization/model.py +++ b/routellm/routers/matrix_factorization/model.py @@ -1,6 +1,6 @@ import torch from huggingface_hub import PyTorchModelHubMixin - +from transformers import AutoTokenizer, AutoModel from routellm.routers.similarity_weighted.utils import OPENAI_CLIENT MODEL_IDS = { @@ -79,13 +79,22 @@ def __init__( text_dim, num_classes, use_proj, + use_openai_embeddings=True, # Parameter to choose embedding source + embedding_model_name=None # Name of the embedding model ): super().__init__() self._name = "TextMF" self.use_proj = use_proj self.P = torch.nn.Embedding(num_models, dim) + self.use_openai_embeddings = use_openai_embeddings - self.embedding_model = "text-embedding-3-small" + # Set the embedding model name + if self.use_openai_embeddings: + # Default OpenAI embedding model + self.embedding_model_name = embedding_model_name or "text-embedding-ada-002" + else: + # Default Hugging Face embedding model + self.embedding_model_name = embedding_model_name or "sentence-transformers/all-MiniLM-L6-v2" if self.use_proj: self.text_proj = torch.nn.Sequential( @@ -100,22 +109,53 @@ def __init__( torch.nn.Linear(dim, num_classes, bias=False) ) + if not self.use_openai_embeddings: + # Initialize Hugging Face tokenizer and model + self.tokenizer = AutoTokenizer.from_pretrained(self.embedding_model_name) + self.embedding_model = AutoModel.from_pretrained(self.embedding_model_name) + self.embedding_model.eval() + self.embedding_model.to(self.get_device()) + else: + self.embedding_model = None # Not used for OpenAI embeddings + def get_device(self): return self.P.weight.device + def get_prompt_embedding(self, prompt): + if self.use_openai_embeddings: + # Use OpenAI embeddings + response = OPENAI_CLIENT.embeddings.create( + input=[prompt], + model=self.embedding_model_name + ) + prompt_embed = response.data[0].embedding + prompt_embed = torch.tensor(prompt_embed, device=self.get_device()) + else: + # Use Hugging Face embeddings + inputs = self.tokenizer( + prompt, + padding=True, + truncation=True, + return_tensors='pt' + ) + inputs = {k: v.to(self.get_device()) for k, v in inputs.items()} + with torch.no_grad(): + outputs = self.embedding_model(**inputs) + # Mean pooling over the token embeddings + last_hidden_state = outputs.last_hidden_state + prompt_embed = last_hidden_state.mean(dim=1).squeeze() + return prompt_embed + def forward(self, model_id, prompt): model_id = torch.tensor(model_id, dtype=torch.long).to(self.get_device()) model_embed = self.P(model_id) model_embed = torch.nn.functional.normalize(model_embed, p=2, dim=1) - prompt_embed = ( - OPENAI_CLIENT.embeddings.create(input=[prompt], model=self.embedding_model) - .data[0] - .embedding - ) - prompt_embed = torch.tensor(prompt_embed, device=self.get_device()) - prompt_embed = self.text_proj(prompt_embed) + prompt_embed = self.get_prompt_embedding(prompt) + + if self.use_proj: + prompt_embed = self.text_proj(prompt_embed) return self.classifier(model_embed * prompt_embed).squeeze() @@ -126,4 +166,4 @@ def pred_win_rate(self, model_a, model_b, prompt): return winrate def load(self, path): - self.load_state_dict(torch.load(path)) + self.load_state_dict(torch.load(path)) \ No newline at end of file From 8706f3e4269fdd966f0b795afc06cf1c41fce3e5 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Fri, 25 Oct 2024 11:08:39 -0500 Subject: [PATCH 02/12] feat: Enhance MatrixFactorizationRouter to support variable embedding dimensions and optional Hugging Face embeddings - Added parameters 'use_openai_embeddings', 'embedding_model_name', and 'text_dim' to MatrixFactorizationRouter. - Modified initialization to dynamically set 'text_dim' based on the selected embedding model. - Adjusted the router to handle both OpenAI and Hugging Face embeddings seamlessly. - Ensured compatibility with different embedding models by determining 'text_dim' at runtime. - Updated documentation and comments for clarity. This enhancement allows users to choose between OpenAI and Hugging Face embeddings in the MatrixFactorizationRouter, providing greater flexibility and eliminating the hard-coded dependency on OpenAI's embedding dimension. --- routellm/controller.py | 2 +- routellm/routers/routers.py | 39 +++++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/routellm/controller.py b/routellm/controller.py index 8a02a05..e51bb6f 100644 --- a/routellm/controller.py +++ b/routellm/controller.py @@ -24,7 +24,7 @@ }, "causal_llm": {"checkpoint_path": "routellm/causal_llm_gpt4_augmented"}, "bert": {"checkpoint_path": "routellm/bert_gpt4_augmented"}, - "mf": {"checkpoint_path": "routellm/mf_gpt4_augmented"}, + "mf": {"checkpoint_path": "madison-evans/routellm_all-MiniLM-L6-v2"}, } diff --git a/routellm/routers/routers.py b/routellm/routers/routers.py index 0096c0a..cca80b1 100644 --- a/routellm/routers/routers.py +++ b/routellm/routers/routers.py @@ -1,7 +1,7 @@ import abc import functools import random - +from transformers import AutoTokenizer, AutoModel import numpy as np import torch from datasets import concatenate_datasets, load_dataset @@ -211,17 +211,43 @@ class MatrixFactorizationRouter(Router): def __init__( self, checkpoint_path, - # This is the model pair for scoring at inference time, - # and can be different from the model pair used for routing. + # Model pair for scoring at inference time strong_model="gpt-4-1106-preview", weak_model="mixtral-8x7b-instruct-v0.1", hidden_size=128, - num_models=64, - text_dim=1536, + num_models=None, # Updated to allow specifying num_models + text_dim=None, # Updated to accept text_dim as a parameter num_classes=1, use_proj=True, + use_openai_embeddings=True, # New parameter + embedding_model_name=None, # New parameter ): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Set num_models to the length of MODEL_IDS if not specified + if num_models is None: + num_models = len(MODEL_IDS) + + # Set text_dim based on the embedding model if not specified + if text_dim is None: + if use_openai_embeddings: + # Default OpenAI embedding model is 'text-embedding-ada-002' + if embedding_model_name is None: + embedding_model_name = "text-embedding-ada-002" + # OpenAI embeddings have a fixed dimension + text_dim = 1536 # Adjust if using a different OpenAI model + else: + # For Hugging Face embeddings, determine text_dim from the model + if embedding_model_name is None: + embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2' + # Load the model to get the embedding dimension + tokenizer = AutoTokenizer.from_pretrained(embedding_model_name) + hf_model = AutoModel.from_pretrained(embedding_model_name) + # Get the embedding dimension from the model's config + text_dim = hf_model.config.hidden_size + # Clean up the model from memory + del tokenizer + del hf_model self.model = MFModel.from_pretrained( checkpoint_path, @@ -230,6 +256,8 @@ def __init__( text_dim=text_dim, num_classes=num_classes, use_proj=use_proj, + use_openai_embeddings=use_openai_embeddings, # Pass the new parameter + embedding_model_name=embedding_model_name, # Pass the new parameter ) self.model = self.model.eval().to(device) self.strong_model_id = MODEL_IDS[strong_model] @@ -241,7 +269,6 @@ def calculate_strong_win_rate(self, prompt): ) return winrate - # Parallelism makes the randomness non deterministic @no_parallel class RandomRouter(Router): From fa66531c3fbead7ad790aff84d195eac3f865798 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Fri, 25 Oct 2024 11:57:59 -0500 Subject: [PATCH 03/12] Update Controller class to accept and pass hf_token to routers for improved flexibility in handling Hugging Face authentication. Ensures that the Hugging Face token is properly utilized when initializing routers, supporting local embeddings without external API dependencies. --- routellm/controller.py | 6 +++++- .../routers/matrix_factorization/model.py | 19 ++++++++++++------- routellm/routers/routers.py | 9 ++++++--- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/routellm/controller.py b/routellm/controller.py index e51bb6f..52259c2 100644 --- a/routellm/controller.py +++ b/routellm/controller.py @@ -48,6 +48,7 @@ def __init__( api_base: Optional[str] = None, api_key: Optional[str] = None, progress_bar: bool = False, + hf_token: Optional[str] = None, # Add hf_token as a parameter ): self.model_pair = ModelPair(strong=strong_model, weak=weak_model) self.routers = {} @@ -67,7 +68,10 @@ def __init__( for router in routers: if router_pbar is not None: router_pbar.set_description(f"Loading {router}") - self.routers[router] = ROUTER_CLS[router](**config.get(router, {})) + self.routers[router] = ROUTER_CLS[router]( + **config.get(router, {}), + hf_token=self.hf_token # Pass the token to the router + ) # Some Python magic to match the OpenAI Python SDK self.chat = SimpleNamespace( diff --git a/routellm/routers/matrix_factorization/model.py b/routellm/routers/matrix_factorization/model.py index d7beefa..03fa9cc 100644 --- a/routellm/routers/matrix_factorization/model.py +++ b/routellm/routers/matrix_factorization/model.py @@ -80,20 +80,19 @@ def __init__( num_classes, use_proj, use_openai_embeddings=True, # Parameter to choose embedding source - embedding_model_name=None # Name of the embedding model + embedding_model_name=None, # Name of the embedding model + hf_token=None, # Add hf_token as a parameter ): super().__init__() self._name = "TextMF" self.use_proj = use_proj self.P = torch.nn.Embedding(num_models, dim) self.use_openai_embeddings = use_openai_embeddings + self.hf_token = hf_token - # Set the embedding model name if self.use_openai_embeddings: - # Default OpenAI embedding model - self.embedding_model_name = embedding_model_name or "text-embedding-ada-002" + self.embedding_model_name = embedding_model_name or "text-embedding-3-small" else: - # Default Hugging Face embedding model self.embedding_model_name = embedding_model_name or "sentence-transformers/all-MiniLM-L6-v2" if self.use_proj: @@ -111,8 +110,14 @@ def __init__( if not self.use_openai_embeddings: # Initialize Hugging Face tokenizer and model - self.tokenizer = AutoTokenizer.from_pretrained(self.embedding_model_name) - self.embedding_model = AutoModel.from_pretrained(self.embedding_model_name) + self.tokenizer = AutoTokenizer.from_pretrained( + self.embedding_model_name, + use_auth_token=self.hf_token + ) + self.embedding_model = AutoModel.from_pretrained( + self.embedding_model_name, + use_auth_token=self.hf_token + ) self.embedding_model.eval() self.embedding_model.to(self.get_device()) else: diff --git a/routellm/routers/routers.py b/routellm/routers/routers.py index cca80b1..7bf234e 100644 --- a/routellm/routers/routers.py +++ b/routellm/routers/routers.py @@ -221,6 +221,7 @@ def __init__( use_proj=True, use_openai_embeddings=True, # New parameter embedding_model_name=None, # New parameter + hf_token=None, # Add hf_token as a parameter ): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -233,7 +234,7 @@ def __init__( if use_openai_embeddings: # Default OpenAI embedding model is 'text-embedding-ada-002' if embedding_model_name is None: - embedding_model_name = "text-embedding-ada-002" + embedding_model_name = "text-embedding-3-small" # OpenAI embeddings have a fixed dimension text_dim = 1536 # Adjust if using a different OpenAI model else: @@ -249,6 +250,7 @@ def __init__( del tokenizer del hf_model + # Initialize the MFModel with the token passed in self.model = MFModel.from_pretrained( checkpoint_path, dim=hidden_size, @@ -256,8 +258,9 @@ def __init__( text_dim=text_dim, num_classes=num_classes, use_proj=use_proj, - use_openai_embeddings=use_openai_embeddings, # Pass the new parameter - embedding_model_name=embedding_model_name, # Pass the new parameter + use_openai_embeddings=use_openai_embeddings, + embedding_model_name=embedding_model_name, + hf_token=hf_token # Pass the token here ) self.model = self.model.eval().to(device) self.strong_model_id = MODEL_IDS[strong_model] From 638dc5d6043f52ef37559e9bc84fa16787a4c7b4 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Fri, 25 Oct 2024 12:02:48 -0500 Subject: [PATCH 04/12] fixed bug for missing hf token argument --- routellm/controller.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/routellm/controller.py b/routellm/controller.py index 52259c2..a51f82f 100644 --- a/routellm/controller.py +++ b/routellm/controller.py @@ -50,6 +50,7 @@ def __init__( progress_bar: bool = False, hf_token: Optional[str] = None, # Add hf_token as a parameter ): + self.hf_token = hf_token # Store the hf_token self.model_pair = ModelPair(strong=strong_model, weak=weak_model) self.routers = {} self.api_base = api_base @@ -68,10 +69,8 @@ def __init__( for router in routers: if router_pbar is not None: router_pbar.set_description(f"Loading {router}") - self.routers[router] = ROUTER_CLS[router]( - **config.get(router, {}), - hf_token=self.hf_token # Pass the token to the router - ) + self.routers[router] = ROUTER_CLS[router](hf_token=self.hf_token, **config.get(router, {})) + # Some Python magic to match the OpenAI Python SDK self.chat = SimpleNamespace( From fd9bb5e0155f33844fa8ff49006832e3868fdb96 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Fri, 25 Oct 2024 12:50:23 -0500 Subject: [PATCH 05/12] added a public 'def get_routed_model' function --- routellm/controller.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/routellm/controller.py b/routellm/controller.py index a51f82f..a4b04d8 100644 --- a/routellm/controller.py +++ b/routellm/controller.py @@ -104,6 +104,14 @@ def _parse_model_name(self, model: str): f"Invalid model {model}. Model name must be of the format 'router-[router name]-[threshold]." ) return router, threshold + + def get_routed_model(self, messages: list, router: str, threshold: float) -> str: + """ + Get the routed model for a given message using the specified router and threshold. + """ + self._validate_router_threshold(router, threshold) + routed_model = self._get_routed_model_for_completion(messages, router, threshold) + return routed_model def _get_routed_model_for_completion( self, messages: list, router: str, threshold: float From a0c9ae413052dfff63fae505299825071b71bb9c Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Thu, 20 Feb 2025 13:53:14 -0600 Subject: [PATCH 06/12] added logging for winrate --- routellm/routers/routers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/routellm/routers/routers.py b/routellm/routers/routers.py index 7bf234e..49ce4ef 100644 --- a/routellm/routers/routers.py +++ b/routellm/routers/routers.py @@ -21,6 +21,13 @@ compute_tiers, preprocess_battles, ) +import logging + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) def no_parallel(cls): @@ -270,6 +277,7 @@ def calculate_strong_win_rate(self, prompt): winrate = self.model.pred_win_rate( self.strong_model_id, self.weak_model_id, prompt ) + logger.info(f"\n\nwinrate: {winrate}\n\n") return winrate # Parallelism makes the randomness non deterministic From 1653339aab81eb2a83c8126fe6005cea0bafbaa8 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Thu, 20 Feb 2025 15:58:02 -0600 Subject: [PATCH 07/12] additional logging statements added to routers.py --- routellm/routers/routers.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/routellm/routers/routers.py b/routellm/routers/routers.py index 49ce4ef..fb83efd 100644 --- a/routellm/routers/routers.py +++ b/routellm/routers/routers.py @@ -257,6 +257,18 @@ def __init__( del tokenizer del hf_model + logger.info(f"Initializing MFModel with parameters:") + logger.info(f" checkpoint_path: {checkpoint_path}") + logger.info(f" dim: {hidden_size}") + logger.info(f" num_models: {num_models}") + logger.info(f" text_dim: {text_dim}") + logger.info(f" num_classes: {num_classes}") + logger.info(f" use_proj: {use_proj}") + logger.info(f" use_openai_embeddings: {use_openai_embeddings}") + logger.info(f" embedding_model_name: {embedding_model_name}") + logger.info(f" hf_token: {hf_token}") + + # Initialize the MFModel with the token passed in self.model = MFModel.from_pretrained( checkpoint_path, From 1dcb3570ae81293807a21c01219c9bf7a7aeaf78 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Thu, 20 Feb 2025 16:19:30 -0600 Subject: [PATCH 08/12] additional print statements added --- routellm/routers/matrix_factorization/model.py | 12 +++++++++++- routellm/routers/routers.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/routellm/routers/matrix_factorization/model.py b/routellm/routers/matrix_factorization/model.py index 03fa9cc..9b1c289 100644 --- a/routellm/routers/matrix_factorization/model.py +++ b/routellm/routers/matrix_factorization/model.py @@ -2,6 +2,13 @@ from huggingface_hub import PyTorchModelHubMixin from transformers import AutoTokenizer, AutoModel from routellm.routers.similarity_weighted.utils import OPENAI_CLIENT +import logging + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) MODEL_IDS = { "RWKV-4-Raven-14B": 0, @@ -93,7 +100,10 @@ def __init__( if self.use_openai_embeddings: self.embedding_model_name = embedding_model_name or "text-embedding-3-small" else: - self.embedding_model_name = embedding_model_name or "sentence-transformers/all-MiniLM-L6-v2" + self.embedding_model_name = embedding_model_name or "intfloat/e5-base-v2" + + logger.info(f"self.embedding_model_name: {self.embedding_model_name}") + logger.info(f"dim: {dim}") if self.use_proj: self.text_proj = torch.nn.Sequential( diff --git a/routellm/routers/routers.py b/routellm/routers/routers.py index fb83efd..4657137 100644 --- a/routellm/routers/routers.py +++ b/routellm/routers/routers.py @@ -247,7 +247,7 @@ def __init__( else: # For Hugging Face embeddings, determine text_dim from the model if embedding_model_name is None: - embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2' + embedding_model_name = 'intfloat/e5-base-v2' # Load the model to get the embedding dimension tokenizer = AutoTokenizer.from_pretrained(embedding_model_name) hf_model = AutoModel.from_pretrained(embedding_model_name) From 345c1e3ba1c6e51148da6a461defd9e755081c1f Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Thu, 20 Feb 2025 16:40:01 -0600 Subject: [PATCH 09/12] additional logging + update of get_prompt_embedding method --- .../routers/matrix_factorization/model.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/routellm/routers/matrix_factorization/model.py b/routellm/routers/matrix_factorization/model.py index 9b1c289..72552a8 100644 --- a/routellm/routers/matrix_factorization/model.py +++ b/routellm/routers/matrix_factorization/model.py @@ -124,6 +124,7 @@ def __init__( self.embedding_model_name, use_auth_token=self.hf_token ) + self.embedding_model = AutoModel.from_pretrained( self.embedding_model_name, use_auth_token=self.hf_token @@ -132,7 +133,7 @@ def __init__( self.embedding_model.to(self.get_device()) else: self.embedding_model = None # Not used for OpenAI embeddings - + logger.info(f"\n\ntokenizer: {self.tokenizer}\n\n") def get_device(self): return self.P.weight.device @@ -154,11 +155,20 @@ def get_prompt_embedding(self, prompt): return_tensors='pt' ) inputs = {k: v.to(self.get_device()) for k, v in inputs.items()} + with torch.no_grad(): outputs = self.embedding_model(**inputs) - # Mean pooling over the token embeddings - last_hidden_state = outputs.last_hidden_state - prompt_embed = last_hidden_state.mean(dim=1).squeeze() + + # Use CLS token instead of mean pooling + prompt_embed = outputs.last_hidden_state[:, 0, :].squeeze() + + # Normalize embeddings to match OpenAI + prompt_embed = torch.nn.functional.normalize(prompt_embed, p=2, dim=-1) + + # Ensure shape consistency + if prompt_embed.dim() == 1: + prompt_embed = prompt_embed.view(-1) + return prompt_embed def forward(self, model_id, prompt): @@ -177,6 +187,7 @@ def forward(self, model_id, prompt): @torch.no_grad() def pred_win_rate(self, model_a, model_b, prompt): logits = self.forward([model_a, model_b], prompt) + logger.info(f"\n\nlogits: {logits}\n\n") winrate = torch.sigmoid(logits[0] - logits[1]).item() return winrate From 0d3e92080ad56e63846e23d17e4cf526d92ba814 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Thu, 20 Feb 2025 20:34:02 -0600 Subject: [PATCH 10/12] updated model.py --- .../routers/matrix_factorization/model.py | 106 +++++++----------- 1 file changed, 40 insertions(+), 66 deletions(-) diff --git a/routellm/routers/matrix_factorization/model.py b/routellm/routers/matrix_factorization/model.py index 72552a8..ce1b78c 100644 --- a/routellm/routers/matrix_factorization/model.py +++ b/routellm/routers/matrix_factorization/model.py @@ -77,7 +77,6 @@ "zephyr-7b-beta": 63, } - class MFModel(torch.nn.Module, PyTorchModelHubMixin): def __init__( self, @@ -86,97 +85,68 @@ def __init__( text_dim, num_classes, use_proj, - use_openai_embeddings=True, # Parameter to choose embedding source - embedding_model_name=None, # Name of the embedding model - hf_token=None, # Add hf_token as a parameter + use_openai_embeddings=False, # Default: Hugging Face embeddings + embedding_model_name="intfloat/e5-base-v2", # Match notebook + hf_token=None, # Hugging Face API token ): super().__init__() - self._name = "TextMF" self.use_proj = use_proj - self.P = torch.nn.Embedding(num_models, dim) self.use_openai_embeddings = use_openai_embeddings - self.hf_token = hf_token + self.hf_token = hf_token + self.embedding_model_name = embedding_model_name - if self.use_openai_embeddings: - self.embedding_model_name = embedding_model_name or "text-embedding-3-small" - else: - self.embedding_model_name = embedding_model_name or "intfloat/e5-base-v2" - - logger.info(f"self.embedding_model_name: {self.embedding_model_name}") - logger.info(f"dim: {dim}") + # Model embedding matrix + self.P = torch.nn.Embedding(num_models, dim) if self.use_proj: - self.text_proj = torch.nn.Sequential( - torch.nn.Linear(text_dim, dim, bias=False) - ) + self.text_proj = torch.nn.Linear(text_dim, dim, bias=False) else: - assert ( - text_dim == dim - ), f"text_dim {text_dim} must be equal to dim {dim} if not using projection" + assert text_dim == dim, f"text_dim {text_dim} must be equal to dim {dim} if not using projection" - self.classifier = torch.nn.Sequential( - torch.nn.Linear(dim, num_classes, bias=False) - ) + self.classifier = torch.nn.Linear(dim, num_classes, bias=False) if not self.use_openai_embeddings: - # Initialize Hugging Face tokenizer and model + logger.info(f"Loading Hugging Face tokenizer and model: {self.embedding_model_name}") + + # Load tokenizer & model exactly as in the notebook self.tokenizer = AutoTokenizer.from_pretrained( self.embedding_model_name, - use_auth_token=self.hf_token + token=hf_token # Use `token` instead of `use_auth_token` ) - self.embedding_model = AutoModel.from_pretrained( self.embedding_model_name, - use_auth_token=self.hf_token + token=hf_token # Use `token` instead of `use_auth_token` ) - self.embedding_model.eval() + self.embedding_model.eval() # Set to inference mode self.embedding_model.to(self.get_device()) - else: - self.embedding_model = None # Not used for OpenAI embeddings - logger.info(f"\n\ntokenizer: {self.tokenizer}\n\n") + def get_device(self): return self.P.weight.device def get_prompt_embedding(self, prompt): - if self.use_openai_embeddings: - # Use OpenAI embeddings - response = OPENAI_CLIENT.embeddings.create( - input=[prompt], - model=self.embedding_model_name - ) - prompt_embed = response.data[0].embedding - prompt_embed = torch.tensor(prompt_embed, device=self.get_device()) - else: - # Use Hugging Face embeddings - inputs = self.tokenizer( - prompt, - padding=True, - truncation=True, - return_tensors='pt' - ) - inputs = {k: v.to(self.get_device()) for k, v in inputs.items()} - - with torch.no_grad(): - outputs = self.embedding_model(**inputs) - - # Use CLS token instead of mean pooling - prompt_embed = outputs.last_hidden_state[:, 0, :].squeeze() - - # Normalize embeddings to match OpenAI - prompt_embed = torch.nn.functional.normalize(prompt_embed, p=2, dim=-1) - - # Ensure shape consistency - if prompt_embed.dim() == 1: - prompt_embed = prompt_embed.view(-1) - + """Generate sentence embedding using mean pooling (matches notebook).""" + logger.info(f"Generating embedding for prompt: {prompt[:30]}...") + + inputs = self.tokenizer( + prompt, + padding=True, + truncation=True, + return_tensors="pt" + ).to(self.get_device()) + + with torch.no_grad(): + outputs = self.embedding_model(**inputs) + last_hidden_state = outputs.last_hidden_state + + # Mean pooling over token embeddings + prompt_embed = last_hidden_state.mean(dim=1).squeeze() + return prompt_embed def forward(self, model_id, prompt): model_id = torch.tensor(model_id, dtype=torch.long).to(self.get_device()) - model_embed = self.P(model_id) model_embed = torch.nn.functional.normalize(model_embed, p=2, dim=1) - prompt_embed = self.get_prompt_embedding(prompt) if self.use_proj: @@ -187,8 +157,12 @@ def forward(self, model_id, prompt): @torch.no_grad() def pred_win_rate(self, model_a, model_b, prompt): logits = self.forward([model_a, model_b], prompt) - logger.info(f"\n\nlogits: {logits}\n\n") - winrate = torch.sigmoid(logits[0] - logits[1]).item() + raw_diff = logits[0] - logits[1] + winrate = torch.sigmoid(raw_diff).item() + logger.info( + f"For prompt: '{prompt[:30]}...', logits: {[float(x) for x in logits]}, " + f"raw difference: {raw_diff:.4f}, winrate (sigmoid): {winrate:.4f}" + ) return winrate def load(self, path): From 91e59d2663b1a6127adf254e9cd391bcb7a0a897 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Fri, 21 Feb 2025 11:02:04 -0600 Subject: [PATCH 11/12] name of embedding model changed --- routellm/routers/routers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routellm/routers/routers.py b/routellm/routers/routers.py index 4657137..f75c999 100644 --- a/routellm/routers/routers.py +++ b/routellm/routers/routers.py @@ -247,7 +247,7 @@ def __init__( else: # For Hugging Face embeddings, determine text_dim from the model if embedding_model_name is None: - embedding_model_name = 'intfloat/e5-base-v2' + embedding_model_name = 'BAAI/bge-base-en' # Load the model to get the embedding dimension tokenizer = AutoTokenizer.from_pretrained(embedding_model_name) hf_model = AutoModel.from_pretrained(embedding_model_name) From a7464f2f9aaea0d806d58cd8bc9854e601857f11 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Sat, 22 Feb 2025 12:35:16 -0600 Subject: [PATCH 12/12] code cleanup --- .../routers/matrix_factorization/model.py | 6 +- routellm/routers/routers.py | 103 ++++++++++-------- 2 files changed, 61 insertions(+), 48 deletions(-) diff --git a/routellm/routers/matrix_factorization/model.py b/routellm/routers/matrix_factorization/model.py index ce1b78c..ad006a9 100644 --- a/routellm/routers/matrix_factorization/model.py +++ b/routellm/routers/matrix_factorization/model.py @@ -86,7 +86,7 @@ def __init__( num_classes, use_proj, use_openai_embeddings=False, # Default: Hugging Face embeddings - embedding_model_name="intfloat/e5-base-v2", # Match notebook + embedding_model_name="BAAI/bge-base-en", # Match notebook hf_token=None, # Hugging Face API token ): super().__init__() @@ -111,11 +111,11 @@ def __init__( # Load tokenizer & model exactly as in the notebook self.tokenizer = AutoTokenizer.from_pretrained( self.embedding_model_name, - token=hf_token # Use `token` instead of `use_auth_token` + token=hf_token ) self.embedding_model = AutoModel.from_pretrained( self.embedding_model_name, - token=hf_token # Use `token` instead of `use_auth_token` + token=hf_token ) self.embedding_model.eval() # Set to inference mode self.embedding_model.to(self.get_device()) diff --git a/routellm/routers/routers.py b/routellm/routers/routers.py index f75c999..a635bfd 100644 --- a/routellm/routers/routers.py +++ b/routellm/routers/routers.py @@ -218,58 +218,47 @@ class MatrixFactorizationRouter(Router): def __init__( self, checkpoint_path, - # Model pair for scoring at inference time strong_model="gpt-4-1106-preview", weak_model="mixtral-8x7b-instruct-v0.1", hidden_size=128, - num_models=None, # Updated to allow specifying num_models - text_dim=None, # Updated to accept text_dim as a parameter + num_models=None, + text_dim=None, num_classes=1, use_proj=True, - use_openai_embeddings=True, # New parameter - embedding_model_name=None, # New parameter - hf_token=None, # Add hf_token as a parameter + use_openai_embeddings=True, + embedding_model_name=None, + hf_token=None, ): + """ + A simplified constructor that flattens the logic for: + 1) Setting num_models from MODEL_IDS, + 2) Determining embedding_model_name defaults, + 3) Setting text_dim for OpenAI vs. HF embeddings, + 4) Initializing the MFModel, + 5) Setting strong/weak model IDs. + """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - # Set num_models to the length of MODEL_IDS if not specified - if num_models is None: - num_models = len(MODEL_IDS) - # Set text_dim based on the embedding model if not specified + # Default num_models to the length of MODEL_IDS if not provided + num_models = num_models or len(MODEL_IDS) + + # Decide which embedding model_name to use if none provided + if not embedding_model_name: + if use_openai_embeddings: + # e.g. "text-embedding-ada-002" or your default + embedding_model_name = "text-embedding-3-small" + else: + embedding_model_name = "BAAI/bge-base-en" + + # Decide text_dim if not provided if text_dim is None: if use_openai_embeddings: - # Default OpenAI embedding model is 'text-embedding-ada-002' - if embedding_model_name is None: - embedding_model_name = "text-embedding-3-small" - # OpenAI embeddings have a fixed dimension - text_dim = 1536 # Adjust if using a different OpenAI model + # e.g., 1536 for text-embedding-ada-002 + text_dim = 1536 else: - # For Hugging Face embeddings, determine text_dim from the model - if embedding_model_name is None: - embedding_model_name = 'BAAI/bge-base-en' - # Load the model to get the embedding dimension - tokenizer = AutoTokenizer.from_pretrained(embedding_model_name) - hf_model = AutoModel.from_pretrained(embedding_model_name) - # Get the embedding dimension from the model's config - text_dim = hf_model.config.hidden_size - # Clean up the model from memory - del tokenizer - del hf_model - - logger.info(f"Initializing MFModel with parameters:") - logger.info(f" checkpoint_path: {checkpoint_path}") - logger.info(f" dim: {hidden_size}") - logger.info(f" num_models: {num_models}") - logger.info(f" text_dim: {text_dim}") - logger.info(f" num_classes: {num_classes}") - logger.info(f" use_proj: {use_proj}") - logger.info(f" use_openai_embeddings: {use_openai_embeddings}") - logger.info(f" embedding_model_name: {embedding_model_name}") - logger.info(f" hf_token: {hf_token}") - - - # Initialize the MFModel with the token passed in + text_dim = self._infer_hf_text_dim(embedding_model_name) + + # Initialize the MFModel self.model = MFModel.from_pretrained( checkpoint_path, dim=hidden_size, @@ -279,19 +268,43 @@ def __init__( use_proj=use_proj, use_openai_embeddings=use_openai_embeddings, embedding_model_name=embedding_model_name, - hf_token=hf_token # Pass the token here - ) - self.model = self.model.eval().to(device) + hf_token=hf_token, + ).eval().to(device) + + # Store strong/weak model IDs self.strong_model_id = MODEL_IDS[strong_model] self.weak_model_id = MODEL_IDS[weak_model] + @staticmethod + def _infer_hf_text_dim(embedding_model_name: str) -> int: + """ + Helper to load a huggingface model and extract its hidden size. + Immediately frees model from memory. + """ + tokenizer = AutoTokenizer.from_pretrained(embedding_model_name) + hf_model = AutoModel.from_pretrained(embedding_model_name) + dim = hf_model.config.hidden_size + + del tokenizer + del hf_model + + return dim + def calculate_strong_win_rate(self, prompt): + """ + Scores the prompt using the MF model to see how + often the 'strong' model is predicted to win + over the 'weak' model. + """ winrate = self.model.pred_win_rate( - self.strong_model_id, self.weak_model_id, prompt + self.strong_model_id, + self.weak_model_id, + prompt ) logger.info(f"\n\nwinrate: {winrate}\n\n") return winrate + # Parallelism makes the randomness non deterministic @no_parallel class RandomRouter(Router):