From 5be28ed50af904f9a92966a7cb928029c3d30672 Mon Sep 17 00:00:00 2001 From: Michael Luo Date: Wed, 5 Feb 2025 18:36:13 -0800 Subject: [PATCH 1/3] Add video arena to fastchat leaderboard --- fastchat/serve/monitor/monitor.py | 14 ++ fastchat/serve/monitor/video_arena.py | 271 ++++++++++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 fastchat/serve/monitor/video_arena.py diff --git a/fastchat/serve/monitor/monitor.py b/fastchat/serve/monitor/monitor.py index 5cbed3f6d..ba3efeb57 100644 --- a/fastchat/serve/monitor/monitor.py +++ b/fastchat/serve/monitor/monitor.py @@ -1050,6 +1050,20 @@ def build_leaderboard_tab( "Please configure it to a valid URL." ) + from fastchat.serve.monitor.video_arena import ( + build_video_arena_tab, + VIDEO_ARENA_LEADERBOARD_URL, + ) + if VIDEO_ARENA_LEADERBOARD_URL: + with gr.Tab("🎥 Text-to-Video", id=6): + build_video_arena_tab() + else: + print( + "Unable to build Video Arena's Leaderboard. " + "VIDEO_ARENA_LEADERBOARD_URL environment variable is not set. " + "Please configure it to a valid URL." + ) + if not show_plot: gr.Markdown( """ ## Visit our [HF space](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) for more analysis! diff --git a/fastchat/serve/monitor/video_arena.py b/fastchat/serve/monitor/video_arena.py new file mode 100644 index 000000000..e05ec86a3 --- /dev/null +++ b/fastchat/serve/monitor/video_arena.py @@ -0,0 +1,271 @@ +"""Video Arena module for displaying video model leaderboard and rankings. + +This module provides functionality to fetch, process and display leaderboard data +from the Video Arena API. It includes model information, score processing, and +Gradio UI components for visualization. + +Classes: + ModelVersion: Data class for storing model version information + ModelInfo: Data class for storing model class information + +Functions: + process_video_arena_leaderboard: Process raw leaderboard data into DataFrame + build_video_arena_tab: Build and display the Video Arena leaderboard UI +""" + +import os +from dataclasses import dataclass +import requests +import pandas as pd +import gradio as gr + +from fastchat.serve.monitor.monitor import model_hyperlink, recompute_final_ranking + +# URL for fetching Video Arena leaderboard data +VIDEO_ARENA_LEADERBOARD_URL = os.getenv( + "VIDEO_ARENA_LEADERBOARD_URL", + "https://www.videoarena.tv/api/v1/leaderboard" +) + + +@dataclass +class ModelVersion: + """Data class to store model version information. + + Attributes: + license: The license type of the model version + website: The website URL for the model version + """ + license: str + website: str + + +@dataclass +class ModelInfo: + """Data class to store model class information. + + Attributes: + official_name: The official display name of the model + organization: The organization that created the model + versions: Dictionary mapping version names to ModelVersion objects + """ + official_name: str + organization: str + versions: dict[str, ModelVersion] = None + + +# Mapping of model identifiers to their information +VIDEO_MODEL_INFO = { + 'veo': ModelInfo( + 'Veo', + 'Google', + versions={'2.0': ModelVersion('Proprietary', 'https://deepmind.google/technologies/veo/veo-2/')} + ), + 'minimax': ModelInfo( + 'Minimax', + 'Hailuo', + versions={'01': ModelVersion('Proprietary', 'https://hailuoai.video/')} + ), + 'kling': ModelInfo( + 'Kling', + 'Kuaishou', + versions={'1.0': ModelVersion('Proprietary', 'https://klingai.com/'), + '1.5': ModelVersion('Proprietary', 'https://klingai.com/')} + ), + 'sora': ModelInfo( + 'Sora', + 'OpenAI', + versions={'1': ModelVersion('Proprietary', 'https://openai.com/sora/')} + ), + 'luma': ModelInfo( + 'Luma', + 'LumaLabs', + versions={'1.6': ModelVersion('Proprietary', 'https://lumalabs.ai/dream-machine')} + ), + 'runway': ModelInfo( + 'Runway', + 'Runway', + versions={'default': ModelVersion('Proprietary', + 'https://runwayml.com/research/introducing-gen-3-alpha')} + ), + 'genmo': ModelInfo( + 'Genmo', + 'Genmo', + versions={ + '0.2': ModelVersion('Proprietary', 'https://www.genmo.ai/'), + 'Mochi-1': ModelVersion('Apache-2.0', 'https://www.genmo.ai/') + } + ), + 'svd': ModelInfo( + 'SVD', + 'StabilityAI', + versions={'1.0': ModelVersion('Proprietary', 'https://stability.ai/stable-video')} + ), + 'opensora': ModelInfo( + 'OpenSora', + 'OpenSora', + versions={'1.2': ModelVersion('Apache-2.0', 'https://github.com/hpcaitech/Open-Sora')} + ), + 'pika': ModelInfo( + 'Pika', + 'PikaLabs', + versions={'β': ModelVersion('Proprietary', 'pika.art'), + '1.5': ModelVersion('Proprietary', 'pika.art')} + ), +} + + +def process_video_arena_leaderboard(data): + """Process raw leaderboard data into a formatted DataFrame. + + Args: + data: Raw leaderboard data from the Video Arena API containing model scores + and metadata + + Returns: + pd.DataFrame: Processed leaderboard with rankings and confidence intervals. + Contains columns for rank, model name, scores, confidence intervals, + vote counts, organization and license information. + """ + leaderboard = [] + for item in data: + model_name = item['model'].lower() + version = item['version'] + + # Skip veo model + if model_name == 'veo': + continue + + # Get model info from mapping + model_info = VIDEO_MODEL_INFO.get( + model_name, + ModelInfo(model_name, "Unknown", {'default': ModelVersion('Proprietary', '')}) + ) + + # Determine license and website based on version + version_key = version if version in model_info.versions else 'default' + if version_key not in model_info.versions: + model_info.versions['default'] = ModelVersion('Proprietary', '') + model_version = model_info.versions[version_key] + license_type, website = model_version.license, model_version.website + + # Replace spaces with dashes in the display name + display_name = f"{model_info.official_name} {version}".replace(' ', '-') + model_data = { + "name": f"[{display_name}]({website})" if website else display_name, + "visibility": "public", + "score": round(item['scores']['elo']), + "lower": item['scores']['elo'] - item['scores']['ci_lower'], + "upper": item['scores']['elo'] + item['scores']['ci_upper'], + "votes": (item['scores']['win']['total'] + + item['scores']['loss']['total'] + + item['scores']['tie']['total']), + "organization": model_info.organization, + "license": license_type, + } + leaderboard.append(model_data) + + leaderboard = pd.DataFrame(leaderboard) + + # Calculate confidence intervals + leaderboard["rating_q975"] = leaderboard["upper"].round().astype(int) + leaderboard["rating_q025"] = leaderboard["lower"].round().astype(int) + + leaderboard["upper_diff"] = leaderboard["upper"] - leaderboard["score"] + leaderboard["lower_diff"] = leaderboard["score"] - leaderboard["lower"] + + # Round the differences to integers + leaderboard["upper_diff"] = leaderboard["upper_diff"].round().astype(int) + leaderboard["lower_diff"] = leaderboard["lower_diff"].round().astype(int) + + leaderboard["confidence_interval"] = ( + "+" + + leaderboard["upper_diff"].astype(str) + + "/-" + + leaderboard["lower_diff"].astype(str) + ) + + # Calculate rankings using the existing function + rankings_ub = recompute_final_ranking(leaderboard) + leaderboard.insert(loc=0, column="Rank* (UB)", value=rankings_ub) + + # Sort the leaderboard + leaderboard = leaderboard.sort_values( + by=["Rank* (UB)", "score"], + ascending=[True, False] + ) + + return leaderboard + + +def build_video_arena_tab(): + """Build and display the Video Arena leaderboard tab in the Gradio interface. + + Fetches data from the Video Arena API, processes it into a formatted DataFrame, + and creates a formatted display using Gradio components including: + - Summary statistics (number of models and battles) + - Interactive leaderboard table + - Explanatory text for ranking methodology + + Returns: + None. Creates and displays Gradio UI components directly. + """ + response = requests.get(VIDEO_ARENA_LEADERBOARD_URL) + if response.status_code == 200: + leaderboard = process_video_arena_leaderboard(response.json()) + leaderboard = leaderboard.rename( + columns={ + "name": "Model", + "confidence_interval": "95% CI", + "score": "Arena Score", + "organization": "Organization", + "votes": "Votes", + "license": "License", + } + ) + + column_order = [ + "Rank* (UB)", + "Model", + "Arena Score", + "95% CI", + "Votes", + "Organization", + "License", + ] + leaderboard = leaderboard[column_order] + num_models = len(leaderboard) + total_battles = int(leaderboard["Votes"].sum()) // 2 + + md = f""" + VideoArena is a free AI video service allowing users to access, compare, and rank + text-to-video capabilities of state-of-the-art generative models. This + leaderboard contains the relative performance and ranking of {num_models} + models over {total_battles} battles. + """ + + gr.Markdown(md, elem_id="leaderboard_markdown") + gr.DataFrame( + leaderboard, + datatype=["number", "markdown", "number", "str", "number", "str", "str"], + elem_id="video_arena_leaderboard", + height=600, + wrap=True, + interactive=False, + column_widths=[70, 130, 60, 80, 50, 80, 70], + ) + + gr.Markdown( + """ + ***Rank (UB)**: model's ranking (upper-bound), defined by one + the number of models + that are statistically better than the target model. + Model A is statistically better than model B when A's lower-bound score is greater + than B's upper-bound score (in 95% confidence interval). \n + **Confidence Interval**: represents the range of uncertainty around the Arena Score. + It's displayed as +X / -Y, where X is the difference between the upper bound and + the score, and Y is the difference between the score and the lower bound. + """, + elem_id="leaderboard_markdown", + ) + else: + gr.Markdown("Error with fetching Video Arena data. Check back in later.") \ No newline at end of file From 742f7a584c62036210592c1e8e64cb483cbc03da Mon Sep 17 00:00:00 2001 From: Michael Luo Date: Wed, 5 Feb 2025 18:44:21 -0800 Subject: [PATCH 2/3] add video arena hyperlink --- fastchat/serve/monitor/video_arena.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastchat/serve/monitor/video_arena.py b/fastchat/serve/monitor/video_arena.py index e05ec86a3..ff328db8a 100644 --- a/fastchat/serve/monitor/video_arena.py +++ b/fastchat/serve/monitor/video_arena.py @@ -19,7 +19,7 @@ import pandas as pd import gradio as gr -from fastchat.serve.monitor.monitor import model_hyperlink, recompute_final_ranking +from fastchat.serve.monitor.monitor import recompute_final_ranking # URL for fetching Video Arena leaderboard data VIDEO_ARENA_LEADERBOARD_URL = os.getenv( @@ -238,7 +238,7 @@ def build_video_arena_tab(): total_battles = int(leaderboard["Votes"].sum()) // 2 md = f""" - VideoArena is a free AI video service allowing users to access, compare, and rank + [VideoArena](https://www.videoarena.tv/) is a free AI video service allowing users to access, compare, and rank text-to-video capabilities of state-of-the-art generative models. This leaderboard contains the relative performance and ranking of {num_models} models over {total_battles} battles. From a65529d5a50c6652366f1b75f1c3b52ecc32fea6 Mon Sep 17 00:00:00 2001 From: Michael Luo Date: Thu, 6 Feb 2025 03:04:06 +0000 Subject: [PATCH 3/3] Fix formatter --- fastchat/serve/monitor/monitor.py | 1 + fastchat/serve/monitor/video_arena.py | 147 +++++++++++++++----------- 2 files changed, 85 insertions(+), 63 deletions(-) diff --git a/fastchat/serve/monitor/monitor.py b/fastchat/serve/monitor/monitor.py index ba3efeb57..bbcfe6e78 100644 --- a/fastchat/serve/monitor/monitor.py +++ b/fastchat/serve/monitor/monitor.py @@ -1054,6 +1054,7 @@ def build_leaderboard_tab( build_video_arena_tab, VIDEO_ARENA_LEADERBOARD_URL, ) + if VIDEO_ARENA_LEADERBOARD_URL: with gr.Tab("🎥 Text-to-Video", id=6): build_video_arena_tab() diff --git a/fastchat/serve/monitor/video_arena.py b/fastchat/serve/monitor/video_arena.py index ff328db8a..3e3b16392 100644 --- a/fastchat/serve/monitor/video_arena.py +++ b/fastchat/serve/monitor/video_arena.py @@ -23,8 +23,7 @@ # URL for fetching Video Arena leaderboard data VIDEO_ARENA_LEADERBOARD_URL = os.getenv( - "VIDEO_ARENA_LEADERBOARD_URL", - "https://www.videoarena.tv/api/v1/leaderboard" + "VIDEO_ARENA_LEADERBOARD_URL", "https://www.videoarena.tv/api/v1/leaderboard" ) @@ -36,6 +35,7 @@ class ModelVersion: license: The license type of the model version website: The website URL for the model version """ + license: str website: str @@ -49,6 +49,7 @@ class ModelInfo: organization: The organization that created the model versions: Dictionary mapping version names to ModelVersion objects """ + official_name: str organization: str versions: dict[str, ModelVersion] = None @@ -56,61 +57,78 @@ class ModelInfo: # Mapping of model identifiers to their information VIDEO_MODEL_INFO = { - 'veo': ModelInfo( - 'Veo', - 'Google', - versions={'2.0': ModelVersion('Proprietary', 'https://deepmind.google/technologies/veo/veo-2/')} + "veo": ModelInfo( + "Veo", + "Google", + versions={ + "2.0": ModelVersion( + "Proprietary", "https://deepmind.google/technologies/veo/veo-2/" + ) + }, ), - 'minimax': ModelInfo( - 'Minimax', - 'Hailuo', - versions={'01': ModelVersion('Proprietary', 'https://hailuoai.video/')} + "minimax": ModelInfo( + "Minimax", + "Hailuo", + versions={"01": ModelVersion("Proprietary", "https://hailuoai.video/")}, ), - 'kling': ModelInfo( - 'Kling', - 'Kuaishou', - versions={'1.0': ModelVersion('Proprietary', 'https://klingai.com/'), - '1.5': ModelVersion('Proprietary', 'https://klingai.com/')} + "kling": ModelInfo( + "Kling", + "Kuaishou", + versions={ + "1.0": ModelVersion("Proprietary", "https://klingai.com/"), + "1.5": ModelVersion("Proprietary", "https://klingai.com/"), + }, ), - 'sora': ModelInfo( - 'Sora', - 'OpenAI', - versions={'1': ModelVersion('Proprietary', 'https://openai.com/sora/')} + "sora": ModelInfo( + "Sora", + "OpenAI", + versions={"1": ModelVersion("Proprietary", "https://openai.com/sora/")}, ), - 'luma': ModelInfo( - 'Luma', - 'LumaLabs', - versions={'1.6': ModelVersion('Proprietary', 'https://lumalabs.ai/dream-machine')} + "luma": ModelInfo( + "Luma", + "LumaLabs", + versions={ + "1.6": ModelVersion("Proprietary", "https://lumalabs.ai/dream-machine") + }, ), - 'runway': ModelInfo( - 'Runway', - 'Runway', - versions={'default': ModelVersion('Proprietary', - 'https://runwayml.com/research/introducing-gen-3-alpha')} + "runway": ModelInfo( + "Runway", + "Runway", + versions={ + "default": ModelVersion( + "Proprietary", "https://runwayml.com/research/introducing-gen-3-alpha" + ) + }, ), - 'genmo': ModelInfo( - 'Genmo', - 'Genmo', + "genmo": ModelInfo( + "Genmo", + "Genmo", versions={ - '0.2': ModelVersion('Proprietary', 'https://www.genmo.ai/'), - 'Mochi-1': ModelVersion('Apache-2.0', 'https://www.genmo.ai/') - } + "0.2": ModelVersion("Proprietary", "https://www.genmo.ai/"), + "Mochi-1": ModelVersion("Apache-2.0", "https://www.genmo.ai/"), + }, ), - 'svd': ModelInfo( - 'SVD', - 'StabilityAI', - versions={'1.0': ModelVersion('Proprietary', 'https://stability.ai/stable-video')} + "svd": ModelInfo( + "SVD", + "StabilityAI", + versions={ + "1.0": ModelVersion("Proprietary", "https://stability.ai/stable-video") + }, ), - 'opensora': ModelInfo( - 'OpenSora', - 'OpenSora', - versions={'1.2': ModelVersion('Apache-2.0', 'https://github.com/hpcaitech/Open-Sora')} + "opensora": ModelInfo( + "OpenSora", + "OpenSora", + versions={ + "1.2": ModelVersion("Apache-2.0", "https://github.com/hpcaitech/Open-Sora") + }, ), - 'pika': ModelInfo( - 'Pika', - 'PikaLabs', - versions={'β': ModelVersion('Proprietary', 'pika.art'), - '1.5': ModelVersion('Proprietary', 'pika.art')} + "pika": ModelInfo( + "Pika", + "PikaLabs", + versions={ + "β": ModelVersion("Proprietary", "pika.art"), + "1.5": ModelVersion("Proprietary", "pika.art"), + }, ), } @@ -129,37 +147,41 @@ def process_video_arena_leaderboard(data): """ leaderboard = [] for item in data: - model_name = item['model'].lower() - version = item['version'] + model_name = item["model"].lower() + version = item["version"] # Skip veo model - if model_name == 'veo': + if model_name == "veo": continue # Get model info from mapping model_info = VIDEO_MODEL_INFO.get( model_name, - ModelInfo(model_name, "Unknown", {'default': ModelVersion('Proprietary', '')}) + ModelInfo( + model_name, "Unknown", {"default": ModelVersion("Proprietary", "")} + ), ) # Determine license and website based on version - version_key = version if version in model_info.versions else 'default' + version_key = version if version in model_info.versions else "default" if version_key not in model_info.versions: - model_info.versions['default'] = ModelVersion('Proprietary', '') + model_info.versions["default"] = ModelVersion("Proprietary", "") model_version = model_info.versions[version_key] license_type, website = model_version.license, model_version.website # Replace spaces with dashes in the display name - display_name = f"{model_info.official_name} {version}".replace(' ', '-') + display_name = f"{model_info.official_name} {version}".replace(" ", "-") model_data = { "name": f"[{display_name}]({website})" if website else display_name, "visibility": "public", - "score": round(item['scores']['elo']), - "lower": item['scores']['elo'] - item['scores']['ci_lower'], - "upper": item['scores']['elo'] + item['scores']['ci_upper'], - "votes": (item['scores']['win']['total'] + - item['scores']['loss']['total'] + - item['scores']['tie']['total']), + "score": round(item["scores"]["elo"]), + "lower": item["scores"]["elo"] - item["scores"]["ci_lower"], + "upper": item["scores"]["elo"] + item["scores"]["ci_upper"], + "votes": ( + item["scores"]["win"]["total"] + + item["scores"]["loss"]["total"] + + item["scores"]["tie"]["total"] + ), "organization": model_info.organization, "license": license_type, } @@ -191,8 +213,7 @@ def process_video_arena_leaderboard(data): # Sort the leaderboard leaderboard = leaderboard.sort_values( - by=["Rank* (UB)", "score"], - ascending=[True, False] + by=["Rank* (UB)", "score"], ascending=[True, False] ) return leaderboard @@ -268,4 +289,4 @@ def build_video_arena_tab(): elem_id="leaderboard_markdown", ) else: - gr.Markdown("Error with fetching Video Arena data. Check back in later.") \ No newline at end of file + gr.Markdown("Error with fetching Video Arena data. Check back in later.")