diff --git a/.gitignore b/.gitignore index 39111a6..14cb0d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .venv/ .env __pycache__ +.personal/ \ No newline at end of file diff --git a/AI_BENCHMARKING_ANALYSIS.ipynb b/AI_BENCHMARKING_ANALYSIS.ipynb index 3753b54..bf4055e 100644 --- a/AI_BENCHMARKING_ANALYSIS.ipynb +++ b/AI_BENCHMARKING_ANALYSIS.ipynb @@ -34,9 +34,12 @@ "outputs": [], "source": [ "# @title Import libraries\n", + "%load_ext autoreload\n", + "%autoreload 2\n", "from functions import *\n", "from IPython.display import display, clear_output\n", - "import pandas as pd" + "import pandas as pd\n", + "from copy import deepcopy\n" ] }, { @@ -54,7 +57,350 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_17143/1846409041.py:25: DtypeWarning: Columns (18,19) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df_bot_forecasts = pd.read_csv('https://data.heroku.com/dataclips/tfwiopapwgyjkawcpjmpibjlsars.csv')\n" + ] + }, + { + "data": { + "text/html": [ + "
| \n", + " | bot_question_id | \n", + "title | \n", + "resolution | \n", + "scheduled_close_time | \n", + "actual_close_time | \n", + "question_weight_x | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_upper_bound | \n", + "open_lower_bound | \n", + "pro_question_id | \n", + "question_weight_y | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "31262 | \n", + "For Q1 2025, how many banks will be listed on ... | \n", + "0 | \n", + "2025-01-20 03:27:00+00 | \n", + "2025-01-20 03:27:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "31268.0 | \n", + "1.0 | \n", + "
| 1 | \n", + "31262 | \n", + "For Q1 2025, how many banks will be listed on ... | \n", + "0 | \n", + "2025-01-20 03:27:00+00 | \n", + "2025-01-20 03:27:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "31268.0 | \n", + "1.0 | \n", + "
| 2 | \n", + "31262 | \n", + "For Q1 2025, how many banks will be listed on ... | \n", + "0 | \n", + "2025-01-20 03:27:00+00 | \n", + "2025-01-20 03:27:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "31268.0 | \n", + "1.0 | \n", + "
| 3 | \n", + "31262 | \n", + "For Q1 2025, how many banks will be listed on ... | \n", + "0 | \n", + "2025-01-20 03:27:00+00 | \n", + "2025-01-20 03:27:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "31268.0 | \n", + "1.0 | \n", + "
| 4 | \n", + "31262 | \n", + "For Q1 2025, how many banks will be listed on ... | \n", + "0 | \n", + "2025-01-20 03:27:00+00 | \n", + "2025-01-20 03:27:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "31268.0 | \n", + "1.0 | \n", + "
| \n", + " | bot_question_id | \n", + "title | \n", + "resolution | \n", + "scheduled_close_time | \n", + "actual_close_time | \n", + "question_weight_x | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_upper_bound | \n", + "open_lower_bound | \n", + "pro_question_id | \n", + "question_weight_y | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 236916 | \n", + "35705 | \n", + "Which podcast will be ranked higher on Spotify... | \n", + "Candace | \n", + "2025-03-20 20:00:00+00 | \n", + "2025-03-20 20:00:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"Call Her Daddy\",\"Candace\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "NaN | \n", + "NaN | \n", + "
| 236917 | \n", + "35705 | \n", + "Which podcast will be ranked higher on Spotify... | \n", + "Candace | \n", + "2025-03-20 20:00:00+00 | \n", + "2025-03-20 20:00:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"Call Her Daddy\",\"Candace\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "NaN | \n", + "NaN | \n", + "
| 236918 | \n", + "35705 | \n", + "Which podcast will be ranked higher on Spotify... | \n", + "Candace | \n", + "2025-03-20 20:00:00+00 | \n", + "2025-03-20 20:00:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"Call Her Daddy\",\"Candace\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "NaN | \n", + "NaN | \n", + "
| 236919 | \n", + "35705 | \n", + "Which podcast will be ranked higher on Spotify... | \n", + "Candace | \n", + "2025-03-20 20:00:00+00 | \n", + "2025-03-20 20:00:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"Call Her Daddy\",\"Candace\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "NaN | \n", + "NaN | \n", + "
| 236920 | \n", + "35705 | \n", + "Which podcast will be ranked higher on Spotify... | \n", + "Candace | \n", + "2025-03-20 20:00:00+00 | \n", + "2025-03-20 20:00:00+00 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[\"Call Her Daddy\",\"Candace\"] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "NaN | \n", + "NaN | \n", + "
5 rows × 53 columns
\n", + "5 rows × 57 columns
\n", "" ], "text/plain": [ @@ -2473,39 +2714,39 @@ "3 31280 31274 5-9 1.0 \n", "4 31281 31275 119.2 1.0 \n", "\n", - " type options \\\n", - "0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] \n", - "1 numeric None \n", - "2 binary None \n", - "3 multiple_choice [\"0-4\",\"5-9\",\">9\"] \n", - "4 numeric None \n", - "\n", - " pro_median 4Shadower Bot_Pepa \\\n", - "0 [0.001,0.62,0.35,0.019,0.01] NaN NaN \n", - "1 [0.0013749738,0.0014499743,0.001526641,0.00160... NaN NaN \n", - "2 0.013 NaN NaN \n", - "3 [0.16,0.44,0.4] NaN NaN \n", - "4 [0.0,0.0005044914,0.0010323506,0.0015847475,0.... NaN NaN \n", - "\n", - " CatrachoCaster ... metac-o1 \\\n", - "0 NaN ... [0.45,0.3,0.15,0.05,0.05] \n", - "1 NaN ... [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 NaN ... 0.1 \n", - "3 [0.16,0.47,0.37] ... [0.25,0.6,0.15] \n", - "4 NaN ... [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", + " type options range_min range_max \\\n", + "0 multiple_choice [0, 1, 2-3, 4-6, >6] NaN NaN \n", + "1 numeric None 60.0 100.0 \n", + "2 binary None NaN NaN \n", + "3 multiple_choice [0-4, 5-9, >9] NaN NaN \n", + "4 numeric None 0.0 400.0 \n", + "\n", + " open_upper_bound open_lower_bound ... \\\n", + "0 False False ... \n", + "1 True True ... \n", + "2 False False ... \n", + "3 None None ... \n", + "4 False False ... \n", + "\n", + " metac-o1 \\\n", + "0 [0.25,0.3,0.3,0.1,0.05] \n", + "1 [0.05,0.0505882353,0.0511764706,0.0517647059,0... \n", + "2 0.1 \n", + "3 [0.45,0.45,0.1] \n", + "4 [0.0,0.0033333333,0.0066666667,0.01,0.01333333... \n", "\n", " metac-o1-preview \\\n", - "0 [0.02,0.7,0.2,0.07,0.01] \n", - "1 [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 0.15 \n", - "3 [0.2,0.6,0.2] \n", - "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", + "0 [0.01,0.7,0.2,0.07,0.02] \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05... \n", + "2 0.05 \n", + "3 [0.15,0.65,0.2] \n", + "4 [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,... \n", "\n", " metac-perplexity minefrac1 \\\n", - "0 [0.2,0.25,0.35,0.15,0.05] NaN \n", - "1 [0.05,0.0508333333,0.0516666667,0.0525,0.05333... NaN \n", + "0 [0.3,0.4,0.2,0.07,0.03] NaN \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05... NaN \n", "2 0.1 NaN \n", - "3 [0.15,0.45,0.4] NaN \n", + "3 [0.15000000000000002,0.54,0.31000000000000005] NaN \n", "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... NaN \n", "\n", " mmBot \\\n", @@ -2529,7 +2770,7 @@ "3 [0.116,0.42,0.464] NaN \n", "4 [0.0,0.001311947,0.0026238939,0.0039358409,0.0... NaN \n", "\n", - "[5 rows x 53 columns]" + "[5 rows x 57 columns]" ] }, "metadata": {}, @@ -2562,10 +2803,10 @@ "5 rows × 53 columns
\n", + "5 rows × 57 columns
\n", "" ], "text/plain": [ @@ -2713,28 +2954,28 @@ "97 35386 35364 no 0.85 binary \n", "98 35387 35367 no 0.85 binary \n", "\n", - " options pro_median 4Shadower Bot_Pepa CatrachoCaster ... metac-o1 \\\n", - "94 None 0.95 0.9 NaN NaN ... 0.9 \n", - "95 None 0.05 0.95 NaN NaN ... 0.2 \n", - "96 None 0.97 0.85 NaN NaN ... 0.85 \n", - "97 None 0.666 0.8 NaN NaN ... 0.75 \n", - "98 None 0.03 0.3 NaN NaN ... 0.07 \n", - "\n", - " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai pianobot \\\n", - "94 0.9 NaN NaN 0.95 0.95 NaN \n", - "95 0.9 NaN NaN 0.15 NaN NaN \n", - "96 0.9 NaN NaN 0.9 NaN NaN \n", - "97 0.85 0.3 NaN 0.85 0.85 NaN \n", - "98 0.1 0.05 NaN 0.15 0.05 NaN \n", - "\n", - " swingswish twsummerbot wunderplumb \n", - "94 0.9 0.762 0.9 \n", - "95 0.1 0.126 0.95 \n", - "96 0.85 0.828 0.85 \n", - "97 0.7 0.132 0.3 \n", - "98 0.2 0.27 0.2 \n", - "\n", - "[5 rows x 53 columns]" + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "94 None NaN NaN False False ... \n", + "95 None NaN NaN False False ... \n", + "96 None NaN NaN False False ... \n", + "97 None NaN NaN False False ... \n", + "98 None NaN NaN False False ... \n", + "\n", + " metac-o1 metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "94 0.9 0.9 NaN NaN 0.95 0.95 \n", + "95 0.4 0.9 NaN NaN 0.15 NaN \n", + "96 0.8 0.95 NaN NaN 0.9 NaN \n", + "97 0.8 0.85 0.3 NaN 0.85 0.85 \n", + "98 0.05 0.05 0.03 NaN 0.15 0.05 \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb \n", + "94 NaN 0.9 0.762 0.9 \n", + "95 NaN 0.1 0.126 0.95 \n", + "96 NaN 0.85 0.828 0.85 \n", + "97 NaN 0.7 0.132 0.3 \n", + "98 NaN 0.2 0.27 0.2 \n", + "\n", + "[5 rows x 57 columns]" ] }, "metadata": {}, @@ -2772,7 +3013,11 @@ "df_bot_forecasts = df_bot_forecasts.reset_index()\n", "\n", "# One row per question, with pro_question_id and bot_question_id and resolution\n", - "df_pro_bot_resolved_questions_first = df_pro_bot_resolved_questions.groupby(['pro_question_id', 'bot_question_id']).first().reset_index()[['pro_question_id', 'bot_question_id', 'resolution', 'question_weight', 'type', 'options']]\n", + "df_pro_bot_resolved_questions_first = df_pro_bot_resolved_questions.groupby(\n", + " ['pro_question_id', 'bot_question_id']\n", + " ).first().reset_index()[\n", + " ['pro_question_id', 'bot_question_id', 'resolution', 'question_weight', 'type', 'options', 'range_min', 'range_max', 'open_upper_bound', 'open_lower_bound']\n", + " ]\n", "\n", "df2 = pd.merge(\n", " df_pro_bot_resolved_questions_first,\n", @@ -2793,14 +3038,15 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['pro_question_id', 'bot_question_id', 'resolution', 'question_weight',\n", - " 'type', 'options', 'pro_median', '4Shadower', 'Bot_Pepa',\n", + " 'type', 'options', 'range_min', 'range_max', 'open_upper_bound',\n", + " 'open_lower_bound', 'pro_median', '4Shadower', 'Bot_Pepa',\n", " 'CatrachoCaster', 'CumulativeBot', 'GreeneiBot2', 'Grizeu_Bot',\n", " 'InstitutPelFutur', 'KevinTestBot', 'MWG', 'NextWorldLab',\n", " 'ProfessorSP', 'RPM_bot', 'SynapseSeer', 'VeritasAI', 'X_bot',\n", @@ -2808,14 +3054,14 @@ " 'cobyj-bot', 'cookics_bot_TEST', 'jkraybill_bot', 'jonahsingerbot',\n", " 'krm-bot', 'laylaps', 'manticAI', 'metac-Gemini-Exp-1206',\n", " 'metac-Llama-3.1', 'metac-claude-3-5-sonnet-20240620',\n", - " 'metac-claude-3-5-sonnet-latest', 'metac-deepseek-r1', 'metac-exa',\n", - " 'metac-gpt-4o', 'metac-grok-2-1212', 'metac-o1', 'metac-o1-preview',\n", - " 'metac-perplexity', 'minefrac1', 'mmBot', 'pgodzinai', 'pianobot',\n", - " 'swingswish', 'twsummerbot', 'wunderplumb'],\n", + " 'metac-claude-3-5-sonnet-latest', 'metac-deepseek-r1+asknews',\n", + " 'metac-exa', 'metac-gpt-4o', 'metac-grok-2-1212', 'metac-o1',\n", + " 'metac-o1-preview', 'metac-perplexity', 'minefrac1', 'mmBot',\n", + " 'pgodzinai', 'pianobot', 'swingswish', 'twsummerbot', 'wunderplumb'],\n", " dtype='object')" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2826,7 +3072,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -2836,7 +3082,7 @@ "Name: GreeneiBot2, dtype: object" ] }, - "execution_count": 31, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2851,7 +3097,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -2863,73 +3109,17 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "df_pro_bot_forecasts['options'] = df_pro_bot_forecasts['options'].apply(parse_options_array)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "# Simple function to parse CDF strings for numeric questions\n", - "def parse_numeric_forecasts(df):\n", - " \"\"\"\n", - " Parse CDF strings for numeric questions in-place.\n", - " \n", - " Args:\n", - " df: DataFrame with forecast data\n", - " \"\"\"\n", - " # Get numeric questions\n", - " numeric_mask = df['type'] == 'numeric'\n", - " \n", - " # List of columns to process\n", - " forecast_cols = [col for col in df.columns if col in all_bots or col in ['pro_median', 'bot_median']]\n", - " \n", - " # Process each column\n", - " for col in forecast_cols:\n", - " # Process only for numeric questions and only where the column exists\n", - " if col in df.columns:\n", - " for idx in df[numeric_mask].index:\n", - " value = df.at[idx, col]\n", - " \n", - " # Skip NaN values\n", - " if pd.isna(value):\n", - " continue\n", - " \n", - " # Process string values\n", - " if isinstance(value, str):\n", - " try:\n", - " # Parse the CDF string to an array\n", - " parsed_array = np.array([float(x) for x in value.strip('[]').split(',')])\n", - " df.at[idx, col] = parsed_array\n", - " except Exception as e:\n", - " print(f\"Warning: Could not parse {col} at index {idx}: {e}\")\n", - " \n", - " return df\n", - "\n", - "# Now parse the numeric forecasts\n", - "df_pro_bot_forecasts = parse_numeric_forecasts(df_pro_bot_forecasts)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "df_bot_vs_pro_peer = calculate_all_peer_scores(df_pro_bot_forecasts, all_bots)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, + "execution_count": 32, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_17143/199340000.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " multiple_choice_rows_with_empty_options = df_pro_bot_forecasts[df_pro_bot_forecasts['options'] == '[]'][df_pro_bot_forecasts['type'] == 'multiple_choice']\n" + ] + }, { "data": { "text/html": [ @@ -2957,11 +3147,12 @@ "5 rows × 54 columns
\n", + "5 rows × 57 columns
\n", "" ], "text/plain": [ - " pro_question_id bot_question_id resolution question_weight \\\n", - "0 31268 31262 0 1.0 \n", - "3 31280 31274 5-9 1.0 \n", - "6 31292 31286 Jeff Bezos 1.0 \n", - "9 31321 31370 0 1.0 \n", - "13 31368 31366 ≥0% and <5% 1.0 \n", - "\n", - " type \\\n", - "0 multiple_choice \n", - "3 multiple_choice \n", - "6 multiple_choice \n", - "9 multiple_choice \n", - "13 multiple_choice \n", - "\n", - " options \\\n", - "0 [0, 1, 2-3, 4-6, >6] \n", - "3 [0-4, 5-9, >9] \n", - "6 [Larry Ellison, Elon Musk, Mark Zuckerberg, Bernard Arnault & family, Jeff Bezos, Someone else] \n", - "9 [0, 1, 2, Greater than 2] \n", - "13 [Less than -5%, ≥-5% and <0%, ≥0% and <5%, Greater than 5%] \n", + " pro_question_id bot_question_id resolution question_weight \\\n", + "0 31268 31262 0 1.0 \n", + "1 31269 31263 86.82 1.0 \n", + "2 31270 31264 no 1.0 \n", + "3 31280 31274 5-9 1.0 \n", + "4 31281 31275 119.2 1.0 \n", "\n", - " pro_median 4Shadower Bot_Pepa CatrachoCaster \\\n", - "0 [0.001,0.62,0.35,0.019,0.01] NaN NaN NaN \n", - "3 [0.16,0.44,0.4] NaN NaN 6.595797 \n", - "6 [0.2,0.025,0.225,0.08,0.445,0.025] NaN NaN -70.444674 \n", - "9 [0.336,0.364,0.2,0.1] NaN NaN -87.546874 \n", - "13 [0.05,0.45,0.45,0.05] NaN NaN -16.907633 \n", - "\n", - " ... metac-o1-preview metac-perplexity minefrac1 mmBot \\\n", - "0 ... 299.573227 529.831737 NaN 229.263476 \n", - "3 ... 31.015493 2.247286 NaN 12.783337 \n", - "6 ... 29.885537 21.184400 NaN -18.457128 \n", - "9 ... -51.879379 -121.194097 NaN -80.647587 \n", - "13 ... 44.183275 33.647224 2.197891 20.067070 \n", - "\n", - " pgodzinai pianobot swingswish twsummerbot wunderplumb \\\n", - "0 270.308741 NaN NaN NaN NaN \n", - "3 15.252598 NaN NaN -4.652002 NaN \n", - "6 11.152127 NaN NaN NaN NaN \n", - "9 -49.410118 NaN NaN -62.415431 NaN \n", - "13 25.378052 NaN NaN NaN NaN \n", - "\n", - " bot_team_median \n", - "0 501.063529 \n", - "3 31.015493 \n", - "6 11.152127 \n", - "9 -69.314718 \n", - "13 -32.542240 \n", - "\n", - "[5 rows x 54 columns]" + " type options range_min range_max \\\n", + "0 multiple_choice [0, 1, 2-3, 4-6, >6] NaN NaN \n", + "1 numeric None 60.0 100.0 \n", + "2 binary None NaN NaN \n", + "3 multiple_choice [0-4, 5-9, >9] NaN NaN \n", + "4 numeric None 0.0 400.0 \n", + "\n", + " open_upper_bound open_lower_bound ... \\\n", + "0 False False ... \n", + "1 True True ... \n", + "2 False False ... \n", + "3 None None ... \n", + "4 False False ... \n", + "\n", + " metac-o1 \\\n", + "0 [0.25,0.3,0.3,0.1,0.05] \n", + "1 [0.05,0.0505882353,0.0511764706,0.0517647059,0.0523529412,0.0529411765,0.0535294118,0.0541176471,0.0547058824,0.0552941176,0.0558823529,0.0564705882,0.0570588235,0.0576470588,0.0582352941,0.0588235294,0.0594117647,0.06,0.0605882353,0.0611764706,0.0617647059,0.0623529412,0.0629411765,0.0635294118,0.0641176471,0.0647058824,0.0652941176,0.0658823529,0.0664705882,0.0670588235,0.0676470588,0.0682352941,0.0688235294,0.0694117647,0.07,0.0705882353,0.0711764706,0.0717647059,0.0723529412,0.0729411765,0.0735294118,0.0741176471,0.0747058824,0.0752941176,0.0758823529,0.0764705882,0.0770588235,0.0776470588,0.0782352941,0.0788235294,0.0794117647,0.08,0.0805882353,0.0811764706,0.0817647059,0.0823529412,0.0829411765,0.0835294118,0.0841176471,0.0847058824,0.0852941176,0.0858823529,0.0864705882,0.0870588235,0.0876470588,0.0882352941,0.0888235294,0.0894117647,0.09,0.0905882353,0.0911764706,0.0917647059,0.0923529412,0.0929411765,0.0935294118,0.0941176471,0.0947058824,0.0952941176,0.0958823529,0.0964705882,0.0970588235,0.0976470588,0.0982352941,0.0988235294,0.0994117647,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.22,0.24,0.26,0.28,0.3,0.32,0.34,0.36,0.38,0.4,0.42,0.44,0.46,0.48,0.5,0.52,0.54,0.56,0.58,0.6,0.62,0.64,0.66,0.68,0.7,0.72,0.74,0.76,0.78,0.8,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.9007692308,0.9015384615,0.9023076923,0.9030769231,0.9038461538,0.9046153846,0.9053846154,0.9061538462,0.9069230769,0.9076923077,0.9084615385,0.9092307692,0.91,0.9107692308,0.9115384615,0.9123076923,0.9130769231,0.9138461538,0.9146153846,0.9153846154,0.9161538462,0.9169230769,0.9176923077,0.9184615385,0.9192307692,0.92,0.9207692308,0.9215384615,0.9223076923,0.9230769231,0.9238461538,0.9246153846,0.9253846154,0.9261538462,0.9269230769,0.9276923077,0.9284615385,0.9292307692,0.93,0.9307692308,0.9315384615,0.9323076923,0.9330769231,0.9338461538,0.9346153846,0.9353846154,0.9361538462,0.9369230769,0.9376923077,0.9384615385,0.9392307692,0.94,0.9407692308,0.9415384615,0.9423076923,0.9430769231,0.9438461538,0.9446153846,0.9453846154,0.9461538462,0.9469230769,0.9476923077,0.9484615385,0.9492307692,0.95] \n", + "2 0.1 \n", + "3 [0.45,0.45,0.1] \n", + "4 [0.0,0.0033333333,0.0066666667,0.01,0.0133333333,0.0166666667,0.02,0.0233333333,0.0266666667,0.03,0.0333333333,0.0366666667,0.04,0.0433333333,0.0466666667,0.05,0.0533333333,0.0566666667,0.06,0.0633333333,0.0666666667,0.07,0.0733333333,0.0766666667,0.08,0.0833333333,0.0866666667,0.09,0.0933333333,0.0966666667,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.208,0.216,0.224,0.232,0.24,0.248,0.256,0.264,0.272,0.28,0.288,0.296,0.304,0.312,0.32,0.328,0.336,0.344,0.352,0.36,0.368,0.376,0.384,0.392,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.6057142857,0.6114285714,0.6171428571,0.6228571429,0.6285714286,0.6342857143,0.64,0.6457142857,0.6514285714,0.6571428571,0.6628571429,0.6685714286,0.6742857143,0.68,0.6857142857,0.6914285714,0.6971428571,0.7028571429,0.7085714286,0.7142857143,0.72,0.7257142857,0.7314285714,0.7371428571,0.7428571429,0.7485714286,0.7542857143,0.76,0.7657142857,0.7714285714,0.7771428571,0.7828571429,0.7885714286,0.7942857143,0.8,0.8033333333,0.8066666667,0.81,0.8133333333,0.8166666667,0.82,0.8233333333,0.8266666667,0.83,0.8333333333,0.8366666667,0.84,0.8433333333,0.8466666667,0.85,0.8533333333,0.8566666667,0.86,0.8633333333,0.8666666667,0.87,0.8733333333,0.8766666667,0.88,0.8833333333,0.8866666667,0.89,0.8933333333,0.8966666667,0.9,0.9025,0.905,0.9075,0.91,0.9125,0.915,0.9175,0.92,0.9225,0.925,0.9275,0.93,0.9325,0.935,0.9375,0.94,0.9425,0.945,0.9475,0.95,0.9525,0.955,0.9575,0.96,0.9625,0.965,0.9675,0.97,0.9725,0.975,0.9775,0.98,0.9825,0.985,0.9875,0.99,0.9925,0.995,0.9975,1.0] \n", + "\n", + " metac-o1-preview \\\n", + "0 [0.01,0.7,0.2,0.07,0.02] \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.057,0.058,0.059,0.06,0.061,0.062,0.063,0.064,0.065,0.066,0.067,0.068,0.069,0.07,0.071,0.072,0.073,0.074,0.075,0.076,0.077,0.078,0.079,0.08,0.081,0.082,0.083,0.084,0.085,0.086,0.087,0.088,0.089,0.09,0.091,0.092,0.093,0.094,0.095,0.096,0.097,0.098,0.099,0.1,0.104,0.108,0.112,0.116,0.12,0.124,0.128,0.132,0.136,0.14,0.144,0.148,0.152,0.156,0.16,0.164,0.168,0.172,0.176,0.18,0.184,0.188,0.192,0.196,0.2,0.208,0.216,0.224,0.232,0.24,0.248,0.256,0.264,0.272,0.28,0.288,0.296,0.304,0.312,0.32,0.328,0.336,0.344,0.352,0.36,0.368,0.376,0.384,0.392,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.8066666667,0.8133333333,0.82,0.8266666667,0.8333333333,0.84,0.8466666667,0.8533333333,0.86,0.8666666667,0.8733333333,0.88,0.8866666667,0.8933333333,0.9,0.901,0.902,0.903,0.904,0.905,0.906,0.907,0.908,0.909,0.91,0.911,0.912,0.913,0.914,0.915,0.916,0.917,0.918,0.919,0.92,0.921,0.922,0.923,0.924,0.925,0.926,0.927,0.928,0.929,0.93,0.931,0.932,0.933,0.934,0.935,0.936,0.937,0.938,0.939,0.94,0.941,0.942,0.943,0.944,0.945,0.946,0.947,0.948,0.949,0.95] \n", + "2 0.05 \n", + "3 [0.15,0.65,0.2] \n", + "4 [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,0.032,0.036,0.04,0.044,0.048,0.052,0.056,0.06,0.064,0.068,0.072,0.076,0.08,0.084,0.088,0.092,0.096,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.805,0.81,0.815,0.82,0.825,0.83,0.835,0.84,0.845,0.85,0.855,0.86,0.865,0.87,0.875,0.88,0.885,0.89,0.895,0.9,0.9013333333,0.9026666667,0.904,0.9053333333,0.9066666667,0.908,0.9093333333,0.9106666667,0.912,0.9133333333,0.9146666667,0.916,0.9173333333,0.9186666667,0.92,0.9213333333,0.9226666667,0.924,0.9253333333,0.9266666667,0.928,0.9293333333,0.9306666667,0.932,0.9333333333,0.9346666667,0.936,0.9373333333,0.9386666667,0.94,0.9413333333,0.9426666667,0.944,0.9453333333,0.9466666667,0.948,0.9493333333,0.9506666667,0.952,0.9533333333,0.9546666667,0.956,0.9573333333,0.9586666667,0.96,0.9613333333,0.9626666667,0.964,0.9653333333,0.9666666667,0.968,0.9693333333,0.9706666667,0.972,0.9733333333,0.9746666667,0.976,0.9773333333,0.9786666667,0.98,0.9813333333,0.9826666667,0.984,0.9853333333,0.9866666667,0.988,0.9893333333,0.9906666667,0.992,0.9933333333,0.9946666667,0.996,0.9973333333,0.9986666667,1.0] \n", + "\n", + " metac-perplexity \\\n", + "0 [0.3,0.4,0.2,0.07,0.03] \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.057,0.058,0.059,0.06,0.061,0.062,0.063,0.064,0.065,0.066,0.067,0.068,0.069,0.07,0.071,0.072,0.073,0.074,0.075,0.076,0.077,0.078,0.079,0.08,0.081,0.082,0.083,0.084,0.085,0.086,0.087,0.088,0.089,0.09,0.091,0.092,0.093,0.094,0.095,0.096,0.097,0.098,0.099,0.1,0.104,0.108,0.112,0.116,0.12,0.124,0.128,0.132,0.136,0.14,0.144,0.148,0.152,0.156,0.16,0.164,0.168,0.172,0.176,0.18,0.184,0.188,0.192,0.196,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.6133333333,0.6266666667,0.64,0.6533333333,0.6666666667,0.68,0.6933333333,0.7066666667,0.72,0.7333333333,0.7466666667,0.76,0.7733333333,0.7866666667,0.8,0.804,0.808,0.812,0.816,0.82,0.824,0.828,0.832,0.836,0.84,0.844,0.848,0.852,0.856,0.86,0.864,0.868,0.872,0.876,0.88,0.884,0.888,0.892,0.896,0.9,0.901,0.902,0.903,0.904,0.905,0.906,0.907,0.908,0.909,0.91,0.911,0.912,0.913,0.914,0.915,0.916,0.917,0.918,0.919,0.92,0.921,0.922,0.923,0.924,0.925,0.926,0.927,0.928,0.929,0.93,0.931,0.932,0.933,0.934,0.935,0.936,0.937,0.938,0.939,0.94,0.941,0.942,0.943,0.944,0.945,0.946,0.947,0.948,0.949,0.95] \n", + "2 0.1 \n", + "3 [0.15000000000000002,0.54,0.31000000000000005] \n", + "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0175,0.02,0.0225,0.025,0.0275,0.03,0.0325,0.035,0.0375,0.04,0.0425,0.045,0.0475,0.05,0.0525,0.055,0.0575,0.06,0.0625,0.065,0.0675,0.07,0.0725,0.075,0.0775,0.08,0.0825,0.085,0.0875,0.09,0.0925,0.095,0.0975,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.608,0.616,0.624,0.632,0.64,0.648,0.656,0.664,0.672,0.68,0.688,0.696,0.704,0.712,0.72,0.728,0.736,0.744,0.752,0.76,0.768,0.776,0.784,0.792,0.8,0.8033333333,0.8066666667,0.81,0.8133333333,0.8166666667,0.82,0.8233333333,0.8266666667,0.83,0.8333333333,0.8366666667,0.84,0.8433333333,0.8466666667,0.85,0.8533333333,0.8566666667,0.86,0.8633333333,0.8666666667,0.87,0.8733333333,0.8766666667,0.88,0.8833333333,0.8866666667,0.89,0.8933333333,0.8966666667,0.9,0.902,0.904,0.906,0.908,0.91,0.912,0.914,0.916,0.918,0.92,0.922,0.924,0.926,0.928,0.93,0.932,0.934,0.936,0.938,0.94,0.942,0.944,0.946,0.948,0.95,0.952,0.954,0.956,0.958,0.96,0.962,0.964,0.966,0.968,0.97,0.972,0.974,0.976,0.978,0.98,0.982,0.984,0.986,0.988,0.99,0.992,0.994,0.996,0.998,1.0] \n", + "\n", + " minefrac1 \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " mmBot \\\n", + "0 [0.009900990099009901,0.39603960396039606,0.44554455445544555,0.1188118811881188,0.0297029702970297] \n", + "1 [0.0215944348,0.0218024136,0.0220262706,0.0222657692,0.0225205234,0.0227900084,0.0230735761,0.0233704727,0.0236798595,0.0240008339,0.0243324518,0.0246737484,0.0250237592,0.0253815375,0.0257461704,0.0261167925,0.0264925953,0.0268728349,0.0272568365,0.0276439961,0.0280337803,0.0284257242,0.0288194274,0.0292145496,0.0296108048,0.0300079559,0.0304058088,0.0308042061,0.031203022,0.0316021576,0.0320015358,0.0324010988,0.0328008038,0.033200622,0.0336005361,0.0340005406,0.0344006419,0.0348008594,0.0352012288,0.0356018064,0.0360026751,0.0364039532,0.0368058059,0.0372084598,0.0376122217,0.0380175022,0.0384248443,0.0388349581,0.0392487619,0.0396674303,0.040092449,0.0405256766,0.040969412,0.0414264662,0.0419002382,0.0423947905,0.0429149226,0.0434662384,0.0440552034,0.0446891875,0.0453764888,0.0461263346,0.0469488546,0.047855024,0.0488565752,0.0499658763,0.0511957788,0.0525594355,0.0540700958,0.0557408822,0.0575845575,0.0596132911,0.061838434,0.0642703126,0.0669180506,0.0697894271,0.0728907793,0.0762269529,0.0798013046,0.0836157568,0.0876709009,0.091966147,0.096499911,0.1012698318,0.1062730078,0.1115062433,0.116966291,0.1226500836,0.1285549408,0.1346787459,0.1410200827,0.1475783286,0.1543537019,0.1613472593,0.1685608481,0.1759970129,0.1836588644,0.1915499147,0.1996738871,0.208034508,0.2166352903,0.225479315,0.2345690212,0.24390601,0.2534908708,0.2633230334,0.2734006526,0.283720526,0.2942780484,0.3050672012,0.316080577,0.3273094353,0.3387437886,0.3503725099,0.3621834602,0.3741636271,0.3862992712,0.3985760721,0.4109792702,0.4234937993,0.4361044066,0.4487957561,0.4615525185,0.4743594438,0.4872014199,0.5000635204,0.5129310433,0.5257895463,0.5386248816,0.5514232322,0.5641711536,0.5768556211,0.589464083,0.6019845173,0.6144054896,0.6267162064,0.6389065595,0.6509671563,0.6628893291,0.6746651196,0.6862872355,0.6977489765,0.7090441313,0.7201668477,0.7311114815,0.7418724312,0.7524439675,0.7628200682,0.7729942685,0.7829595382,0.7927081941,0.8022318565,0.8115214549,0.8205672863,0.8293591256,0.8378863854,0.8461383197,0.8541042651,0.8617739066,0.8691375599,0.8761864572,0.8829130238,0.8893111359,0.8953763492,0.9011060878,0.9064997881,0.9115589931,0.9162873921,0.9206908074,0.9247771276,0.9285561903,0.9320396198,0.9352406245,0.9381737618,0.9408546777,0.9432998299,0.945526202,0.9475510194,0.949391472,0.9510644542,0.9525863264,0.953972705,0.955238285,0.9563966974,0.9574604037,0.9584406278,0.9593473236,0.960189177,0.9609736386,0.9617069836,0.9623943945,0.9630400616,0.9636472966,0.9642186545,0.9647560591,0.9652609283,0.9657342945,0.9661769175,0.9665893865,0.9669722099,0.9673258911] \n", + "2 0.2 \n", + "3 [0.25,0.5,0.25] \n", + "4 [0.0,0.0006552097,0.0013605064,0.0021151815,0.0029180701,0.0037675922,0.0046618077,0.0055984833,0.0065751692,0.0075892831,0.0086381998,0.0097193446,0.0108302867,0.0119688337,0.0131331257,0.014321727,0.0155337159,0.0167687729,0.0180272663,0.0193103356,0.020619972,0.0219590952,0.0233316264,0.024742554,0.0261979914,0.0277052245,0.0292727448,0.030910267,0.0326287265,0.034440256,0.0363581376,0.0383967303,0.0405713707,0.042898249,0.0453942605,0.0480768342,0.0509637431,0.0540728987,0.0574221344,0.0610289827,0.0649104508,0.069082799,0.0735613277,0.0783601755,0.0834921337,0.0889684789,0.0947988278,0.1009910149,0.1075509944,0.1144827695,0.1217883466,0.1294677162,0.1375188601,0.1459377845,0.1547185775,0.1638534906,0.173333043,0.183146147,0.1932802518,0.2037215056,0.2144549309,0.2254646117,0.2367338883,0.2482455564,0.2599820665,0.2719257181,0.2840588463,0.2963639938,0.308824066,0.3214224646,0.3341431959,0.3469709515,0.3598911602,0.3728900098,0.3859544391,0.3990721017,0.4122313044,0.4254209242,0.4386303077,0.4518491587,0.4650674199,0.4782751541,0.4914624335,0.5046192399,0.5177353826,0.5308004395,0.5438037232,0.5567342756,0.5695808913,0.5823321691,0.5949765903,0.6075026181,0.6198988152,0.6321539735,0.6442572471,0.6561982838,0.6679673464,0.679555418,0.6909542849,0.7021565932,0.7131558737,0.7239465364,0.7345238314,0.7448837818,0.7550230879,0.7649390101,0.7746292356,0.7840917363,0.793324625,0.8023260164,0.8110939019,0.8196260428,0.8279198893,0.8359725294,0.84378067,0.8513406529,0.8586485067,0.8657000313,0.8724909149,0.8790168773,0.8852738353,0.8912580844,0.8969664881,0.9023966684,0.9075471904,0.9124177307,0.9170092252,0.9213239875,0.9253657928,0.9291399243,0.9326531773,0.9359138212,0.9389315199,0.9417172132,0.9442829632,0.9466417713,0.9488073729,0.9507940179,0.9526162437,0.9542886507,0.9558256867,0.957241447,0.9585494976,0.9597627233,0.9608932066,0.9619521358,0.9629497455,0.9638952848,0.9647970143,0.9656622247,0.9664972774,0.9673076585,0.9680980464,0.9688723855,0.9696339648,0.9703854957,0.9711291891,0.9718668279,0.9725998336,0.9733293276,0.9740561839,0.9747810757,0.9755045151,0.9762268859,0.9769484703,0.9776694709,0.9783900269,0.9791102268,0.9798301173,0.9805497088,0.9812689786,0.981987871,0.9827062964,0.9834241265,0.9841411897,0.9848572642,0.98557207,0.9862852591,0.9869964062,0.9877049976,0.9884104215,0.9891119579,0.9898087704,0.990499899,0.9911842569,0.9918606294,0.9925276775,0.9931839465,0.9938278782,0.99445783,0.9950720981,0.9956689463,0.9962466383,0.9968034747,0.9973378313,0.9978481983,0.9983332192,0.9987917276,0.9992227789,0.9996256782,1.0] \n", + "\n", + " pgodzinai \\\n", + "0 [0.014925742574257425,0.5137871287128712,0.3349009900990099,0.10168316831683169,0.03470297029702965] \n", + "1 [0.001,0.001060875,0.0011396,0.0012863125,0.0015459984,0.0019048369,0.0023147701,0.0027425688,0.0031719899,0.0035935463,0.0040047171,0.0044081612,0.0048073678,0.0052048637,0.0056023079,0.0060005117,0.0063995798,0.0067992898,0.0071993689,0.0075995902,0.007999808,0.0083999595,0.0088000381,0.0092000616,0.0096525538,0.0103347221,0.0114180238,0.0128617561,0.0144931539,0.0161909912,0.0178965175,0.0195748423,0.0212159342,0.0228289888,0.0244265464,0.0260177161,0.0276085304,0.0292020038,0.0307985773,0.0323974755,0.0339977246,0.0355985069,0.0371992898,0.0387998404,0.0404001295,0.0420002192,0.0436001942,0.0452001261,0.0468000593,0.0484758458,0.0504834257,0.0530704368,0.056178071,0.0595567722,0.0630314345,0.0665171977,0.0699636664,0.0733563529,0.0767085411,0.0800383523,0.0833589543,0.0866790344,0.0900028852,0.0933311337,0.0967326953,0.1004442449,0.1047006189,0.1094577119,0.1144907128,0.1196353715,0.1248049846,0.1299418958,0.1350232879,0.1400570021,0.1452540043,0.1513017567,0.1589133116,0.1680377058,0.1780770546,0.1885468618,0.1991553484,0.2096896812,0.2200450325,0.2302229342,0.2402681458,0.2502302229,0.2601553402,0.27007834,0.2800179047,0.2899799302,0.2999629146,0.3099614863,0.3199691186,0.3299801956,0.3403173669,0.3521487483,0.3668129253,0.3844513624,0.4041888551,0.4247935739,0.4442765262,0.4605082419,0.4728869633,0.4822309604,0.4895341295,0.4956449952,0.5013686886,0.5073076754,0.5137610388,0.5206987551,0.5276657564,0.5340334461,0.5395220756,0.5442306919,0.5484901071,0.5530599502,0.5588761244,0.5663266439,0.5752119583,0.585204242,0.5959735276,0.6071500854,0.6184053116,0.6295209059,0.6403758638,0.650921239,0.6611693012,0.671174569,0.681009388,0.6907471485,0.7004527783,0.7101763721,0.7199504677,0.7297911321,0.7397010124,0.7496729757,0.7596938994,0.7697481465,0.7798202777,0.7898968803,0.7999675731,0.8100253018,0.8200662214,0.8300893951,0.8400025166,0.8494453768,0.8579165269,0.8651653723,0.8712540566,0.8763468591,0.8806505608,0.8844338485,0.8879756773,0.8915092577,0.8952099002,0.8991948145,0.9035195392,0.9081838533,0.9131467515,0.9183416751,0.9236898731,0.9291127196,0.9345414554,0.9399230919,0.9451659123,0.9500324455,0.9542146638,0.9575690762,0.9601504006,0.9620795658,0.9635039422,0.9646063832,0.965571997,0.9665531773,0.9676621061,0.9689711529,0.9705116418,0.9722785871,0.9742409577,0.9763519694,0.9785580215,0.9808067315,0.9830531373,0.9852633275,0.987415817,0.9895011861,0.9915203598,0.9934820158,0.9953894047,0.9970771779,0.998127745,0.99846,0.99852,0.99858,0.99864,0.9987,0.99876,0.99882,0.99888,0.99894,0.99899] \n", + "2 0.07 \n", + "3 [0.27499999999999997,0.5125,0.21249999999999997] \n", + "4 [0.0,0.0001141583,0.0002446967,0.0003862688,0.0005272579,0.0006650709,0.0008243437,0.0011074433,0.0016696544,0.0025699094,0.0037138357,0.0049708626,0.0062610152,0.0075426566,0.0089765864,0.0111726822,0.0147311078,0.0195212559,0.0249547717,0.0306181288,0.0363105138,0.0419407763,0.0476011969,0.053516341,0.0598014349,0.0663689162,0.0730761187,0.0798334547,0.0865904866,0.0933196582,0.1000172031,0.1066924089,0.1133554776,0.1200140176,0.1266729489,0.1333343989,0.1399984689,0.1466644317,0.1533314439,0.1599988203,0.1666661444,0.1733332523,0.1800001372,0.1866668598,0.1933334943,0.2000000995,0.2066667101,0.2133333393,0.2199999878,0.22666665,0.2333333196,0.2399999916,0.2466666631,0.2533333329,0.2600000011,0.2666666681,0.2733333345,0.2800000007,0.286666667,0.2933333334,0.2999999999,0.3066666665,0.3133333332,0.3199999999,0.3266666666,0.3333333333,0.34,0.3466666667,0.3533333333,0.36,0.3666666667,0.3733333333,0.38,0.3866666667,0.3934628939,0.400837331,0.40925763,0.4186848364,0.428718413,0.4390353607,0.4494419812,0.4597974687,0.4700329298,0.4801500685,0.4901790777,0.500153105,0.5101028922,0.5200515519,0.5300114112,0.5398722838,0.5492279015,0.5576212737,0.5650210292,0.571743695,0.5780856137,0.5842571713,0.5904328096,0.5967209586,0.603152213,0.6097133168,0.6163738459,0.6230958146,0.6298433017,0.6365902337,0.6433215069,0.6500308134,0.656718392,0.6633885674,0.6700472479,0.6767001542,0.6833518918,0.6900055659,0.6966627826,0.7033239321,0.7099885835,0.7166558627,0.723324761,0.7299943545,0.7366639271,0.7433330133,0.7500013847,0.7566690034,0.7633359628,0.770002427,0.7766685825,0.7833346018,0.7900006228,0.7966667394,0.8033330023,0.8099994258,0.8166659972,0.8233326871,0.8299994586,0.8366662749,0.8433331037,0.8499999207,0.8566667097,0.8633334627,0.8700001785,0.8766668606,0.8833335157,0.8899751517,0.8964699017,0.9025861327,0.9081211655,0.9130226546,0.9173491712,0.921198292,0.9246959323,0.9279877368,0.9312103051,0.934472912,0.9378540969,0.9414005467,0.9450901244,0.9487670554,0.9522009139,0.9552513327,0.9578998205,0.9601715711,0.96211589,0.9638162438,0.9653702301,0.9668664828,0.9683781475,0.9699605983,0.9716476808,0.9734519305,0.9753688047,0.9773815283,0.9794657325,0.9815941718,0.9837408125,0.9858836701,0.9879773814,0.9898993305,0.9914888717,0.9926681205,0.9934599632,0.9939261174,0.9941560479,0.9942611072,0.9943265488,0.9943865488,0.9944537386,0.9945561009,0.9947328687,0.9950042368,0.9953660612,0.9958058993,0.9963078442,0.9968511117,0.9974139813,0.9979781729,0.9985251814,0.999027536,0.9994498435,0.999736686,0.9998734993,0.99994,1.0] \n", + "\n", + " pianobot swingswish \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " twsummerbot \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 [0.116,0.42,0.464] \n", + "4 [0.0,0.001311947,0.0026238939,0.0039358409,0.0052477878,0.0065597348,0.0078716817,0.0091836287,0.0104955756,0.0118075226,0.0131194695,0.0144314165,0.0157433634,0.0170553104,0.0183672573,0.0196792043,0.0209911512,0.0223030982,0.0236150451,0.0249269921,0.026238939,0.027550886,0.0288628329,0.0301747799,0.0314867268,0.0327986738,0.0341106207,0.0354225677,0.0367345146,0.0380464616,0.0393584085,0.0406703555,0.0419823024,0.0432942494,0.0446061963,0.0459181433,0.0472300902,0.0485420372,0.0498539841,0.0511659311,0.052477878,0.053789825,0.0551017719,0.0564137189,0.0577256658,0.0590376128,0.0603495597,0.0616615067,0.0629734536,0.0642854006,0.0655973475,0.0669092945,0.0682212414,0.0695331884,0.0708451353,0.0721570823,0.0734690292,0.0747809762,0.0760929231,0.0774048701,0.078716817,0.080028764,0.0813407109,0.0826526579,0.0839646048,0.0852765518,0.0865884987,0.0879004457,0.0902457862,0.0933094828,0.0978079399,0.1023063969,0.1068048539,0.111303311,0.115801768,0.120300225,0.124798682,0.1292971391,0.1338199508,0.1388055027,0.1440933779,0.1496807808,0.1571177226,0.1652387403,0.1753118263,0.1904276903,0.2058197291,0.2212117678,0.237030829,0.2551785571,0.273870758,0.2925629589,0.3115548313,0.3307464845,0.3499926649,0.3692260274,0.3884136416,0.407661417,0.4269091924,0.4457073638,0.464050886,0.4823944081,0.5007379302,0.5190814523,0.5374249745,0.5538739661,0.5696118391,0.5853388804,0.6010659216,0.6161284786,0.6273538036,0.6382421632,0.6486483242,0.6588094975,0.668725683,0.6786418685,0.688558054,0.6984742395,0.708390425,0.7183066106,0.7278808508,0.7373411092,0.7468013677,0.7561442929,0.7645842622,0.7730242316,0.7814642009,0.7899041702,0.7983441395,0.8067841088,0.8152111577,0.8229940495,0.8307769414,0.8385598332,0.8447944123,0.8509124517,0.8563824526,0.8610823306,0.8657454654,0.8704086002,0.8750717351,0.8797348699,0.8843980047,0.8890611396,0.8934873987,0.8970573375,0.9006272763,0.9041972151,0.9077671539,0.9103291006,0.9126390493,0.914948998,0.9172589467,0.9195688953,0.921878844,0.9236671785,0.9253634634,0.9270597483,0.9287560333,0.9304523182,0.9321486031,0.933844888,0.935541173,0.9372374579,0.9389337428,0.9406300277,0.9423263126,0.9440225976,0.9457188825,0.9474151674,0.9491114523,0.9508077373,0.9525040222,0.9542003071,0.955896592,0.9575928769,0.9592891619,0.9609854468,0.9626817317,0.9643780166,0.9660743016,0.9677705865,0.9694668714,0.9711631563,0.9728594412,0.9745557262,0.9762520111,0.977948296,0.9796445809,0.9813408659,0.9830371508,0.9847334357,0.9864297206,0.9881260055,0.9898222905,0.9915185754,0.9932148603,0.9949111452,0.9966074302,0.9983037151,1.0] \n", + "\n", + " wunderplumb \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + "[5 rows x 57 columns]" ] }, "metadata": {}, @@ -3183,11 +3408,12 @@ "5 rows × 54 columns
\n", + "5 rows × 57 columns
\n", "" ], "text/plain": [ - " pro_question_id bot_question_id resolution question_weight \\\n", - "81 35169 35119 Not in top 50 1.0 \n", - "82 35170 35121 3 or more 1.0 \n", - "83 35171 35123 ≥7.5 and ≤8.5 1.0 \n", - "91 35377 35334 Jimmy Patronis 1.0 \n", - "92 35378 35336 31-49 1.0 \n", - "\n", - " type \\\n", - "81 multiple_choice \n", - "82 multiple_choice \n", - "83 multiple_choice \n", - "91 multiple_choice \n", - "92 multiple_choice \n", + " pro_question_id bot_question_id resolution question_weight type \\\n", + "94 35380 35345 yes 1.00 binary \n", + "95 35381 35354 no 1.00 binary \n", + "96 35385 35358 yes 1.00 binary \n", + "97 35386 35364 no 0.85 binary \n", + "98 35387 35367 no 0.85 binary \n", "\n", - " options \\\n", - "81 [0-10, 11-20, 21-30, 31-40, 41-50, Not in top 50] \n", - "82 [0, 1, 2, 3 or more] \n", - "83 [<7.5, ≥7.5 and ≤8.5, >8.5 and <9.0, ≥9.0 and ≤9.5, >9.5] \n", - "91 [Jimmy Patronis, Gay Valimont, Someone else] \n", - "92 [0-24, 25-30, 31-49, 50-70, >70] \n", - "\n", - " pro_median 4Shadower Bot_Pepa CatrachoCaster \\\n", - "81 [0.02,0.01,0.015,0.015,0.05,0.89] NaN -280.223742 NaN \n", - "82 [0.01,0.18,0.54,0.27] NaN -77.944110 NaN \n", - "83 [0.02,0.3,0.3,0.3,0.08] NaN -70.227966 NaN \n", - "91 [0.997,0.001,0.002] -17.134888 -15.951442 NaN \n", - "92 [0.001,0.359,0.55,0.08,0.01] -69.314718 -87.183897 NaN \n", - "\n", - " ... metac-o1-preview metac-perplexity minefrac1 mmBot \\\n", - "81 ... -448.863637 -178.058617 -300.703183 -287.919846 \n", - "82 ... -99.325177 -18.677591 -52.324814 10.536052 \n", - "83 ... -132.175584 -26.570317 NaN -18.232156 \n", - "91 ... -3.781749 -4.828879 NaN -12.482886 \n", - "92 ... -170.474809 -290.872090 NaN -170.474809 \n", - "\n", - " pgodzinai pianobot swingswish twsummerbot wunderplumb \\\n", - "81 -339.002408 NaN NaN -234.857021 -240.919483 \n", - "82 25.951120 NaN NaN 27.650877 -64.460900 \n", - "83 NaN NaN NaN -17.832954 -56.798404 \n", - "91 -8.037710 NaN -11.352931 NaN -14.781838 \n", - "92 -31.845373 NaN -48.097266 NaN -74.923665 \n", - "\n", - " bot_team_median \n", - "81 -287.919846 \n", - "82 27.650877 \n", - "83 -62.860866 \n", - "91 -12.104814 \n", - "92 -20.067070 \n", - "\n", - "[5 rows x 54 columns]" + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "94 None NaN NaN False False ... \n", + "95 None NaN NaN False False ... \n", + "96 None NaN NaN False False ... \n", + "97 None NaN NaN False False ... \n", + "98 None NaN NaN False False ... \n", + "\n", + " metac-o1 metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "94 0.9 0.9 NaN NaN 0.95 0.95 \n", + "95 0.4 0.9 NaN NaN 0.15 NaN \n", + "96 0.8 0.95 NaN NaN 0.9 NaN \n", + "97 0.8 0.85 0.3 NaN 0.85 0.85 \n", + "98 0.05 0.05 0.03 NaN 0.15 0.05 \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb \n", + "94 NaN 0.9 0.762 0.9 \n", + "95 NaN 0.1 0.126 0.95 \n", + "96 NaN 0.85 0.828 0.85 \n", + "97 NaN 0.7 0.132 0.3 \n", + "98 NaN 0.2 0.27 0.2 \n", + "\n", + "[5 rows x 57 columns]" ] }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "| \n", - " | pro_question_id | \n", - "bot_question_id | \n", - "resolution | \n", - "question_weight | \n", - "type | \n", - "options | \n", - "pro_median | \n", - "4Shadower | \n", - "Bot_Pepa | \n", - "CatrachoCaster | \n", - "... | \n", - "metac-o1-preview | \n", - "metac-perplexity | \n", - "minefrac1 | \n", - "mmBot | \n", - "pgodzinai | \n", - "pianobot | \n", - "swingswish | \n", - "twsummerbot | \n", - "wunderplumb | \n", + } + ], + "source": [ + "multiple_choice_rows_with_empty_options = df_pro_bot_forecasts[df_pro_bot_forecasts['options'] == '[]'][df_pro_bot_forecasts['type'] == 'multiple_choice']\n", + "if len(multiple_choice_rows_with_empty_options) > 0:\n", + " display_head_and_tail(multiple_choice_rows_with_empty_options)\n", + " raise ValueError(\"Multiple choice questions with empty options found\")\n", + "\n", + "df_pro_bot_forecasts['options'] = df_pro_bot_forecasts['options'].apply(parse_options_array) # @Check: TODO: Refactor/move this (and other times parse_options_array is used) to one central area at beginning cell data normalization should happen together and be availabe at all times in notebook\n", + "display_head_and_tail(df_pro_bot_forecasts)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# Simple function to parse CDF strings for numeric questions\n", + "def parse_numeric_forecasts(df):\n", + " \"\"\"\n", + " Parse CDF strings for numeric questions in-place.\n", + "\n", + " Args:\n", + " df: DataFrame with forecast data\n", + " \"\"\"\n", + " # Get numeric questions\n", + " numeric_mask = df['type'] == 'numeric'\n", + "\n", + " # List of columns to process\n", + " forecast_cols = [col for col in df.columns if col in all_bots or col in ['pro_median', 'bot_median']]\n", + "\n", + " # Process each column\n", + " for col in forecast_cols:\n", + " # Process only for numeric questions and only where the column exists\n", + " if col in df.columns:\n", + " for idx in df[numeric_mask].index:\n", + " value = df.at[idx, col]\n", + "\n", + " # Skip NaN values\n", + " if pd.isna(value):\n", + " continue\n", + "\n", + " # Process string values\n", + " if isinstance(value, str):\n", + " try:\n", + " # Parse the CDF string to an array\n", + " parsed_array = np.array([float(x) for x in value.strip('[]').split(',')])\n", + " df.at[idx, col] = parsed_array\n", + " except Exception as e:\n", + " print(f\"Warning: Could not parse {col} at index {idx}: {e}\")\n", + "\n", + " return df\n", + "\n", + "# Now parse the numeric forecasts\n", + "df_pro_bot_forecasts = parse_numeric_forecasts(df_pro_bot_forecasts)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n" + ] + } + ], + "source": [ + "df_bot_vs_pro_peer = calculate_all_peer_scores(df_pro_bot_forecasts, all_bots)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
|---|
| \n", + " | pro_question_id | \n", + "bot_question_id | \n", + "resolution | \n", + "question_weight | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_upper_bound | \n", + "open_lower_bound | \n", + "... | \n", + "metac-o1-preview | \n", + "metac-perplexity | \n", + "minefrac1 | \n", + "mmBot | \n", + "pgodzinai | \n", + "pianobot | \n", + "swingswish | \n", + "twsummerbot | \n", + "wunderplumb | \n", "bot_team_median | \n", "||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | \n", - "31270 | \n", - "31264 | \n", - "no | \n", + "0 | \n", + "31268 | \n", + "31262 | \n", + "0 | \n", "1.0 | \n", - "binary | \n", - "None | \n", - "0.013 | \n", - "NaN | \n", + "multiple_choice | \n", + "[0, 1, 2-3, 4-6, >6] | \n", "NaN | \n", "NaN | \n", + "False | \n", + "False | \n", "... | \n", - "-14.943369 | \n", - "-9.227528 | \n", + "2.302585 | \n", + "5.703782 | \n", "NaN | \n", - "-21.005831 | \n", - "-5.948545 | \n", + "2.292635 | \n", + "2.703087 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", - "-14.943369 | \n", + "4.605170 | \n", "|
| 5 | \n", - "31282 | \n", - "31276 | \n", - "yes | \n", + "3 | \n", + "31280 | \n", + "31274 | \n", + "5-9 | \n", "1.0 | \n", - "binary | \n", - "None | \n", - "0.45 | \n", + "multiple_choice | \n", + "[0-4, 5-9, >9] | \n", "NaN | \n", "NaN | \n", - "67.445505 | \n", + "None | \n", + "None | \n", "... | \n", - "-25.131443 | \n", - "44.183275 | \n", - "NaN | \n", - "51.082562 | \n", - "32.047190 | \n", + "0.390198 | \n", + "0.204794 | \n", "NaN | \n", + "0.127833 | \n", + "0.152526 | \n", "NaN | \n", "NaN | \n", + "-0.046520 | \n", "NaN | \n", - "32.047190 | \n", + "0.310155 | \n", "
| 8 | \n", - "31294 | \n", - "31288 | \n", - "yes | \n", + "6 | \n", + "31292 | \n", + "31286 | \n", + "Jeff Bezos | \n", "1.0 | \n", - "binary | \n", - "None | \n", - "0.95 | \n", + "multiple_choice | \n", + "[Larry Ellison, Elon Musk, Mark Zuckerberg, Bernard Arnault & family, Jeff Bezos, Someone else] | \n", "NaN | \n", "NaN | \n", - "-19.645607 | \n", + "False | \n", + "False | \n", "... | \n", - "0.000000 | \n", - "0.000000 | \n", + "0.298855 | \n", + "0.211844 | \n", + "NaN | \n", + "-0.184571 | \n", + "0.112526 | \n", "NaN | \n", - "-11.122564 | \n", - "-14.715764 | \n", "NaN | \n", "NaN | \n", - "-39.812370 | \n", "NaN | \n", - "-17.185026 | \n", + "0.112526 | \n", "
| 12 | \n", - "31338 | \n", - "31334 | \n", - "yes | \n", + "9 | \n", + "31321 | \n", + "31370 | \n", + "0 | \n", "1.0 | \n", - "binary | \n", - "None | \n", - "0.9 | \n", + "multiple_choice | \n", + "[0, 1, 2, Greater than 2] | \n", "NaN | \n", "NaN | \n", - "-0.309119 | \n", + "None | \n", + "None | \n", "... | \n", - "-18.232156 | \n", - "0.000000 | \n", + "-0.518794 | \n", + "-1.211941 | \n", "NaN | \n", - "5.406722 | \n", - "-5.715841 | \n", + "-0.806476 | \n", + "-0.494101 | \n", "NaN | \n", "NaN | \n", - "-49.977579 | \n", + "-0.624154 | \n", "NaN | \n", - "-5.715841 | \n", + "-0.681313 | \n", "
| 16 | \n", - "33876 | \n", - "33751 | \n", - "no | \n", + "13 | \n", + "31368 | \n", + "31366 | \n", + "≥0% and <5% | \n", "1.0 | \n", - "binary | \n", - "None | \n", - "0.058 | \n", - "NaN | \n", + "multiple_choice | \n", + "[Less than -5%, ≥-5% and <0%, ≥0% and <5%, Greater than 5%] | \n", "NaN | \n", "NaN | \n", + "None | \n", + "None | \n", "... | \n", - "-4.561051 | \n", - "0.845671 | \n", - "NaN | \n", - "-6.808337 | \n", + "0.330943 | \n", + "0.510826 | \n", + "0.021979 | \n", + "0.200671 | \n", + "0.253781 | \n", "NaN | \n", "NaN | \n", "NaN | \n", - "-7.606972 | \n", "NaN | \n", - "-7.606972 | \n", + "0.158111 | \n", "
5 rows × 54 columns
\n", + "5 rows × 58 columns
\n", "5 rows × 54 columns
\n", + "5 rows × 58 columns
\n", "5 rows × 58 columns
\n", + "" + ], + "text/plain": [ + " pro_question_id bot_question_id resolution question_weight type \\\n", + "2 31270 31264 no 1.0 binary \n", + "5 31282 31276 yes 1.0 binary \n", + "8 31294 31288 yes 1.0 binary \n", + "12 31338 31334 yes 1.0 binary \n", + "16 33876 33751 no 1.0 binary \n", + "\n", + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "2 None NaN NaN False False ... \n", + "5 None NaN NaN None None ... \n", + "8 None NaN NaN False False ... \n", + "12 None NaN NaN False False ... \n", + "16 None NaN NaN False False ... \n", + "\n", + " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "2 -0.038208 -0.092275 NaN -0.210058 -0.059485 \n", + "5 -0.251314 0.441833 NaN 0.510826 0.320472 \n", + "8 -0.054067 0.000000 NaN -0.111226 -0.147158 \n", + "12 -0.057158 0.000000 NaN 0.054067 -0.057158 \n", + "16 0.008457 0.008457 NaN -0.068083 NaN \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb bot_team_median \n", + "2 NaN NaN NaN NaN -0.149434 \n", + "5 NaN NaN NaN NaN 0.367725 \n", + "8 NaN NaN -0.398124 NaN -0.147158 \n", + "12 NaN NaN -0.499776 NaN -0.057158 \n", + "16 NaN NaN -0.076070 NaN -0.096728 \n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "| \n", + " | pro_question_id | \n", + "bot_question_id | \n", + "resolution | \n", + "question_weight | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_upper_bound | \n", + "open_lower_bound | \n", + "... | \n", + "metac-o1-preview | \n", + "metac-perplexity | \n", + "minefrac1 | \n", + "mmBot | \n", + "pgodzinai | \n", + "pianobot | \n", + "swingswish | \n", + "twsummerbot | \n", + "wunderplumb | \n", + "bot_team_median | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 94 | \n", + "35380 | \n", + "35345 | \n", + "yes | \n", + "1.00 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.054067 | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "0.000000 | \n", + "NaN | \n", + "-0.054067 | \n", + "-0.220515 | \n", + "-0.054067 | \n", + "-0.054067 | \n", + "
| 95 | \n", + "35381 | \n", + "35354 | \n", + "no | \n", + "1.00 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-2.251292 | \n", + "NaN | \n", + "NaN | \n", + "-0.111226 | \n", + "NaN | \n", + "NaN | \n", + "-0.054067 | \n", + "-0.083382 | \n", + "-2.944439 | \n", + "-0.111226 | \n", + "
| 96 | \n", + "35385 | \n", + "35358 | \n", + "yes | \n", + "1.00 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.020834 | \n", + "NaN | \n", + "NaN | \n", + "-0.074901 | \n", + "NaN | \n", + "NaN | \n", + "-0.132060 | \n", + "-0.158283 | \n", + "-0.132060 | \n", + "-0.158283 | \n", + "
| 97 | \n", + "35386 | \n", + "35364 | \n", + "no | \n", + "0.85 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.680430 | \n", + "0.628948 | \n", + "NaN | \n", + "-0.680430 | \n", + "-0.680430 | \n", + "NaN | \n", + "-0.091255 | \n", + "0.811793 | \n", + "0.628948 | \n", + "-0.091255 | \n", + "
| 98 | \n", + "35387 | \n", + "35367 | \n", + "no | \n", + "0.85 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.017709 | \n", + "0.000000 | \n", + "NaN | \n", + "-0.112251 | \n", + "-0.017709 | \n", + "NaN | \n", + "-0.163782 | \n", + "-0.241614 | \n", + "-0.163782 | \n", + "-0.112251 | \n", + "
5 rows × 58 columns
\n", + "| \n", + " | bot | \n", + "Peer Score | \n", + "|||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Rank | \n", + "\n", + " | \n", + " | |||||||||||||||||||||
| 1 | \n", + "metac-o1 | \n", + "3864.168122 | \n", + "|||||||||||||||||||||
| 2 | \n", + "bot_median | \n", + "3472.028144 | \n", + "|||||||||||||||||||||
| 3 | \n", + "metac-o1-preview | \n", + "3162.155445 | \n", + "|||||||||||||||||||||
| 4 | \n", + "manticAI | \n", + "2142.538438 | \n", + "|||||||||||||||||||||
| 5 | \n", + "metac-Gemini-Exp-1206 | \n", + "2072.216227 | \n", + "|||||||||||||||||||||
| 6 | \n", + "acm_bot | \n", + "1876.466009 | \n", + "|||||||||||||||||||||
| 7 | \n", + "twsummerbot | \n", + "1763.532046 | \n", + "|||||||||||||||||||||
| 8 | \n", + "metac-perplexity | \n", + "1697.555196 | \n", + "|||||||||||||||||||||
| 9 | \n", + "GreeneiBot2 | \n", + "1603.998618 | \n", + "|||||||||||||||||||||
| 10 | \n", + "cookics_bot_TEST | \n", + "1140.390796 | \n", + "|||||||||||||||||||||
| 11 | \n", + "metac-claude-3-5-sonnet-latest | \n", + "1134.209821 | \n", + "|||||||||||||||||||||
| 12 | \n", + "SynapseSeer | \n", + "1066.533051 | \n", + "|||||||||||||||||||||
| 13 | \n", + "CumulativeBot | \n", + "1030.716475 | \n", + "|||||||||||||||||||||
| 14 | \n", + "pgodzinai | \n", + "926.081448 | \n", + "|||||||||||||||||||||
| 15 | \n", + "jkraybill_bot | \n", + "627.932509 | \n", + "|||||||||||||||||||||
| 16 | \n", + "metac-deepseek-r1+asknews | \n", + "614.572462 | \n", + "|||||||||||||||||||||
| 17 | \n", + "question_weight | \n", "378.020000 | \n", "|||||||||||||||||||||
| 3 | \n", "4 | \n", "bot_median | \n", - "2481.552010 | \n", + "2374.216338 | \n", "97 | \n", "93.10 | \n", "|||||||||||||||||
| 15 | \n", "16 | \n", - "metac-deepseek-r1 | \n", + "metac-deepseek-r1+asknews | \n", "1518.308625 | \n", "55 | \n", "52.10 | \n", @@ -4711,7 +5487,7 @@ "0 1 pro_median 4238.561607 97 \n", "1 2 metac-o1 3010.353788 96 \n", "2 3 metac-perplexity 2774.080331 94 \n", - "3 4 bot_median 2481.552010 97 \n", + "3 4 bot_median 2374.216338 97 \n", "4 5 acm_bot 2239.058675 85 \n", "5 6 metac-claude-3-5-sonnet-20240620 2018.110211 95 \n", "6 7 manticAI 1865.126260 74 \n", @@ -4723,7 +5499,7 @@ "12 13 metac-Gemini-Exp-1206 1595.682612 81 \n", "13 14 NextWorldLab 1583.026226 85 \n", "14 15 metac-o1-preview 1527.657141 96 \n", - "15 16 metac-deepseek-r1 1518.308625 55 \n", + "15 16 metac-deepseek-r1+asknews 1518.308625 55 \n", "16 17 laylaps 1500.567874 68 \n", "17 18 mmBot 1482.726445 97 \n", "18 19 Grizeu_Bot 1399.477718 55 \n", @@ -4806,7 +5582,7 @@ "46 52.10 " ] }, - "execution_count": 41, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -4875,7 +5651,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -4957,17 +5733,17 @@ "|||||||||||||||||
| bot_median | \n", - "2481.6 | \n", + "2374.2 | \n", "93.1 | \n", - "26.7 | \n", - "55.791339 | \n", - "5.782185 | \n", - "4.609796 | \n", + "25.5 | \n", + "56.712830 | \n", + "5.877687 | \n", + "4.338745 | \n", "1.985277 | \n", - "38.1 | \n", - "15.2 | \n", - "0.999994 | \n", - "0.000013 | \n", + "37.2 | \n", + "13.8 | \n", + "0.999982 | \n", + "0.000037 | \n", "|||
| acm_bot | \n", @@ -5124,7 +5900,7 @@ "0.070922 | \n", "||||||||||||||||||||||
| metac-deepseek-r1 | \n", + "metac-deepseek-r1+asknews | \n", "1518.3 | \n", "52.1 | \n", "29.1 | \n", @@ -5580,7 +6356,7 @@ "pro_median 4238.6 93.1 45.5 62.229168 \n", "metac-o1 3010.4 92.1 32.7 57.756859 \n", "metac-perplexity 2774.1 90.1 30.8 67.210383 \n", - "bot_median 2481.6 93.1 26.7 55.791339 \n", + "bot_median 2374.2 93.1 25.5 56.712830 \n", "acm_bot 2239.1 81.2 27.6 55.554054 \n", "metac-claude-3-5-sonnet-20240620 2018.1 91.5 22.1 64.219307 \n", "manticAI 1865.1 70.4 26.5 66.353059 \n", @@ -5592,7 +6368,7 @@ "metac-Gemini-Exp-1206 1595.7 77.5 20.6 67.099981 \n", "NextWorldLab 1583.0 81.2 19.5 66.411747 \n", "metac-o1-preview 1527.7 92.1 16.6 87.111568 \n", - "metac-deepseek-r1 1518.3 52.1 29.1 62.764970 \n", + "metac-deepseek-r1+asknews 1518.3 52.1 29.1 62.764970 \n", "laylaps 1500.6 65.1 23.1 74.457365 \n", "mmBot 1482.7 93.1 15.9 79.990502 \n", "Grizeu_Bot 1399.5 52.4 26.7 60.886905 \n", @@ -5629,7 +6405,7 @@ "pro_median 6.449398 7.059105 1.985277 58.3 \n", "metac-o1 6.018299 5.431054 1.985550 44.6 \n", "metac-perplexity 7.080664 4.348308 1.986114 44.9 \n", - "bot_median 5.782185 4.609796 1.985277 38.1 \n", + "bot_median 5.877687 4.338745 1.985277 37.2 \n", "acm_bot 6.163169 4.471343 1.988985 39.8 \n", "metac-claude-3-5-sonnet-20240620 6.713594 3.285252 1.985788 35.4 \n", "manticAI 7.905338 3.348936 1.993488 42.2 \n", @@ -5641,7 +6417,7 @@ "metac-Gemini-Exp-1206 7.622046 2.701303 1.990426 35.8 \n", "NextWorldLab 7.367722 2.644427 1.988985 34.1 \n", "metac-o1-preview 9.077077 1.827344 1.985550 34.6 \n", - "metac-deepseek-r1 8.695578 3.351382 2.005379 46.6 \n", + "metac-deepseek-r1+asknews 8.695578 3.351382 2.005379 46.6 \n", "laylaps 9.228204 2.497799 1.996341 41.5 \n", "mmBot 8.290173 1.921090 1.985277 32.4 \n", "Grizeu_Bot 8.415222 3.176755 2.005555 43.6 \n", @@ -5678,7 +6454,7 @@ "pro_median 32.7 1.000000 0.000000 \n", "metac-o1 20.7 1.000000 0.000000 \n", "metac-perplexity 16.7 0.999982 0.000036 \n", - "bot_median 15.2 0.999994 0.000013 \n", + "bot_median 13.8 0.999982 0.000037 \n", "acm_bot 15.3 0.999987 0.000025 \n", "metac-claude-3-5-sonnet-20240620 8.7 0.999275 0.001450 \n", "manticAI 10.7 0.999343 0.001314 \n", @@ -5690,7 +6466,7 @@ "metac-Gemini-Exp-1206 5.4 0.995749 0.008502 \n", "NextWorldLab 4.8 0.995080 0.009840 \n", "metac-o1-preview -1.4 0.964539 0.070922 \n", - "metac-deepseek-r1 11.7 0.999241 0.001519 \n", + "metac-deepseek-r1+asknews 11.7 0.999241 0.001519 \n", "laylaps 4.6 0.992463 0.015074 \n", "mmBot -0.5 0.971093 0.057813 \n", "Grizeu_Bot 9.9 0.998740 0.002521 \n", @@ -5724,7 +6500,7 @@ "minefrac1 -25.4 0.279560 0.559119 " ] }, - "execution_count": 42, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -5740,7 +6516,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 42, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -5785,62 +6561,6 @@ " \n", "|||||||||||||||||||
| Grizeu_Bot | \n", - "487.9 | \n", - "40.0 | \n", - "12.2 | \n", - "123.498523 | \n", - "19.539047 | \n", - "0.625100 | \n", - "2.020314 | \n", - "51.7 | \n", - "-27.3 | \n", - "0.732225 | \n", - "0.535551 | \n", - "||||||||||||
| acm_bot | \n", - "149.7 | \n", - "63.8 | \n", - "2.3 | \n", - "123.167219 | \n", - "15.413976 | \n", - "0.152116 | \n", - "1.997018 | \n", - "33.1 | \n", - "-28.4 | \n", - "0.560209 | \n", - "0.879583 | \n", - "||||||||||||
| RPM_bot | \n", - "145.0 | \n", - "6.0 | \n", - "24.2 | \n", - "31.468907 | \n", - "12.847127 | \n", - "1.880996 | \n", - "2.570582 | \n", - "57.2 | \n", - "-8.9 | \n", - "0.940638 | \n", - "0.118725 | \n", - "||||||||||||
| X_bot | \n", - "20.7 | \n", - "5.0 | \n", - "4.1 | \n", - "19.756237 | \n", - "8.835258 | \n", - "0.468897 | \n", - "2.776445 | \n", - "28.7 | \n", - "-20.4 | \n", - "0.668221 | \n", - "0.663558 | \n", - "||||||||||||
| cobyj-bot | \n", "0.0 | \n", "0.0 | \n", @@ -5869,716 +6589,772 @@ "NA | \n", "||||||||||||||||||||
| jonahsingerbot | \n", - "-61.3 | \n", + "bean_bot | \n", + "-0.6 | \n", "4.7 | \n", - "-13.0 | \n", - "5.485369 | \n", - "2.530212 | \n", - "-5.154842 | \n", + "-0.1 | \n", + "0.069849 | \n", + "0.032219 | \n", + "-4.265106 | \n", "2.784843 | \n", - "-6.0 | \n", - "-20.1 | \n", - "0.004141 | \n", - "0.008283 | \n", + "-0.0 | \n", + "-0.2 | \n", + "0.007674 | \n", + "0.015349 | \n", "||
| bean_bot | \n", - "-70.7 | \n", + "jonahsingerbot | \n", + "-0.6 | \n", "4.7 | \n", - "-15.1 | \n", - "8.813137 | \n", - "4.065197 | \n", - "-3.702222 | \n", + "-0.1 | \n", + "0.050272 | \n", + "0.023189 | \n", + "-5.273630 | \n", "2.784843 | \n", - "-3.7 | \n", - "-26.4 | \n", - "0.011925 | \n", - "0.023851 | \n", + "-0.1 | \n", + "-0.2 | \n", + "0.003839 | \n", + "0.007677 | \n", "||
| jkraybill_bot | \n", - "-76.1 | \n", - "38.2 | \n", - "-2.0 | \n", - "67.065479 | \n", - "10.858048 | \n", - "-0.183706 | \n", - "2.023360 | \n", - "20.0 | \n", - "-24.0 | \n", - "0.427622 | \n", - "0.855243 | \n", + "X_bot | \n", + "-0.7 | \n", + "7.0 | \n", + "-0.1 | \n", + "0.354068 | \n", + "0.133825 | \n", + "-0.747195 | \n", + "2.446912 | \n", + "0.2 | \n", + "-0.4 | \n", + "0.241594 | \n", + "0.483189 | \n", "
| CumulativeBot | \n", - "-97.0 | \n", + "-1.1 | \n", "10.2 | \n", - "-9.5 | \n", - "30.121060 | \n", - "9.408238 | \n", - "-1.005535 | \n", + "-0.1 | \n", + "0.257798 | \n", + "0.080522 | \n", + "-1.315132 | \n", "2.231848 | \n", - "11.5 | \n", - "-30.5 | \n", - "0.170109 | \n", - "0.340218 | \n", + "0.1 | \n", + "-0.3 | \n", + "0.110066 | \n", + "0.220132 | \n", "|||
| swingswish | \n", - "-109.0 | \n", - "6.7 | \n", - "-16.3 | \n", - "15.145531 | \n", - "5.851229 | \n", - "-2.779701 | \n", - "2.450387 | \n", - "-1.9 | \n", - "-30.6 | \n", - "0.016896 | \n", - "0.033793 | \n", + "-1.2 | \n", + "7.7 | \n", + "-0.2 | \n", + "0.140275 | \n", + "0.050552 | \n", + "-3.074947 | \n", + "2.367123 | \n", + "-0.0 | \n", + "-0.3 | \n", + "0.009476 | \n", + "0.018953 | \n", + "|
| RPM_bot | \n", + "-1.3 | \n", + "7.0 | \n", + "-0.2 | \n", + "0.803163 | \n", + "0.303567 | \n", + "-0.601802 | \n", + "2.446912 | \n", + "0.6 | \n", + "-0.9 | \n", + "0.284666 | \n", + "0.569332 | \n", "||||||||||||
| SynapseSeer | \n", - "-128.5 | \n", - "27.1 | \n", - "-4.8 | \n", - "47.081045 | \n", - "9.052373 | \n", - "-0.524959 | \n", - "2.049569 | \n", - "13.8 | \n", - "-23.3 | \n", - "0.302026 | \n", - "0.604052 | \n", + "-1.3 | \n", + "26.2 | \n", + "-0.1 | \n", + "0.452555 | \n", + "0.088498 | \n", + "-0.568910 | \n", + "2.053076 | \n", + "0.1 | \n", + "-0.2 | \n", + "0.287231 | \n", + "0.574463 | \n", "|
| KevinTestBot | \n", - "-148.3 | \n", + "-1.5 | \n", "8.4 | \n", - "-17.7 | \n", - "59.369669 | \n", - "20.484482 | \n", - "-0.861938 | \n", + "-0.2 | \n", + "0.589466 | \n", + "0.203385 | \n", + "-0.897116 | \n", "2.311496 | \n", - "29.7 | \n", - "-65.0 | \n", - "0.207889 | \n", - "0.415777 | \n", + "0.3 | \n", + "-0.7 | \n", + "0.198952 | \n", + "0.397903 | \n", "|||
| twsummerbot | \n", - "-237.2 | \n", - "47.0 | \n", - "-5.0 | \n", - "79.502690 | \n", - "11.596659 | \n", - "-0.435134 | \n", - "2.011215 | \n", - "18.3 | \n", - "-28.4 | \n", - "0.332750 | \n", - "0.665500 | \n", + "Grizeu_Bot | \n", + "-1.7 | \n", + "51.4 | \n", + "-0.0 | \n", + "1.173392 | \n", + "0.163747 | \n", + "-0.206616 | \n", + "2.006447 | \n", + "0.3 | \n", + "-0.4 | \n", + "0.418571 | \n", + "0.837143 | \n", "
| pianobot | \n", - "-272.2 | \n", + "-2.7 | \n", "4.7 | \n", - "-57.9 | \n", - "92.187165 | \n", - "42.522768 | \n", - "-1.361786 | \n", + "-0.6 | \n", + "0.916204 | \n", + "0.422613 | \n", + "-1.384327 | \n", "2.798986 | \n", - "61.1 | \n", - "-176.9 | \n", - "0.125137 | \n", - "0.250274 | \n", - "|||||||
| annabot | \n", - "-316.0 | \n", - "24.8 | \n", - "-12.7 | \n", - "43.737410 | \n", - "8.782683 | \n", - "-1.450614 | \n", - "2.061307 | \n", - "5.4 | \n", - "-30.8 | \n", - "0.079970 | \n", - "0.159940 | \n", + "0.6 | \n", + "-1.8 | \n", + "0.121941 | \n", + "0.243882 | \n", "||||||||
| CatrachoCaster | \n", - "-331.3 | \n", + "-3.2 | \n", "19.7 | \n", - "-16.8 | \n", - "52.315059 | \n", - "11.786737 | \n", - "-1.426980 | \n", + "-0.2 | \n", + "0.520901 | \n", + "0.117361 | \n", + "-1.365532 | \n", "2.088777 | \n", - "7.8 | \n", - "-41.4 | \n", - "0.085035 | \n", - "0.170071 | \n", + "0.1 | \n", + "-0.4 | \n", + "0.094144 | \n", + "0.188288 | \n", "|||
| cookics_bot_TEST | \n", - "-413.3 | \n", - "24.6 | \n", - "-16.8 | \n", - "72.426694 | \n", - "14.602631 | \n", - "-1.150436 | \n", - "2.060845 | \n", - "13.3 | \n", - "-46.9 | \n", - "0.130744 | \n", - "0.261488 | \n", + "krm-bot | \n", + "-5.1 | \n", + "9.5 | \n", + "-0.5 | \n", + "0.511546 | \n", + "0.165967 | \n", + "-3.229846 | \n", + "2.264709 | \n", + "-0.2 | \n", + "-0.9 | \n", + "0.005563 | \n", + "0.011127 | \n", "
| GreeneiBot2 | \n", - "-446.6 | \n", - "45.8 | \n", - "-9.8 | \n", - "88.553207 | \n", - "13.092083 | \n", - "-0.745705 | \n", - "2.012340 | \n", - "16.6 | \n", - "-36.1 | \n", - "0.229872 | \n", - "0.459745 | \n", + "annabot | \n", + "-6.2 | \n", + "29.3 | \n", + "-0.2 | \n", + "0.520869 | \n", + "0.096226 | \n", + "-2.211795 | \n", + "2.044183 | \n", + "-0.0 | \n", + "-0.4 | \n", + "0.017610 | \n", + "0.035221 | \n", "
| metac-o1 | \n", - "-500.3 | \n", - "74.7 | \n", + "4Shadower | \n", + "-6.2 | \n", + "14.0 | \n", + "-0.4 | \n", + "0.767322 | \n", + "0.205075 | \n", + "-2.143194 | \n", + "2.147239 | \n", + "0.0 | \n", + "-0.9 | \n", + "0.025797 | \n", + "0.051593 | \n", + "|||||||||
| cookics_bot_TEST | \n", "-6.7 | \n", - "111.255242 | \n", - "12.872419 | \n", - "-0.520339 | \n", - "1.991597 | \n", - "18.9 | \n", - "-32.3 | \n", - "0.302194 | \n", - "0.604387 | \n", + "27.4 | \n", + "-0.2 | \n", + "0.748050 | \n", + "0.142908 | \n", + "-1.722004 | \n", + "2.049541 | \n", + "0.0 | \n", + "-0.5 | \n", + "0.048384 | \n", + "0.096767 | \n", "||||
| krm-bot | \n", - "-521.0 | \n", - "9.5 | \n", - "-54.8 | \n", - "50.627856 | \n", - "16.425846 | \n", - "-3.338962 | \n", - "2.264709 | \n", - "-17.6 | \n", - "-92.0 | \n", - "0.004700 | \n", - "0.009400 | \n", + "jkraybill_bot | \n", + "-7.5 | \n", + "44.0 | \n", + "-0.2 | \n", + "0.512853 | \n", + "0.077272 | \n", + "-2.197133 | \n", + "2.014642 | \n", + "-0.0 | \n", + "-0.3 | \n", + "0.016721 | \n", + "0.033441 | \n", "
| 4Shadower | \n", - "-527.8 | \n", - "12.2 | \n", - "-43.3 | \n", - "80.791182 | \n", - "23.130448 | \n", - "-1.870273 | \n", - "2.181695 | \n", - "7.2 | \n", - "-93.7 | \n", - "0.043896 | \n", - "0.087792 | \n", + "twsummerbot | \n", + "-8.9 | \n", + "58.4 | \n", + "-0.2 | \n", + "0.659710 | \n", + "0.086327 | \n", + "-1.758391 | \n", + "2.000855 | \n", + "0.0 | \n", + "-0.3 | \n", + "0.042006 | \n", + "0.084012 | \n", "
| MWG | \n", - "-766.4 | \n", - "29.5 | \n", - "-26.0 | \n", - "87.753338 | \n", - "16.156699 | \n", - "-1.608077 | \n", - "2.043527 | \n", - "7.0 | \n", - "-59.0 | \n", - "0.059421 | \n", - "0.118842 | \n", + "-9.6 | \n", + "28.6 | \n", + "-0.3 | \n", + "0.711160 | \n", + "0.132979 | \n", + "-2.535384 | \n", + "2.046561 | \n", + "-0.1 | \n", + "-0.6 | \n", + "0.008595 | \n", + "0.017191 | \n", "|
| bot_median | \n", - "-780.6 | \n", - "75.7 | \n", - "-10.3 | \n", - "85.113891 | \n", - "9.782560 | \n", - "-1.054147 | \n", - "1.991181 | \n", - "9.2 | \n", - "-29.8 | \n", - "0.147607 | \n", - "0.295213 | \n", + "ProfessorSP | \n", + "-10.0 | \n", + "18.6 | \n", + "-0.5 | \n", + "0.936277 | \n", + "0.217094 | \n", + "-2.484480 | \n", + "2.095243 | \n", + "-0.1 | \n", + "-1.0 | \n", + "0.011644 | \n", + "0.023289 | \n", "
| Bot_Pepa | \n", - "-814.9 | \n", - "37.2 | \n", - "-21.9 | \n", - "93.067285 | \n", - "15.269248 | \n", - "-1.436551 | \n", - "2.025098 | \n", - "9.0 | \n", - "-52.9 | \n", - "0.079722 | \n", - "0.159444 | \n", + "acm_bot | \n", + "-10.5 | \n", + "80.2 | \n", + "-0.1 | \n", + "0.914265 | \n", + "0.102059 | \n", + "-1.287717 | \n", + "1.989344 | \n", + "0.1 | \n", + "-0.3 | \n", + "0.100796 | \n", + "0.201592 | \n", + "
| metac-o1 | \n", + "-10.8 | \n", + "91.1 | \n", + "-0.1 | \n", + "0.866824 | \n", + "0.090818 | \n", + "-1.303018 | \n", + "1.985829 | \n", + "0.1 | \n", + "-0.3 | \n", + "0.097944 | \n", + "0.195889 | \n", "||||||||||||
| ajf-bot | \n", - "-843.1 | \n", - "31.4 | \n", - "-26.9 | \n", - "104.854733 | \n", - "18.727046 | \n", - "-1.436020 | \n", - "2.037667 | \n", - "11.3 | \n", - "-65.1 | \n", - "0.080612 | \n", - "0.161224 | \n", + "-10.9 | \n", + "34.2 | \n", + "-0.3 | \n", + "1.085589 | \n", + "0.185496 | \n", + "-1.722395 | \n", + "2.030778 | \n", + "0.1 | \n", + "-0.7 | \n", + "0.047145 | \n", + "0.094289 | \n", "|
| manticAI | \n", - "-861.5 | \n", - "55.0 | \n", - "-15.7 | \n", - "82.873865 | \n", - "11.169634 | \n", - "-1.401147 | \n", - "2.003064 | \n", - "6.7 | \n", - "-38.0 | \n", - "0.083443 | \n", - "0.166886 | \n", + "metac-deepseek-r1+asknews | \n", + "-11.2 | \n", + "52.1 | \n", + "-0.2 | \n", + "0.634257 | \n", + "0.087871 | \n", + "-2.445043 | \n", + "2.005379 | \n", + "-0.0 | \n", + "-0.4 | \n", + "0.008985 | \n", + "0.017970 | \n", "
| ProfessorSP | \n", - "-997.2 | \n", - "16.8 | \n", - "-59.4 | \n", - "96.919488 | \n", - "23.645934 | \n", - "-2.510293 | \n", - "2.112371 | \n", - "-9.4 | \n", - "-109.3 | \n", - "0.011672 | \n", - "0.023345 | \n", + "GreeneiBot2 | \n", + "-11.4 | \n", + "58.4 | \n", + "-0.2 | \n", + "0.846228 | \n", + "0.110781 | \n", + "-1.766811 | \n", + "2.000832 | \n", + "0.0 | \n", + "-0.4 | \n", + "0.041290 | \n", + "0.082581 | \n", "
| metac-perplexity | \n", - "-1072.9 | \n", - "72.7 | \n", - "-14.8 | \n", - "105.315607 | \n", - "12.351666 | \n", - "-1.194808 | \n", - "1.992462 | \n", - "9.9 | \n", - "-39.4 | \n", - "0.118050 | \n", - "0.236099 | \n", + "Bot_Pepa | \n", + "-11.5 | \n", + "44.0 | \n", + "-0.3 | \n", + "0.737537 | \n", + "0.111125 | \n", + "-2.343166 | \n", + "2.014642 | \n", + "-0.0 | \n", + "-0.5 | \n", + "0.011905 | \n", + "0.023810 | \n", "
| wunderplumb | \n", - "-1159.0 | \n", - "23.8 | \n", - "-48.8 | \n", - "90.740106 | \n", - "18.619477 | \n", - "-2.620990 | \n", - "2.065034 | \n", - "-10.4 | \n", - "-87.3 | \n", - "0.007677 | \n", - "0.015353 | \n", + "metac-Gemini-Exp-1206 | \n", + "-11.5 | \n", + "76.5 | \n", + "-0.2 | \n", + "0.895210 | \n", + "0.102351 | \n", + "-1.471849 | \n", + "1.990822 | \n", + "0.1 | \n", + "-0.4 | \n", + "0.072609 | \n", + "0.145218 | \n", "
| laylaps | \n", - "-1214.5 | \n", - "52.2 | \n", - "-23.3 | \n", - "48.019929 | \n", - "6.646397 | \n", - "-3.500587 | \n", - "2.005359 | \n", - "-9.9 | \n", - "-36.6 | \n", - "0.000486 | \n", - "0.000971 | \n", + "-12.9 | \n", + "64.1 | \n", + "-0.2 | \n", + "0.661905 | \n", + "0.082674 | \n", + "-2.440461 | \n", + "1.996907 | \n", + "-0.0 | \n", + "-0.4 | \n", + "0.008744 | \n", + "0.017488 | \n", "|
| NextWorldLab | \n", - "-1224.1 | \n", - "63.8 | \n", - "-19.2 | \n", - "98.662622 | \n", - "12.347306 | \n", - "-1.552699 | \n", - "1.997018 | \n", - "5.5 | \n", - "-43.8 | \n", - "0.062758 | \n", - "0.125517 | \n", + "bot_median | \n", + "-13.3 | \n", + "92.1 | \n", + "-0.1 | \n", + "0.757201 | \n", + "0.078901 | \n", + "-1.830058 | \n", + "1.985550 | \n", + "0.0 | \n", + "-0.3 | \n", + "0.035256 | \n", + "0.070512 | \n", "
| metac-Gemini-Exp-1206 | \n", - "-1250.5 | \n", - "65.1 | \n", - "-19.2 | \n", - "94.993211 | \n", - "11.773405 | \n", - "-1.631519 | \n", - "1.996377 | \n", - "4.3 | \n", - "-42.7 | \n", - "0.053842 | \n", - "0.107685 | \n", + "wunderplumb | \n", + "-13.6 | \n", + "25.6 | \n", + "-0.5 | \n", + "0.900051 | \n", + "0.178062 | \n", + "-2.984094 | \n", + "2.056603 | \n", + "-0.2 | \n", + "-0.9 | \n", + "0.003174 | \n", + "0.006348 | \n", "
| minefrac1 | \n", - "-1289.4 | \n", - "43.5 | \n", - "-29.6 | \n", - "123.199791 | \n", - "18.679504 | \n", - "-1.586858 | \n", - "2.014918 | \n", - "8.0 | \n", - "-67.3 | \n", - "0.059979 | \n", - "0.119958 | \n", + "metac-perplexity | \n", + "-14.4 | \n", + "89.1 | \n", + "-0.2 | \n", + "1.102601 | \n", + "0.116810 | \n", + "-1.384952 | \n", + "1.986405 | \n", + "0.1 | \n", + "-0.4 | \n", + "0.084782 | \n", + "0.169564 | \n", "
| pgodzinai | \n", - "-1330.4 | \n", - "62.0 | \n", - "-21.5 | \n", - "98.404053 | \n", - "12.497327 | \n", - "-1.716953 | \n", - "1.998174 | \n", - "3.5 | \n", - "-46.4 | \n", - "0.045531 | \n", - "0.091062 | \n", + "manticAI | \n", + "-14.6 | \n", + "69.4 | \n", + "-0.2 | \n", + "0.670946 | \n", + "0.080510 | \n", + "-2.613354 | \n", + "1.993968 | \n", + "-0.0 | \n", + "-0.4 | \n", + "0.005507 | \n", + "0.011014 | \n", "
| metac-deepseek-r1 | \n", - "-1360.3 | \n", - "48.2 | \n", - "-28.2 | \n", - "108.359802 | \n", - "15.607908 | \n", - "-1.808248 | \n", - "2.009112 | \n", - "3.1 | \n", - "-59.6 | \n", - "0.038471 | \n", - "0.076941 | \n", + "NextWorldLab | \n", + "-16.9 | \n", + "80.2 | \n", + "-0.2 | \n", + "0.906964 | \n", + "0.101244 | \n", + "-2.078393 | \n", + "1.989344 | \n", + "-0.0 | \n", + "-0.4 | \n", + "0.020455 | \n", + "0.040909 | \n", "
| metac-Llama-3.1 | \n", - "-1412.1 | \n", - "73.7 | \n", - "-19.2 | \n", - "97.483499 | \n", - "11.355267 | \n", - "-1.687375 | \n", - "1.992024 | \n", - "3.5 | \n", - "-41.8 | \n", - "0.047909 | \n", - "0.095818 | \n", + "minefrac1 | \n", + "-18.8 | \n", + "51.1 | \n", + "-0.4 | \n", + "0.874752 | \n", + "0.122370 | \n", + "-3.013581 | \n", + "2.006545 | \n", + "-0.1 | \n", + "-0.6 | \n", + "0.002021 | \n", + "0.004043 | \n", "
| metac-claude-3-5-sonnet-latest | \n", - "-1463.9 | \n", - "74.7 | \n", - "-19.6 | \n", - "96.855911 | \n", - "11.206393 | \n", - "-1.748737 | \n", - "1.991597 | \n", - "2.7 | \n", - "-41.9 | \n", - "0.042250 | \n", - "0.084500 | \n", + "-21.6 | \n", + "91.1 | \n", + "-0.2 | \n", + "0.784073 | \n", + "0.082148 | \n", + "-2.885581 | \n", + "1.985829 | \n", + "-0.1 | \n", + "-0.4 | \n", + "0.002444 | \n", + "0.004888 | \n", + "|
| mmBot | \n", + "-21.9 | \n", + "92.1 | \n", + "-0.2 | \n", + "0.725010 | \n", + "0.075546 | \n", + "-3.150104 | \n", + "1.985550 | \n", + "-0.1 | \n", + "-0.4 | \n", + "0.001104 | \n", + "0.002208 | \n", "||||||||||||
| metac-claude-3-5-sonnet-20240620 | \n", - "-1649.9 | \n", - "75.1 | \n", - "-22.0 | \n", - "105.324094 | \n", - "12.153679 | \n", - "-1.807616 | \n", - "1.991536 | \n", - "2.2 | \n", - "-46.2 | \n", - "0.037362 | \n", - "0.074725 | \n", + "-22.1 | \n", + "90.5 | \n", + "-0.2 | \n", + "0.992190 | \n", + "0.104297 | \n", + "-2.344713 | \n", + "1.986072 | \n", + "-0.0 | \n", + "-0.5 | \n", + "0.010627 | \n", + "0.021254 | \n", "|
| metac-o1-preview | \n", - "-1830.6 | \n", - "74.7 | \n", - "-24.5 | \n", - "107.515409 | \n", - "12.439714 | \n", - "-1.969955 | \n", - "1.991597 | \n", - "0.3 | \n", - "-49.3 | \n", - "0.026301 | \n", - "0.052601 | \n", + "metac-grok-2-1212 | \n", + "-23.2 | \n", + "91.1 | \n", + "-0.3 | \n", + "0.969180 | \n", + "0.101542 | \n", + "-2.504438 | \n", + "1.985829 | \n", + "-0.1 | \n", + "-0.5 | \n", + "0.007032 | \n", + "0.014063 | \n", "
| mmBot | \n", - "-2006.4 | \n", - "75.7 | \n", - "-26.5 | \n", - "78.532351 | \n", - "9.026111 | \n", - "-2.936446 | \n", - "1.991181 | \n", - "-8.5 | \n", - "-44.5 | \n", - "0.002205 | \n", - "0.004411 | \n", + "pgodzinai | \n", + "-23.2 | \n", + "76.4 | \n", + "-0.3 | \n", + "1.002923 | \n", + "0.114742 | \n", + "-2.649317 | \n", + "1.990849 | \n", + "-0.1 | \n", + "-0.5 | \n", + "0.004910 | \n", + "0.009821 | \n", "
| VeritasAI | \n", - "-2024.5 | \n", - "67.7 | \n", - "-29.9 | \n", - "63.282103 | \n", - "7.691066 | \n", - "-3.888187 | \n", - "1.994849 | \n", - "-14.6 | \n", - "-45.2 | \n", - "0.000118 | \n", - "0.000235 | \n", + "-24.3 | \n", + "77.1 | \n", + "-0.3 | \n", + "0.660703 | \n", + "0.075245 | \n", + "-4.185910 | \n", + "1.990482 | \n", + "-0.2 | \n", + "-0.5 | \n", + "0.000038 | \n", + "0.000076 | \n", "|
| metac-grok-2-1212 | \n", - "-2154.6 | \n", - "74.7 | \n", - "-28.8 | \n", - "106.094606 | \n", - "12.275325 | \n", - "-2.349685 | \n", - "1.991597 | \n", - "-4.4 | \n", - "-53.3 | \n", - "0.010735 | \n", - "0.021470 | \n", + "metac-o1-preview | \n", + "-24.4 | \n", + "91.1 | \n", + "-0.3 | \n", + "0.852432 | \n", + "0.089310 | \n", + "-2.999396 | \n", + "1.985829 | \n", + "-0.1 | \n", + "-0.4 | \n", + "0.001749 | \n", + "0.003497 | \n", "
| metac-gpt-4o | \n", - "-2196.6 | \n", - "74.7 | \n", - "-29.4 | \n", - "100.421684 | \n", - "11.618958 | \n", - "-2.530844 | \n", - "1.991597 | \n", - "-6.3 | \n", - "-52.5 | \n", - "0.006756 | \n", - "0.013513 | \n", + "-25.1 | \n", + "91.1 | \n", + "-0.3 | \n", + "0.873597 | \n", + "0.091528 | \n", + "-3.009707 | \n", + "1.985829 | \n", + "-0.1 | \n", + "-0.5 | \n", + "0.001696 | \n", + "0.003391 | \n", "|
| metac-exa | \n", - "-2249.1 | \n", - "72.7 | \n", - "-30.9 | \n", - "91.723290 | \n", - "10.757526 | \n", - "-2.875853 | \n", - "1.992462 | \n", - "-9.5 | \n", - "-52.4 | \n", - "0.002651 | \n", - "0.005302 | \n", + "-26.1 | \n", + "89.1 | \n", + "-0.3 | \n", + "0.791935 | \n", + "0.083898 | \n", + "-3.495695 | \n", + "1.986405 | \n", + "-0.1 | \n", + "-0.5 | \n", + "0.000371 | \n", + "0.000743 | \n", "|
| InstitutPelFutur | \n", - "-2477.3 | \n", - "72.8 | \n", - "-34.0 | \n", - "102.041454 | \n", - "11.959443 | \n", - "-2.845391 | \n", - "1.992461 | \n", - "-10.2 | \n", - "-57.9 | \n", - "0.002888 | \n", - "0.005777 | \n", + "-26.9 | \n", + "90.1 | \n", + "-0.3 | \n", + "0.973767 | \n", + "0.102587 | \n", + "-2.908524 | \n", + "1.986114 | \n", + "-0.1 | \n", + "-0.5 | \n", + "0.002292 | \n", + "0.004584 | \n", + "|
| metac-Llama-3.1 | \n", + "-28.0 | \n", + "89.1 | \n", + "-0.3 | \n", + "0.907200 | \n", + "0.096109 | \n", + "-3.270200 | \n", + "1.986405 | \n", + "-0.1 | \n", + "-0.5 | \n", + "0.000767 | \n", + "0.001534 | \n", "
| \n", + " | pro_question_id | \n", + "bot_question_id | \n", + "resolution | \n", + "question_weight | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_upper_bound | \n", + "open_lower_bound | \n", + "... | \n", + "metac-o1-preview | \n", + "metac-perplexity | \n", + "minefrac1 | \n", + "mmBot | \n", + "pgodzinai | \n", + "pianobot | \n", + "swingswish | \n", + "twsummerbot | \n", + "wunderplumb | \n", + "bot_team_median | \n", + "||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "31268 | \n", + "31262 | \n", + "0 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[0, 1, 2-3, 4-6, >6] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "2.302585 | \n", + "5.703782 | \n", + "NaN | \n", + "2.292635 | \n", + "2.703087 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "4.605170 | \n", + "||||||
| 1 | \n", + "31269 | \n", + "31263 | \n", + "86.82 | \n", + "1.0 | \n", + "numeric | \n", + "None | \n", + "60.0 | \n", + "100.0 | \n", + "True | \n", + "True | \n", + "... | \n", + "-0.158842 | \n", + "-0.616988 | \n", + "NaN | \n", + "-0.050442 | \n", + "-0.163369 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-1.512868 | \n", + "||||||
| 2 | \n", + "31270 | \n", + "31264 | \n", + "no | \n", "1.0 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.038208 | \n", + "-0.092275 | \n", + "NaN | \n", + "-0.210058 | \n", + "-0.059485 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-0.149434 | \n", "||||||
| NextWorldLab | \n", - "-8.3 | \n", - "-6.7 | \n", - "-3.4 | \n", - "-0.4 | \n", - "1.2 | \n", + "3 | \n", + "31280 | \n", + "31274 | \n", + "5-9 | \n", + "1.0 | \n", + "multiple_choice | \n", + "[0-4, 5-9, >9] | \n", + "NaN | \n", + "NaN | \n", + "None | \n", + "None | \n", + "... | \n", + "0.390198 | \n", + "0.204794 | \n", + "NaN | \n", + "0.127833 | \n", + "0.152526 | \n", + "NaN | \n", + "NaN | \n", + "-0.046520 | \n", + "NaN | \n", + "0.310155 | \n", "
| laylaps | \n", - "-9.9 | \n", - "-7.7 | \n", - "-3.8 | \n", - "-0.1 | \n", - "2.2 | \n", + "4 | \n", + "31281 | \n", + "31275 | \n", + "119.2 | \n", + "1.0 | \n", + "numeric | \n", + "None | \n", + "0.0 | \n", + "400.0 | \n", + "False | \n", + "False | \n", + "... | \n", + "0.243782 | \n", + "-0.102791 | \n", + "NaN | \n", + "0.265372 | \n", + "0.041050 | \n", + "NaN | \n", + "NaN | \n", + "-0.771754 | \n", + "NaN | \n", + "0.184891 | \n", "
| Bot_Pepa | \n", - "-7.0 | \n", - "-6.0 | \n", - "-3.9 | \n", - "-1.8 | \n", - "-0.9 | \n", + "
5 rows × 57 columns
\n", + "| \n", + " | pro_question_id | \n", + "bot_question_id | \n", + "resolution | \n", + "question_weight | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_upper_bound | \n", + "open_lower_bound | \n", + "... | \n", + "metac-o1-preview | \n", + "metac-perplexity | \n", + "minefrac1 | \n", + "mmBot | \n", + "pgodzinai | \n", + "pianobot | \n", + "swingswish | \n", + "twsummerbot | \n", + "wunderplumb | \n", + "bot_team_median | \n", "||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VeritasAI | \n", - "-7.8 | \n", - "-6.6 | \n", - "-4.3 | \n", - "-1.9 | \n", - "-0.4 | \n", + "94 | \n", + "35380 | \n", + "35345 | \n", + "yes | \n", + "1.00 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.054067 | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "0.000000 | \n", + "NaN | \n", + "-0.054067 | \n", + "-0.220515 | \n", + "-0.054067 | \n", + "-0.054067 | \n", "
| minefrac1 | \n", - "-8.0 | \n", - "-6.7 | \n", - "-4.6 | \n", - "-2.5 | \n", - "-1.3 | \n", + "95 | \n", + "35381 | \n", + "35354 | \n", + "no | \n", + "1.00 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-2.251292 | \n", + "NaN | \n", + "NaN | \n", + "-0.111226 | \n", + "NaN | \n", + "NaN | \n", + "-0.054067 | \n", + "-0.083382 | \n", + "-2.944439 | \n", + "-0.111226 | \n", "
| Grizeu_Bot | \n", - "-8.8 | \n", - "-7.6 | \n", - "-5.1 | \n", - "-2.4 | \n", - "-0.9 | \n", + "96 | \n", + "35385 | \n", + "35358 | \n", + "yes | \n", + "1.00 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.020834 | \n", + "NaN | \n", + "NaN | \n", + "-0.074901 | \n", + "NaN | \n", + "NaN | \n", + "-0.132060 | \n", + "-0.158283 | \n", + "-0.132060 | \n", + "-0.158283 | \n", "
| metac-gpt-4o | \n", - "-10.6 | \n", - "-9.0 | \n", - "-5.8 | \n", - "-2.9 | \n", - "-1.4 | \n", + "97 | \n", + "35386 | \n", + "35364 | \n", + "no | \n", + "0.85 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.680430 | \n", + "0.628948 | \n", + "NaN | \n", + "-0.680430 | \n", + "-0.680430 | \n", + "NaN | \n", + "-0.091255 | \n", + "0.811793 | \n", + "0.628948 | \n", + "-0.091255 | \n", "
| ajf-bot | \n", - "-15.0 | \n", - "-13.0 | \n", - "-8.6 | \n", - "-4.4 | \n", - "-2.0 | \n", + "98 | \n", + "35387 | \n", + "35367 | \n", + "no | \n", + "0.85 | \n", + "binary | \n", + "None | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "... | \n", + "-0.017709 | \n", + "0.000000 | \n", + "NaN | \n", + "-0.112251 | \n", + "-0.017709 | \n", + "NaN | \n", + "-0.163782 | \n", + "-0.241614 | \n", + "-0.163782 | \n", + "-0.112251 | \n", "
5 rows × 57 columns
\n", "| \n", + " | bot_question_id | \n", + "question_weight | \n", + "resolution | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_lower_bound | \n", + "open_upper_bound | \n", + "metac-o1-preview | \n", + "... | \n", + "median_forecast_1_bots | \n", + "median_forecast_2_bots | \n", + "median_forecast_3_bots | \n", + "median_forecast_4_bots | \n", + "median_forecast_5_bots | \n", + "median_forecast_6_bots | \n", + "median_forecast_7_bots | \n", + "median_forecast_8_bots | \n", + "median_forecast_9_bots | \n", + "median_forecast_10_bots | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "31262 | \n", + "1.0 | \n", + "0 | \n", + "multiple_choice | \n", + "[0, 1, 2-3, 4-6, >6] | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "[0.01,0.7,0.2,0.07,0.02] | \n", + "... | \n", + "[0.01, 0.0001, 0.0001, 0.0001, 0.0001] | \n", + "[0.13, 0.0001, 0.0001, 0.0001, 0.0001] | \n", + "[0.014925742574257425, 0.0001, 0.0001, 0.0001,... | \n", + "[0.012462871287128714, 0.0001, 0.0001, 0.0001,... | \n", + "[0.012462871287128714, 0.0001, 0.0001, 0.0001,... | \n", + "[0.014925742574257425, 0.0001, 0.0001, 0.0001,... | \n", + "[0.057462871287128715, 0.0001, 0.0001, 0.0001,... | \n", + "[0.057462871287128715, 0.0001, 0.0001, 0.0001,... | \n", + "[0.01623640201331385, 0.0001, 0.0001, 0.0001, ... | \n", + "[0.01623640201331385, 0.0001, 0.0001, 0.0001, ... | \n", + "
| 1 | \n", + "31263 | \n", + "1.0 | \n", + "86.82 | \n", + "numeric | \n", + "NaN | \n", + "60.0 | \n", + "100.0 | \n", + "True | \n", + "True | \n", + "[0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05... | \n", + "... | \n", + "[0.05, 0.051, 0.052, 0.053, 0.054, 0.055, 0.05... | \n", + "[0.05, 0.05079411765, 0.0515882353, 0.05238235... | \n", + "[0.05, 0.0505882353, 0.0511764706, 0.051764705... | \n", + "[0.05, 0.0505982539, 0.0511965078, 0.051794761... | \n", + "[0.05, 0.0505982539, 0.0511965078, 0.051794761... | \n", + "[0.05, 0.0506082725, 0.051216545, 0.0518248175... | \n", + "[0.05, 0.0506082725, 0.051216545, 0.0518248175... | \n", + "[0.05, 0.0506082725, 0.051216545, 0.0518248175... | \n", + "[0.05, 0.0506374696, 0.051274939150000004, 0.0... | \n", + "[0.05, 0.0506374696, 0.051274939150000004, 0.0... | \n", + "
| 2 | \n", + "31264 | \n", + "1.0 | \n", + "no | \n", + "binary | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "0.05 | \n", + "... | \n", + "0.05 | \n", + "0.075 | \n", + "0.07 | \n", + "0.063 | \n", + "0.063 | \n", + "0.07 | \n", + "0.085 | \n", + "0.085 | \n", + "0.1 | \n", + "0.1 | \n", + "
| 3 | \n", + "31274 | \n", + "1.0 | \n", + "5-9 | \n", + "multiple_choice | \n", + "[0-4, 5-9, >9] | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "[0.15,0.65,0.2] | \n", + "... | \n", + "[0.0001, 0.65, 0.0001] | \n", + "[0.0001, 0.55, 0.0001] | \n", + "[0.0001, 0.5125, 0.0001] | \n", + "[0.0001, 0.5662499999999999, 0.0001] | \n", + "[0.0001, 0.5125, 0.0001] | \n", + "[0.0001, 0.48124999999999996, 0.0001] | \n", + "[0.0001, 0.45, 0.0001] | \n", + "[0.0001, 0.45, 0.0001] | \n", + "[0.0001, 0.48124999999999996, 0.0001] | \n", + "[0.0001, 0.45, 0.0001] | \n", + "
| 4 | \n", + "31275 | \n", + "1.0 | \n", + "119.2 | \n", + "numeric | \n", + "NaN | \n", + "0.0 | \n", + "400.0 | \n", + "False | \n", + "False | \n", + "[0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,... | \n", + "... | \n", + "[0.0, 0.004, 0.008, 0.012, 0.016, 0.02, 0.024,... | \n", + "[0.0, 0.00366666665, 0.00733333335, 0.011, 0.0... | \n", + "[0.0, 0.0033333333, 0.0066666667, 0.01, 0.0133... | \n", + "[0.0, 0.00257575755, 0.00515151515, 0.00772727... | \n", + "[0.0, 0.0018181818, 0.0036363636, 0.0054545455... | \n", + "[0.0, 0.00183065955, 0.00366131905, 0.00549197... | \n", + "[0.0, 0.0018431373, 0.0036862745, 0.0055294118... | \n", + "[0.0, 0.0018431373, 0.0036862745, 0.0055294118... | \n", + "[0.0, 0.002254902, 0.0045098039, 0.0067647059,... | \n", + "[0.0, 0.0018431373, 0.0036862745, 0.0055294118... | \n", + "
5 rows × 29 columns
\n", + "5 rows × 27 columns
\n", "" ], "text/plain": [ - " bot_question_id question_weight resolution type \\\n", - "0 31262 1.0 0 multiple_choice \n", - "1 31263 1.0 86.82 numeric \n", - "2 31264 1.0 no binary \n", - "3 31274 1.0 5-9 multiple_choice \n", - "4 31275 1.0 119.2 numeric \n", - "\n", - " options range_min range_max \\\n", - "0 [0, 1, 2-3, 4-6, >6] NaN NaN \n", - "1 NaN 60.0 100.0 \n", - "2 NaN NaN NaN \n", - "3 [0-4, 5-9, >9] NaN NaN \n", - "4 NaN 0.0 400.0 \n", - "\n", - " metac-o1-preview \\\n", - "0 [0.02,0.7,0.2,0.07,0.01] \n", - "1 [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 0.15 \n", - "3 [0.2,0.6,0.2] \n", - "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", - "\n", - " metac-o1 \\\n", - "0 [0.45,0.3,0.15,0.05,0.05] \n", - "1 [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 0.1 \n", - "3 [0.25,0.6,0.15] \n", - "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", - "\n", - " pgodzinai ... \\\n", - "0 [0.014925742574257425,0.5137871287128712,0.334... ... \n", - "1 [0.001,0.001060875,0.0011396,0.0012863125,0.00... ... \n", - "2 0.07 ... \n", - "3 [0.27499999999999997,0.5125,0.21249999999999997] ... \n", - "4 [0.0,0.0001141583,0.0002446967,0.0003862688,0.... ... \n", - "\n", - " median_forecast_1_bots \\\n", - "0 0.02 \n", - "1 [0.05, 0.0506666667, 0.0513333333, 0.052, 0.05... \n", - "2 0.15 \n", - "3 0.6 \n", - "4 [0.0, 0.0025, 0.005, 0.0075, 0.01, 0.0125, 0.0... \n", - "\n", - " median_forecast_2_bots \\\n", - "0 0.235 \n", - "1 [0.05, 0.0506666667, 0.0513333333, 0.052, 0.05... \n", - "2 0.125 \n", - "3 0.6 \n", - "4 [0.0, 0.0025, 0.005, 0.0075, 0.01, 0.0125, 0.0... \n", - "\n", - " median_forecast_3_bots \\\n", - "0 0.02 \n", - "1 [0.03366666666666667, 0.0341314028, 0.03460208... \n", - "2 0.1 \n", - "3 0.6 \n", - "4 [0.0, 0.0017047194333333333, 0.0034148989, 0.0... \n", - "\n", - " median_forecast_4_bots \\\n", - "0 0.017463 \n", - "1 [0.037750000000000006, 0.038250620225000004, 0... \n", - "2 0.085 \n", - "3 0.6 \n", - "4 [0.0, 0.001733085025, 0.003470265075, 0.005210... \n", - "\n", - " median_forecast_5_bots \\\n", - "0 0.017463 \n", - "1 [0.037750000000000006, 0.038250620225000004, 0... \n", - "2 0.085 \n", - "3 0.6 \n", - "4 [0.0, 0.00161112178, 0.0032277004800000003, 0.... \n", - "\n", - " median_forecast_6_bots \\\n", - "0 0.02 \n", - "1 [0.0402, 0.040750496180000005, 0.04130456232, ... \n", - "2 0.1 \n", - "3 0.55625 \n", - "4 [0.0, 0.0016497910333333336, 0.003304129483333... \n", - "\n", - " median_forecast_7_bots \\\n", - "0 0.085 \n", - "1 [0.0402, 0.040750496180000005, 0.04130456232, ... \n", - "2 0.125 \n", - "3 0.5125 \n", - "4 [0.0, 0.0017712494571428573, 0.0035463967, 0.0... \n", + " bot_question_id title \\\n", + "0 31262 For Q1 2025, how many banks will be listed on ... \n", + "1 31263 What percentage of the vote will Alexander Luk... \n", + "2 31264 Will the bubble in the Magnificent Seven pop b... \n", + "3 31274 How many arms sales globally will the US State... \n", + "4 31275 How much will it rain in Brasília, Brazil in F... \n", "\n", - " median_forecast_8_bots \\\n", - "0 0.085 \n", - "1 [0.0402, 0.040750496180000005, 0.04130456232, ... \n", - "2 0.125 \n", - "3 0.5125 \n", - "4 [0.0, 0.0017712494571428573, 0.0035463967, 0.0... \n", + " resolution scheduled_close_time actual_close_time type \\\n", + "0 0 2025-01-20 03:27:00 2025-01-20 03:27:00 multiple_choice \n", + "1 86.82 2025-01-20 03:27:00 2025-01-20 03:27:00 numeric \n", + "2 no 2025-01-20 03:27:00 2025-01-20 03:27:00 binary \n", + "3 5-9 2025-01-21 11:42:00 2025-01-21 11:42:00 multiple_choice \n", + "4 119.2 2025-01-21 11:42:00 2025-01-21 11:42:00 numeric \n", + "\n", + " options range_min range_max open_upper_bound \\\n", + "0 [0, 1, 2-3, 4-6, >6] NaN NaN False \n", + "1 NaN 60.0 100.0 True \n", + "2 NaN NaN NaN False \n", + "3 [0-4, 5-9, >9] NaN NaN NaN \n", + "4 NaN 0.0 400.0 False \n", + "\n", + " open_lower_bound pro_question_id question_weight \\\n", + "0 False 31268 1.0 \n", + "1 True 31269 1.0 \n", + "2 False 31270 1.0 \n", + "3 NaN 31280 1.0 \n", + "4 False 31281 1.0 \n", + "\n", + " bot_team_median \\\n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505982539, 0.0511965078, 0.051794761... \n", + "2 0.063 \n", + "3 [0.0001, 0.5125, 0.0001] \n", + "4 [0.0, 0.0018181818, 0.0036363636, 0.0054545455... \n", + "\n", + " pro_median \n", + "0 [0.001,0.62,0.35,0.019,0.01] \n", + "1 [0.0013749738,0.0014499743,0.001526641,0.00160... \n", + "2 0.013 \n", + "3 [0.16,0.44,0.4] \n", + "4 [0.0,0.0005044914,0.0010323506,0.0015847475,0.... " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "| \n", + " | bot_question_id | \n", + "title | \n", + "resolution | \n", + "scheduled_close_time | \n", + "actual_close_time | \n", + "type | \n", + "options | \n", + "range_min | \n", + "range_max | \n", + "open_upper_bound | \n", + "open_lower_bound | \n", + "pro_question_id | \n", + "question_weight | \n", + "bot_team_median | \n", + "pro_median | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 342 | \n", + "35345 | \n", + "Will the US Citizenship and Immigration Servic... | \n", + "yes | \n", + "2025-03-12 22:00:00 | \n", + "2025-03-12 22:00:00 | \n", + "binary | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "35380 | \n", + "1.00 | \n", + "0.9 | \n", + "0.95 | \n", + "
| 351 | \n", + "35354 | \n", + "Will the United States impose any new tariffs ... | \n", + "no | \n", + "2025-03-13 03:00:00 | \n", + "2025-03-13 03:00:00 | \n", + "binary | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "35381 | \n", + "1.00 | \n", + "0.4 | \n", + "0.05 | \n", + "
| 355 | \n", + "35358 | \n", + "Will ChatGPT rank in the top 10 global website... | \n", + "yes | \n", + "2025-03-13 03:00:00 | \n", + "2025-03-13 03:00:00 | \n", + "binary | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "35385 | \n", + "1.00 | \n", + "0.8 | \n", + "0.97 | \n", + "
| 361 | \n", + "35364 | \n", + "Will Doge's Agency Efficiency Leaderboard have... | \n", + "no | \n", + "2025-03-14 23:00:00 | \n", + "2025-03-14 23:00:00 | \n", + "binary | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "35386 | \n", + "0.85 | \n", + "0.8 | \n", + "0.666 | \n", + "
| 364 | \n", + "35367 | \n", + "Will the Project 2025 Tracker spreadsheet mark... | \n", + "no | \n", + "2025-03-14 23:00:00 | \n", + "2025-03-14 23:00:00 | \n", + "binary | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "False | \n", + "35387 | \n", + "0.85 | \n", + "0.05 | \n", + "0.03 | \n", + "
| \n", + " | title | \n", + "bot_team_median | \n", + "pro_median | \n", + "resolution | \n", + "head_to_head | \n", + "
|---|---|---|---|---|---|
| 279 | \n", + "What will Kalshi's rank in the iPhone Top Free... | \n", + "[0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.05] | \n", + "[0.02,0.01,0.015,0.015,0.05,0.89] | \n", + "Not in top 50 | \n", + "-2.9 | \n", + "
| 121 | \n", + "How many movies will be new on Netflix's top 1... | \n", + "[0.0001, 0.0001, 0.0001, 0.125] | \n", + "[0.005,0.017,0.157,0.821] | \n", + "3 or more | \n", + "-1.9 | \n", + "
| 47 | \n", + "What will be Donald Trump's net worth, accordi... | \n", + "[0.16999999999999998, 0.0001, 0.0001, 0.0001, ... | \n", + "[0.6,0.2,0.1,0.075,0.025] | \n", + "0-$6 billion, inclusive | \n", + "-1.3 | \n", + "
| 232 | \n", + "How many movies will be new on Netflix's top 1... | \n", + "[0.0001, 0.0001, 0.0001, 0.2963039014373716] | \n", + "[0.002,0.008,0.09,0.9] | \n", + "3 or more | \n", + "-1.1 | \n", + "
| 247 | \n", + "Will the 500th richest person on Bloomberg's B... | \n", + "0.766667 | \n", + "0.333 | \n", + "no | \n", + "-1.1 | \n", + "
| \n", + " | title | \n", + "bot_team_median | \n", + "pro_median | \n", + "resolution | \n", + "head_to_head | \n", + "
|---|---|---|---|---|---|
| 0 | \n", + "For Q1 2025, how many banks will be listed on ... | \n", + "[0.012462871287128714, 0.0001, 0.0001, 0.0001,... | \n", + "[0.001,0.62,0.35,0.019,0.01] | \n", + "0 | \n", + "2.5 | \n", + "
| 189 | \n", + "What will the highest rank of metac-GPT4o or m... | \n", + "[0.0, 0.0369946063, 0.07475, 0.10485, 0.1198, ... | \n", + "[0.0,5.19918e-05,0.0001040776,0.0001562618,0.0... | \n", + "34.0 | \n", + "2.8 | \n", + "
| 151 | \n", + "How many earthquakes of magnitude ≥ 4 will hap... | \n", + "[0.0, 0.0035714286, 0.0071428571, 0.0107142857... | \n", + "[0.0,0.0158237002,0.0235315723,0.0279864362,0.... | \n", + "0.0 | \n", + "NaN | \n", + "
| 211 | \n", + "Will Nikola Corporation file for bankruptcy be... | \n", + "0.99 | \n", + "0.999 | \n", + "annulled | \n", + "NaN | \n", + "
| 214 | \n", + "Will the state of Rhode Island have any recrea... | \n", + "0.928 | \n", + "0.95 | \n", + "annulled | \n", + "NaN | \n", "