diff --git a/.gitignore b/.gitignore index 39111a6..14cb0d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .venv/ .env __pycache__ +.personal/ \ No newline at end of file diff --git a/AI_BENCHMARKING_ANALYSIS.ipynb b/AI_BENCHMARKING_ANALYSIS.ipynb index 3753b54..bf4055e 100644 --- a/AI_BENCHMARKING_ANALYSIS.ipynb +++ b/AI_BENCHMARKING_ANALYSIS.ipynb @@ -34,9 +34,12 @@ "outputs": [], "source": [ "# @title Import libraries\n", + "%load_ext autoreload\n", + "%autoreload 2\n", "from functions import *\n", "from IPython.display import display, clear_output\n", - "import pandas as pd" + "import pandas as pd\n", + "from copy import deepcopy\n" ] }, { @@ -54,7 +57,350 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_17143/1846409041.py:25: DtypeWarning: Columns (18,19) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df_bot_forecasts = pd.read_csv('https://data.heroku.com/dataclips/tfwiopapwgyjkawcpjmpibjlsars.csv')\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bot_question_idtitleresolutionscheduled_close_timeactual_close_timequestion_weight_xtypeoptionsrange_minrange_maxopen_upper_boundopen_lower_boundpro_question_idquestion_weight_y
031262For Q1 2025, how many banks will be listed on ...02025-01-20 03:27:00+002025-01-20 03:27:00+001.0multiple_choice[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]NaNNaNFalseFalse31268.01.0
131262For Q1 2025, how many banks will be listed on ...02025-01-20 03:27:00+002025-01-20 03:27:00+001.0multiple_choice[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]NaNNaNFalseFalse31268.01.0
231262For Q1 2025, how many banks will be listed on ...02025-01-20 03:27:00+002025-01-20 03:27:00+001.0multiple_choice[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]NaNNaNFalseFalse31268.01.0
331262For Q1 2025, how many banks will be listed on ...02025-01-20 03:27:00+002025-01-20 03:27:00+001.0multiple_choice[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]NaNNaNFalseFalse31268.01.0
431262For Q1 2025, how many banks will be listed on ...02025-01-20 03:27:00+002025-01-20 03:27:00+001.0multiple_choice[\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]NaNNaNFalseFalse31268.01.0
\n", + "
" + ], + "text/plain": [ + " bot_question_id title \\\n", + "0 31262 For Q1 2025, how many banks will be listed on ... \n", + "1 31262 For Q1 2025, how many banks will be listed on ... \n", + "2 31262 For Q1 2025, how many banks will be listed on ... \n", + "3 31262 For Q1 2025, how many banks will be listed on ... \n", + "4 31262 For Q1 2025, how many banks will be listed on ... \n", + "\n", + " resolution scheduled_close_time actual_close_time \\\n", + "0 0 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 \n", + "1 0 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 \n", + "2 0 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 \n", + "3 0 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 \n", + "4 0 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 \n", + "\n", + " question_weight_x type options range_min \\\n", + "0 1.0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN \n", + "1 1.0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN \n", + "2 1.0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN \n", + "3 1.0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN \n", + "4 1.0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN \n", + "\n", + " range_max open_upper_bound open_lower_bound pro_question_id \\\n", + "0 NaN False False 31268.0 \n", + "1 NaN False False 31268.0 \n", + "2 NaN False False 31268.0 \n", + "3 NaN False False 31268.0 \n", + "4 NaN False False 31268.0 \n", + "\n", + " question_weight_y \n", + "0 1.0 \n", + "1 1.0 \n", + "2 1.0 \n", + "3 1.0 \n", + "4 1.0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bot_question_idtitleresolutionscheduled_close_timeactual_close_timequestion_weight_xtypeoptionsrange_minrange_maxopen_upper_boundopen_lower_boundpro_question_idquestion_weight_y
23691635705Which podcast will be ranked higher on Spotify...Candace2025-03-20 20:00:00+002025-03-20 20:00:00+001.0multiple_choice[\"Call Her Daddy\",\"Candace\"]NaNNaNFalseFalseNaNNaN
23691735705Which podcast will be ranked higher on Spotify...Candace2025-03-20 20:00:00+002025-03-20 20:00:00+001.0multiple_choice[\"Call Her Daddy\",\"Candace\"]NaNNaNFalseFalseNaNNaN
23691835705Which podcast will be ranked higher on Spotify...Candace2025-03-20 20:00:00+002025-03-20 20:00:00+001.0multiple_choice[\"Call Her Daddy\",\"Candace\"]NaNNaNFalseFalseNaNNaN
23691935705Which podcast will be ranked higher on Spotify...Candace2025-03-20 20:00:00+002025-03-20 20:00:00+001.0multiple_choice[\"Call Her Daddy\",\"Candace\"]NaNNaNFalseFalseNaNNaN
23692035705Which podcast will be ranked higher on Spotify...Candace2025-03-20 20:00:00+002025-03-20 20:00:00+001.0multiple_choice[\"Call Her Daddy\",\"Candace\"]NaNNaNFalseFalseNaNNaN
\n", + "
" + ], + "text/plain": [ + " bot_question_id title \\\n", + "236916 35705 Which podcast will be ranked higher on Spotify... \n", + "236917 35705 Which podcast will be ranked higher on Spotify... \n", + "236918 35705 Which podcast will be ranked higher on Spotify... \n", + "236919 35705 Which podcast will be ranked higher on Spotify... \n", + "236920 35705 Which podcast will be ranked higher on Spotify... \n", + "\n", + " resolution scheduled_close_time actual_close_time \\\n", + "236916 Candace 2025-03-20 20:00:00+00 2025-03-20 20:00:00+00 \n", + "236917 Candace 2025-03-20 20:00:00+00 2025-03-20 20:00:00+00 \n", + "236918 Candace 2025-03-20 20:00:00+00 2025-03-20 20:00:00+00 \n", + "236919 Candace 2025-03-20 20:00:00+00 2025-03-20 20:00:00+00 \n", + "236920 Candace 2025-03-20 20:00:00+00 2025-03-20 20:00:00+00 \n", + "\n", + " question_weight_x type options \\\n", + "236916 1.0 multiple_choice [\"Call Her Daddy\",\"Candace\"] \n", + "236917 1.0 multiple_choice [\"Call Her Daddy\",\"Candace\"] \n", + "236918 1.0 multiple_choice [\"Call Her Daddy\",\"Candace\"] \n", + "236919 1.0 multiple_choice [\"Call Her Daddy\",\"Candace\"] \n", + "236920 1.0 multiple_choice [\"Call Her Daddy\",\"Candace\"] \n", + "\n", + " range_min range_max open_upper_bound open_lower_bound \\\n", + "236916 NaN NaN False False \n", + "236917 NaN NaN False False \n", + "236918 NaN NaN False False \n", + "236919 NaN NaN False False \n", + "236920 NaN NaN False False \n", + "\n", + " pro_question_id question_weight_y \n", + "236916 NaN NaN \n", + "236917 NaN NaN \n", + "236918 NaN NaN \n", + "236919 NaN NaN \n", + "236920 NaN NaN " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# @title Create df_bot_resolved_questions, df_pro_resolved_questions, df_pro_bot_resolved_questions, df_bot_question_weights\n", "\n", @@ -74,7 +420,7 @@ "This is done by matching the title and scheduled_close_time.\n", "\n", "We remove early closers from the analysis. I do this by comparing actual close time to scheduled\n", - "close time in a later cell!\n", + "close time in a later cell! @Check: Do we want to do this now that tournament is closed? Are we still doing this?\n", "\n", "df_pro_bot_resolved_questions: Has pro_question_id, bot_question_id, title, resolution, scheduled_close_time, question_weight\n", "\"\"\"\n", @@ -87,7 +433,7 @@ "df_pro_forecasts = pd.read_csv('https://data.heroku.com/dataclips/roxytxphqvznkgbygmfgzymjtfxx.csv')\n", "df_pro_questions = df_pro_forecasts.rename(columns={'question_id': 'pro_question_id', 'question_title': 'title'})\n", "\n", - "if False: # Temporary\n", + "if False: # Temporary - Only keep Binary\n", " df_bot_questions = df_bot_questions[df_bot_questions['resolution'].isin(['yes', 'no'])]\n", " df_bot_forecasts = df_bot_forecasts[df_bot_forecasts['resolution'].isin(['yes', 'no'])]\n", " df_bot_scores = df_bot_scores[df_bot_scores['resolution'].isin(['yes', 'no'])]\n", @@ -95,8 +441,8 @@ " df_pro_forecasts = df_pro_forecasts[df_pro_forecasts['resolution'].isin(['yes', 'no'])]\n", " df_pro_scores = df_pro_scores[df_pro_scores['resolution'].isin(['yes', 'no'])]\n", "\n", - "df_pro_resolved_questions = df_pro_questions[['pro_question_id', 'title', 'resolution', 'scheduled_close_time', 'actual_close_time', 'question_weight', 'type', 'options', 'range_min', 'range_max']]\n", - "df_bot_resolved_questions = df_bot_questions[['bot_question_id', 'title', 'resolution', 'scheduled_close_time', 'actual_close_time', 'question_weight', 'type', 'options', 'range_min', 'range_max']]\n", + "df_pro_resolved_questions = df_pro_questions[['pro_question_id', 'title', 'resolution', 'scheduled_close_time', 'actual_close_time', 'question_weight', 'type', 'options', 'range_min', 'range_max', 'open_upper_bound', 'open_lower_bound']]\n", + "df_bot_resolved_questions = df_bot_questions[['bot_question_id', 'title', 'resolution', 'scheduled_close_time', 'actual_close_time', 'question_weight', 'type', 'options', 'range_min', 'range_max', 'open_upper_bound', 'open_lower_bound']]\n", "\n", "df_pro_bot_resolved_questions = pd.merge(\n", " df_bot_resolved_questions,\n", @@ -104,6 +450,7 @@ " on=['title', 'scheduled_close_time'],\n", " how='left'\n", ")\n", + "display_head_and_tail(df_pro_bot_resolved_questions)\n", "\n", "df_pro_bot_resolved_questions['question_weight'] = df_pro_bot_resolved_questions['question_weight_x'].combine_first(df_pro_bot_resolved_questions['question_weight_y'])\n", "df_pro_bot_resolved_questions.drop(['question_weight_x', 'question_weight_y'], axis=1, inplace=True)\n", @@ -114,6 +461,7 @@ "# Cast both question ids to int64\n", "df_pro_bot_resolved_questions['pro_question_id'] = df_pro_bot_resolved_questions['pro_question_id'].astype('Int64')\n", "df_pro_bot_resolved_questions['bot_question_id'] = df_pro_bot_resolved_questions['bot_question_id'].astype('Int64')\n", + "df_pro_bot_resolved_questions['options'] = df_pro_bot_resolved_questions['options'].apply(parse_options_array)\n", "\n", "# Remove df_bot_resolved_questions and df_pro_resolved_questions to make sure you only ever use df_pro_bot_resolved_questions\n", "del df_bot_resolved_questions\n", @@ -182,154 +530,13 @@ "# Weighted vs unweighted breakdown for those overlapping questions?\n", "df_pro_bot_overlap = df_pro_bot_resolved_questions[~df_pro_bot_resolved_questions['pro_question_id'].isna()]\n", "print(f'Unweighted count: {df_pro_bot_overlap.shape[0]}')\n", - "print(f'Weighted count: {df_pro_bot_overlap['question_weight'].sum()}')" + "print(f'Weighted count: {df_pro_bot_overlap[\"question_weight\"].sum()}')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], - "source": [ - "# @title Relationships between Bot Questions, create df_bot_question_related_weights (FOR Q3 ONLY)\n", - "if 25871 in df_pro_bot_resolved_questions['bot_question_id'].values:\n", - " \"\"\"\n", - " Relationships between questions are entered as tuples. These relationships\n", - " will be used to perform logical consistency checks.\n", - "\n", - " Weights are assigned to questions based on relationships. This is a way to\n", - " deal with correlations between questions.\n", - " \"\"\"\n", - "\n", - " # Scope sensitity list of tuples where the first entry should equal the sum of the others\n", - " bot_scope_questions = [\n", - " (26019, 26017, 26018), # Starship launches\n", - " (26098, 26096, 26097), # SENSEX\n", - " (26159, 26158, 26157), # Geomagnetic storm July 28\n", - " (26194, 26195, 26196), # measles cases\n", - " (26006, 26005, 26004), # Trump lead over Biden\n", - " (26642, 26643, 26644), # spanish wikipedia\n", - " (26700, 26701, 26702), # market cap cryptocurrencies\n", - " (27261, 27262, 27263), # Geomagnetic storm Sept 11\n", - " ]\n", - "\n", - " # Sum of each tuple should logically equal 1\n", - " bot_sum_to_1_questions = [\n", - " (25952, 25953, 25954), # French PM party July 30\n", - " (25957, 25958, 25959), # Tour de France winner\n", - " (26570, 26571, 26572, 26573), # Warhammer\n", - " (26574, 26575, 26576, 26577), # H5 cases in US\n", - " (26671, 26670, 26669), # DOES NOT SUM TO EXACTLY 1 PM France Aug 31\n", - " (27748, 27747, 27746, 27749), # Speed Chess\n", - " (27488, 27489, 27490, 27491, 27492, 27493), # August CPI\n", - " (27932, 27933, 27934, 27935), # Chinese youth unemployment\n", - " (27484, 27485, 27486, 27487), # Fed rate cut Sept meeting\n", - " (28045, 28044, 28043, 28042), # Afd vote share\n", - " (28038, 28039, 28040, 28041), # Major Atlantic hurricanes\n", - " (26776, 26777, 26778, 26779), # Seattle-Tacoma-Bellevu Air Quality\n", - " ]\n", - "\n", - " # parent, child, if_yes, if_no\n", - " bot_conditional_pair = [\n", - " (26917, 26918, 26919, 26920) # israel lebanon conflict\n", - " ]\n", - "\n", - " # CDFs - Logically the probability of each successive question must not decrease\n", - " bot_increasing_questions = [\n", - " (26981, 26982, 26983, 26984, 26985, 26986), # aircraft ADIZ\n", - " (26977, 26978, 26979, 26980), # hurricane energy\n", - " (27548, 27547, 27546, 27545), # mpox CDC risk level\n", - " (28306, 28305, 28304, 28303, 28302), # Gas prices in US Sept 30\n", - " ]\n", - "\n", - " bot_repeated_questions = [\n", - " (26646, 26021), # mens 100m dash record\n", - " (26555, 27021), # USA gold silver\n", - " (26210, 26917), # israel invade lebanon\n", - " (26781, 26304), # ruto\n", - " (26100, 27136), # rfk drop out\n", - " (25956, 27158), # democrat brokered convention\n", - " (26102, 27022), # astronauts NOT EXACT REPEAT\n", - " (26022, 27085), # arrest warrants NOT EXACT REPEAT\n", - " (26235, 27281), # Buffett Indicator\n", - " (26390, 27789), # Bubble Magnificent 7\n", - " (26024, 27161), # QB Bo Nix starting for Broncos\n", - " (26302, 27282), # riots\n", - " (25955, 27157), # armed forces death US, China, Japan\n", - " (26958, 27640), # Youtube banned in Russia\n", - " (25936, 27141), # Crimean bridge attack\n", - " ]\n", - "\n", - " bot_similar_questions = [\n", - " (26915, 26916), # harris favorability\n", - " (26913, 26914), # trump favorability\n", - " (26193, 27733), # debate on Sept 10\n", - " (27886, 27968), # Taylor Swift awards\n", - " (27723, 27637), # Best Rock VMAs\n", - " (27583, 27582, 27584, 27602, 27603, 27604), # mpox Zambia, US, Angola, Russia, Japan, Mexico\n", - " (26306, 26838), # Richest people 250th > $10.2, 500th > 6.2\n", - " (27887, 27969), # Emmys Outstanding Limited or Anthology Series\n", - " (28206, 28207, 28208, 28209, 28210), # LMSYS leaderboard\n", - " (28154, 28336), # Nigeria Edo gubernatorial election\n", - " (26407, 27897), # Second Russian mobilization wave\n", - " (27539, 26215), # Nuclear weapons used\n", - " (27606, 27607, 27608, 27609, 27610), # Ukranian forces capture\n", - " (26387, 27788), # Will Tesla increase deliveries in Q3 2024\n", - " (26821, 26959), # VP debate\n", - " (26212, 26213, 26214), # number of dairy cow herds with H5N1\n", - " (26639, 26640, 26641) # Presidential debate 0, 1, or 2+\n", - " ]\n", - "\n", - " ####### CREATE QUESTION WEIGHTS #########\n", - "\n", - " # Combine both lists of tuples\n", - " all_questions = bot_scope_questions + bot_sum_to_1_questions + bot_increasing_questions + bot_similar_questions + bot_conditional_pair\n", - "\n", - " # Create an empty list to store the data\n", - " data = []\n", - "\n", - " # Process each tuple\n", - " for tuple_questions in all_questions:\n", - " # Calculate the weight for each question in the tuple\n", - " weight = np.log2(1 + len(tuple_questions))/(1 + len(tuple_questions))\n", - "\n", - " # Add each question and its weight to the data list\n", - " for question_id in tuple_questions:\n", - " data.append({'bot_question_id': question_id, 'question_weight': weight})\n", - "\n", - " # Process each tuple\n", - " for tuple_questions in bot_repeated_questions:\n", - " # 1st iteration has weight 1, 2nd has weight 1/2, 3rd weight 1/3....\n", - " count = 1\n", - "\n", - " # Add each question and its weight to the data list\n", - " for question_id in tuple_questions:\n", - " data.append({'bot_question_id': question_id, 'question_weight': 1/count})\n", - " count += 1\n", - "\n", - " # Create the DataFrame\n", - " df = pd.DataFrame(data)\n", - "\n", - " # Sort the DataFrame by bot_question_id for better readability\n", - " df_bot_question_related_weights = df.sort_values('bot_question_id').reset_index(drop=True)\n", - "\n", - "# if df_bot_question_related_weights is defined, replace the question weights in df_pro_bot_resolved_questions\n", - "if 'df_bot_question_related_weights' in locals():\n", - " df_pro_bot_resolved_questions = pd.merge(\n", - " df_pro_bot_resolved_questions,\n", - " df_bot_question_related_weights,\n", - " on='bot_question_id',\n", - " how='left'\n", - " )\n", - "\n", - " df_pro_bot_resolved_questions['question_weight'] = df_pro_bot_resolved_questions['question_weight_y'].combine_first(df_pro_bot_resolved_questions['question_weight_x'])\n", - " df_pro_bot_resolved_questions.drop(['question_weight_x', 'question_weight_y'], axis=1, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, "outputs": [ { "name": "stdout", @@ -346,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -354,11 +561,12 @@ "text/plain": [ "Index(['bot_question_id', 'title', 'resolution', 'scheduled_close_time',\n", " 'actual_close_time', 'type', 'options', 'range_min', 'range_max',\n", - " 'pro_question_id', 'question_weight'],\n", + " 'open_upper_bound', 'open_lower_bound', 'pro_question_id',\n", + " 'question_weight'],\n", " dtype='object')" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +577,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -404,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -419,12 +627,14 @@ "options object\n", "range_min float64\n", "range_max float64\n", + "open_upper_bound object\n", + "open_lower_bound object\n", "pro_question_id Int64\n", "question_weight float64\n", "dtype: object" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -435,7 +645,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -446,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -467,7 +677,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -499,11 +709,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "# Process forecasts (consolidate forecast columns; take the last forecast from each forecaster for each question) \n", + "# Process forecasts (consolidate forecast columns; take the last forecast from each forecaster for each question)\n", "df_bot_forecasts = process_forecasts(df_bot_forecasts)\n", "df_pro_forecasts = process_forecasts(df_pro_forecasts)\n", "\n", @@ -514,7 +724,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -551,6 +761,8 @@ " options\n", " range_min\n", " range_max\n", + " open_lower_bound\n", + " open_upper_bound\n", " post_id\n", " forecast\n", " is_median\n", @@ -572,6 +784,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.568,0.366,0.041,0.024]\n", " False\n", @@ -591,6 +805,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.62,0.35,0.019,0.01]\n", " True\n", @@ -610,6 +826,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.005,0.7,0.25,0.04,0.005]\n", " False\n", @@ -629,6 +847,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.59,0.35,0.044,0.015]\n", " False\n", @@ -648,6 +868,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.623,0.336,0.03,0.01]\n", " False\n", @@ -685,22 +907,29 @@ "3 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 1.0 \n", "4 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 1.0 \n", "\n", - " type options range_min range_max post_id \\\n", - "0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "1 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "2 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "3 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "4 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "\n", - " forecast is_median \n", - "0 [0.001,0.568,0.366,0.041,0.024] False \n", - "1 [0.001,0.62,0.35,0.019,0.01] True \n", - "2 [0.005,0.7,0.25,0.04,0.005] False \n", - "3 [0.001,0.59,0.35,0.044,0.015] False \n", - "4 [0.001,0.623,0.336,0.03,0.01] False " + " type options range_min range_max \\\n", + "0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "1 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "2 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "3 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "4 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "\n", + " open_lower_bound open_upper_bound post_id forecast \\\n", + "0 False False 31736 [0.001,0.568,0.366,0.041,0.024] \n", + "1 False False 31736 [0.001,0.62,0.35,0.019,0.01] \n", + "2 False False 31736 [0.005,0.7,0.25,0.04,0.005] \n", + "3 False False 31736 [0.001,0.59,0.35,0.044,0.015] \n", + "4 False False 31736 [0.001,0.623,0.336,0.03,0.01] \n", + "\n", + " is_median \n", + "0 False \n", + "1 True \n", + "2 False \n", + "3 False \n", + "4 False " ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -711,7 +940,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -734,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -747,15 +976,15 @@ " 'metac-perplexity', 'bot_median',\n", " 'metac-claude-3-5-sonnet-20240620', 'pgodzinai', 'jkraybill_bot',\n", " 'metac-exa', 'manticAI', 'MWG', 'CatrachoCaster', 'twsummerbot',\n", - " 'VeritasAI', 'X_bot', 'annabot', 'minefrac1', 'metac-deepseek-r1',\n", - " 'Bot_Pepa', 'laylaps', 'ajf-bot', 'SynapseSeer', 'RPM_bot',\n", - " 'cookics_bot_TEST', 'ProfessorSP', 'wunderplumb', 'CumulativeBot',\n", - " 'pianobot', 'krm-bot', 'KevinTestBot', '4Shadower', 'swingswish',\n", - " 'jonahsingerbot', 'bean_bot', 'andrewsiah', 'cobyj-bot'],\n", - " dtype=object)" + " 'VeritasAI', 'X_bot', 'annabot', 'minefrac1',\n", + " 'metac-deepseek-r1+asknews', 'Bot_Pepa', 'laylaps', 'ajf-bot',\n", + " 'SynapseSeer', 'RPM_bot', 'cookics_bot_TEST', 'ProfessorSP',\n", + " 'wunderplumb', 'CumulativeBot', 'pianobot', 'krm-bot',\n", + " 'KevinTestBot', '4Shadower', 'swingswish', 'jonahsingerbot',\n", + " 'bean_bot', 'andrewsiah', 'cobyj-bot'], dtype=object)" ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -767,7 +996,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -821,11 +1050,11 @@ " \n", " 14\n", " bot_median\n", - " 6.926374\n", - " 2618.307732\n", + " 8.143307\n", + " 3078.332902\n", " 409\n", - " 3.779645\n", - " 1.600741\n", + " 5.471228\n", + " 1.359286\n", " \n", " \n", " 19\n", @@ -853,14 +1082,14 @@ " forecaster weighted_mean weighted_sum n_questions ci_lower \\\n", "11 metac-o1 9.674740 3631.123492 406 6.257418 \n", "12 metac-o1-preview 8.465638 3121.449998 399 3.947903 \n", - "14 bot_median 6.926374 2618.307732 409 3.779645 \n", + "14 bot_median 8.143307 3078.332902 409 5.471228 \n", "19 manticAI 6.510835 2055.210309 337 0.552564 \n", "5 metac-Gemini-Exp-1206 5.417367 1880.476418 377 0.876988 \n", "\n", " weighted_se \n", "11 1.738353 \n", "12 2.298000 \n", - "14 1.600741 \n", + "14 1.359286 \n", "19 3.029040 \n", "5 2.309106 " ] @@ -976,7 +1205,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": { "id": "BmAFBHIhK77X" }, @@ -1025,7 +1254,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1449,7 +1678,7 @@ " np.int64(35705)}" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1470,7 +1699,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": { "cellView": "form", "id": "XceLWcgCPNw-" @@ -1520,7 +1749,7 @@ " \n", " 3\n", " bot_median\n", - " 8152.574861\n", + " 8721.511046\n", " \n", " \n", " 4\n", @@ -1541,7 +1770,7 @@ "Rank \n", "1 metac-o1 8861.959039\n", "2 metac-o1-preview 8849.559824\n", - "3 bot_median 8152.574861\n", + "3 bot_median 8721.511046\n", "4 acm_bot 7605.922314\n", "5 manticAI 7061.660958" ] @@ -1647,7 +1876,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1666,7 +1895,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": { "cellView": "form", "id": "iRDMoH7hTBEq" @@ -1710,13 +1939,13 @@ " \n", " \n", " 2\n", - " metac-o1-preview\n", - " 3162.155445\n", + " bot_median\n", + " 3472.028144\n", " \n", " \n", " 3\n", - " bot_median\n", - " 2724.680171\n", + " metac-o1-preview\n", + " 3162.155445\n", " \n", " \n", " 4\n", @@ -1780,7 +2009,7 @@ " \n", " \n", " 16\n", - " metac-deepseek-r1\n", + " metac-deepseek-r1+asknews\n", " 614.572462\n", " \n", " \n", @@ -1946,8 +2175,8 @@ " bot Peer Score\n", "Rank \n", "1 metac-o1 3864.168122\n", - "2 metac-o1-preview 3162.155445\n", - "3 bot_median 2724.680171\n", + "2 bot_median 3472.028144\n", + "3 metac-o1-preview 3162.155445\n", "4 manticAI 2142.538438\n", "5 metac-Gemini-Exp-1206 2072.216227\n", "6 acm_bot 1876.466009\n", @@ -1960,7 +2189,7 @@ "13 CumulativeBot 1030.716475\n", "14 pgodzinai 926.081448\n", "15 jkraybill_bot 627.932509\n", - "16 metac-deepseek-r1 614.572462\n", + "16 metac-deepseek-r1+asknews 614.572462\n", "17 question_weight 378.020000\n", "18 metac-exa 265.384263\n", "19 MWG 215.551323\n", @@ -1994,7 +2223,7 @@ "47 ajf-bot -3239.712801" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -2036,7 +2265,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -2055,7 +2284,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -2064,7 +2293,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -2072,9 +2301,7 @@ "output_type": "stream", "text": [ "PRO MEDIAN\n", - "Average baseline: 44.964801909223056\n", - "pgodzinai MEDIAN\n", - "Average baseline: 16.482817250003514\n" + "Average baseline: 44.964801909223056\n" ] } ], @@ -2082,17 +2309,12 @@ "# Print WEIGHTED average for pro_median\n", "print(\"PRO MEDIAN\")\n", "pro_median_baseline = df_pro_baseline_long[df_pro_baseline_long['forecaster'] == 'pro_median']\n", - "print(f'Average baseline: {(pro_median_baseline['score'] * pro_median_baseline['question_weight']).sum() / pro_median_baseline['question_weight'].sum()}')\n", - "\n", - "# Same for pgodzinai in df_bot_scores (this differs from the bot team results later on because it's on ALL his questions)\n", - "print(\"pgodzinai MEDIAN\")\n", - "pgodzinai_baseline = df_bot_scores[df_bot_scores['forecaster'] == 'pgodzinai']\n", - "print(f'Average baseline: {(pgodzinai_baseline['score'] * pgodzinai_baseline['question_weight']).sum() / pgodzinai_baseline['question_weight'].sum()}')" + "print(f'Average baseline: {(pro_median_baseline[\"score\"] * pro_median_baseline[\"question_weight\"]).sum() / pro_median_baseline[\"question_weight\"].sum()}')" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -2129,6 +2351,8 @@ " options\n", " range_min\n", " range_max\n", + " open_lower_bound\n", + " open_upper_bound\n", " post_id\n", " forecast\n", " is_median\n", @@ -2150,6 +2374,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.568,0.366,0.041,0.024]\n", " False\n", @@ -2169,6 +2395,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.62,0.35,0.019,0.01]\n", " True\n", @@ -2188,6 +2416,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.005,0.7,0.25,0.04,0.005]\n", " False\n", @@ -2207,6 +2437,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.59,0.35,0.044,0.015]\n", " False\n", @@ -2226,6 +2458,8 @@ " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31736\n", " [0.001,0.623,0.336,0.03,0.01]\n", " False\n", @@ -2263,22 +2497,29 @@ "3 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 1.0 \n", "4 2025-01-20 03:27:00+00 2025-01-20 03:27:00+00 1.0 \n", "\n", - " type options range_min range_max post_id \\\n", - "0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "1 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "2 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "3 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "4 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31736 \n", - "\n", - " forecast is_median \n", - "0 [0.001,0.568,0.366,0.041,0.024] False \n", - "1 [0.001,0.62,0.35,0.019,0.01] True \n", - "2 [0.005,0.7,0.25,0.04,0.005] False \n", - "3 [0.001,0.59,0.35,0.044,0.015] False \n", - "4 [0.001,0.623,0.336,0.03,0.01] False " + " type options range_min range_max \\\n", + "0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "1 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "2 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "3 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "4 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN \n", + "\n", + " open_lower_bound open_upper_bound post_id forecast \\\n", + "0 False False 31736 [0.001,0.568,0.366,0.041,0.024] \n", + "1 False False 31736 [0.001,0.62,0.35,0.019,0.01] \n", + "2 False False 31736 [0.005,0.7,0.25,0.04,0.005] \n", + "3 False False 31736 [0.001,0.59,0.35,0.044,0.015] \n", + "4 False False 31736 [0.001,0.623,0.336,0.03,0.01] \n", + "\n", + " is_median \n", + "0 False \n", + "1 True \n", + "2 False \n", + "3 False \n", + "4 False " ] }, - "execution_count": 28, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -2289,7 +2530,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "metadata": { "cellView": "form", "id": "Yfq0_lDKAMl7" @@ -2322,10 +2563,10 @@ " question_weight\n", " type\n", " options\n", - " pro_median\n", - " 4Shadower\n", - " Bot_Pepa\n", - " CatrachoCaster\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", " ...\n", " metac-o1\n", " metac-o1-preview\n", @@ -2347,15 +2588,15 @@ " 0\n", " 1.0\n", " multiple_choice\n", - " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", - " [0.001,0.62,0.35,0.019,0.01]\n", - " NaN\n", + " [0, 1, 2-3, 4-6, >6]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " [0.45,0.3,0.15,0.05,0.05]\n", - " [0.02,0.7,0.2,0.07,0.01]\n", - " [0.2,0.25,0.35,0.15,0.05]\n", + " [0.25,0.3,0.3,0.1,0.05]\n", + " [0.01,0.7,0.2,0.07,0.02]\n", + " [0.3,0.4,0.2,0.07,0.03]\n", " NaN\n", " [0.009900990099009901,0.39603960396039606,0.44...\n", " [0.014925742574257425,0.5137871287128712,0.334...\n", @@ -2372,14 +2613,14 @@ " 1.0\n", " numeric\n", " None\n", - " [0.0013749738,0.0014499743,0.001526641,0.00160...\n", - " NaN\n", - " NaN\n", - " NaN\n", + " 60.0\n", + " 100.0\n", + " True\n", + " True\n", " ...\n", - " [0.05,0.0506666667,0.0513333333,0.052,0.052666...\n", - " [0.05,0.0506666667,0.0513333333,0.052,0.052666...\n", - " [0.05,0.0508333333,0.0516666667,0.0525,0.05333...\n", + " [0.05,0.0505882353,0.0511764706,0.0517647059,0...\n", + " [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05...\n", + " [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05...\n", " NaN\n", " [0.0215944348,0.0218024136,0.0220262706,0.0222...\n", " [0.001,0.001060875,0.0011396,0.0012863125,0.00...\n", @@ -2396,13 +2637,13 @@ " 1.0\n", " binary\n", " None\n", - " 0.013\n", - " NaN\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", " 0.1\n", - " 0.15\n", + " 0.05\n", " 0.1\n", " NaN\n", " 0.2\n", @@ -2419,15 +2660,15 @@ " 5-9\n", " 1.0\n", " multiple_choice\n", - " [\"0-4\",\"5-9\",\">9\"]\n", - " [0.16,0.44,0.4]\n", + " [0-4, 5-9, >9]\n", " NaN\n", " NaN\n", - " [0.16,0.47,0.37]\n", + " None\n", + " None\n", " ...\n", - " [0.25,0.6,0.15]\n", - " [0.2,0.6,0.2]\n", - " [0.15,0.45,0.4]\n", + " [0.45,0.45,0.1]\n", + " [0.15,0.65,0.2]\n", + " [0.15000000000000002,0.54,0.31000000000000005]\n", " NaN\n", " [0.25,0.5,0.25]\n", " [0.27499999999999997,0.5125,0.21249999999999997]\n", @@ -2444,13 +2685,13 @@ " 1.0\n", " numeric\n", " None\n", - " [0.0,0.0005044914,0.0010323506,0.0015847475,0....\n", - " NaN\n", - " NaN\n", - " NaN\n", + " 0.0\n", + " 400.0\n", + " False\n", + " False\n", " ...\n", - " [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0...\n", - " [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0...\n", + " [0.0,0.0033333333,0.0066666667,0.01,0.01333333...\n", + " [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,...\n", " [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0...\n", " NaN\n", " [0.0,0.0006552097,0.0013605064,0.0021151815,0....\n", @@ -2462,7 +2703,7 @@ " \n", " \n", "\n", - "

5 rows × 53 columns

\n", + "

5 rows × 57 columns

\n", "" ], "text/plain": [ @@ -2473,39 +2714,39 @@ "3 31280 31274 5-9 1.0 \n", "4 31281 31275 119.2 1.0 \n", "\n", - " type options \\\n", - "0 multiple_choice [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] \n", - "1 numeric None \n", - "2 binary None \n", - "3 multiple_choice [\"0-4\",\"5-9\",\">9\"] \n", - "4 numeric None \n", - "\n", - " pro_median 4Shadower Bot_Pepa \\\n", - "0 [0.001,0.62,0.35,0.019,0.01] NaN NaN \n", - "1 [0.0013749738,0.0014499743,0.001526641,0.00160... NaN NaN \n", - "2 0.013 NaN NaN \n", - "3 [0.16,0.44,0.4] NaN NaN \n", - "4 [0.0,0.0005044914,0.0010323506,0.0015847475,0.... NaN NaN \n", - "\n", - " CatrachoCaster ... metac-o1 \\\n", - "0 NaN ... [0.45,0.3,0.15,0.05,0.05] \n", - "1 NaN ... [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 NaN ... 0.1 \n", - "3 [0.16,0.47,0.37] ... [0.25,0.6,0.15] \n", - "4 NaN ... [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", + " type options range_min range_max \\\n", + "0 multiple_choice [0, 1, 2-3, 4-6, >6] NaN NaN \n", + "1 numeric None 60.0 100.0 \n", + "2 binary None NaN NaN \n", + "3 multiple_choice [0-4, 5-9, >9] NaN NaN \n", + "4 numeric None 0.0 400.0 \n", + "\n", + " open_upper_bound open_lower_bound ... \\\n", + "0 False False ... \n", + "1 True True ... \n", + "2 False False ... \n", + "3 None None ... \n", + "4 False False ... \n", + "\n", + " metac-o1 \\\n", + "0 [0.25,0.3,0.3,0.1,0.05] \n", + "1 [0.05,0.0505882353,0.0511764706,0.0517647059,0... \n", + "2 0.1 \n", + "3 [0.45,0.45,0.1] \n", + "4 [0.0,0.0033333333,0.0066666667,0.01,0.01333333... \n", "\n", " metac-o1-preview \\\n", - "0 [0.02,0.7,0.2,0.07,0.01] \n", - "1 [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 0.15 \n", - "3 [0.2,0.6,0.2] \n", - "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", + "0 [0.01,0.7,0.2,0.07,0.02] \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05... \n", + "2 0.05 \n", + "3 [0.15,0.65,0.2] \n", + "4 [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,... \n", "\n", " metac-perplexity minefrac1 \\\n", - "0 [0.2,0.25,0.35,0.15,0.05] NaN \n", - "1 [0.05,0.0508333333,0.0516666667,0.0525,0.05333... NaN \n", + "0 [0.3,0.4,0.2,0.07,0.03] NaN \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05... NaN \n", "2 0.1 NaN \n", - "3 [0.15,0.45,0.4] NaN \n", + "3 [0.15000000000000002,0.54,0.31000000000000005] NaN \n", "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... NaN \n", "\n", " mmBot \\\n", @@ -2529,7 +2770,7 @@ "3 [0.116,0.42,0.464] NaN \n", "4 [0.0,0.001311947,0.0026238939,0.0039358409,0.0... NaN \n", "\n", - "[5 rows x 53 columns]" + "[5 rows x 57 columns]" ] }, "metadata": {}, @@ -2562,10 +2803,10 @@ " question_weight\n", " type\n", " options\n", - " pro_median\n", - " 4Shadower\n", - " Bot_Pepa\n", - " CatrachoCaster\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", " ...\n", " metac-o1\n", " metac-o1-preview\n", @@ -2588,10 +2829,10 @@ " 1.00\n", " binary\n", " None\n", - " 0.95\n", - " 0.9\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", " 0.9\n", " 0.9\n", @@ -2612,12 +2853,12 @@ " 1.00\n", " binary\n", " None\n", - " 0.05\n", - " 0.95\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " 0.2\n", + " 0.4\n", " 0.9\n", " NaN\n", " NaN\n", @@ -2636,13 +2877,13 @@ " 1.00\n", " binary\n", " None\n", - " 0.97\n", - " 0.85\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " 0.85\n", - " 0.9\n", + " 0.8\n", + " 0.95\n", " NaN\n", " NaN\n", " 0.9\n", @@ -2660,12 +2901,12 @@ " 0.85\n", " binary\n", " None\n", - " 0.666\n", - " 0.8\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " 0.75\n", + " 0.8\n", " 0.85\n", " 0.3\n", " NaN\n", @@ -2684,14 +2925,14 @@ " 0.85\n", " binary\n", " None\n", - " 0.03\n", - " 0.3\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " 0.07\n", - " 0.1\n", " 0.05\n", + " 0.05\n", + " 0.03\n", " NaN\n", " 0.15\n", " 0.05\n", @@ -2702,7 +2943,7 @@ " \n", " \n", "\n", - "

5 rows × 53 columns

\n", + "

5 rows × 57 columns

\n", "" ], "text/plain": [ @@ -2713,28 +2954,28 @@ "97 35386 35364 no 0.85 binary \n", "98 35387 35367 no 0.85 binary \n", "\n", - " options pro_median 4Shadower Bot_Pepa CatrachoCaster ... metac-o1 \\\n", - "94 None 0.95 0.9 NaN NaN ... 0.9 \n", - "95 None 0.05 0.95 NaN NaN ... 0.2 \n", - "96 None 0.97 0.85 NaN NaN ... 0.85 \n", - "97 None 0.666 0.8 NaN NaN ... 0.75 \n", - "98 None 0.03 0.3 NaN NaN ... 0.07 \n", - "\n", - " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai pianobot \\\n", - "94 0.9 NaN NaN 0.95 0.95 NaN \n", - "95 0.9 NaN NaN 0.15 NaN NaN \n", - "96 0.9 NaN NaN 0.9 NaN NaN \n", - "97 0.85 0.3 NaN 0.85 0.85 NaN \n", - "98 0.1 0.05 NaN 0.15 0.05 NaN \n", - "\n", - " swingswish twsummerbot wunderplumb \n", - "94 0.9 0.762 0.9 \n", - "95 0.1 0.126 0.95 \n", - "96 0.85 0.828 0.85 \n", - "97 0.7 0.132 0.3 \n", - "98 0.2 0.27 0.2 \n", - "\n", - "[5 rows x 53 columns]" + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "94 None NaN NaN False False ... \n", + "95 None NaN NaN False False ... \n", + "96 None NaN NaN False False ... \n", + "97 None NaN NaN False False ... \n", + "98 None NaN NaN False False ... \n", + "\n", + " metac-o1 metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "94 0.9 0.9 NaN NaN 0.95 0.95 \n", + "95 0.4 0.9 NaN NaN 0.15 NaN \n", + "96 0.8 0.95 NaN NaN 0.9 NaN \n", + "97 0.8 0.85 0.3 NaN 0.85 0.85 \n", + "98 0.05 0.05 0.03 NaN 0.15 0.05 \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb \n", + "94 NaN 0.9 0.762 0.9 \n", + "95 NaN 0.1 0.126 0.95 \n", + "96 NaN 0.85 0.828 0.85 \n", + "97 NaN 0.7 0.132 0.3 \n", + "98 NaN 0.2 0.27 0.2 \n", + "\n", + "[5 rows x 57 columns]" ] }, "metadata": {}, @@ -2772,7 +3013,11 @@ "df_bot_forecasts = df_bot_forecasts.reset_index()\n", "\n", "# One row per question, with pro_question_id and bot_question_id and resolution\n", - "df_pro_bot_resolved_questions_first = df_pro_bot_resolved_questions.groupby(['pro_question_id', 'bot_question_id']).first().reset_index()[['pro_question_id', 'bot_question_id', 'resolution', 'question_weight', 'type', 'options']]\n", + "df_pro_bot_resolved_questions_first = df_pro_bot_resolved_questions.groupby(\n", + " ['pro_question_id', 'bot_question_id']\n", + " ).first().reset_index()[\n", + " ['pro_question_id', 'bot_question_id', 'resolution', 'question_weight', 'type', 'options', 'range_min', 'range_max', 'open_upper_bound', 'open_lower_bound']\n", + " ]\n", "\n", "df2 = pd.merge(\n", " df_pro_bot_resolved_questions_first,\n", @@ -2793,14 +3038,15 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['pro_question_id', 'bot_question_id', 'resolution', 'question_weight',\n", - " 'type', 'options', 'pro_median', '4Shadower', 'Bot_Pepa',\n", + " 'type', 'options', 'range_min', 'range_max', 'open_upper_bound',\n", + " 'open_lower_bound', 'pro_median', '4Shadower', 'Bot_Pepa',\n", " 'CatrachoCaster', 'CumulativeBot', 'GreeneiBot2', 'Grizeu_Bot',\n", " 'InstitutPelFutur', 'KevinTestBot', 'MWG', 'NextWorldLab',\n", " 'ProfessorSP', 'RPM_bot', 'SynapseSeer', 'VeritasAI', 'X_bot',\n", @@ -2808,14 +3054,14 @@ " 'cobyj-bot', 'cookics_bot_TEST', 'jkraybill_bot', 'jonahsingerbot',\n", " 'krm-bot', 'laylaps', 'manticAI', 'metac-Gemini-Exp-1206',\n", " 'metac-Llama-3.1', 'metac-claude-3-5-sonnet-20240620',\n", - " 'metac-claude-3-5-sonnet-latest', 'metac-deepseek-r1', 'metac-exa',\n", - " 'metac-gpt-4o', 'metac-grok-2-1212', 'metac-o1', 'metac-o1-preview',\n", - " 'metac-perplexity', 'minefrac1', 'mmBot', 'pgodzinai', 'pianobot',\n", - " 'swingswish', 'twsummerbot', 'wunderplumb'],\n", + " 'metac-claude-3-5-sonnet-latest', 'metac-deepseek-r1+asknews',\n", + " 'metac-exa', 'metac-gpt-4o', 'metac-grok-2-1212', 'metac-o1',\n", + " 'metac-o1-preview', 'metac-perplexity', 'minefrac1', 'mmBot',\n", + " 'pgodzinai', 'pianobot', 'swingswish', 'twsummerbot', 'wunderplumb'],\n", " dtype='object')" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2826,7 +3072,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -2836,7 +3082,7 @@ "Name: GreeneiBot2, dtype: object" ] }, - "execution_count": 31, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2851,7 +3097,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -2863,73 +3109,17 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "df_pro_bot_forecasts['options'] = df_pro_bot_forecasts['options'].apply(parse_options_array)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "# Simple function to parse CDF strings for numeric questions\n", - "def parse_numeric_forecasts(df):\n", - " \"\"\"\n", - " Parse CDF strings for numeric questions in-place.\n", - " \n", - " Args:\n", - " df: DataFrame with forecast data\n", - " \"\"\"\n", - " # Get numeric questions\n", - " numeric_mask = df['type'] == 'numeric'\n", - " \n", - " # List of columns to process\n", - " forecast_cols = [col for col in df.columns if col in all_bots or col in ['pro_median', 'bot_median']]\n", - " \n", - " # Process each column\n", - " for col in forecast_cols:\n", - " # Process only for numeric questions and only where the column exists\n", - " if col in df.columns:\n", - " for idx in df[numeric_mask].index:\n", - " value = df.at[idx, col]\n", - " \n", - " # Skip NaN values\n", - " if pd.isna(value):\n", - " continue\n", - " \n", - " # Process string values\n", - " if isinstance(value, str):\n", - " try:\n", - " # Parse the CDF string to an array\n", - " parsed_array = np.array([float(x) for x in value.strip('[]').split(',')])\n", - " df.at[idx, col] = parsed_array\n", - " except Exception as e:\n", - " print(f\"Warning: Could not parse {col} at index {idx}: {e}\")\n", - " \n", - " return df\n", - "\n", - "# Now parse the numeric forecasts\n", - "df_pro_bot_forecasts = parse_numeric_forecasts(df_pro_bot_forecasts)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "df_bot_vs_pro_peer = calculate_all_peer_scores(df_pro_bot_forecasts, all_bots)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, + "execution_count": 32, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_17143/199340000.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " multiple_choice_rows_with_empty_options = df_pro_bot_forecasts[df_pro_bot_forecasts['options'] == '[]'][df_pro_bot_forecasts['type'] == 'multiple_choice']\n" + ] + }, { "data": { "text/html": [ @@ -2957,11 +3147,12 @@ " question_weight\n", " type\n", " options\n", - " pro_median\n", - " 4Shadower\n", - " Bot_Pepa\n", - " CatrachoCaster\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", " ...\n", + " metac-o1\n", " metac-o1-preview\n", " metac-perplexity\n", " minefrac1\n", @@ -2971,7 +3162,6 @@ " swingswish\n", " twsummerbot\n", " wunderplumb\n", - " bot_team_median\n", " \n", " \n", " \n", @@ -2983,174 +3173,209 @@ " 1.0\n", " multiple_choice\n", " [0, 1, 2-3, 4-6, >6]\n", - " [0.001,0.62,0.35,0.019,0.01]\n", - " NaN\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " 299.573227\n", - " 529.831737\n", + " [0.25,0.3,0.3,0.1,0.05]\n", + " [0.01,0.7,0.2,0.07,0.02]\n", + " [0.3,0.4,0.2,0.07,0.03]\n", " NaN\n", - " 229.263476\n", - " 270.308741\n", + " [0.009900990099009901,0.39603960396039606,0.44554455445544555,0.1188118811881188,0.0297029702970297]\n", + " [0.014925742574257425,0.5137871287128712,0.3349009900990099,0.10168316831683169,0.03470297029702965]\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " 501.063529\n", " \n", " \n", - " 3\n", - " 31280\n", - " 31274\n", - " 5-9\n", + " 1\n", + " 31269\n", + " 31263\n", + " 86.82\n", " 1.0\n", - " multiple_choice\n", - " [0-4, 5-9, >9]\n", - " [0.16,0.44,0.4]\n", - " NaN\n", - " NaN\n", - " 6.595797\n", + " numeric\n", + " None\n", + " 60.0\n", + " 100.0\n", + " True\n", + " True\n", " ...\n", - " 31.015493\n", - " 2.247286\n", + " [0.05,0.0505882353,0.0511764706,0.0517647059,0.0523529412,0.0529411765,0.0535294118,0.0541176471,0.0547058824,0.0552941176,0.0558823529,0.0564705882,0.0570588235,0.0576470588,0.0582352941,0.0588235294,0.0594117647,0.06,0.0605882353,0.0611764706,0.0617647059,0.0623529412,0.0629411765,0.0635294118,0.0641176471,0.0647058824,0.0652941176,0.0658823529,0.0664705882,0.0670588235,0.0676470588,0.0682352941,0.0688235294,0.0694117647,0.07,0.0705882353,0.0711764706,0.0717647059,0.0723529412,0.0729411765,0.0735294118,0.0741176471,0.0747058824,0.0752941176,0.0758823529,0.0764705882,0.0770588235,0.0776470588,0.0782352941,0.0788235294,0.0794117647,0.08,0.0805882353,0.0811764706,0.0817647059,0.0823529412,0.0829411765,0.0835294118,0.0841176471,0.0847058824,0.0852941176,0.0858823529,0.0864705882,0.0870588235,0.0876470588,0.0882352941,0.0888235294,0.0894117647,0.09,0.0905882353,0.0911764706,0.0917647059,0.0923529412,0.0929411765,0.0935294118,0.0941176471,0.0947058824,0.0952941176,0.0958823529,0.0964705882,0.0970588235,0.0976470588,0.0982352941,0.0988235294,0.0994117647,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.22,0.24,0.26,0.28,0.3,0.32,0.34,0.36,0.38,0.4,0.42,0.44,0.46,0.48,0.5,0.52,0.54,0.56,0.58,0.6,0.62,0.64,0.66,0.68,0.7,0.72,0.74,0.76,0.78,0.8,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.9007692308,0.9015384615,0.9023076923,0.9030769231,0.9038461538,0.9046153846,0.9053846154,0.9061538462,0.9069230769,0.9076923077,0.9084615385,0.9092307692,0.91,0.9107692308,0.9115384615,0.9123076923,0.9130769231,0.9138461538,0.9146153846,0.9153846154,0.9161538462,0.9169230769,0.9176923077,0.9184615385,0.9192307692,0.92,0.9207692308,0.9215384615,0.9223076923,0.9230769231,0.9238461538,0.9246153846,0.9253846154,0.9261538462,0.9269230769,0.9276923077,0.9284615385,0.9292307692,0.93,0.9307692308,0.9315384615,0.9323076923,0.9330769231,0.9338461538,0.9346153846,0.9353846154,0.9361538462,0.9369230769,0.9376923077,0.9384615385,0.9392307692,0.94,0.9407692308,0.9415384615,0.9423076923,0.9430769231,0.9438461538,0.9446153846,0.9453846154,0.9461538462,0.9469230769,0.9476923077,0.9484615385,0.9492307692,0.95]\n", + " [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.057,0.058,0.059,0.06,0.061,0.062,0.063,0.064,0.065,0.066,0.067,0.068,0.069,0.07,0.071,0.072,0.073,0.074,0.075,0.076,0.077,0.078,0.079,0.08,0.081,0.082,0.083,0.084,0.085,0.086,0.087,0.088,0.089,0.09,0.091,0.092,0.093,0.094,0.095,0.096,0.097,0.098,0.099,0.1,0.104,0.108,0.112,0.116,0.12,0.124,0.128,0.132,0.136,0.14,0.144,0.148,0.152,0.156,0.16,0.164,0.168,0.172,0.176,0.18,0.184,0.188,0.192,0.196,0.2,0.208,0.216,0.224,0.232,0.24,0.248,0.256,0.264,0.272,0.28,0.288,0.296,0.304,0.312,0.32,0.328,0.336,0.344,0.352,0.36,0.368,0.376,0.384,0.392,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.8066666667,0.8133333333,0.82,0.8266666667,0.8333333333,0.84,0.8466666667,0.8533333333,0.86,0.8666666667,0.8733333333,0.88,0.8866666667,0.8933333333,0.9,0.901,0.902,0.903,0.904,0.905,0.906,0.907,0.908,0.909,0.91,0.911,0.912,0.913,0.914,0.915,0.916,0.917,0.918,0.919,0.92,0.921,0.922,0.923,0.924,0.925,0.926,0.927,0.928,0.929,0.93,0.931,0.932,0.933,0.934,0.935,0.936,0.937,0.938,0.939,0.94,0.941,0.942,0.943,0.944,0.945,0.946,0.947,0.948,0.949,0.95]\n", + " [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.057,0.058,0.059,0.06,0.061,0.062,0.063,0.064,0.065,0.066,0.067,0.068,0.069,0.07,0.071,0.072,0.073,0.074,0.075,0.076,0.077,0.078,0.079,0.08,0.081,0.082,0.083,0.084,0.085,0.086,0.087,0.088,0.089,0.09,0.091,0.092,0.093,0.094,0.095,0.096,0.097,0.098,0.099,0.1,0.104,0.108,0.112,0.116,0.12,0.124,0.128,0.132,0.136,0.14,0.144,0.148,0.152,0.156,0.16,0.164,0.168,0.172,0.176,0.18,0.184,0.188,0.192,0.196,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.6133333333,0.6266666667,0.64,0.6533333333,0.6666666667,0.68,0.6933333333,0.7066666667,0.72,0.7333333333,0.7466666667,0.76,0.7733333333,0.7866666667,0.8,0.804,0.808,0.812,0.816,0.82,0.824,0.828,0.832,0.836,0.84,0.844,0.848,0.852,0.856,0.86,0.864,0.868,0.872,0.876,0.88,0.884,0.888,0.892,0.896,0.9,0.901,0.902,0.903,0.904,0.905,0.906,0.907,0.908,0.909,0.91,0.911,0.912,0.913,0.914,0.915,0.916,0.917,0.918,0.919,0.92,0.921,0.922,0.923,0.924,0.925,0.926,0.927,0.928,0.929,0.93,0.931,0.932,0.933,0.934,0.935,0.936,0.937,0.938,0.939,0.94,0.941,0.942,0.943,0.944,0.945,0.946,0.947,0.948,0.949,0.95]\n", + " NaN\n", + " [0.0215944348,0.0218024136,0.0220262706,0.0222657692,0.0225205234,0.0227900084,0.0230735761,0.0233704727,0.0236798595,0.0240008339,0.0243324518,0.0246737484,0.0250237592,0.0253815375,0.0257461704,0.0261167925,0.0264925953,0.0268728349,0.0272568365,0.0276439961,0.0280337803,0.0284257242,0.0288194274,0.0292145496,0.0296108048,0.0300079559,0.0304058088,0.0308042061,0.031203022,0.0316021576,0.0320015358,0.0324010988,0.0328008038,0.033200622,0.0336005361,0.0340005406,0.0344006419,0.0348008594,0.0352012288,0.0356018064,0.0360026751,0.0364039532,0.0368058059,0.0372084598,0.0376122217,0.0380175022,0.0384248443,0.0388349581,0.0392487619,0.0396674303,0.040092449,0.0405256766,0.040969412,0.0414264662,0.0419002382,0.0423947905,0.0429149226,0.0434662384,0.0440552034,0.0446891875,0.0453764888,0.0461263346,0.0469488546,0.047855024,0.0488565752,0.0499658763,0.0511957788,0.0525594355,0.0540700958,0.0557408822,0.0575845575,0.0596132911,0.061838434,0.0642703126,0.0669180506,0.0697894271,0.0728907793,0.0762269529,0.0798013046,0.0836157568,0.0876709009,0.091966147,0.096499911,0.1012698318,0.1062730078,0.1115062433,0.116966291,0.1226500836,0.1285549408,0.1346787459,0.1410200827,0.1475783286,0.1543537019,0.1613472593,0.1685608481,0.1759970129,0.1836588644,0.1915499147,0.1996738871,0.208034508,0.2166352903,0.225479315,0.2345690212,0.24390601,0.2534908708,0.2633230334,0.2734006526,0.283720526,0.2942780484,0.3050672012,0.316080577,0.3273094353,0.3387437886,0.3503725099,0.3621834602,0.3741636271,0.3862992712,0.3985760721,0.4109792702,0.4234937993,0.4361044066,0.4487957561,0.4615525185,0.4743594438,0.4872014199,0.5000635204,0.5129310433,0.5257895463,0.5386248816,0.5514232322,0.5641711536,0.5768556211,0.589464083,0.6019845173,0.6144054896,0.6267162064,0.6389065595,0.6509671563,0.6628893291,0.6746651196,0.6862872355,0.6977489765,0.7090441313,0.7201668477,0.7311114815,0.7418724312,0.7524439675,0.7628200682,0.7729942685,0.7829595382,0.7927081941,0.8022318565,0.8115214549,0.8205672863,0.8293591256,0.8378863854,0.8461383197,0.8541042651,0.8617739066,0.8691375599,0.8761864572,0.8829130238,0.8893111359,0.8953763492,0.9011060878,0.9064997881,0.9115589931,0.9162873921,0.9206908074,0.9247771276,0.9285561903,0.9320396198,0.9352406245,0.9381737618,0.9408546777,0.9432998299,0.945526202,0.9475510194,0.949391472,0.9510644542,0.9525863264,0.953972705,0.955238285,0.9563966974,0.9574604037,0.9584406278,0.9593473236,0.960189177,0.9609736386,0.9617069836,0.9623943945,0.9630400616,0.9636472966,0.9642186545,0.9647560591,0.9652609283,0.9657342945,0.9661769175,0.9665893865,0.9669722099,0.9673258911]\n", + " [0.001,0.001060875,0.0011396,0.0012863125,0.0015459984,0.0019048369,0.0023147701,0.0027425688,0.0031719899,0.0035935463,0.0040047171,0.0044081612,0.0048073678,0.0052048637,0.0056023079,0.0060005117,0.0063995798,0.0067992898,0.0071993689,0.0075995902,0.007999808,0.0083999595,0.0088000381,0.0092000616,0.0096525538,0.0103347221,0.0114180238,0.0128617561,0.0144931539,0.0161909912,0.0178965175,0.0195748423,0.0212159342,0.0228289888,0.0244265464,0.0260177161,0.0276085304,0.0292020038,0.0307985773,0.0323974755,0.0339977246,0.0355985069,0.0371992898,0.0387998404,0.0404001295,0.0420002192,0.0436001942,0.0452001261,0.0468000593,0.0484758458,0.0504834257,0.0530704368,0.056178071,0.0595567722,0.0630314345,0.0665171977,0.0699636664,0.0733563529,0.0767085411,0.0800383523,0.0833589543,0.0866790344,0.0900028852,0.0933311337,0.0967326953,0.1004442449,0.1047006189,0.1094577119,0.1144907128,0.1196353715,0.1248049846,0.1299418958,0.1350232879,0.1400570021,0.1452540043,0.1513017567,0.1589133116,0.1680377058,0.1780770546,0.1885468618,0.1991553484,0.2096896812,0.2200450325,0.2302229342,0.2402681458,0.2502302229,0.2601553402,0.27007834,0.2800179047,0.2899799302,0.2999629146,0.3099614863,0.3199691186,0.3299801956,0.3403173669,0.3521487483,0.3668129253,0.3844513624,0.4041888551,0.4247935739,0.4442765262,0.4605082419,0.4728869633,0.4822309604,0.4895341295,0.4956449952,0.5013686886,0.5073076754,0.5137610388,0.5206987551,0.5276657564,0.5340334461,0.5395220756,0.5442306919,0.5484901071,0.5530599502,0.5588761244,0.5663266439,0.5752119583,0.585204242,0.5959735276,0.6071500854,0.6184053116,0.6295209059,0.6403758638,0.650921239,0.6611693012,0.671174569,0.681009388,0.6907471485,0.7004527783,0.7101763721,0.7199504677,0.7297911321,0.7397010124,0.7496729757,0.7596938994,0.7697481465,0.7798202777,0.7898968803,0.7999675731,0.8100253018,0.8200662214,0.8300893951,0.8400025166,0.8494453768,0.8579165269,0.8651653723,0.8712540566,0.8763468591,0.8806505608,0.8844338485,0.8879756773,0.8915092577,0.8952099002,0.8991948145,0.9035195392,0.9081838533,0.9131467515,0.9183416751,0.9236898731,0.9291127196,0.9345414554,0.9399230919,0.9451659123,0.9500324455,0.9542146638,0.9575690762,0.9601504006,0.9620795658,0.9635039422,0.9646063832,0.965571997,0.9665531773,0.9676621061,0.9689711529,0.9705116418,0.9722785871,0.9742409577,0.9763519694,0.9785580215,0.9808067315,0.9830531373,0.9852633275,0.987415817,0.9895011861,0.9915203598,0.9934820158,0.9953894047,0.9970771779,0.998127745,0.99846,0.99852,0.99858,0.99864,0.9987,0.99876,0.99882,0.99888,0.99894,0.99899]\n", " NaN\n", - " 12.783337\n", - " 15.252598\n", " NaN\n", " NaN\n", - " -4.652002\n", " NaN\n", - " 31.015493\n", " \n", " \n", - " 6\n", - " 31292\n", - " 31286\n", - " Jeff Bezos\n", + " 2\n", + " 31270\n", + " 31264\n", + " no\n", " 1.0\n", - " multiple_choice\n", - " [Larry Ellison, Elon Musk, Mark Zuckerberg, Bernard Arnault & family, Jeff Bezos, Someone else]\n", - " [0.2,0.025,0.225,0.08,0.445,0.025]\n", + " binary\n", + " None\n", " NaN\n", " NaN\n", - " -70.444674\n", + " False\n", + " False\n", " ...\n", - " 29.885537\n", - " 21.184400\n", + " 0.1\n", + " 0.05\n", + " 0.1\n", " NaN\n", - " -18.457128\n", - " 11.152127\n", + " 0.2\n", + " 0.07\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " 11.152127\n", " \n", " \n", - " 9\n", - " 31321\n", - " 31370\n", - " 0\n", + " 3\n", + " 31280\n", + " 31274\n", + " 5-9\n", " 1.0\n", " multiple_choice\n", - " [0, 1, 2, Greater than 2]\n", - " [0.336,0.364,0.2,0.1]\n", + " [0-4, 5-9, >9]\n", " NaN\n", " NaN\n", - " -87.546874\n", + " None\n", + " None\n", " ...\n", - " -51.879379\n", - " -121.194097\n", + " [0.45,0.45,0.1]\n", + " [0.15,0.65,0.2]\n", + " [0.15000000000000002,0.54,0.31000000000000005]\n", " NaN\n", - " -80.647587\n", - " -49.410118\n", + " [0.25,0.5,0.25]\n", + " [0.27499999999999997,0.5125,0.21249999999999997]\n", " NaN\n", " NaN\n", - " -62.415431\n", + " [0.116,0.42,0.464]\n", " NaN\n", - " -69.314718\n", " \n", " \n", - " 13\n", - " 31368\n", - " 31366\n", - " ≥0% and <5%\n", + " 4\n", + " 31281\n", + " 31275\n", + " 119.2\n", " 1.0\n", - " multiple_choice\n", - " [Less than -5%, ≥-5% and <0%, ≥0% and <5%, Greater than 5%]\n", - " [0.05,0.45,0.45,0.05]\n", - " NaN\n", - " NaN\n", - " -16.907633\n", + " numeric\n", + " None\n", + " 0.0\n", + " 400.0\n", + " False\n", + " False\n", " ...\n", - " 44.183275\n", - " 33.647224\n", - " 2.197891\n", - " 20.067070\n", - " 25.378052\n", + " [0.0,0.0033333333,0.0066666667,0.01,0.0133333333,0.0166666667,0.02,0.0233333333,0.0266666667,0.03,0.0333333333,0.0366666667,0.04,0.0433333333,0.0466666667,0.05,0.0533333333,0.0566666667,0.06,0.0633333333,0.0666666667,0.07,0.0733333333,0.0766666667,0.08,0.0833333333,0.0866666667,0.09,0.0933333333,0.0966666667,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.208,0.216,0.224,0.232,0.24,0.248,0.256,0.264,0.272,0.28,0.288,0.296,0.304,0.312,0.32,0.328,0.336,0.344,0.352,0.36,0.368,0.376,0.384,0.392,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.6057142857,0.6114285714,0.6171428571,0.6228571429,0.6285714286,0.6342857143,0.64,0.6457142857,0.6514285714,0.6571428571,0.6628571429,0.6685714286,0.6742857143,0.68,0.6857142857,0.6914285714,0.6971428571,0.7028571429,0.7085714286,0.7142857143,0.72,0.7257142857,0.7314285714,0.7371428571,0.7428571429,0.7485714286,0.7542857143,0.76,0.7657142857,0.7714285714,0.7771428571,0.7828571429,0.7885714286,0.7942857143,0.8,0.8033333333,0.8066666667,0.81,0.8133333333,0.8166666667,0.82,0.8233333333,0.8266666667,0.83,0.8333333333,0.8366666667,0.84,0.8433333333,0.8466666667,0.85,0.8533333333,0.8566666667,0.86,0.8633333333,0.8666666667,0.87,0.8733333333,0.8766666667,0.88,0.8833333333,0.8866666667,0.89,0.8933333333,0.8966666667,0.9,0.9025,0.905,0.9075,0.91,0.9125,0.915,0.9175,0.92,0.9225,0.925,0.9275,0.93,0.9325,0.935,0.9375,0.94,0.9425,0.945,0.9475,0.95,0.9525,0.955,0.9575,0.96,0.9625,0.965,0.9675,0.97,0.9725,0.975,0.9775,0.98,0.9825,0.985,0.9875,0.99,0.9925,0.995,0.9975,1.0]\n", + " [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,0.032,0.036,0.04,0.044,0.048,0.052,0.056,0.06,0.064,0.068,0.072,0.076,0.08,0.084,0.088,0.092,0.096,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.805,0.81,0.815,0.82,0.825,0.83,0.835,0.84,0.845,0.85,0.855,0.86,0.865,0.87,0.875,0.88,0.885,0.89,0.895,0.9,0.9013333333,0.9026666667,0.904,0.9053333333,0.9066666667,0.908,0.9093333333,0.9106666667,0.912,0.9133333333,0.9146666667,0.916,0.9173333333,0.9186666667,0.92,0.9213333333,0.9226666667,0.924,0.9253333333,0.9266666667,0.928,0.9293333333,0.9306666667,0.932,0.9333333333,0.9346666667,0.936,0.9373333333,0.9386666667,0.94,0.9413333333,0.9426666667,0.944,0.9453333333,0.9466666667,0.948,0.9493333333,0.9506666667,0.952,0.9533333333,0.9546666667,0.956,0.9573333333,0.9586666667,0.96,0.9613333333,0.9626666667,0.964,0.9653333333,0.9666666667,0.968,0.9693333333,0.9706666667,0.972,0.9733333333,0.9746666667,0.976,0.9773333333,0.9786666667,0.98,0.9813333333,0.9826666667,0.984,0.9853333333,0.9866666667,0.988,0.9893333333,0.9906666667,0.992,0.9933333333,0.9946666667,0.996,0.9973333333,0.9986666667,1.0]\n", + " [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0175,0.02,0.0225,0.025,0.0275,0.03,0.0325,0.035,0.0375,0.04,0.0425,0.045,0.0475,0.05,0.0525,0.055,0.0575,0.06,0.0625,0.065,0.0675,0.07,0.0725,0.075,0.0775,0.08,0.0825,0.085,0.0875,0.09,0.0925,0.095,0.0975,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.608,0.616,0.624,0.632,0.64,0.648,0.656,0.664,0.672,0.68,0.688,0.696,0.704,0.712,0.72,0.728,0.736,0.744,0.752,0.76,0.768,0.776,0.784,0.792,0.8,0.8033333333,0.8066666667,0.81,0.8133333333,0.8166666667,0.82,0.8233333333,0.8266666667,0.83,0.8333333333,0.8366666667,0.84,0.8433333333,0.8466666667,0.85,0.8533333333,0.8566666667,0.86,0.8633333333,0.8666666667,0.87,0.8733333333,0.8766666667,0.88,0.8833333333,0.8866666667,0.89,0.8933333333,0.8966666667,0.9,0.902,0.904,0.906,0.908,0.91,0.912,0.914,0.916,0.918,0.92,0.922,0.924,0.926,0.928,0.93,0.932,0.934,0.936,0.938,0.94,0.942,0.944,0.946,0.948,0.95,0.952,0.954,0.956,0.958,0.96,0.962,0.964,0.966,0.968,0.97,0.972,0.974,0.976,0.978,0.98,0.982,0.984,0.986,0.988,0.99,0.992,0.994,0.996,0.998,1.0]\n", " NaN\n", + " [0.0,0.0006552097,0.0013605064,0.0021151815,0.0029180701,0.0037675922,0.0046618077,0.0055984833,0.0065751692,0.0075892831,0.0086381998,0.0097193446,0.0108302867,0.0119688337,0.0131331257,0.014321727,0.0155337159,0.0167687729,0.0180272663,0.0193103356,0.020619972,0.0219590952,0.0233316264,0.024742554,0.0261979914,0.0277052245,0.0292727448,0.030910267,0.0326287265,0.034440256,0.0363581376,0.0383967303,0.0405713707,0.042898249,0.0453942605,0.0480768342,0.0509637431,0.0540728987,0.0574221344,0.0610289827,0.0649104508,0.069082799,0.0735613277,0.0783601755,0.0834921337,0.0889684789,0.0947988278,0.1009910149,0.1075509944,0.1144827695,0.1217883466,0.1294677162,0.1375188601,0.1459377845,0.1547185775,0.1638534906,0.173333043,0.183146147,0.1932802518,0.2037215056,0.2144549309,0.2254646117,0.2367338883,0.2482455564,0.2599820665,0.2719257181,0.2840588463,0.2963639938,0.308824066,0.3214224646,0.3341431959,0.3469709515,0.3598911602,0.3728900098,0.3859544391,0.3990721017,0.4122313044,0.4254209242,0.4386303077,0.4518491587,0.4650674199,0.4782751541,0.4914624335,0.5046192399,0.5177353826,0.5308004395,0.5438037232,0.5567342756,0.5695808913,0.5823321691,0.5949765903,0.6075026181,0.6198988152,0.6321539735,0.6442572471,0.6561982838,0.6679673464,0.679555418,0.6909542849,0.7021565932,0.7131558737,0.7239465364,0.7345238314,0.7448837818,0.7550230879,0.7649390101,0.7746292356,0.7840917363,0.793324625,0.8023260164,0.8110939019,0.8196260428,0.8279198893,0.8359725294,0.84378067,0.8513406529,0.8586485067,0.8657000313,0.8724909149,0.8790168773,0.8852738353,0.8912580844,0.8969664881,0.9023966684,0.9075471904,0.9124177307,0.9170092252,0.9213239875,0.9253657928,0.9291399243,0.9326531773,0.9359138212,0.9389315199,0.9417172132,0.9442829632,0.9466417713,0.9488073729,0.9507940179,0.9526162437,0.9542886507,0.9558256867,0.957241447,0.9585494976,0.9597627233,0.9608932066,0.9619521358,0.9629497455,0.9638952848,0.9647970143,0.9656622247,0.9664972774,0.9673076585,0.9680980464,0.9688723855,0.9696339648,0.9703854957,0.9711291891,0.9718668279,0.9725998336,0.9733293276,0.9740561839,0.9747810757,0.9755045151,0.9762268859,0.9769484703,0.9776694709,0.9783900269,0.9791102268,0.9798301173,0.9805497088,0.9812689786,0.981987871,0.9827062964,0.9834241265,0.9841411897,0.9848572642,0.98557207,0.9862852591,0.9869964062,0.9877049976,0.9884104215,0.9891119579,0.9898087704,0.990499899,0.9911842569,0.9918606294,0.9925276775,0.9931839465,0.9938278782,0.99445783,0.9950720981,0.9956689463,0.9962466383,0.9968034747,0.9973378313,0.9978481983,0.9983332192,0.9987917276,0.9992227789,0.9996256782,1.0]\n", + " [0.0,0.0001141583,0.0002446967,0.0003862688,0.0005272579,0.0006650709,0.0008243437,0.0011074433,0.0016696544,0.0025699094,0.0037138357,0.0049708626,0.0062610152,0.0075426566,0.0089765864,0.0111726822,0.0147311078,0.0195212559,0.0249547717,0.0306181288,0.0363105138,0.0419407763,0.0476011969,0.053516341,0.0598014349,0.0663689162,0.0730761187,0.0798334547,0.0865904866,0.0933196582,0.1000172031,0.1066924089,0.1133554776,0.1200140176,0.1266729489,0.1333343989,0.1399984689,0.1466644317,0.1533314439,0.1599988203,0.1666661444,0.1733332523,0.1800001372,0.1866668598,0.1933334943,0.2000000995,0.2066667101,0.2133333393,0.2199999878,0.22666665,0.2333333196,0.2399999916,0.2466666631,0.2533333329,0.2600000011,0.2666666681,0.2733333345,0.2800000007,0.286666667,0.2933333334,0.2999999999,0.3066666665,0.3133333332,0.3199999999,0.3266666666,0.3333333333,0.34,0.3466666667,0.3533333333,0.36,0.3666666667,0.3733333333,0.38,0.3866666667,0.3934628939,0.400837331,0.40925763,0.4186848364,0.428718413,0.4390353607,0.4494419812,0.4597974687,0.4700329298,0.4801500685,0.4901790777,0.500153105,0.5101028922,0.5200515519,0.5300114112,0.5398722838,0.5492279015,0.5576212737,0.5650210292,0.571743695,0.5780856137,0.5842571713,0.5904328096,0.5967209586,0.603152213,0.6097133168,0.6163738459,0.6230958146,0.6298433017,0.6365902337,0.6433215069,0.6500308134,0.656718392,0.6633885674,0.6700472479,0.6767001542,0.6833518918,0.6900055659,0.6966627826,0.7033239321,0.7099885835,0.7166558627,0.723324761,0.7299943545,0.7366639271,0.7433330133,0.7500013847,0.7566690034,0.7633359628,0.770002427,0.7766685825,0.7833346018,0.7900006228,0.7966667394,0.8033330023,0.8099994258,0.8166659972,0.8233326871,0.8299994586,0.8366662749,0.8433331037,0.8499999207,0.8566667097,0.8633334627,0.8700001785,0.8766668606,0.8833335157,0.8899751517,0.8964699017,0.9025861327,0.9081211655,0.9130226546,0.9173491712,0.921198292,0.9246959323,0.9279877368,0.9312103051,0.934472912,0.9378540969,0.9414005467,0.9450901244,0.9487670554,0.9522009139,0.9552513327,0.9578998205,0.9601715711,0.96211589,0.9638162438,0.9653702301,0.9668664828,0.9683781475,0.9699605983,0.9716476808,0.9734519305,0.9753688047,0.9773815283,0.9794657325,0.9815941718,0.9837408125,0.9858836701,0.9879773814,0.9898993305,0.9914888717,0.9926681205,0.9934599632,0.9939261174,0.9941560479,0.9942611072,0.9943265488,0.9943865488,0.9944537386,0.9945561009,0.9947328687,0.9950042368,0.9953660612,0.9958058993,0.9963078442,0.9968511117,0.9974139813,0.9979781729,0.9985251814,0.999027536,0.9994498435,0.999736686,0.9998734993,0.99994,1.0]\n", " NaN\n", " NaN\n", + " [0.0,0.001311947,0.0026238939,0.0039358409,0.0052477878,0.0065597348,0.0078716817,0.0091836287,0.0104955756,0.0118075226,0.0131194695,0.0144314165,0.0157433634,0.0170553104,0.0183672573,0.0196792043,0.0209911512,0.0223030982,0.0236150451,0.0249269921,0.026238939,0.027550886,0.0288628329,0.0301747799,0.0314867268,0.0327986738,0.0341106207,0.0354225677,0.0367345146,0.0380464616,0.0393584085,0.0406703555,0.0419823024,0.0432942494,0.0446061963,0.0459181433,0.0472300902,0.0485420372,0.0498539841,0.0511659311,0.052477878,0.053789825,0.0551017719,0.0564137189,0.0577256658,0.0590376128,0.0603495597,0.0616615067,0.0629734536,0.0642854006,0.0655973475,0.0669092945,0.0682212414,0.0695331884,0.0708451353,0.0721570823,0.0734690292,0.0747809762,0.0760929231,0.0774048701,0.078716817,0.080028764,0.0813407109,0.0826526579,0.0839646048,0.0852765518,0.0865884987,0.0879004457,0.0902457862,0.0933094828,0.0978079399,0.1023063969,0.1068048539,0.111303311,0.115801768,0.120300225,0.124798682,0.1292971391,0.1338199508,0.1388055027,0.1440933779,0.1496807808,0.1571177226,0.1652387403,0.1753118263,0.1904276903,0.2058197291,0.2212117678,0.237030829,0.2551785571,0.273870758,0.2925629589,0.3115548313,0.3307464845,0.3499926649,0.3692260274,0.3884136416,0.407661417,0.4269091924,0.4457073638,0.464050886,0.4823944081,0.5007379302,0.5190814523,0.5374249745,0.5538739661,0.5696118391,0.5853388804,0.6010659216,0.6161284786,0.6273538036,0.6382421632,0.6486483242,0.6588094975,0.668725683,0.6786418685,0.688558054,0.6984742395,0.708390425,0.7183066106,0.7278808508,0.7373411092,0.7468013677,0.7561442929,0.7645842622,0.7730242316,0.7814642009,0.7899041702,0.7983441395,0.8067841088,0.8152111577,0.8229940495,0.8307769414,0.8385598332,0.8447944123,0.8509124517,0.8563824526,0.8610823306,0.8657454654,0.8704086002,0.8750717351,0.8797348699,0.8843980047,0.8890611396,0.8934873987,0.8970573375,0.9006272763,0.9041972151,0.9077671539,0.9103291006,0.9126390493,0.914948998,0.9172589467,0.9195688953,0.921878844,0.9236671785,0.9253634634,0.9270597483,0.9287560333,0.9304523182,0.9321486031,0.933844888,0.935541173,0.9372374579,0.9389337428,0.9406300277,0.9423263126,0.9440225976,0.9457188825,0.9474151674,0.9491114523,0.9508077373,0.9525040222,0.9542003071,0.955896592,0.9575928769,0.9592891619,0.9609854468,0.9626817317,0.9643780166,0.9660743016,0.9677705865,0.9694668714,0.9711631563,0.9728594412,0.9745557262,0.9762520111,0.977948296,0.9796445809,0.9813408659,0.9830371508,0.9847334357,0.9864297206,0.9881260055,0.9898222905,0.9915185754,0.9932148603,0.9949111452,0.9966074302,0.9983037151,1.0]\n", " NaN\n", - " -32.542240\n", " \n", " \n", "\n", - "

5 rows × 54 columns

\n", + "

5 rows × 57 columns

\n", "" ], "text/plain": [ - " pro_question_id bot_question_id resolution question_weight \\\n", - "0 31268 31262 0 1.0 \n", - "3 31280 31274 5-9 1.0 \n", - "6 31292 31286 Jeff Bezos 1.0 \n", - "9 31321 31370 0 1.0 \n", - "13 31368 31366 ≥0% and <5% 1.0 \n", - "\n", - " type \\\n", - "0 multiple_choice \n", - "3 multiple_choice \n", - "6 multiple_choice \n", - "9 multiple_choice \n", - "13 multiple_choice \n", - "\n", - " options \\\n", - "0 [0, 1, 2-3, 4-6, >6] \n", - "3 [0-4, 5-9, >9] \n", - "6 [Larry Ellison, Elon Musk, Mark Zuckerberg, Bernard Arnault & family, Jeff Bezos, Someone else] \n", - "9 [0, 1, 2, Greater than 2] \n", - "13 [Less than -5%, ≥-5% and <0%, ≥0% and <5%, Greater than 5%] \n", + " pro_question_id bot_question_id resolution question_weight \\\n", + "0 31268 31262 0 1.0 \n", + "1 31269 31263 86.82 1.0 \n", + "2 31270 31264 no 1.0 \n", + "3 31280 31274 5-9 1.0 \n", + "4 31281 31275 119.2 1.0 \n", "\n", - " pro_median 4Shadower Bot_Pepa CatrachoCaster \\\n", - "0 [0.001,0.62,0.35,0.019,0.01] NaN NaN NaN \n", - "3 [0.16,0.44,0.4] NaN NaN 6.595797 \n", - "6 [0.2,0.025,0.225,0.08,0.445,0.025] NaN NaN -70.444674 \n", - "9 [0.336,0.364,0.2,0.1] NaN NaN -87.546874 \n", - "13 [0.05,0.45,0.45,0.05] NaN NaN -16.907633 \n", - "\n", - " ... metac-o1-preview metac-perplexity minefrac1 mmBot \\\n", - "0 ... 299.573227 529.831737 NaN 229.263476 \n", - "3 ... 31.015493 2.247286 NaN 12.783337 \n", - "6 ... 29.885537 21.184400 NaN -18.457128 \n", - "9 ... -51.879379 -121.194097 NaN -80.647587 \n", - "13 ... 44.183275 33.647224 2.197891 20.067070 \n", - "\n", - " pgodzinai pianobot swingswish twsummerbot wunderplumb \\\n", - "0 270.308741 NaN NaN NaN NaN \n", - "3 15.252598 NaN NaN -4.652002 NaN \n", - "6 11.152127 NaN NaN NaN NaN \n", - "9 -49.410118 NaN NaN -62.415431 NaN \n", - "13 25.378052 NaN NaN NaN NaN \n", - "\n", - " bot_team_median \n", - "0 501.063529 \n", - "3 31.015493 \n", - "6 11.152127 \n", - "9 -69.314718 \n", - "13 -32.542240 \n", - "\n", - "[5 rows x 54 columns]" + " type options range_min range_max \\\n", + "0 multiple_choice [0, 1, 2-3, 4-6, >6] NaN NaN \n", + "1 numeric None 60.0 100.0 \n", + "2 binary None NaN NaN \n", + "3 multiple_choice [0-4, 5-9, >9] NaN NaN \n", + "4 numeric None 0.0 400.0 \n", + "\n", + " open_upper_bound open_lower_bound ... \\\n", + "0 False False ... \n", + "1 True True ... \n", + "2 False False ... \n", + "3 None None ... \n", + "4 False False ... \n", + "\n", + " metac-o1 \\\n", + "0 [0.25,0.3,0.3,0.1,0.05] \n", + "1 [0.05,0.0505882353,0.0511764706,0.0517647059,0.0523529412,0.0529411765,0.0535294118,0.0541176471,0.0547058824,0.0552941176,0.0558823529,0.0564705882,0.0570588235,0.0576470588,0.0582352941,0.0588235294,0.0594117647,0.06,0.0605882353,0.0611764706,0.0617647059,0.0623529412,0.0629411765,0.0635294118,0.0641176471,0.0647058824,0.0652941176,0.0658823529,0.0664705882,0.0670588235,0.0676470588,0.0682352941,0.0688235294,0.0694117647,0.07,0.0705882353,0.0711764706,0.0717647059,0.0723529412,0.0729411765,0.0735294118,0.0741176471,0.0747058824,0.0752941176,0.0758823529,0.0764705882,0.0770588235,0.0776470588,0.0782352941,0.0788235294,0.0794117647,0.08,0.0805882353,0.0811764706,0.0817647059,0.0823529412,0.0829411765,0.0835294118,0.0841176471,0.0847058824,0.0852941176,0.0858823529,0.0864705882,0.0870588235,0.0876470588,0.0882352941,0.0888235294,0.0894117647,0.09,0.0905882353,0.0911764706,0.0917647059,0.0923529412,0.0929411765,0.0935294118,0.0941176471,0.0947058824,0.0952941176,0.0958823529,0.0964705882,0.0970588235,0.0976470588,0.0982352941,0.0988235294,0.0994117647,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.22,0.24,0.26,0.28,0.3,0.32,0.34,0.36,0.38,0.4,0.42,0.44,0.46,0.48,0.5,0.52,0.54,0.56,0.58,0.6,0.62,0.64,0.66,0.68,0.7,0.72,0.74,0.76,0.78,0.8,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.9007692308,0.9015384615,0.9023076923,0.9030769231,0.9038461538,0.9046153846,0.9053846154,0.9061538462,0.9069230769,0.9076923077,0.9084615385,0.9092307692,0.91,0.9107692308,0.9115384615,0.9123076923,0.9130769231,0.9138461538,0.9146153846,0.9153846154,0.9161538462,0.9169230769,0.9176923077,0.9184615385,0.9192307692,0.92,0.9207692308,0.9215384615,0.9223076923,0.9230769231,0.9238461538,0.9246153846,0.9253846154,0.9261538462,0.9269230769,0.9276923077,0.9284615385,0.9292307692,0.93,0.9307692308,0.9315384615,0.9323076923,0.9330769231,0.9338461538,0.9346153846,0.9353846154,0.9361538462,0.9369230769,0.9376923077,0.9384615385,0.9392307692,0.94,0.9407692308,0.9415384615,0.9423076923,0.9430769231,0.9438461538,0.9446153846,0.9453846154,0.9461538462,0.9469230769,0.9476923077,0.9484615385,0.9492307692,0.95] \n", + "2 0.1 \n", + "3 [0.45,0.45,0.1] \n", + "4 [0.0,0.0033333333,0.0066666667,0.01,0.0133333333,0.0166666667,0.02,0.0233333333,0.0266666667,0.03,0.0333333333,0.0366666667,0.04,0.0433333333,0.0466666667,0.05,0.0533333333,0.0566666667,0.06,0.0633333333,0.0666666667,0.07,0.0733333333,0.0766666667,0.08,0.0833333333,0.0866666667,0.09,0.0933333333,0.0966666667,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.208,0.216,0.224,0.232,0.24,0.248,0.256,0.264,0.272,0.28,0.288,0.296,0.304,0.312,0.32,0.328,0.336,0.344,0.352,0.36,0.368,0.376,0.384,0.392,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.6057142857,0.6114285714,0.6171428571,0.6228571429,0.6285714286,0.6342857143,0.64,0.6457142857,0.6514285714,0.6571428571,0.6628571429,0.6685714286,0.6742857143,0.68,0.6857142857,0.6914285714,0.6971428571,0.7028571429,0.7085714286,0.7142857143,0.72,0.7257142857,0.7314285714,0.7371428571,0.7428571429,0.7485714286,0.7542857143,0.76,0.7657142857,0.7714285714,0.7771428571,0.7828571429,0.7885714286,0.7942857143,0.8,0.8033333333,0.8066666667,0.81,0.8133333333,0.8166666667,0.82,0.8233333333,0.8266666667,0.83,0.8333333333,0.8366666667,0.84,0.8433333333,0.8466666667,0.85,0.8533333333,0.8566666667,0.86,0.8633333333,0.8666666667,0.87,0.8733333333,0.8766666667,0.88,0.8833333333,0.8866666667,0.89,0.8933333333,0.8966666667,0.9,0.9025,0.905,0.9075,0.91,0.9125,0.915,0.9175,0.92,0.9225,0.925,0.9275,0.93,0.9325,0.935,0.9375,0.94,0.9425,0.945,0.9475,0.95,0.9525,0.955,0.9575,0.96,0.9625,0.965,0.9675,0.97,0.9725,0.975,0.9775,0.98,0.9825,0.985,0.9875,0.99,0.9925,0.995,0.9975,1.0] \n", + "\n", + " metac-o1-preview \\\n", + "0 [0.01,0.7,0.2,0.07,0.02] \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.057,0.058,0.059,0.06,0.061,0.062,0.063,0.064,0.065,0.066,0.067,0.068,0.069,0.07,0.071,0.072,0.073,0.074,0.075,0.076,0.077,0.078,0.079,0.08,0.081,0.082,0.083,0.084,0.085,0.086,0.087,0.088,0.089,0.09,0.091,0.092,0.093,0.094,0.095,0.096,0.097,0.098,0.099,0.1,0.104,0.108,0.112,0.116,0.12,0.124,0.128,0.132,0.136,0.14,0.144,0.148,0.152,0.156,0.16,0.164,0.168,0.172,0.176,0.18,0.184,0.188,0.192,0.196,0.2,0.208,0.216,0.224,0.232,0.24,0.248,0.256,0.264,0.272,0.28,0.288,0.296,0.304,0.312,0.32,0.328,0.336,0.344,0.352,0.36,0.368,0.376,0.384,0.392,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.8066666667,0.8133333333,0.82,0.8266666667,0.8333333333,0.84,0.8466666667,0.8533333333,0.86,0.8666666667,0.8733333333,0.88,0.8866666667,0.8933333333,0.9,0.901,0.902,0.903,0.904,0.905,0.906,0.907,0.908,0.909,0.91,0.911,0.912,0.913,0.914,0.915,0.916,0.917,0.918,0.919,0.92,0.921,0.922,0.923,0.924,0.925,0.926,0.927,0.928,0.929,0.93,0.931,0.932,0.933,0.934,0.935,0.936,0.937,0.938,0.939,0.94,0.941,0.942,0.943,0.944,0.945,0.946,0.947,0.948,0.949,0.95] \n", + "2 0.05 \n", + "3 [0.15,0.65,0.2] \n", + "4 [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,0.032,0.036,0.04,0.044,0.048,0.052,0.056,0.06,0.064,0.068,0.072,0.076,0.08,0.084,0.088,0.092,0.096,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.805,0.81,0.815,0.82,0.825,0.83,0.835,0.84,0.845,0.85,0.855,0.86,0.865,0.87,0.875,0.88,0.885,0.89,0.895,0.9,0.9013333333,0.9026666667,0.904,0.9053333333,0.9066666667,0.908,0.9093333333,0.9106666667,0.912,0.9133333333,0.9146666667,0.916,0.9173333333,0.9186666667,0.92,0.9213333333,0.9226666667,0.924,0.9253333333,0.9266666667,0.928,0.9293333333,0.9306666667,0.932,0.9333333333,0.9346666667,0.936,0.9373333333,0.9386666667,0.94,0.9413333333,0.9426666667,0.944,0.9453333333,0.9466666667,0.948,0.9493333333,0.9506666667,0.952,0.9533333333,0.9546666667,0.956,0.9573333333,0.9586666667,0.96,0.9613333333,0.9626666667,0.964,0.9653333333,0.9666666667,0.968,0.9693333333,0.9706666667,0.972,0.9733333333,0.9746666667,0.976,0.9773333333,0.9786666667,0.98,0.9813333333,0.9826666667,0.984,0.9853333333,0.9866666667,0.988,0.9893333333,0.9906666667,0.992,0.9933333333,0.9946666667,0.996,0.9973333333,0.9986666667,1.0] \n", + "\n", + " metac-perplexity \\\n", + "0 [0.3,0.4,0.2,0.07,0.03] \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.057,0.058,0.059,0.06,0.061,0.062,0.063,0.064,0.065,0.066,0.067,0.068,0.069,0.07,0.071,0.072,0.073,0.074,0.075,0.076,0.077,0.078,0.079,0.08,0.081,0.082,0.083,0.084,0.085,0.086,0.087,0.088,0.089,0.09,0.091,0.092,0.093,0.094,0.095,0.096,0.097,0.098,0.099,0.1,0.104,0.108,0.112,0.116,0.12,0.124,0.128,0.132,0.136,0.14,0.144,0.148,0.152,0.156,0.16,0.164,0.168,0.172,0.176,0.18,0.184,0.188,0.192,0.196,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.6133333333,0.6266666667,0.64,0.6533333333,0.6666666667,0.68,0.6933333333,0.7066666667,0.72,0.7333333333,0.7466666667,0.76,0.7733333333,0.7866666667,0.8,0.804,0.808,0.812,0.816,0.82,0.824,0.828,0.832,0.836,0.84,0.844,0.848,0.852,0.856,0.86,0.864,0.868,0.872,0.876,0.88,0.884,0.888,0.892,0.896,0.9,0.901,0.902,0.903,0.904,0.905,0.906,0.907,0.908,0.909,0.91,0.911,0.912,0.913,0.914,0.915,0.916,0.917,0.918,0.919,0.92,0.921,0.922,0.923,0.924,0.925,0.926,0.927,0.928,0.929,0.93,0.931,0.932,0.933,0.934,0.935,0.936,0.937,0.938,0.939,0.94,0.941,0.942,0.943,0.944,0.945,0.946,0.947,0.948,0.949,0.95] \n", + "2 0.1 \n", + "3 [0.15000000000000002,0.54,0.31000000000000005] \n", + "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0175,0.02,0.0225,0.025,0.0275,0.03,0.0325,0.035,0.0375,0.04,0.0425,0.045,0.0475,0.05,0.0525,0.055,0.0575,0.06,0.0625,0.065,0.0675,0.07,0.0725,0.075,0.0775,0.08,0.0825,0.085,0.0875,0.09,0.0925,0.095,0.0975,0.1,0.105,0.11,0.115,0.12,0.125,0.13,0.135,0.14,0.145,0.15,0.155,0.16,0.165,0.17,0.175,0.18,0.185,0.19,0.195,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.4133333333,0.4266666667,0.44,0.4533333333,0.4666666667,0.48,0.4933333333,0.5066666667,0.52,0.5333333333,0.5466666667,0.56,0.5733333333,0.5866666667,0.6,0.608,0.616,0.624,0.632,0.64,0.648,0.656,0.664,0.672,0.68,0.688,0.696,0.704,0.712,0.72,0.728,0.736,0.744,0.752,0.76,0.768,0.776,0.784,0.792,0.8,0.8033333333,0.8066666667,0.81,0.8133333333,0.8166666667,0.82,0.8233333333,0.8266666667,0.83,0.8333333333,0.8366666667,0.84,0.8433333333,0.8466666667,0.85,0.8533333333,0.8566666667,0.86,0.8633333333,0.8666666667,0.87,0.8733333333,0.8766666667,0.88,0.8833333333,0.8866666667,0.89,0.8933333333,0.8966666667,0.9,0.902,0.904,0.906,0.908,0.91,0.912,0.914,0.916,0.918,0.92,0.922,0.924,0.926,0.928,0.93,0.932,0.934,0.936,0.938,0.94,0.942,0.944,0.946,0.948,0.95,0.952,0.954,0.956,0.958,0.96,0.962,0.964,0.966,0.968,0.97,0.972,0.974,0.976,0.978,0.98,0.982,0.984,0.986,0.988,0.99,0.992,0.994,0.996,0.998,1.0] \n", + "\n", + " minefrac1 \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " mmBot \\\n", + "0 [0.009900990099009901,0.39603960396039606,0.44554455445544555,0.1188118811881188,0.0297029702970297] \n", + "1 [0.0215944348,0.0218024136,0.0220262706,0.0222657692,0.0225205234,0.0227900084,0.0230735761,0.0233704727,0.0236798595,0.0240008339,0.0243324518,0.0246737484,0.0250237592,0.0253815375,0.0257461704,0.0261167925,0.0264925953,0.0268728349,0.0272568365,0.0276439961,0.0280337803,0.0284257242,0.0288194274,0.0292145496,0.0296108048,0.0300079559,0.0304058088,0.0308042061,0.031203022,0.0316021576,0.0320015358,0.0324010988,0.0328008038,0.033200622,0.0336005361,0.0340005406,0.0344006419,0.0348008594,0.0352012288,0.0356018064,0.0360026751,0.0364039532,0.0368058059,0.0372084598,0.0376122217,0.0380175022,0.0384248443,0.0388349581,0.0392487619,0.0396674303,0.040092449,0.0405256766,0.040969412,0.0414264662,0.0419002382,0.0423947905,0.0429149226,0.0434662384,0.0440552034,0.0446891875,0.0453764888,0.0461263346,0.0469488546,0.047855024,0.0488565752,0.0499658763,0.0511957788,0.0525594355,0.0540700958,0.0557408822,0.0575845575,0.0596132911,0.061838434,0.0642703126,0.0669180506,0.0697894271,0.0728907793,0.0762269529,0.0798013046,0.0836157568,0.0876709009,0.091966147,0.096499911,0.1012698318,0.1062730078,0.1115062433,0.116966291,0.1226500836,0.1285549408,0.1346787459,0.1410200827,0.1475783286,0.1543537019,0.1613472593,0.1685608481,0.1759970129,0.1836588644,0.1915499147,0.1996738871,0.208034508,0.2166352903,0.225479315,0.2345690212,0.24390601,0.2534908708,0.2633230334,0.2734006526,0.283720526,0.2942780484,0.3050672012,0.316080577,0.3273094353,0.3387437886,0.3503725099,0.3621834602,0.3741636271,0.3862992712,0.3985760721,0.4109792702,0.4234937993,0.4361044066,0.4487957561,0.4615525185,0.4743594438,0.4872014199,0.5000635204,0.5129310433,0.5257895463,0.5386248816,0.5514232322,0.5641711536,0.5768556211,0.589464083,0.6019845173,0.6144054896,0.6267162064,0.6389065595,0.6509671563,0.6628893291,0.6746651196,0.6862872355,0.6977489765,0.7090441313,0.7201668477,0.7311114815,0.7418724312,0.7524439675,0.7628200682,0.7729942685,0.7829595382,0.7927081941,0.8022318565,0.8115214549,0.8205672863,0.8293591256,0.8378863854,0.8461383197,0.8541042651,0.8617739066,0.8691375599,0.8761864572,0.8829130238,0.8893111359,0.8953763492,0.9011060878,0.9064997881,0.9115589931,0.9162873921,0.9206908074,0.9247771276,0.9285561903,0.9320396198,0.9352406245,0.9381737618,0.9408546777,0.9432998299,0.945526202,0.9475510194,0.949391472,0.9510644542,0.9525863264,0.953972705,0.955238285,0.9563966974,0.9574604037,0.9584406278,0.9593473236,0.960189177,0.9609736386,0.9617069836,0.9623943945,0.9630400616,0.9636472966,0.9642186545,0.9647560591,0.9652609283,0.9657342945,0.9661769175,0.9665893865,0.9669722099,0.9673258911] \n", + "2 0.2 \n", + "3 [0.25,0.5,0.25] \n", + "4 [0.0,0.0006552097,0.0013605064,0.0021151815,0.0029180701,0.0037675922,0.0046618077,0.0055984833,0.0065751692,0.0075892831,0.0086381998,0.0097193446,0.0108302867,0.0119688337,0.0131331257,0.014321727,0.0155337159,0.0167687729,0.0180272663,0.0193103356,0.020619972,0.0219590952,0.0233316264,0.024742554,0.0261979914,0.0277052245,0.0292727448,0.030910267,0.0326287265,0.034440256,0.0363581376,0.0383967303,0.0405713707,0.042898249,0.0453942605,0.0480768342,0.0509637431,0.0540728987,0.0574221344,0.0610289827,0.0649104508,0.069082799,0.0735613277,0.0783601755,0.0834921337,0.0889684789,0.0947988278,0.1009910149,0.1075509944,0.1144827695,0.1217883466,0.1294677162,0.1375188601,0.1459377845,0.1547185775,0.1638534906,0.173333043,0.183146147,0.1932802518,0.2037215056,0.2144549309,0.2254646117,0.2367338883,0.2482455564,0.2599820665,0.2719257181,0.2840588463,0.2963639938,0.308824066,0.3214224646,0.3341431959,0.3469709515,0.3598911602,0.3728900098,0.3859544391,0.3990721017,0.4122313044,0.4254209242,0.4386303077,0.4518491587,0.4650674199,0.4782751541,0.4914624335,0.5046192399,0.5177353826,0.5308004395,0.5438037232,0.5567342756,0.5695808913,0.5823321691,0.5949765903,0.6075026181,0.6198988152,0.6321539735,0.6442572471,0.6561982838,0.6679673464,0.679555418,0.6909542849,0.7021565932,0.7131558737,0.7239465364,0.7345238314,0.7448837818,0.7550230879,0.7649390101,0.7746292356,0.7840917363,0.793324625,0.8023260164,0.8110939019,0.8196260428,0.8279198893,0.8359725294,0.84378067,0.8513406529,0.8586485067,0.8657000313,0.8724909149,0.8790168773,0.8852738353,0.8912580844,0.8969664881,0.9023966684,0.9075471904,0.9124177307,0.9170092252,0.9213239875,0.9253657928,0.9291399243,0.9326531773,0.9359138212,0.9389315199,0.9417172132,0.9442829632,0.9466417713,0.9488073729,0.9507940179,0.9526162437,0.9542886507,0.9558256867,0.957241447,0.9585494976,0.9597627233,0.9608932066,0.9619521358,0.9629497455,0.9638952848,0.9647970143,0.9656622247,0.9664972774,0.9673076585,0.9680980464,0.9688723855,0.9696339648,0.9703854957,0.9711291891,0.9718668279,0.9725998336,0.9733293276,0.9740561839,0.9747810757,0.9755045151,0.9762268859,0.9769484703,0.9776694709,0.9783900269,0.9791102268,0.9798301173,0.9805497088,0.9812689786,0.981987871,0.9827062964,0.9834241265,0.9841411897,0.9848572642,0.98557207,0.9862852591,0.9869964062,0.9877049976,0.9884104215,0.9891119579,0.9898087704,0.990499899,0.9911842569,0.9918606294,0.9925276775,0.9931839465,0.9938278782,0.99445783,0.9950720981,0.9956689463,0.9962466383,0.9968034747,0.9973378313,0.9978481983,0.9983332192,0.9987917276,0.9992227789,0.9996256782,1.0] \n", + "\n", + " pgodzinai \\\n", + "0 [0.014925742574257425,0.5137871287128712,0.3349009900990099,0.10168316831683169,0.03470297029702965] \n", + "1 [0.001,0.001060875,0.0011396,0.0012863125,0.0015459984,0.0019048369,0.0023147701,0.0027425688,0.0031719899,0.0035935463,0.0040047171,0.0044081612,0.0048073678,0.0052048637,0.0056023079,0.0060005117,0.0063995798,0.0067992898,0.0071993689,0.0075995902,0.007999808,0.0083999595,0.0088000381,0.0092000616,0.0096525538,0.0103347221,0.0114180238,0.0128617561,0.0144931539,0.0161909912,0.0178965175,0.0195748423,0.0212159342,0.0228289888,0.0244265464,0.0260177161,0.0276085304,0.0292020038,0.0307985773,0.0323974755,0.0339977246,0.0355985069,0.0371992898,0.0387998404,0.0404001295,0.0420002192,0.0436001942,0.0452001261,0.0468000593,0.0484758458,0.0504834257,0.0530704368,0.056178071,0.0595567722,0.0630314345,0.0665171977,0.0699636664,0.0733563529,0.0767085411,0.0800383523,0.0833589543,0.0866790344,0.0900028852,0.0933311337,0.0967326953,0.1004442449,0.1047006189,0.1094577119,0.1144907128,0.1196353715,0.1248049846,0.1299418958,0.1350232879,0.1400570021,0.1452540043,0.1513017567,0.1589133116,0.1680377058,0.1780770546,0.1885468618,0.1991553484,0.2096896812,0.2200450325,0.2302229342,0.2402681458,0.2502302229,0.2601553402,0.27007834,0.2800179047,0.2899799302,0.2999629146,0.3099614863,0.3199691186,0.3299801956,0.3403173669,0.3521487483,0.3668129253,0.3844513624,0.4041888551,0.4247935739,0.4442765262,0.4605082419,0.4728869633,0.4822309604,0.4895341295,0.4956449952,0.5013686886,0.5073076754,0.5137610388,0.5206987551,0.5276657564,0.5340334461,0.5395220756,0.5442306919,0.5484901071,0.5530599502,0.5588761244,0.5663266439,0.5752119583,0.585204242,0.5959735276,0.6071500854,0.6184053116,0.6295209059,0.6403758638,0.650921239,0.6611693012,0.671174569,0.681009388,0.6907471485,0.7004527783,0.7101763721,0.7199504677,0.7297911321,0.7397010124,0.7496729757,0.7596938994,0.7697481465,0.7798202777,0.7898968803,0.7999675731,0.8100253018,0.8200662214,0.8300893951,0.8400025166,0.8494453768,0.8579165269,0.8651653723,0.8712540566,0.8763468591,0.8806505608,0.8844338485,0.8879756773,0.8915092577,0.8952099002,0.8991948145,0.9035195392,0.9081838533,0.9131467515,0.9183416751,0.9236898731,0.9291127196,0.9345414554,0.9399230919,0.9451659123,0.9500324455,0.9542146638,0.9575690762,0.9601504006,0.9620795658,0.9635039422,0.9646063832,0.965571997,0.9665531773,0.9676621061,0.9689711529,0.9705116418,0.9722785871,0.9742409577,0.9763519694,0.9785580215,0.9808067315,0.9830531373,0.9852633275,0.987415817,0.9895011861,0.9915203598,0.9934820158,0.9953894047,0.9970771779,0.998127745,0.99846,0.99852,0.99858,0.99864,0.9987,0.99876,0.99882,0.99888,0.99894,0.99899] \n", + "2 0.07 \n", + "3 [0.27499999999999997,0.5125,0.21249999999999997] \n", + "4 [0.0,0.0001141583,0.0002446967,0.0003862688,0.0005272579,0.0006650709,0.0008243437,0.0011074433,0.0016696544,0.0025699094,0.0037138357,0.0049708626,0.0062610152,0.0075426566,0.0089765864,0.0111726822,0.0147311078,0.0195212559,0.0249547717,0.0306181288,0.0363105138,0.0419407763,0.0476011969,0.053516341,0.0598014349,0.0663689162,0.0730761187,0.0798334547,0.0865904866,0.0933196582,0.1000172031,0.1066924089,0.1133554776,0.1200140176,0.1266729489,0.1333343989,0.1399984689,0.1466644317,0.1533314439,0.1599988203,0.1666661444,0.1733332523,0.1800001372,0.1866668598,0.1933334943,0.2000000995,0.2066667101,0.2133333393,0.2199999878,0.22666665,0.2333333196,0.2399999916,0.2466666631,0.2533333329,0.2600000011,0.2666666681,0.2733333345,0.2800000007,0.286666667,0.2933333334,0.2999999999,0.3066666665,0.3133333332,0.3199999999,0.3266666666,0.3333333333,0.34,0.3466666667,0.3533333333,0.36,0.3666666667,0.3733333333,0.38,0.3866666667,0.3934628939,0.400837331,0.40925763,0.4186848364,0.428718413,0.4390353607,0.4494419812,0.4597974687,0.4700329298,0.4801500685,0.4901790777,0.500153105,0.5101028922,0.5200515519,0.5300114112,0.5398722838,0.5492279015,0.5576212737,0.5650210292,0.571743695,0.5780856137,0.5842571713,0.5904328096,0.5967209586,0.603152213,0.6097133168,0.6163738459,0.6230958146,0.6298433017,0.6365902337,0.6433215069,0.6500308134,0.656718392,0.6633885674,0.6700472479,0.6767001542,0.6833518918,0.6900055659,0.6966627826,0.7033239321,0.7099885835,0.7166558627,0.723324761,0.7299943545,0.7366639271,0.7433330133,0.7500013847,0.7566690034,0.7633359628,0.770002427,0.7766685825,0.7833346018,0.7900006228,0.7966667394,0.8033330023,0.8099994258,0.8166659972,0.8233326871,0.8299994586,0.8366662749,0.8433331037,0.8499999207,0.8566667097,0.8633334627,0.8700001785,0.8766668606,0.8833335157,0.8899751517,0.8964699017,0.9025861327,0.9081211655,0.9130226546,0.9173491712,0.921198292,0.9246959323,0.9279877368,0.9312103051,0.934472912,0.9378540969,0.9414005467,0.9450901244,0.9487670554,0.9522009139,0.9552513327,0.9578998205,0.9601715711,0.96211589,0.9638162438,0.9653702301,0.9668664828,0.9683781475,0.9699605983,0.9716476808,0.9734519305,0.9753688047,0.9773815283,0.9794657325,0.9815941718,0.9837408125,0.9858836701,0.9879773814,0.9898993305,0.9914888717,0.9926681205,0.9934599632,0.9939261174,0.9941560479,0.9942611072,0.9943265488,0.9943865488,0.9944537386,0.9945561009,0.9947328687,0.9950042368,0.9953660612,0.9958058993,0.9963078442,0.9968511117,0.9974139813,0.9979781729,0.9985251814,0.999027536,0.9994498435,0.999736686,0.9998734993,0.99994,1.0] \n", + "\n", + " pianobot swingswish \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " twsummerbot \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 [0.116,0.42,0.464] \n", + "4 [0.0,0.001311947,0.0026238939,0.0039358409,0.0052477878,0.0065597348,0.0078716817,0.0091836287,0.0104955756,0.0118075226,0.0131194695,0.0144314165,0.0157433634,0.0170553104,0.0183672573,0.0196792043,0.0209911512,0.0223030982,0.0236150451,0.0249269921,0.026238939,0.027550886,0.0288628329,0.0301747799,0.0314867268,0.0327986738,0.0341106207,0.0354225677,0.0367345146,0.0380464616,0.0393584085,0.0406703555,0.0419823024,0.0432942494,0.0446061963,0.0459181433,0.0472300902,0.0485420372,0.0498539841,0.0511659311,0.052477878,0.053789825,0.0551017719,0.0564137189,0.0577256658,0.0590376128,0.0603495597,0.0616615067,0.0629734536,0.0642854006,0.0655973475,0.0669092945,0.0682212414,0.0695331884,0.0708451353,0.0721570823,0.0734690292,0.0747809762,0.0760929231,0.0774048701,0.078716817,0.080028764,0.0813407109,0.0826526579,0.0839646048,0.0852765518,0.0865884987,0.0879004457,0.0902457862,0.0933094828,0.0978079399,0.1023063969,0.1068048539,0.111303311,0.115801768,0.120300225,0.124798682,0.1292971391,0.1338199508,0.1388055027,0.1440933779,0.1496807808,0.1571177226,0.1652387403,0.1753118263,0.1904276903,0.2058197291,0.2212117678,0.237030829,0.2551785571,0.273870758,0.2925629589,0.3115548313,0.3307464845,0.3499926649,0.3692260274,0.3884136416,0.407661417,0.4269091924,0.4457073638,0.464050886,0.4823944081,0.5007379302,0.5190814523,0.5374249745,0.5538739661,0.5696118391,0.5853388804,0.6010659216,0.6161284786,0.6273538036,0.6382421632,0.6486483242,0.6588094975,0.668725683,0.6786418685,0.688558054,0.6984742395,0.708390425,0.7183066106,0.7278808508,0.7373411092,0.7468013677,0.7561442929,0.7645842622,0.7730242316,0.7814642009,0.7899041702,0.7983441395,0.8067841088,0.8152111577,0.8229940495,0.8307769414,0.8385598332,0.8447944123,0.8509124517,0.8563824526,0.8610823306,0.8657454654,0.8704086002,0.8750717351,0.8797348699,0.8843980047,0.8890611396,0.8934873987,0.8970573375,0.9006272763,0.9041972151,0.9077671539,0.9103291006,0.9126390493,0.914948998,0.9172589467,0.9195688953,0.921878844,0.9236671785,0.9253634634,0.9270597483,0.9287560333,0.9304523182,0.9321486031,0.933844888,0.935541173,0.9372374579,0.9389337428,0.9406300277,0.9423263126,0.9440225976,0.9457188825,0.9474151674,0.9491114523,0.9508077373,0.9525040222,0.9542003071,0.955896592,0.9575928769,0.9592891619,0.9609854468,0.9626817317,0.9643780166,0.9660743016,0.9677705865,0.9694668714,0.9711631563,0.9728594412,0.9745557262,0.9762520111,0.977948296,0.9796445809,0.9813408659,0.9830371508,0.9847334357,0.9864297206,0.9881260055,0.9898222905,0.9915185754,0.9932148603,0.9949111452,0.9966074302,0.9983037151,1.0] \n", + "\n", + " wunderplumb \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + "[5 rows x 57 columns]" ] }, "metadata": {}, @@ -3183,11 +3408,12 @@ " question_weight\n", " type\n", " options\n", - " pro_median\n", - " 4Shadower\n", - " Bot_Pepa\n", - " CatrachoCaster\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", " ...\n", + " metac-o1\n", " metac-o1-preview\n", " metac-perplexity\n", " minefrac1\n", @@ -3197,391 +3423,517 @@ " swingswish\n", " twsummerbot\n", " wunderplumb\n", - " bot_team_median\n", " \n", " \n", " \n", " \n", - " 81\n", - " 35169\n", - " 35119\n", - " Not in top 50\n", - " 1.0\n", - " multiple_choice\n", - " [0-10, 11-20, 21-30, 31-40, 41-50, Not in top 50]\n", - " [0.02,0.01,0.015,0.015,0.05,0.89]\n", + " 94\n", + " 35380\n", + " 35345\n", + " yes\n", + " 1.00\n", + " binary\n", + " None\n", " NaN\n", - " -280.223742\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " -448.863637\n", - " -178.058617\n", - " -300.703183\n", - " -287.919846\n", - " -339.002408\n", + " 0.9\n", + " 0.9\n", + " NaN\n", " NaN\n", + " 0.95\n", + " 0.95\n", " NaN\n", - " -234.857021\n", - " -240.919483\n", - " -287.919846\n", + " 0.9\n", + " 0.762\n", + " 0.9\n", " \n", " \n", - " 82\n", - " 35170\n", - " 35121\n", - " 3 or more\n", - " 1.0\n", - " multiple_choice\n", - " [0, 1, 2, 3 or more]\n", - " [0.01,0.18,0.54,0.27]\n", + " 95\n", + " 35381\n", + " 35354\n", + " no\n", + " 1.00\n", + " binary\n", + " None\n", " NaN\n", - " -77.944110\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " -99.325177\n", - " -18.677591\n", - " -52.324814\n", - " 10.536052\n", - " 25.951120\n", + " 0.4\n", + " 0.9\n", + " NaN\n", + " NaN\n", + " 0.15\n", " NaN\n", " NaN\n", - " 27.650877\n", - " -64.460900\n", - " 27.650877\n", + " 0.1\n", + " 0.126\n", + " 0.95\n", " \n", " \n", - " 83\n", - " 35171\n", - " 35123\n", - " ≥7.5 and ≤8.5\n", - " 1.0\n", - " multiple_choice\n", - " [<7.5, ≥7.5 and ≤8.5, >8.5 and <9.0, ≥9.0 and ≤9.5, >9.5]\n", - " [0.02,0.3,0.3,0.3,0.08]\n", + " 96\n", + " 35385\n", + " 35358\n", + " yes\n", + " 1.00\n", + " binary\n", + " None\n", " NaN\n", - " -70.227966\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " -132.175584\n", - " -26.570317\n", + " 0.8\n", + " 0.95\n", " NaN\n", - " -18.232156\n", " NaN\n", + " 0.9\n", " NaN\n", " NaN\n", - " -17.832954\n", - " -56.798404\n", - " -62.860866\n", + " 0.85\n", + " 0.828\n", + " 0.85\n", " \n", " \n", - " 91\n", - " 35377\n", - " 35334\n", - " Jimmy Patronis\n", - " 1.0\n", - " multiple_choice\n", - " [Jimmy Patronis, Gay Valimont, Someone else]\n", - " [0.997,0.001,0.002]\n", - " -17.134888\n", - " -15.951442\n", + " 97\n", + " 35386\n", + " 35364\n", + " no\n", + " 0.85\n", + " binary\n", + " None\n", " NaN\n", - " ...\n", - " -3.781749\n", - " -4.828879\n", " NaN\n", - " -12.482886\n", - " -8.037710\n", + " False\n", + " False\n", + " ...\n", + " 0.8\n", + " 0.85\n", + " 0.3\n", " NaN\n", - " -11.352931\n", + " 0.85\n", + " 0.85\n", " NaN\n", - " -14.781838\n", - " -12.104814\n", + " 0.7\n", + " 0.132\n", + " 0.3\n", " \n", " \n", - " 92\n", - " 35378\n", - " 35336\n", - " 31-49\n", - " 1.0\n", - " multiple_choice\n", - " [0-24, 25-30, 31-49, 50-70, >70]\n", - " [0.001,0.359,0.55,0.08,0.01]\n", - " -69.314718\n", - " -87.183897\n", + " 98\n", + " 35387\n", + " 35367\n", + " no\n", + " 0.85\n", + " binary\n", + " None\n", " NaN\n", - " ...\n", - " -170.474809\n", - " -290.872090\n", " NaN\n", - " -170.474809\n", - " -31.845373\n", + " False\n", + " False\n", + " ...\n", + " 0.05\n", + " 0.05\n", + " 0.03\n", " NaN\n", - " -48.097266\n", + " 0.15\n", + " 0.05\n", " NaN\n", - " -74.923665\n", - " -20.067070\n", + " 0.2\n", + " 0.27\n", + " 0.2\n", " \n", " \n", "\n", - "

5 rows × 54 columns

\n", + "

5 rows × 57 columns

\n", "" ], "text/plain": [ - " pro_question_id bot_question_id resolution question_weight \\\n", - "81 35169 35119 Not in top 50 1.0 \n", - "82 35170 35121 3 or more 1.0 \n", - "83 35171 35123 ≥7.5 and ≤8.5 1.0 \n", - "91 35377 35334 Jimmy Patronis 1.0 \n", - "92 35378 35336 31-49 1.0 \n", - "\n", - " type \\\n", - "81 multiple_choice \n", - "82 multiple_choice \n", - "83 multiple_choice \n", - "91 multiple_choice \n", - "92 multiple_choice \n", + " pro_question_id bot_question_id resolution question_weight type \\\n", + "94 35380 35345 yes 1.00 binary \n", + "95 35381 35354 no 1.00 binary \n", + "96 35385 35358 yes 1.00 binary \n", + "97 35386 35364 no 0.85 binary \n", + "98 35387 35367 no 0.85 binary \n", "\n", - " options \\\n", - "81 [0-10, 11-20, 21-30, 31-40, 41-50, Not in top 50] \n", - "82 [0, 1, 2, 3 or more] \n", - "83 [<7.5, ≥7.5 and ≤8.5, >8.5 and <9.0, ≥9.0 and ≤9.5, >9.5] \n", - "91 [Jimmy Patronis, Gay Valimont, Someone else] \n", - "92 [0-24, 25-30, 31-49, 50-70, >70] \n", - "\n", - " pro_median 4Shadower Bot_Pepa CatrachoCaster \\\n", - "81 [0.02,0.01,0.015,0.015,0.05,0.89] NaN -280.223742 NaN \n", - "82 [0.01,0.18,0.54,0.27] NaN -77.944110 NaN \n", - "83 [0.02,0.3,0.3,0.3,0.08] NaN -70.227966 NaN \n", - "91 [0.997,0.001,0.002] -17.134888 -15.951442 NaN \n", - "92 [0.001,0.359,0.55,0.08,0.01] -69.314718 -87.183897 NaN \n", - "\n", - " ... metac-o1-preview metac-perplexity minefrac1 mmBot \\\n", - "81 ... -448.863637 -178.058617 -300.703183 -287.919846 \n", - "82 ... -99.325177 -18.677591 -52.324814 10.536052 \n", - "83 ... -132.175584 -26.570317 NaN -18.232156 \n", - "91 ... -3.781749 -4.828879 NaN -12.482886 \n", - "92 ... -170.474809 -290.872090 NaN -170.474809 \n", - "\n", - " pgodzinai pianobot swingswish twsummerbot wunderplumb \\\n", - "81 -339.002408 NaN NaN -234.857021 -240.919483 \n", - "82 25.951120 NaN NaN 27.650877 -64.460900 \n", - "83 NaN NaN NaN -17.832954 -56.798404 \n", - "91 -8.037710 NaN -11.352931 NaN -14.781838 \n", - "92 -31.845373 NaN -48.097266 NaN -74.923665 \n", - "\n", - " bot_team_median \n", - "81 -287.919846 \n", - "82 27.650877 \n", - "83 -62.860866 \n", - "91 -12.104814 \n", - "92 -20.067070 \n", - "\n", - "[5 rows x 54 columns]" + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "94 None NaN NaN False False ... \n", + "95 None NaN NaN False False ... \n", + "96 None NaN NaN False False ... \n", + "97 None NaN NaN False False ... \n", + "98 None NaN NaN False False ... \n", + "\n", + " metac-o1 metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "94 0.9 0.9 NaN NaN 0.95 0.95 \n", + "95 0.4 0.9 NaN NaN 0.15 NaN \n", + "96 0.8 0.95 NaN NaN 0.9 NaN \n", + "97 0.8 0.85 0.3 NaN 0.85 0.85 \n", + "98 0.05 0.05 0.03 NaN 0.15 0.05 \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb \n", + "94 NaN 0.9 0.762 0.9 \n", + "95 NaN 0.1 0.126 0.95 \n", + "96 NaN 0.85 0.828 0.85 \n", + "97 NaN 0.7 0.132 0.3 \n", + "98 NaN 0.2 0.27 0.2 \n", + "\n", + "[5 rows x 57 columns]" ] }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + } + ], + "source": [ + "multiple_choice_rows_with_empty_options = df_pro_bot_forecasts[df_pro_bot_forecasts['options'] == '[]'][df_pro_bot_forecasts['type'] == 'multiple_choice']\n", + "if len(multiple_choice_rows_with_empty_options) > 0:\n", + " display_head_and_tail(multiple_choice_rows_with_empty_options)\n", + " raise ValueError(\"Multiple choice questions with empty options found\")\n", + "\n", + "df_pro_bot_forecasts['options'] = df_pro_bot_forecasts['options'].apply(parse_options_array) # @Check: TODO: Refactor/move this (and other times parse_options_array is used) to one central area at beginning cell data normalization should happen together and be availabe at all times in notebook\n", + "display_head_and_tail(df_pro_bot_forecasts)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# Simple function to parse CDF strings for numeric questions\n", + "def parse_numeric_forecasts(df):\n", + " \"\"\"\n", + " Parse CDF strings for numeric questions in-place.\n", + "\n", + " Args:\n", + " df: DataFrame with forecast data\n", + " \"\"\"\n", + " # Get numeric questions\n", + " numeric_mask = df['type'] == 'numeric'\n", + "\n", + " # List of columns to process\n", + " forecast_cols = [col for col in df.columns if col in all_bots or col in ['pro_median', 'bot_median']]\n", + "\n", + " # Process each column\n", + " for col in forecast_cols:\n", + " # Process only for numeric questions and only where the column exists\n", + " if col in df.columns:\n", + " for idx in df[numeric_mask].index:\n", + " value = df.at[idx, col]\n", + "\n", + " # Skip NaN values\n", + " if pd.isna(value):\n", + " continue\n", + "\n", + " # Process string values\n", + " if isinstance(value, str):\n", + " try:\n", + " # Parse the CDF string to an array\n", + " parsed_array = np.array([float(x) for x in value.strip('[]').split(',')])\n", + " df.at[idx, col] = parsed_array\n", + " except Exception as e:\n", + " print(f\"Warning: Could not parse {col} at index {idx}: {e}\")\n", + "\n", + " return df\n", + "\n", + "# Now parse the numeric forecasts\n", + "df_pro_bot_forecasts = parse_numeric_forecasts(df_pro_bot_forecasts)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n", + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n" + ] + } + ], + "source": [ + "df_bot_vs_pro_peer = calculate_all_peer_scores(df_pro_bot_forecasts, all_bots)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "
pro_question_idbot_question_idresolutionquestion_weighttypeoptionspro_median4ShadowerBot_PepaCatrachoCaster...metac-o1-previewmetac-perplexityminefrac1mmBotpgodzinaipianobotswingswishtwsummerbotwunderplumb
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", " \n", " \n", - " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", " \n", - " \n", + " \n", " \n", " \n", "
pro_question_idbot_question_idresolutionquestion_weighttypeoptionsrange_minrange_maxopen_upper_boundopen_lower_bound...metac-o1-previewmetac-perplexityminefrac1mmBotpgodzinaipianobotswingswishtwsummerbotwunderplumbbot_team_median
23127031264no0312683126201.0binaryNone0.013NaNmultiple_choice[0, 1, 2-3, 4-6, >6]NaNNaNFalseFalse...-14.943369-9.2275282.3025855.703782NaN-21.005831-5.9485452.2926352.703087NaNNaNNaNNaN-14.9433694.605170
53128231276yes331280312745-91.0binaryNone0.45multiple_choice[0-4, 5-9, >9]NaNNaN67.445505NoneNone...-25.13144344.183275NaN51.08256232.0471900.3901980.204794NaN0.1278330.152526NaNNaN-0.046520NaN32.0471900.310155
83129431288yes63129231286Jeff Bezos1.0binaryNone0.95multiple_choice[Larry Ellison, Elon Musk, Mark Zuckerberg, Bernard Arnault & family, Jeff Bezos, Someone else]NaNNaN-19.645607FalseFalse...0.0000000.0000000.2988550.211844NaN-0.1845710.112526NaN-11.122564-14.715764NaNNaN-39.812370NaN-17.1850260.112526
123133831334yes9313213137001.0binaryNone0.9multiple_choice[0, 1, 2, Greater than 2]NaNNaN-0.309119NoneNone...-18.2321560.000000-0.518794-1.211941NaN5.406722-5.715841-0.806476-0.494101NaNNaN-49.977579-0.624154NaN-5.715841-0.681313
163387633751no133136831366≥0% and <5%1.0binaryNone0.058NaNmultiple_choice[Less than -5%, ≥-5% and <0%, ≥0% and <5%, Greater than 5%]NaNNaNNoneNone...-4.5610510.845671NaN-6.8083370.3309430.5108260.0219790.2006710.253781NaNNaNNaN-7.606972NaN-7.6069720.158111
\n", - "

5 rows × 54 columns

\n", + "

5 rows × 58 columns

\n", "
" ], "text/plain": [ - " pro_question_id bot_question_id resolution question_weight type \\\n", - "2 31270 31264 no 1.0 binary \n", - "5 31282 31276 yes 1.0 binary \n", - "8 31294 31288 yes 1.0 binary \n", - "12 31338 31334 yes 1.0 binary \n", - "16 33876 33751 no 1.0 binary \n", + " pro_question_id bot_question_id resolution question_weight \\\n", + "0 31268 31262 0 1.0 \n", + "3 31280 31274 5-9 1.0 \n", + "6 31292 31286 Jeff Bezos 1.0 \n", + "9 31321 31370 0 1.0 \n", + "13 31368 31366 ≥0% and <5% 1.0 \n", + "\n", + " type \\\n", + "0 multiple_choice \n", + "3 multiple_choice \n", + "6 multiple_choice \n", + "9 multiple_choice \n", + "13 multiple_choice \n", + "\n", + " options \\\n", + "0 [0, 1, 2-3, 4-6, >6] \n", + "3 [0-4, 5-9, >9] \n", + "6 [Larry Ellison, Elon Musk, Mark Zuckerberg, Bernard Arnault & family, Jeff Bezos, Someone else] \n", + "9 [0, 1, 2, Greater than 2] \n", + "13 [Less than -5%, ≥-5% and <0%, ≥0% and <5%, Greater than 5%] \n", "\n", - " options pro_median 4Shadower Bot_Pepa CatrachoCaster ... \\\n", - "2 None 0.013 NaN NaN NaN ... \n", - "5 None 0.45 NaN NaN 67.445505 ... \n", - "8 None 0.95 NaN NaN -19.645607 ... \n", - "12 None 0.9 NaN NaN -0.309119 ... \n", - "16 None 0.058 NaN NaN NaN ... \n", + " range_min range_max open_upper_bound open_lower_bound ... \\\n", + "0 NaN NaN False False ... \n", + "3 NaN NaN None None ... \n", + "6 NaN NaN False False ... \n", + "9 NaN NaN None None ... \n", + "13 NaN NaN None None ... \n", "\n", - " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", - "2 -14.943369 -9.227528 NaN -21.005831 -5.948545 \n", - "5 -25.131443 44.183275 NaN 51.082562 32.047190 \n", - "8 0.000000 0.000000 NaN -11.122564 -14.715764 \n", - "12 -18.232156 0.000000 NaN 5.406722 -5.715841 \n", - "16 -4.561051 0.845671 NaN -6.808337 NaN \n", + " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "0 2.302585 5.703782 NaN 2.292635 2.703087 \n", + "3 0.390198 0.204794 NaN 0.127833 0.152526 \n", + "6 0.298855 0.211844 NaN -0.184571 0.112526 \n", + "9 -0.518794 -1.211941 NaN -0.806476 -0.494101 \n", + "13 0.330943 0.510826 0.021979 0.200671 0.253781 \n", "\n", " pianobot swingswish twsummerbot wunderplumb bot_team_median \n", - "2 NaN NaN NaN NaN -14.943369 \n", - "5 NaN NaN NaN NaN 32.047190 \n", - "8 NaN NaN -39.812370 NaN -17.185026 \n", - "12 NaN NaN -49.977579 NaN -5.715841 \n", - "16 NaN NaN -7.606972 NaN -7.606972 \n", + "0 NaN NaN NaN NaN 4.605170 \n", + "3 NaN NaN -0.046520 NaN 0.310155 \n", + "6 NaN NaN NaN NaN 0.112526 \n", + "9 NaN NaN -0.624154 NaN -0.681313 \n", + "13 NaN NaN NaN NaN 0.158111 \n", "\n", - "[5 rows x 54 columns]" + "[5 rows x 58 columns]" ] }, "metadata": {}, @@ -3614,10 +3966,10 @@ " question_weight\n", " type\n", " options\n", - " pro_median\n", - " 4Shadower\n", - " Bot_Pepa\n", - " CatrachoCaster\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", " ...\n", " metac-o1-preview\n", " metac-perplexity\n", @@ -3633,177 +3985,179 @@ " \n", " \n", " \n", - " 94\n", - " 35380\n", - " 35345\n", - " yes\n", - " 1.00\n", - " binary\n", - " None\n", - " 0.95\n", - " -5.406722\n", + " 81\n", + " 35169\n", + " 35119\n", + " Not in top 50\n", + " 1.0\n", + " multiple_choice\n", + " [0-10, 11-20, 21-30, 31-40, 41-50, Not in top 50]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " -5.406722\n", - " NaN\n", + " -2.879198\n", + " -2.186051\n", + " -3.007032\n", + " -2.879198\n", + " -3.795489\n", " NaN\n", - " 0.000000\n", - " 0.000000\n", " NaN\n", - " -5.406722\n", - " -22.051543\n", - " -5.406722\n", - " -5.406722\n", + " -2.348570\n", + " -2.409195\n", + " -2.186051\n", " \n", " \n", - " 95\n", - " 35381\n", - " 35354\n", - " no\n", - " 1.00\n", - " binary\n", - " None\n", - " 0.05\n", - " -294.443898\n", - " NaN\n", - " NaN\n", - " ...\n", - " -225.129180\n", + " 82\n", + " 35170\n", + " 35121\n", + " 3 or more\n", + " 1.0\n", + " multiple_choice\n", + " [0, 1, 2, 3 or more]\n", " NaN\n", " NaN\n", - " -11.122564\n", + " None\n", + " None\n", + " ...\n", + " -0.076961\n", + " -0.300105\n", + " -0.523248\n", + " 0.105361\n", + " 0.259511\n", " NaN\n", " NaN\n", - " -5.406722\n", - " -8.338161\n", - " -294.443898\n", - " -11.122564\n", + " 0.276509\n", + " -0.644609\n", + " -0.587787\n", " \n", " \n", - " 96\n", - " 35385\n", - " 35358\n", - " yes\n", - " 1.00\n", - " binary\n", - " None\n", - " 0.97\n", - " -13.205972\n", + " 83\n", + " 35171\n", + " 35123\n", + " ≥7.5 and ≤8.5\n", + " 1.0\n", + " multiple_choice\n", + " [<7.5, ≥7.5 and ≤8.5, >8.5 and <9.0, ≥9.0 and ≤9.5, >9.5]\n", " NaN\n", " NaN\n", + " None\n", + " None\n", " ...\n", - " -7.490131\n", + " -0.899761\n", + " -0.405465\n", " NaN\n", + " -0.182322\n", " NaN\n", - " -7.490131\n", " NaN\n", " NaN\n", - " -13.205972\n", - " -15.828292\n", - " -13.205972\n", - " -13.205972\n", + " -0.178330\n", + " -0.567984\n", + " -0.693147\n", " \n", " \n", - " 97\n", - " 35386\n", - " 35364\n", - " no\n", - " 0.85\n", - " binary\n", - " None\n", - " 0.666\n", - " -51.282363\n", + " 91\n", + " 35377\n", + " 35334\n", + " Jimmy Patronis\n", + " 1.0\n", + " multiple_choice\n", + " [Jimmy Patronis, Gay Valimont, Someone else]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " -80.050570\n", - " 73.993934\n", + " -0.054625\n", + " -0.102356\n", + " NaN\n", + " -0.124829\n", + " -0.080377\n", " NaN\n", - " -80.050570\n", - " -80.050570\n", + " -0.113529\n", " NaN\n", - " -10.735852\n", - " 95.505072\n", - " 73.993934\n", - " -10.735852\n", + " -0.147818\n", + " -0.124829\n", " \n", " \n", - " 98\n", - " 35387\n", - " 35367\n", - " no\n", - " 0.85\n", - " binary\n", - " None\n", - " 0.03\n", - " -32.621574\n", + " 92\n", + " 35378\n", + " 35336\n", + " 31-49\n", + " 1.0\n", + " multiple_choice\n", + " [0-24, 25-30, 31-49, 50-70, >70]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " ...\n", - " -7.490131\n", - " -2.083409\n", + " -1.704748\n", + " -4.007333\n", + " NaN\n", + " -1.704748\n", + " -0.318454\n", " NaN\n", - " -13.205972\n", - " -2.083409\n", + " -0.480973\n", " NaN\n", - " -19.268434\n", - " -28.425154\n", - " -19.268434\n", - " -13.205972\n", + " -0.749237\n", + " -0.318454\n", " \n", " \n", "\n", - "

5 rows × 54 columns

\n", + "

5 rows × 58 columns

\n", "" ], "text/plain": [ - " pro_question_id bot_question_id resolution question_weight type \\\n", - "94 35380 35345 yes 1.00 binary \n", - "95 35381 35354 no 1.00 binary \n", - "96 35385 35358 yes 1.00 binary \n", - "97 35386 35364 no 0.85 binary \n", - "98 35387 35367 no 0.85 binary \n", - "\n", - " options pro_median 4Shadower Bot_Pepa CatrachoCaster ... \\\n", - "94 None 0.95 -5.406722 NaN NaN ... \n", - "95 None 0.05 -294.443898 NaN NaN ... \n", - "96 None 0.97 -13.205972 NaN NaN ... \n", - "97 None 0.666 -51.282363 NaN NaN ... \n", - "98 None 0.03 -32.621574 NaN NaN ... \n", - "\n", - " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", - "94 -5.406722 NaN NaN 0.000000 0.000000 \n", - "95 -225.129180 NaN NaN -11.122564 NaN \n", - "96 -7.490131 NaN NaN -7.490131 NaN \n", - "97 -80.050570 73.993934 NaN -80.050570 -80.050570 \n", - "98 -7.490131 -2.083409 NaN -13.205972 -2.083409 \n", + " pro_question_id bot_question_id resolution question_weight \\\n", + "81 35169 35119 Not in top 50 1.0 \n", + "82 35170 35121 3 or more 1.0 \n", + "83 35171 35123 ≥7.5 and ≤8.5 1.0 \n", + "91 35377 35334 Jimmy Patronis 1.0 \n", + "92 35378 35336 31-49 1.0 \n", "\n", - " pianobot swingswish twsummerbot wunderplumb bot_team_median \n", - "94 NaN -5.406722 -22.051543 -5.406722 -5.406722 \n", - "95 NaN -5.406722 -8.338161 -294.443898 -11.122564 \n", - "96 NaN -13.205972 -15.828292 -13.205972 -13.205972 \n", - "97 NaN -10.735852 95.505072 73.993934 -10.735852 \n", - "98 NaN -19.268434 -28.425154 -19.268434 -13.205972 \n", + " type \\\n", + "81 multiple_choice \n", + "82 multiple_choice \n", + "83 multiple_choice \n", + "91 multiple_choice \n", + "92 multiple_choice \n", "\n", - "[5 rows x 54 columns]" + " options range_min \\\n", + "81 [0-10, 11-20, 21-30, 31-40, 41-50, Not in top 50] NaN \n", + "82 [0, 1, 2, 3 or more] NaN \n", + "83 [<7.5, ≥7.5 and ≤8.5, >8.5 and <9.0, ≥9.0 and ≤9.5, >9.5] NaN \n", + "91 [Jimmy Patronis, Gay Valimont, Someone else] NaN \n", + "92 [0-24, 25-30, 31-49, 50-70, >70] NaN \n", + "\n", + " range_max open_upper_bound open_lower_bound ... metac-o1-preview \\\n", + "81 NaN False False ... -2.879198 \n", + "82 NaN None None ... -0.076961 \n", + "83 NaN None None ... -0.899761 \n", + "91 NaN False False ... -0.054625 \n", + "92 NaN False False ... -1.704748 \n", + "\n", + " metac-perplexity minefrac1 mmBot pgodzinai pianobot swingswish \\\n", + "81 -2.186051 -3.007032 -2.879198 -3.795489 NaN NaN \n", + "82 -0.300105 -0.523248 0.105361 0.259511 NaN NaN \n", + "83 -0.405465 NaN -0.182322 NaN NaN NaN \n", + "91 -0.102356 NaN -0.124829 -0.080377 NaN -0.113529 \n", + "92 -4.007333 NaN -1.704748 -0.318454 NaN -0.480973 \n", + "\n", + " twsummerbot wunderplumb bot_team_median \n", + "81 -2.348570 -2.409195 -2.186051 \n", + "82 0.276509 -0.644609 -0.587787 \n", + "83 -0.178330 -0.567984 -0.693147 \n", + "91 NaN -0.147818 -0.124829 \n", + "92 NaN -0.749237 -0.318454 \n", + "\n", + "[5 rows x 58 columns]" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "# Show me a few rows from each type of question in df_bot_vs_pro_peer\n", - "display_head_and_tail(df_bot_vs_pro_peer[df_bot_vs_pro_peer['type'] == 'multiple_choice'])\n", - "display_head_and_tail(df_bot_vs_pro_peer[df_bot_vs_pro_peer['type'] == 'binary'])" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ + }, { "data": { "text/html": [ @@ -3825,99 +4179,521 @@ " \n", " \n", " \n", - " bot\n", - " Peer Score\n", - " \n", - " \n", - " Rank\n", - " \n", - " \n", + " pro_question_id\n", + " bot_question_id\n", + " resolution\n", + " question_weight\n", + " type\n", + " options\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", + " ...\n", + " metac-o1-preview\n", + " metac-perplexity\n", + " minefrac1\n", + " mmBot\n", + " pgodzinai\n", + " pianobot\n", + " swingswish\n", + " twsummerbot\n", + " wunderplumb\n", + " bot_team_median\n", " \n", " \n", " \n", " \n", - " 1\n", - " metac-o1\n", - " 3864.168122\n", - " \n", - " \n", " 2\n", - " metac-o1-preview\n", - " 3162.155445\n", - " \n", - " \n", - " 3\n", - " bot_median\n", - " 2724.680171\n", - " \n", - " \n", - " 4\n", - " manticAI\n", - " 2142.538438\n", + " 31270\n", + " 31264\n", + " no\n", + " 1.0\n", + " binary\n", + " None\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " ...\n", + " -0.038208\n", + " -0.092275\n", + " NaN\n", + " -0.210058\n", + " -0.059485\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " -0.149434\n", " \n", " \n", " 5\n", - " metac-Gemini-Exp-1206\n", - " 2072.216227\n", - " \n", - " \n", - " 6\n", - " acm_bot\n", - " 1876.466009\n", - " \n", - " \n", - " 7\n", - " twsummerbot\n", - " 1763.532046\n", + " 31282\n", + " 31276\n", + " yes\n", + " 1.0\n", + " binary\n", + " None\n", + " NaN\n", + " NaN\n", + " None\n", + " None\n", + " ...\n", + " -0.251314\n", + " 0.441833\n", + " NaN\n", + " 0.510826\n", + " 0.320472\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 0.367725\n", " \n", " \n", " 8\n", - " metac-perplexity\n", - " 1697.555196\n", - " \n", - " \n", - " 9\n", - " GreeneiBot2\n", - " 1603.998618\n", - " \n", - " \n", - " 10\n", - " cookics_bot_TEST\n", - " 1140.390796\n", - " \n", - " \n", - " 11\n", - " metac-claude-3-5-sonnet-latest\n", - " 1134.209821\n", + " 31294\n", + " 31288\n", + " yes\n", + " 1.0\n", + " binary\n", + " None\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " ...\n", + " -0.054067\n", + " 0.000000\n", + " NaN\n", + " -0.111226\n", + " -0.147158\n", + " NaN\n", + " NaN\n", + " -0.398124\n", + " NaN\n", + " -0.147158\n", " \n", " \n", " 12\n", - " SynapseSeer\n", - " 1066.533051\n", - " \n", - " \n", - " 13\n", - " CumulativeBot\n", - " 1030.716475\n", - " \n", - " \n", - " 14\n", - " pgodzinai\n", - " 926.081448\n", - " \n", - " \n", - " 15\n", - " jkraybill_bot\n", - " 627.932509\n", + " 31338\n", + " 31334\n", + " yes\n", + " 1.0\n", + " binary\n", + " None\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " ...\n", + " -0.057158\n", + " 0.000000\n", + " NaN\n", + " 0.054067\n", + " -0.057158\n", + " NaN\n", + " NaN\n", + " -0.499776\n", + " NaN\n", + " -0.057158\n", " \n", " \n", " 16\n", - " metac-deepseek-r1\n", - " 614.572462\n", - " \n", - " \n", - " 17\n", - " question_weight\n", + " 33876\n", + " 33751\n", + " no\n", + " 1.0\n", + " binary\n", + " None\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " ...\n", + " 0.008457\n", + " 0.008457\n", + " NaN\n", + " -0.068083\n", + " NaN\n", + " NaN\n", + " NaN\n", + " -0.076070\n", + " NaN\n", + " -0.096728\n", + " \n", + " \n", + "\n", + "

5 rows × 58 columns

\n", + "" + ], + "text/plain": [ + " pro_question_id bot_question_id resolution question_weight type \\\n", + "2 31270 31264 no 1.0 binary \n", + "5 31282 31276 yes 1.0 binary \n", + "8 31294 31288 yes 1.0 binary \n", + "12 31338 31334 yes 1.0 binary \n", + "16 33876 33751 no 1.0 binary \n", + "\n", + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "2 None NaN NaN False False ... \n", + "5 None NaN NaN None None ... \n", + "8 None NaN NaN False False ... \n", + "12 None NaN NaN False False ... \n", + "16 None NaN NaN False False ... \n", + "\n", + " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "2 -0.038208 -0.092275 NaN -0.210058 -0.059485 \n", + "5 -0.251314 0.441833 NaN 0.510826 0.320472 \n", + "8 -0.054067 0.000000 NaN -0.111226 -0.147158 \n", + "12 -0.057158 0.000000 NaN 0.054067 -0.057158 \n", + "16 0.008457 0.008457 NaN -0.068083 NaN \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb bot_team_median \n", + "2 NaN NaN NaN NaN -0.149434 \n", + "5 NaN NaN NaN NaN 0.367725 \n", + "8 NaN NaN -0.398124 NaN -0.147158 \n", + "12 NaN NaN -0.499776 NaN -0.057158 \n", + "16 NaN NaN -0.076070 NaN -0.096728 \n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pro_question_idbot_question_idresolutionquestion_weighttypeoptionsrange_minrange_maxopen_upper_boundopen_lower_bound...metac-o1-previewmetac-perplexityminefrac1mmBotpgodzinaipianobotswingswishtwsummerbotwunderplumbbot_team_median
943538035345yes1.00binaryNoneNaNNaNFalseFalse...-0.054067NaNNaN0.0000000.000000NaN-0.054067-0.220515-0.054067-0.054067
953538135354no1.00binaryNoneNaNNaNFalseFalse...-2.251292NaNNaN-0.111226NaNNaN-0.054067-0.083382-2.944439-0.111226
963538535358yes1.00binaryNoneNaNNaNFalseFalse...-0.020834NaNNaN-0.074901NaNNaN-0.132060-0.158283-0.132060-0.158283
973538635364no0.85binaryNoneNaNNaNFalseFalse...-0.6804300.628948NaN-0.680430-0.680430NaN-0.0912550.8117930.628948-0.091255
983538735367no0.85binaryNoneNaNNaNFalseFalse...-0.0177090.000000NaN-0.112251-0.017709NaN-0.163782-0.241614-0.163782-0.112251
\n", + "

5 rows × 58 columns

\n", + "
" + ], + "text/plain": [ + " pro_question_id bot_question_id resolution question_weight type \\\n", + "94 35380 35345 yes 1.00 binary \n", + "95 35381 35354 no 1.00 binary \n", + "96 35385 35358 yes 1.00 binary \n", + "97 35386 35364 no 0.85 binary \n", + "98 35387 35367 no 0.85 binary \n", + "\n", + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "94 None NaN NaN False False ... \n", + "95 None NaN NaN False False ... \n", + "96 None NaN NaN False False ... \n", + "97 None NaN NaN False False ... \n", + "98 None NaN NaN False False ... \n", + "\n", + " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "94 -0.054067 NaN NaN 0.000000 0.000000 \n", + "95 -2.251292 NaN NaN -0.111226 NaN \n", + "96 -0.020834 NaN NaN -0.074901 NaN \n", + "97 -0.680430 0.628948 NaN -0.680430 -0.680430 \n", + "98 -0.017709 0.000000 NaN -0.112251 -0.017709 \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb bot_team_median \n", + "94 NaN -0.054067 -0.220515 -0.054067 -0.054067 \n", + "95 NaN -0.054067 -0.083382 -2.944439 -0.111226 \n", + "96 NaN -0.132060 -0.158283 -0.132060 -0.158283 \n", + "97 NaN -0.091255 0.811793 0.628948 -0.091255 \n", + "98 NaN -0.163782 -0.241614 -0.163782 -0.112251 \n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Show me a few rows from each type of question in df_bot_vs_pro_peer\n", + "display_head_and_tail(df_bot_vs_pro_peer[df_bot_vs_pro_peer['type'] == 'multiple_choice'])\n", + "display_head_and_tail(df_bot_vs_pro_peer[df_bot_vs_pro_peer['type'] == 'binary'])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -4078,8 +4854,8 @@ " bot Peer Score\n", "Rank \n", "1 metac-o1 3864.168122\n", - "2 metac-o1-preview 3162.155445\n", - "3 bot_median 2724.680171\n", + "2 bot_median 3472.028144\n", + "3 metac-o1-preview 3162.155445\n", "4 manticAI 2142.538438\n", "5 metac-Gemini-Exp-1206 2072.216227\n", "6 acm_bot 1876.466009\n", @@ -4092,7 +4868,7 @@ "13 CumulativeBot 1030.716475\n", "14 pgodzinai 926.081448\n", "15 jkraybill_bot 627.932509\n", - "16 metac-deepseek-r1 614.572462\n", + "16 metac-deepseek-r1+asknews 614.572462\n", "17 question_weight 378.020000\n", "18 metac-exa 265.384263\n", "19 MWG 215.551323\n", @@ -4126,7 +4902,7 @@ "47 ajf-bot -3239.712801" ] }, - "execution_count": 37, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -4137,7 +4913,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -4146,13 +4922,13 @@ "text": [ "mean pro median forecast on questions that resolved yes: 74.0%\n", "mean pro median forecast on questions that resolved no: 22.0%\n", - "mean metac-o1 forecast on questions that resolved yes: 73.0%\n", - "mean metac-o1 forecast on questions that resolved no: 26.0%\n" + "mean metac-o1 forecast on questions that resolved yes: 75.0%\n", + "mean metac-o1 forecast on questions that resolved no: 28.999999999999996%\n" ] }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIQCAYAAACLwV/UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACDBklEQVR4nO3deVxU9f7H8fcMCAjKoCIqQmriAl5TXCqztNIy7Vpe27PE9ltqi63eUlMrb6vd6pbV71Z62277Xpa2q3mNyFuJC2buiojOKAgKc35/fGVkBHQGGGaA1/Px8DHOOWfOfObI4OdzvpvNsixLAAAAAACf2YMdAAAAAADUNxRSAAAAAOAnCikAAAAA8BOFFAAAAAD4iUIKAAAAAPxEIQUAAAAAfqKQAgAAAAA/UUgBAAAAgJ8opAAAAADATxRSANAI/fHHH7LZbHrppZc82+69917ZbLagxgUAQH1BIQWgUXvppZdks9k8f6KiotS1a1dNmDBB27dvD/j7jxs3TjabTbGxsdq3b1+F/WvWrPHE9sgjjwQ8nobm1FNP9fr3rerPvffeG/BYOnbsWOFnrUuXLrr99tuVn59frXMuXrxY9957r3bv3u3T8WU/b5X9+eyzz6oVQ33ywAMP6L333gt2GAAaiPBgBwAAoWDGjBnq1KmTioqK9P333+uZZ57RJ598ol9//VXR0dEBfe/w8HAVFhbqww8/1IUXXui175VXXlFUVJSKiooCGoMk3XPPPbrrrrsC/j516e6779bVV1/teb5s2TI98cQT+tvf/qbU1FTP9uOOO65O4undu7duvfVWSVJRUZEyMzP1+OOP65tvvtF///tfv8+3ePFiTZ8+XePGjVNcXJxPr4mMjNT//d//Vdjeq1cvv9+/vnnggQd0/vnna9SoUcEOBUADQCEFAJKGDx+ufv36SZKuvvpqtWrVSo899pjef/99XXLJJZW+pqCgQDExMTV+78jISA0cOFCvvfZahULq1Vdf1dlnn6233367xu9zNOHh4QoPb1j/LZxxxhlez6OiovTEE0/ojDPO0Kmnnlrn8bRv316XXXaZ5/nVV1+tZs2a6ZFHHtGaNWvUpUuXgMcQHh7uFUNtKiwsDPiNBwAIFXTtA4BKnH766ZKkdevWSQe7RDVr1kxr167ViBEj1Lx5c40ZM0Y6WFDdeuutSk5OVmRkpLp166ZHHnlElmX5/H6XXnqpPv30U68uWsuWLdOaNWt06aWXVvqa3bt36+abb/a8b0pKih588EG53e4Kx40bN04Oh0NxcXHKyMiotCtYZWOkXnzxRZ1++ulKSEhQZGSk0tLS9Mwzz1R4bceOHfXnP/9Z33//vY4//nhFRUXp2GOP1bx58474uQ8cOKCWLVvqiiuuqLDP5XIpKipKt912m2fbk08+qR49eig6OlotWrRQv3799Oqrrx7xPXzx9NNPq0ePHoqMjFRiYqLGjx9f4Rqdeuqp+tOf/qTMzEyddNJJatq0qTp16qQ5c+bU6L3btm0rHSxwyvvyyy91yimnKCYmRnFxcTr33HOVnZ3t2X/vvffq9ttvlyR16tTJ00Xvjz/+qFE8qsb1GDRokKKjo/W3v/1NklRcXKxp06YpJSVFkZGRSk5O1h133KHi4uIK7/Xyyy/r+OOP9/ybDho0SJ9//rln//vvv6+zzz5biYmJioyMVOfOnTVz5kyVlpZ6nWfNmjU677zz1LZtW0VFRSkpKUkXX3yxnE6nJMlms6mgoEBz5871XKtx48ZJkvbs2aObb75ZHTt2VGRkpBISEnTGGWfop59+qvG1BNBwNaxbjwBQS9auXStJatWqlWdbSUmJhg0bppNPPlmPPPKIoqOjZVmWzjnnHH311Ve66qqr1Lt3b82fP1+33367Nm/erNmzZ/v0fqNHj9Zf//pXvfPOO7ryyiulg61R3bt3V58+fSocX1hYqMGDB2vz5s267rrrdMwxx2jx4sWaPHmytm7dqscff1ySZFmWzj33XH3//ff661//qtTUVL377rvKyMjwKa5nnnlGPXr00DnnnKPw8HB9+OGHuuGGG+R2uzV+/HivY3NycnT++efrqquuUkZGhl544QWNGzdOffv2VY8ePSo9f5MmTfSXv/xF77zzjp599llFRER49r333nsqLi7WxRdfLEl6/vnndeONN+r888/XTTfdpKKiIv3vf//T0qVLqyw2fXHvvfdq+vTpGjp0qK6//nqtWrVKzzzzjJYtW6ZFixapSZMmnmN37dqlESNG6MILL9Qll1yiN954Q9dff70iIiI8/25HcuDAAeXl5UkHu/ZlZWXpscce06BBg9SpUyfPcQsWLNDw4cN17LHH6t5779W+ffv05JNPauDAgfrpp5/UsWNHjR49WqtXr9Zrr72m2bNnKz4+XpLUunXro8ZRFkOZJk2ayOFw+H09du7cqeHDh+viiy/WZZddpjZt2sjtduucc87R999/r2uvvVapqan65ZdfNHv2bK1evdprjNL06dN177336qSTTtKMGTMUERGhpUuX6ssvv9SZZ54pHRzH2KxZM02aNEnNmjXTl19+qalTp8rlcunhhx+WJO3fv1/Dhg1TcXGxJk6cqLZt22rz5s366KOPtHv3bjkcDv373//W1VdfreOPP17XXnutJKlz586SpL/+9a966623NGHCBKWlpWnnzp36/vvvlZ2dXen3DwAk858sADRaL774oiXJWrBggbVjxw5r48aN1uuvv261atXKatq0qbVp0ybLsiwrIyPDkmTdddddXq9/7733LEnWfffd57X9/PPPt2w2m5WTk3PE98/IyLBiYmI8rxkyZIhlWZZVWlpqtW3b1po+fbq1bt06S5L18MMPe143c+ZMKyYmxlq9erXX+e666y4rLCzM2rBhg1d8Dz30kOeYkpIS65RTTrEkWS+++KJn+7Rp06zD/1soLCysEPOwYcOsY4891mtbhw4dLEnWt99+69mWm5trRUZGWrfeeusRr8H8+fMtSdaHH37otX3EiBFe73PuuedaPXr0OOK5jubNN9+0JFlfffWVJ8aIiAjrzDPPtEpLSz3HPfXUU5Yk64UXXvBsGzx4sCXJevTRRz3biouLrd69e1sJCQnW/v37j/jeZdfo8D8DBw608vLyvI4tO+fOnTs925YvX27Z7XZr7Nixnm0PP/ywJclat26dT5+/7Of48D+DBw+u9vWYM2eO13v8+9//tux2u/Xdd995bZ8zZ44lyVq0aJFlWZa1Zs0ay263W3/5y1+83suyLMvtdnv+XtnP4HXXXWdFR0dbRUVFlmVZVlZWliXJevPNN4/4+WNiYqyMjIwK2x0OhzV+/PgjvhYADkfXPgCQNHToULVu3VrJycm6+OKL1axZM7377rtq376913HXX3+91/NPPvlEYWFhuvHGG72233rrrbIsS59++qnPMVx66aX6+uuvtW3bNn355Zfatm1blS0tb775pk455RS1aNFCeXl5nj9Dhw5VaWmpvv32W0984eHhXnGHhYVp4sSJPsXUtGlTz9+dTqfy8vI0ePBg/f77754uU2XS0tJ0yimneJ63bt1a3bp10++//37E9zj99NMVHx+v//znP55tu3bt0hdffKGLLrrIsy0uLk6bNm3SsmXLfIrdFwsWLND+/ft18803y24/9F/iNddco9jYWH388cdex4eHh+u6667zPI+IiNB1112n3NxcZWZmHvX9TjjhBH3xxRf64osv9NFHH+n+++/Xb7/9pnPOOccza+PWrVv1888/a9y4cWrZsqXntccdd5zOOOMMffLJJzX6zFFRUZ4Yyv48+uij1boekZGRFbplvvnmm0pNTVX37t29fjbLust+9dVX0sEWR7fbralTp3q9lw52wytT/mdwz549ysvL0ymnnKLCwkKtXLlSkjytafPnz1dhYaHf1yQuLk5Lly7Vli1b/H4tgMaLrn0AIOmf//ynunbtqvDwcLVp00bdunWrkNyFh4crKSnJa9v69euVmJio5s2be20vmxFu/fr1PsdQNvbqP//5j37++Wf1799fKSkplY55WbNmjf73v/9V2Y0rNzfX8/7t2rVTs2bNvPZ369bNp5gWLVqkadOmacmSJRUSVKfT6UlgJemYY46p8PoWLVpo165dR3yP8PBwnXfeeXr11VdVXFysyMhIvfPOOzpw4IBXIXXnnXdqwYIFOv7445WSkqIzzzxTl156qQYOHOjTZ6lM2b/P4dcjIiJCxx57bIV/v8TExAoTjHTt2lU6uDbXiSeeeMT3i4+P19ChQz3Pzz77bHXr1k3nn3++/u///k8TJ06sMiYd/LmaP39+jSY6CQsL84qhPH+vR/v27b26Y+rgz2Z2dvZRfzbXrl0ru92utLS0I8b722+/6Z577tGXX34pl8vlta+smO/UqZMmTZqkxx57TK+88opOOeUUnXPOObrsssu8fkar8tBDDykjI0PJycnq27evRowYobFjx+rYY4896msBNF4UUgAg6fjjj/fM2leVyMjICsVVbYqMjNTo0aM1d+5c/f7770dc28jtduuMM87QHXfcUen+suS+JtauXashQ4aoe/fueuyxx5ScnKyIiAh98sknmj17doVJLcLCwio9jy+Tblx88cV69tln9emnn2rUqFF644031L17d68puVNTU7Vq1Sp99NFH+uyzz/T222/r6aef1tSpUzV9+vQaf95gGTJkiCTp22+/9bmlMFSUby0q43a71bNnTz322GOVviY5Odnn8+/evVuDBw9WbGysZsyYoc6dOysqKko//fST7rzzTq+fwUcffVTjxo3T+++/r88//1w33nijZs2apR9++KHCDZDDXXjhhTrllFP07rvv6vPPP9fDDz+sBx98UO+8846GDx/uc7wAGhcKKQCogQ4dOmjBggXas2ePV6tUWZejDh06+HW+Sy+9VC+88ILsdrtnkoXKdO7cWXv37q2yZaF8fAsXLtTevXu9WqVWrVp11Fg+/PBDFRcX64MPPvBqbSrrmlWbBg0apHbt2uk///mPTj75ZH355Ze6++67KxwXExOjiy66SBdddJH279+v0aNH6/7779fkyZMVFRXl9/uW/fusWrXKq/Vh//79WrduXYXru2XLlgqtQatXr5YOzlxYHSUlJZKkvXv3VojpcCtXrlR8fLzn/Q+fZbGm/L0elencubOWL1+uIUOGHDG+zp07y+12a8WKFerdu3elx3z99dfauXOn3nnnHQ0aNMizvWw2zcP17NlTPXv21D333KPFixdr4MCBmjNnju677z7pKNerXbt2uuGGG3TDDTcoNzdXffr00f33308hBaBKjJECgBoYMWKESktL9dRTT3ltnz17tmw2m99J2GmnnaaZM2fqqaee8kyLXZkLL7xQS5Ys0fz58yvs2717tyc5HzFihEpKSrymLC8tLdWTTz551FjKWpjKtyg5nU69+OKLfn0mX9jtdp1//vn68MMP9e9//1slJSVe3fp0cIa48iIiIpSWlibLsnTgwIFqve/QoUMVERGhJ554wutz/utf/5LT6dTZZ5/tdXxJSYmeffZZz/P9+/fr2WefVevWrdW3b99qxfDhhx9K5RbEbdeunXr37q25c+d6TTn+66+/6vPPP9eIESM828oKqsqms68Of69HZS688EJt3rxZzz//fIV9+/btU0FBgSRp1KhRstvtmjFjRoXWzbL3ruxncP/+/Xr66ae9jne5XJ6f+TI9e/aU3W73mnI9JiamwrUqLS2tMN4vISFBiYmJlU7XDgBlaJECgBoYOXKkTjvtNN199936448/1KtXL33++ed6//33dfPNN3umV/aV3W7XPffcc9Tjbr/9dn3wwQf685//7JlivKCgQL/88oveeust/fHHH4qPj9fIkSM1cOBA3XXXXfrjjz+Ulpamd955p0LiWJkzzzxTERERGjlypK677jrt3btXzz//vBISErR161a/PpcvLrroIj355JOaNm2aevbs6RlnVj6etm3bauDAgWrTpo2ys7P11FNP6eyzz64wRs1XrVu31uTJkzV9+nSdddZZOuecc7Rq1So9/fTT6t+/f4WFaxMTE/Xggw/qjz/+UNeuXT3j2Z577jmvacGrsnnzZr388svSwYJg+fLlevbZZxUfH+/Vre/hhx/W8OHDNWDAAF111VWe6c8dDodXl8+y4u3uu+/WxRdfrCZNmmjkyJHVHj/l7/WozOWXX6433nhDf/3rX/XVV19p4MCBKi0t1cqVK/XGG29o/vz56tevn1JSUnT33Xdr5syZOuWUUzR69GhFRkZq2bJlSkxM1KxZs3TSSSepRYsWysjI0I033iibzaZ///vfFbqLfvnll5owYYIuuOACde3aVSUlJfr3v/+tsLAwnXfeeV7Xa8GCBXrssceUmJioTp06qVu3bkpKStL555+vXr16qVmzZlqwYIGWLVvmmYQDACoV7GkDASCYyqY/X7Zs2RGPKz9N+eH27Nlj3XLLLVZiYqLVpEkTq0uXLtbDDz/sNYVzdc5bprLpz8ved/LkyVZKSooVERFhxcfHWyeddJL1yCOPeE3FvXPnTuvyyy+3YmNjLYfDYV1++eWe6aKPNv35Bx98YB133HFWVFSU1bFjR+vBBx+0XnjhhQpTbnfo0ME6++yzK8Q+ePBgz9TaR+N2u63k5ORKp5O3LMt69tlnrUGDBlmtWrWyIiMjrc6dO1u333675XQ6fTq/Vcn052Weeuopq3v37laTJk2sNm3aWNdff721a9euCp+lR48e1o8//mgNGDDAioqKsjp06GA99dRTPr334dOf2+12KyEhwbrkkksqnSZ/wYIF1sCBA62mTZtasbGx1siRI60VK1ZUOG7mzJlW+/btLbvdftSp0H35efP3elRm//791oMPPmj16NHDioyMtFq0aGH17dvXmj59eoV/rxdeeMFKT0/3HDd48GDriy++8OxftGiRdeKJJ1pNmza1EhMTrTvuuMMzZX7Zv+Pvv/9uXXnllVbnzp2tqKgoq2XLltZpp51mLViwwOu9Vq5caQ0aNMhq2rSpJcnKyMiwiouLrdtvv93q1auX1bx5cysmJsbq1auX9fTTTx/1OgFo3GyWL6OAAQBo5E499VTl5eXp119/DXYoAIAQwBgpAAAAAPAThRQAAAAA+IlCCgAAAAD8FNRC6ttvv9XIkSOVmJgom82m9957z2u/ZVmaOnWq2rVrp6ZNm2ro0KFas2aN1zH5+fkaM2aMYmNjFRcXp6uuusqzFgcAALXl66+/ZnwUAMAjqIVUQUGBevXqpX/+85+V7n/ooYf0xBNPaM6cOVq6dKliYmI0bNgwFRUVeY4ZM2aMfvvtN33xxRf66KOP9O233+raa6+tw08BAAAAoLEJmVn7bDab3n33XY0aNUo62BqVmJioW2+9Vbfddpt0cCHINm3a6KWXXtLFF1+s7OxspaWladmyZerXr58k6bPPPtOIESO0adMmJSYmBvUzAQAAAGiYQnZB3nXr1mnbtm0aOnSoZ5vD4dAJJ5ygJUuW6OKLL9aSJUsUFxfnKaJ0cFV2u92upUuX6i9/+Uul5y4uLvZardztdis/P1+tWrWSzWYL8CcDAAAAEKosy9KePXuUmJgou73qDnwhW0ht27ZNktSmTRuv7W3atPHs27ZtmxISErz2h4eHq2XLlp5jKjNr1ixNnz49IHEDAAAAqP82btyopKSkKveHbCEVSJMnT9akSZM8z51Op4455hitX79esbGxQY2tPlq7VrrzTqlFC6l584r79+yRdu2S/v53t2Jj8xQfH3/E6h4AAABH53a7lZdHblXbXC6XOnTooOaVJbblhGwh1bZtW0nS9u3b1a5dO8/27du3q3fv3p5jcnNzvV5XUlKi/Px8z+srExkZqcjIyArb4+LiKKSqwe2WDhyQHA4pLKzi/thYKTfXfNljY/crLi6OLzsAAEANud1u7d9PblXbyq7l0Yb8hOwV79Spk9q2bauFCxd6trlcLi1dulQDBgyQJA0YMEC7d+9WZmam55gvv/xSbrdbJ5xwQlDibowcDikqSiooqHx/YaHZ73DUdWQAAABAYAS1RWrv3r3KycnxPF+3bp1+/vlntWzZUsccc4xuvvlm3XffferSpYs6deqkKVOmKDEx0TOzX2pqqs466yxdc801mjNnjg4cOKAJEybo4osvrpcz9rndUk6O5HSaoiMlRQrGzQV/40hJkVJTpcxM81i+eLcsadMmqV8/qXNnKS+vTj4CAAAAEFBBLaR+/PFHnXbaaZ7nZeOWMjIy9NJLL+mOO+5QQUGBrr32Wu3evVsnn3yyPvvsM0VFRXle88orr2jChAkaMmSI7Ha7zjvvPD3xxBNB+Tw1kZUlzZ0rZWdLRUWmBSc1VcrIkNLTQzsOu93sX7/evC4pSYqONi1RmzZJ8fHS2LHBKQoBAACAQAiZdaSCyeVyyeFwyOl0BmWMVFaWNGOGaa1JSpJiYkw3ubIiZOrUuimmahpHZUVYWpopotLTTT/e3NxcJSQk0I8XAACghsitAsPX2iBkJ5toLNxuU3zk5Xl3i4uNNc+zs6V586RevQLbolMbcaSnm/2h0D0RAAAACCQKqSDLyTnUHe7wiUFsNrN9xQpzXNeuoR+H3R7YOAEAAIBQQCEVZE6n6QYXE2MmZnA6pf37pYgI06ITHS1t2WK211UclamrOAAAAFB3QmWys/qIQirIyqYO37RJ2rxZ2r1bKimRwsOluDipffu6mTq8/BTmlXUFZQpzAACAhiVUJjurr6g3gywlRWrVSlq2TNqxw7RExcaaxx07zPb4eHNcoONITTUF3eHTj5RNYZ6WFvg4AAAAEHhlk4xlZkotW0pdupjHzEyzPSsr2BGGPgqpEGOzHfpTl8qmMI+PN3clXC7TMuZymedMYQ4AANAwHD7JWGysFBZ2aJKxvDwzyZjbHexIQxtpcZDl5Eg7d0r9+0utW0vFxaaPanGxed6/v/lhLrduccCkp5spzvv2lfLzzXvm55vFdOtqCnYAAAAElj+TjKFqjJEKsrJJHrp0kY45xrQAlU02ERsrlZYeGgBYF5jCHAAAoGFjkrHaQSEVZIdP8nD4ZA7BmOSBKcwBAAAaLiYZqx20MwQZkzwAAACgLpF/1g4KqSBjkgcAAADUJfLP2kHXvhBQNslD2Tz+W7aY5tR+/cwPcV1O8hCoRdncbmnNGik315y7S5ejn5cF4gAAAGpXWX5VUmKKqa++klauDG7+WV9RSIWIUJjkIVCLspWdd+VKqV07aetWqXv3I5+XBeIAAABqV2X5Vffu0vXXS+3bc+PaXxRSISSYkzyULcqWl2emvIyJMQMQMzOl9eurP/15+fMmJ0uJiWZq9yOdN1CxAAAANFZV5Vc//SRt2GDyKyYb8w/1JgK2KFtl57Xbj3xeFogDAACoXeRXgUEhhYAtylad87JAHAAAQO0ivwoMCin4tChbUZH/i7JV57yBigUAAKCxIr8KDAopeC3KVpnqLspWnfMGKhYAAIDGivwqMCikELBF2apzXhaIAwAAqF3kV4FBIYWALcpW2XlLS498XhaIAwAAqF3kV4Fhs6zD69LGx+VyyeFwyOl0KjY2NtjhBE1lawukpdV8UbZD60i51a5drrZuTVBqqv2I5w1ULAAAAA2F2+1Wbm6uEhISZPehCiK/8o2vtQGFFIWUl7LVrmt7UWC3W1qz5tCXvUsX+1HPG6hYAAAAGgJ/CymRX/nE19qABXnhJVCLAtvtUpcu5gubkODbFzaYCxQDAAA0RORXtYf6EwAAAAD8RCEFAAAAAH6ikAIAAAAAP1FIAQAAAICfKKQAAAAAwE8UUgAAAADgJwopAAAAAPAThRQAAAAA+IlCCgAAAAD8RCEFAAAAAH6ikAIAAAAAP4UHOwA0Tm63lJMjOZ2SwyGlpEh2ynoAAACpqlxJh2089tianS8Ecq9QjcsXFFKocz//LM2bJ2VnS0VFUlSUlJoqZWRI6enBjg4AACC4srKkuXO9c6WhrbKUoblK2FluY1qadP75UkKC3+cLhdwrVOPyFYUU6tTatdKjj0o7dkhJSVJMjFRQIGVmSuvXS1On1o8vDgAAQCBkZUkzZkh5eYdypdabsnTC/BnKU57C+yepZZdyCVRJidS0qdSnj8/nC4XcK1Tj8kc9aThDQ+B2S19+ab4wqalSbKwUFmYeU1PN9nnzzHEAAACNjdttWmjK50rhdreGbJ6rtk3ytKZJqlZujpVlL5dAuVzSyy9XmkBVdr5QyL1CNS5/UUihzqxdK23caO462Gze+2w2s33FCtNPFgAAoLHJyTHd3MrnSq2dOWq3O1u7Y5IU08ymXbslp+vgC2w2KT6+ygSqsvOp3EuDlXuFalz+opBCnXE6pQMHTNNtZaKjTf9Yp7OuIwMAAAg+p9PkQuVzpab7nWpSUqTi8BiFh0ulJdL+/eVeFBlZZQJV2fnKC1buFapx+YtCCnXG4ZCaNDH9XytTWGgGGTocdR0ZAABA8DkcJhcqnyvti3DoQHiUIksKVFIihYVLERHlXlRcXGUCVdn5ygtW7hWqcfmLQgp1pnNnKTlZ2rRJsizvfZZltqelmWkvAQAAGpuUFDNGqHyutMORoq1xqYor2KSCvZZaxEmO2IMvsCwzoKiKBKqy86ncS4OVe4VqXP6ikEKdsdul0083XXmzs83YyJIS85idbbaPHVt/1g4AAACoTXa7mfq7fK50oNSuhe0ztO1AvLocyFb39i7ZSsslULGx0mWXVZpAVXa+UMi9QjUuf9ks6/A6sPFxuVxyOBxyOp2KjY314RWoDrfbrdzcXG3ZkqB58+xeawakpZkvTKhPcwkAABBola2vdEZ8lsZa3utIuXv0UO555ylhwADZj1B1VHa+UMi9QjUuX2sDCikKqTpTVkglJCRIstfbVawBAAACze1WxVxJ3hvdxx6r3Lw8JSQkHLGQqvJ8IZB7hWJcvtYGLMiLoLDbpa5dgx0FAABAaKo8Vzpsox8LLYVq7hWqcfkiBOpQAAAAAKhfKKQAAAAAwE8UUgAAAADgJwopAAAAAPAThRQAAAAA+IlCCgAAAAD8RCEFAAAAAH6ikAIAAAAAP1FIAQAAAICfKKQAAAAAwE8UUgAAAADgp/BgB4DGze2WcnIkp1NyOKSUFMlOeQ8AAOrSwYTEvcup9bsdyotLkaOFnbzED40xp6OQQtBkZUlz50rZ2VJRkRQVJaWmShkZUnp6sKMDAACNwsGExLU0W7kbirS7OEp/RKXqq+QMRZyQTl7ig8aa01FIISiysqQZM6S8PCkpSYqJkQoKpMxMaf16aerUhv3FAwAAIeBgQrLnjzz9vCNJztIYtYwpUK/iTHXcuF7PFE3VjPXp5CVH0Jhzugbe4IZQ5HabuxZ5eeZuRWysFBZmHlNTzfZ588xxAAAAAXEwIbF25OmXA6naVRorR4swlUbHaltcquJK8nRJyTzt3OEmL6lCY8/pKKRQ59auNU2/SUmSzea9z2Yz21esMP1sAQAAAiInR8rO1p64JO1y2hQTU26fzaZdMUlK3L1CfR055CVVOHgJG21ORyGFOud0mv6zXr+wyomONvudzrqODAAANBoHE5KisBiVlkhNDhvwUhwerSYlRWoZ5iQvqUJjz+kopFDnHA4zCLGgoPL9hYVmv8NR15EBAIBG42BCElVaoLBw6UCJ9+7IkkIdCI9SfqmDvKQKjT2no5BCnevc2fSb3bRJsizvfZZltqelmWkzAQAAAiIlRUpNVfPdm9TCYXkXA5alFgWbtCUuTZnOFPKSKhy8hI02p6OQQp2z2810mPHxpl+tyyWVlJjH7GyzfezYhr/2AAAACKKDCYmtdbx6NslWizCXnLtKFFboUtvd2doVHq/XwseqVWs7eUkVGntOZ7Osw+vHxsflcsnhcMjpdCo2NjbY4TRYbrdbubm5SkhIkN1ur3TNgbQ084VrqNNkAgCAEFPJOlK/R6Xp6+SxijwxPaTzksNzq2BpaDmdr7UB60ghaNLTpV69Gt8q2AAAIIQcTEhic3LUbJdT63c7ZMWlqHcLO3mJjxprTkchhaCy26WuXYMdBQAAaNQOJiR2SZ0O/oF/GmNO18DrRAAAAACofRRSAAAAAOAnCikAAAAA8BOFFAAAAAD4iUIKAAAAAPxEIQUAAAAAfqKQAgAAAAA/UUgBAAAAgJ8opAAAAADATxRSAAAAAOAnCikAAAAA8BOFFAAAAAD4KaQLqdLSUk2ZMkWdOnVS06ZN1blzZ82cOVOWZXmOsSxLU6dOVbt27dS0aVMNHTpUa9asCWrcqF1ut7R6tbRsmXl0u4MdEQAAwEG1mKiQ89Qv4cEO4EgefPBBPfPMM5o7d6569OihH3/8UVdccYUcDoduvPFGSdJDDz2kJ554QnPnzlWnTp00ZcoUDRs2TCtWrFBUVFSwPwJqKCtLmjtXys6WioqkqCgpNVXKyJDS04MdHQAAaNRqMVEh56l/QrqQWrx4sc4991ydffbZkqSOHTvqtdde03//+1/pYGvU448/rnvuuUfnnnuuJGnevHlq06aN3nvvPV188cVBjR81k5UlzZgh5eVJSUlSTIxUUCBlZkrr10tTp/KLBQAABEktJirkPPVTSHftO+mkk7Rw4UKtXr1akrR8+XJ9//33Gj58uCRp3bp12rZtm4YOHep5jcPh0AknnKAlS5YELW7UnNtt7srk5Zm7MbGxUliYeUxNNdvnzaPJGwAABEEtJirkPPVXSLdI3XXXXXK5XOrevbvCwsJUWlqq+++/X2PGjJEkbdu2TZLUpk0br9e1adPGs68yxcXFKi4u9jx3uVySJLfbLTc/pQHjdrtlWZZP13jNGmnlSik5WbIfVu7bbGZ7drY5rkuXwMUMAABQQS0mKjU5lT+5FXzn6/UM6ULqjTfe0CuvvKJXX31VPXr00M8//6ybb75ZiYmJysjIqPZ5Z82apenTp1fYvmPHDhUVFdUwalTF7XbL6XTKsizZD/9NcZjcXKldOykxseIvFUlq2VKKjDTHORyBixkAAKCCWkxUanIqf3Ir+G7Pnj0+HRfShdTtt9+uu+66yzPWqWfPnlq/fr1mzZqljIwMtW3bVpK0fft2tWvXzvO67du3q3fv3lWed/LkyZo0aZLnucvlUnJyslq3bq3Y2NiAfqbGzO12y2azqXXr1kf9sjud0tatUnGxado+nMsl5edLCQnmDwAAQJ2pxUSlJqfyJ7eC73ydsC6kC6nCwsIKPxRhYWGe5rZOnTqpbdu2WrhwoadwcrlcWrp0qa6//voqzxsZGanIyMgK2+12Oz+EAWaz2Xy6zl26SN27m0GWqammabuMZUkbN0r9+pnj+CcDAAB1qhYTlZqeytfcCr7z9VqGdCE1cuRI3X///TrmmGPUo0cPZWVl6bHHHtOVV14pHfzBufnmm3XfffepS5cununPExMTNWrUqGCHjxqw2810n+vXm37BSUlSdLRUWCht2iTFx0tjx1JEAQCAIKjFRIWcp/6yWeVXtw0xe/bs0ZQpU/Tuu+8qNzdXiYmJuuSSSzR16lRFRERIB6dAnzZtmp577jnt3r1bJ598sp5++ml17drV5/dxuVxyOBxyOp107Qsgt9ut3NxcJSQk+FzpV7amQlqa+YXCNKAAACCoajFRqc6pqpNb4eh8rQ1CupCqKxRSdaO6X3a3W8rJMX2IHQ4pJYW7MgAAIETUYqLi76kopALD19ogpLv2ATrY5O1HAyMAAEDdqcVEhZynfqF0BQAAAAA/UUgBAAAAgJ8opAAAAADATxRSAAAAAOAnCikAAAAA8BOFFAAAAAD4iUIKAAAAAPxEIQUAAAAAfmJBXtQdt1vavFnasEGKi6vRyt8AAABAMFFIoW5kZUnz5kkFBdLq1VJkpJSaKmVkSOnpwY4OAAAA8AvNAQi8rCxpxgwpM1Nq3lzq0kVq2dI8nzHD7AcAAADqEQopBJbbLc2dK+XlmRao6GgpLEyKjTXP8/JMS5XbHexIAQAAQoLbbTrwLFtmHn1Ok6r9QlQHXfsQWDk5Una2lJQk2Wze+2w2s33FCnNc167BihIAACAkZGWZe9DZ2VJRkRQV5eNoiGq/ENVFIYXAcjrNlzkmpvL90dHSli3mOAAAgEasbDREXp651xwTY4aXZ2ZK69dLU6dWURP9/LM0c2Y1XoiaoGsfAsvhMHdECgoq319YaPY7HHUdGQAAQMg4fDREbKyPoyHcbunf/67GC1FTFFIIrJQU8yXetEmyLO99lmW2p6WZ4wAAABopf0ZDeNm6tZovRE1RSCGw7HbTNzc+3nzJCwulkhLJ5TLP4+OlsWNZTwoAADRqvoyGKCqqZDREYWE1X4iaIntF4KWnm765fftKe/aYOyL5+VK/fvTZBQAAqMloiOhohlEECZNNoG6kp0s9e0rLl0ulpVJcnOnOR0sUAACAZzREZqZ5LN9Lr2w0RL9+lYyGaNfOvODHH/18IWqKQgp1x26X2reXEhIooAAAAMopGw2xfv2hIU/R0aZBadOmI4yGsNulyy+X/vjDzxeipriiAAAAQAgoPxoiP9+P0RC9e1fzhagJWqQAAACAEJGeLvXqZWohp9MMbfJpNES1X4jqopACAAAAQojdLnXtWpcvRHVQogIAAACAnyikAAAAAMBPFFIAAAAA4CcKKQAAAADwE4UUAAAAAPiJQgoAAAAA/EQhBQAAAAB+opACAAAAAD9RSAEAAACAnyikAAAAAMBPFFIAAAAA4KfwYAcA+M3tlnJyJKdTcjiklBTJzj0BAAAQGqpKVY6WwpDi1C8UUqhfsrKkuXOl7GypqEiKipJSU6WMDCk9PdjRAQCARq6qVOWEE6SlS6tOYUhx6h8KKdQfWVnSjBlSXp6UlCTFxEgFBVJmprR+vTR1Kr9pAABA0FSVqnzzjfTGG1KbNlL37hVTmAsvNPtJceoXGgtRP7jd5jZNXp65PRMbK4WFmcfUVLN93jxzHAAAQB2rKlVp3lw6cEDau1cqKTHPy6cwO3ZIDz1kHklx6hcKKdQPOTmmrTspSbLZvPfZbGb7ihXmOAAAgDpWVaricpkxTy1aSLt3m7+XsdmkuDhp40YzJooUp36hkEL94HSaDsMxMZXvj442+8v/dgIAAKgjVaUq+/eblqjISPO4f7/3/rAw02IVXsWAG1Kc0EUhhfrB4TCjLgsKKt9fWGj2Oxx1HRkAAECVqUpEhCmSiovNY0SE9/7SUqlJE1NkVYYUJ3RRSKF+SEkxHYU3bZIsy3ufZZntaWnmOAAAgDpWVaoSG2uKoF27TDe+8gWRZZnufsnJpsWJFKd+oZBC/WC3m/k/4+NNB2SXy9y6cbnM8/h4aexYFlsAAABBUVWqsmePaXFq1sy0SO3Z453CtG4t3XGHeSTFqV9slnV47dv4uFwuORwOOZ1OxcbGBjucBsvtdis3N1cJCQmyV/e3QWWLLKSlmd8wzAsKAACCrKpU5fjjK64jVT6FqU6KUyu5FSrwtTagkKKQqjO19mVn2W8AABDCqkpVjpbC+JviUEgFhq+1AQvyov6x26WuXYMdBQAAQKWqSlWOlsKQ4tQvlK4AAAAA4CcKKQAAAADwE4UUAAAAAPiJMVKoHiZ8AAAAQCNGIQX/VTY/Z2qqWTyBKcgBAADQCFBIwT9ZWdKMGVJenpSUJMXESAUFUmamtH69NHUqxRQAAAAaPPpiwXdut2mJysszLVCxsVJYmHlMTTXb580zxwEAAAANGIUUfJeTY7rzJSVJNpv3PpvNbF+xwhwHAAAANGAUUvCd02nGRMXEVL4/OtrsdzrrOjIAAACgTlFIwXcOh5lYoqCg8v2FhWa/w1HXkQEAAAB1ikIKvktJMWOhNm2SLMt7n2WZ7Wlp5jgAAACgAWPWPvjObjdTnK9ff2isVHS0aYnatEmKj5fGjm1460mxZhYAAAAOQyEF/6SnmynOy9aR2rLFdOfr188UUQ1t6nPWzAIAAEAlKKTgv/R0qVevht9Kw5pZAAAAqAKFFKrHbpe6dg12FIFz+JpZZdO9l62ZlZ1t1szq1avhFZAAAAA4KjJAoDKsmQUAAIAjoJACKsOaWQAAADgCCimgMqyZBQAAgCOgkAIqw5pZAAAAOAIKKaAyZWtmxcebsVIul1RSYh6zsxvumlkAAADwCVkgUJWyNbP69pXy883EEvn5Zs0spj4HAABo1Jj+HDiSxrJmFgAAAPxCIQUcTUNfMwsAAAB+47Y6AAAAAPiJQgoAAAAA/EQhBQAAAAB+opACAAAAAD9RSAEAAACAnyikAAAAAMBPFFIAAAAA4CcKKQAAAADwE4UUAAAAAPiJQgoAAAAA/EQhBQAAAAB+Cg92AECNuN1STo7kdEoOh5SSItm5PwAAAGqXu8St9QtztG+bU03bOtRhSIrs4UHOOdxuafNmacMGKS6OPKiO1aiQKi4uVmRkZO1FU4nNmzfrzjvv1KeffqrCwkKlpKToxRdfVL9+/SRJlmVp2rRpev7557V7924NHDhQzzzzjLp06RLQuBACsrKkuXOl7GypqEiKipJSU6WMDCk9PdjRAQCABmLla1na9tBcNduYrfADRSpsEqX1yalqe0eGul8SpJwjK0uaN08qKJBWr5YiI8mD6phfJeunn36qjIwMHXvssWrSpImio6MVGxurwYMH6/7779eWLVtqNbhdu3Zp4MCBatKkiT799FOtWLFCjz76qFq0aOE55qGHHtITTzyhOXPmaOnSpYqJidGwYcNUVFRUq7EgxGRlSTNmSJmZUsuWUpcu5jEz02zPygp2hAAAoAFY+VqWdt86Qy3WZqo4pqVcbbqoOKalWqzN1O5bZ2jla0HIOcrnQc2bkwcFic2yLOtoB7377ru68847tWfPHo0YMULHH3+8EhMT1bRpU+Xn5+vXX3/Vd999pyVLlmjcuHGaOXOmWrduXePg7rrrLi1atEjfffddpfsty1JiYqJuvfVW3XbbbZIkp9OpNm3a6KWXXtLFF1/s0/u4XC45HA45nU7FxsbWOG5Uzu12Kzc3VwkJCbLXpNnZ7ZYmTTK/LFJTJZvt0D7LMi1U/fpJjz5K8zYAAKg2d4lb3/afpBZrM+VMrJhzOLZka1dKPw3676N1182vXB7kTktTbkKCEnbskN2yyINqia+1gU9d+x566CHNnj1bw4cPrzQBvvDCC6WD3fCefPJJvfzyy7rllltqEr8k6YMPPtCwYcN0wQUX6JtvvlH79u11ww036JprrpEkrVu3Ttu2bdPQoUM9r3E4HDrhhBO0ZMmSKgup4uJiFRcXe567XC7pYKLvdrtrHDcq53a7ZVlWza/xmjXSypVScnLFXxA2m9menW2Oo4snAACopj8WrFHM5pXa0ypZVtjhObBNe1olK2ZTtv5YsEYdz6yjnKNcHuS222VJcpcVeORBtcLXXNWnQmrJkiU+nax9+/b6+9//7tOxvvj999/1zDPPaNKkSfrb3/6mZcuW6cYbb1RERIQyMjK0bds2SVKbNm28XtemTRvPvsrMmjVL06dPr7B9x44ddAkMILfbLafTKcuyatYilZsrtWsnJSZWfqelZUvTTzg310xAAQAAUA15O3NVktpORXGJUoVCSjpQ2lL23ZHK25mr6Nw6yjnK5UHusDA5Y2Nl2WymRUrkQbVhz549Ph1X41n7CgoKVFpaGpAucW63W/369dMDDzwgSUpPT9evv/6qOXPmKCMjo9rnnTx5siZNmuR57nK5lJycrNatW9O1L4DcbrdsNptat25ds0LK6ZS2bpWKi6XK/r1cLik/X0pIMH8AAACqobCVUzuztyoqplil0RVzjrBCl5oU5KtVqwQl1FXOUS4PcjscslmWWuflHSqkyINqLCoqyqfjql1IrVixQmPHjtVPP/0km82mtLQ0r9n0akO7du2UlpbmtS01NVVvv/22JKlt27aSpO3bt6tdu3aeY7Zv367evXtXed7IyMhKZxu02+01S/BxVDabrebXuUsXqXv3qsdIbdxo+gZ36ULfYAAAUG0dh3bRhvbdzRipyIo5R/OdG7UrpZ/6Du1Sdzlk+TyoeXPZJNkt69AYKfKgGvP137LaV/e6667ThAkTtHfvXu3cuVOjR4+uUStRZQYOHKhVq1Z5bVu9erU6dOggSerUqZPatm2rhQsXeva7XC4tXbpUAwYMqNVYEELsdjO1Z3y86QPsckklJeYxO9tsHzuWXx4AAKBG7OF2tb0jQ/uaxcuxJVthhS6ptERhhS45tmRrX7N4tb19bN2uJ3V4HlRYSB4UJD5f4XPPPVebN2/2PN+xY4fOOeccRUdHKy4uTiNGjND27dtrNbhbbrlFP/zwgx544AHl5OTo1Vdf1XPPPafx48dLB1s3br75Zt1333364IMP9Msvv2js2LFKTEzUqFGjajUWhJj0dGnqVKlvX9N8nZNjHvv1M9tZPwEAANSC7pekK+7RqdrVua8iC/IVuz1HkQX52pXST3GPTg3OOlLl86A9e8iDgsTnrn2XXXaZTj/9dI0fP14TJ07UhAkT1KNHDw0ePFgHDhzQl19+qVtvvbVWg+vfv7/effddTZ48WTNmzFCnTp30+OOPa8yYMZ5j7rjjDhUUFOjaa6/V7t27dfLJJ+uzzz7zuW8j6rH0dKlXL/PLw+k0AypZ0RsAANSy7pekq+sFvbR+YY72bXPK0dahPkNS6rYl6nDp6VLPntLy5VJpqRQXRx5Ux3xaR6qM0+nUnXfeqaysLM2ZM0fh4eH6+uuvVVpaqoEDB6p///6BjTZAWEeqbtTaOlIAAAAgtwqQWl1HqozD4dCcOXP0/fffKyMjQ2eccYZmzpyp6Ojo2ogZAAAAAOoFv0rX/Px8ZWZmqmfPnsrMzFRsbKzS09P1ySefBC5CAAAAAAgxPhdSr776qpKSknT22WerQ4cO+vTTTzVt2jS9//77euihh3ThhRfW+mQTAAAAABCKfC6kJk+erBdeeEHbtm3TwoULNWXKFElS9+7d9fXXX+uMM85gynEAAAAAjYLPhdTevXvVrVs3SVLnzp1VWFjotf+aa67RDz/8UPsRAgAAAECI8XmyiYyMDJ199tk69dRT9eOPP+ryyy+vcExCQkJtxwcAAAAAIcfnQuqxxx7TaaedppUrV2rcuHE688wzAxsZAAAAAIQov6Y/HzlypEaOHBm4aAAAAACgHvBpjNTrr7/u8wk3btyoRYsW1SQmAAAAAAhpPhVSzzzzjFJTU/XQQw8pOzu7wn6n06lPPvlEl156qfr06aOdO3cGIlYAAAAACAk+de375ptv9MEHH+jJJ5/U5MmTFRMTozZt2igqKkq7du3Stm3bFB8fr3HjxunXX39VmzZtAh85AAAAAASJz2OkzjnnHJ1zzjnKy8vT999/r/Xr12vfvn2Kj49Xenq60tPTZbf7PJs6EFRut5STIzmdksMhpaRI/PgCAADAV35NNiFJ8fHxGjVqVGCiAepAVpY0d66UnS0VFUlRUVJqqpSRIaWnBzs6AAAA1Ad+F1JAfZaVJc2YIeXlSUlJUkyMVFAgZWZK69dLU6dSTAEAAODo6MyERsPtNi1ReXmmBSo2VgoLM4+pqWb7vHnmOAAAAOBIKKTQaOTkmO58SUmSzea9z2Yz21esMMcBAAAAR0IhhUbD6TRjomJiKt8fHW32O511HRkAAADqG78Lqa+++iowkQAB5nCYiSUKCirfX1ho9jscdR0ZAAAA6hu/C6mzzjpLnTt31n333aeNGzcGJiogAFJSzFioTZsky/LeZ1lme1qaOQ4AAAA4Er8Lqc2bN2vChAl66623dOyxx2rYsGF64403tH///sBECNQSu91McR4fb8ZKuVxSSYl5zM4228eOZT0pAAAAHJ3fKWN8fLxuueUW/fzzz1q6dKm6du2qG264QYmJibrxxhu1fPnywEQK1IL0dDPFed++Un6+mVgiP1/q14+pzwEAAOA7m2Ud3snJP1u2bNFzzz2nv//97woPD1dRUZEGDBigOXPmqEePHrUXaQC5XC45HA45nU7FxsYGO5wGy+12Kzc3VwkJCbIHudnH7TZFlNNpxkSlpNASBQAA6pdQyq0aEl9rg2pd8QMHDuitt97SiBEj1KFDB82fP19PPfWUtm/frpycHHXo0EEXXHBBTeIHAspul7p2lfr3N4/87gEAAIA/wv19wcSJE/Xaa6/Jsixdfvnleuihh/SnP/3Jsz8mJkaPPPKIEhMTaztWAAAAAAgJfhdSK1as0JNPPqnRo0crMjKy0mPi4+OZJh0AAABAg+V3h6Zp06bpggsuqFBElZSU6Ntvv5UkhYeHa/DgwbUXJQAAAACEEL8LqdNOO035+fkVtjudTp122mm1FRcAAAAAhCy/CynLsmSz2Sps37lzp2JiYmorLgAAAAAIWT6PkRo9erQkyWazady4cV5d+0pLS/W///1PJ510UmCiBAAAAIAQ4nMh5XA4pIMtUs2bN1fTpk09+yIiInTiiSfqmmuuCUyUAAAAABBCfC6kXnzxRUlSx44dddttt9GNDwAAAECj5ff059OmTQtMJAAAAABQT/hUSPXp00cLFy5UixYtlJ6eXulkE2V++umn2owPAAAAAEKOT4XUueee65lcYtSoUYGOCQAAAABCms2yLCvYQQSby+WSw+GQ0+lUbGxssMMJXW63lJMjOZ2SwyGlpEh232fQd7vdys3NVUJCgux+vK7eqeF1AgAAISLE/09vNLlVHfO1NvB7jBQaqawsae5cKTtbKiqSoqKk1FQpI0NKTw92dKGD6wQAQMPA/+k4Cp8KqRYtWhxxXFR5+fn5NY0JoSYrS5oxQ8rLk5KSpJgYqaBAysyU1q+Xpk4N/V8odXFHqSFcJwAAwP/p8IlPhdTjjz8e+EgQmtxuczcmL8/chSkrqGNjzfPsbGnePKlXr5Bq6vZSF3eUGsJ1AgAA/J8On/lUSGVkZAQ+EoSmnBzzCyMp6dAvkjI2m9m+YoU5rmvXYEVZtbq6o1TfrxMAADD4Px0+8qmMdrlcXn8/0h80ME6nacWpagHm6Giz3+ms68iO7vA7SrGxUljYoTtKeXnmjpLbXfP3qs/XCQAAHML/6fCRz2Oktm7dqoSEBMXFxVU6XsqyLNlsNpWWlgYiTgSLw2G6whUUmALkcIWFZr/DEYzojqwu7yjV5+sEAAAO4f90+MinQurLL79Uy5YtJUlfffVVoGNCKElJMa03mZne/YQlybKkTZukfv3McaHGlztKW7bUzh2l+nydAADAIfyfDh/5VEgNHjy40r+jEbDbzaQM69cfat2JjjZ3YzZtkuLjpbFjQ3OwZV3eUarP1wkAABzC/+nwUbUW5N21a5f+9a9/KTs7W5KUlpamK664wtNqVd+wIK8PKpv5Li3N/CLxcbKGOl80zu2WJk2q+o5Sdra5o/Too7X3y7AWrhMAAAgB9eD/dBbkDQxfawO/C6lvv/1WI0eOlMPhUL9+/SRJmZmZ2r17tz788EMNGjSo5tHXMQopH9VwLaagfNkPn7Xv8DtKgVgHIsRXQQcAAD4K8f/TKaQCI2CFVM+ePTVgwAA988wzCgsLkySVlpbqhhtu0OLFi/XLL7/UPPo6RiFVN4L2Za8Hd5QAAAD8RSEVGL7WBj6NkSovJydHb731lqeIkqSwsDBNmjRJ8+bNq37EQKCkp5tF80L4jhIAAADqF78LqT59+ig7O1vdunXz2p6dna1evXrVZmxA7bHbWTQPAAAAtcanQup///uf5+833nijbrrpJuXk5OjEE0+UJP3www/65z//qb///e+BixQAAAAAQoRPY6TsdrtsNpuOdmh9XZCXMVJ1g368AAAAtYfcKjBqdYzUunXrajM2AAAAAKjXfCqkOnToEPhIAAAAAKCe8HuyiTIrVqzQhg0btH//fq/t55xzTm3EBQAAAAAhy+9C6vfff9df/vIX/fLLL17jpmw2m3RwTSkAAAAAaMj8HpV20003qVOnTsrNzVV0dLR+++03ffvtt+rXr5++/vrrwEQJAAAAACHE7xapJUuW6Msvv1R8fLzsdrvsdrtOPvlkzZo1SzfeeKOysrICEykAAAAAhAi/W6RKS0vVvHlzSVJ8fLy2bNkiHZyQYtWqVbUfIQAAAACEGL9bpP70pz9p+fLl6tSpk0444QQ99NBDioiI0HPPPadjjz02MFECh3G7pZwcyemUHA4pJUVi+QQAAADUFb8LqXvuuUcFBQWSpBkzZujPf/6zTjnlFLVq1Ur/+c9/AhEj4CUrS5o7V8rOloqKpKgoKTVVysiQ0tODHR0AAAAaA78LqWHDhnn+npKSopUrVyo/P18tWrTwzNwHBEpWljRjhpSXJyUlSTExUkGBlJkprV8vTZ1KMQUAAIDAq1FnqI0bN2rjxo1q2bIlRRQCzu02LVF5eaYFKjZWCgszj6mpZvu8eeY4AAAAIJD8LqRKSko0ZcoUORwOdezYUR07dpTD4dA999yjAwcOBCZKQGZMVHa2aYk6vG632cz2FSvMcQAAAEAg+d21b+LEiXrnnXf00EMPacCAAdLBKdHvvfde7dy5U88880wg4gTkdJoxUTExle+Pjpa2bDHHAQAAAIHkdyH16quv6vXXX9fw4cM924477jglJyfrkksuoZBCwDgcZmKJggLTne9whYVmv8MRjOgAAADQmPjdtS8yMlIdO3assL1Tp06KiIiorbiAClJSzFioTZsky/LeZ1lme1qaOQ4AAAAIJL8LqQkTJmjmzJkqLi72bCsuLtb999+vCRMm1HZ8gIfdbqY4j483Y6VcLqmkxDxmZ5vtY8eynhQAAAACz6eufaNHj/Z6vmDBAiUlJalXr16SpOXLl2v//v0aMmRIYKIEDkpPN1Ocl60jtWWL6c7Xr58popj6HAAAAHXBp0LKcdigk/POO8/reXJycu1GBRxBerrUq5eZnc/pNGOiUlJoiQIAAEDd8amQevHFFwMfCeAHu13q2jXYUQAAAKCx8nvWvjI7duzQqlWrJEndunVT69atazMuAAAAAAhZfneGKigo0JVXXql27dpp0KBBGjRokBITE3XVVVepsLAwMFECAAAAQAjxu5CaNGmSvvnmG3344YfavXu3du/erffff1/ffPONbr311sBECQAAAAAhxO+ufW+//bbeeustnXrqqZ5tI0aMUNOmTXXhhReyIC8AAACABs/vFqnCwkK1adOmwvaEhAS69gEAAABoFPwupAYMGKBp06apqKjIs23fvn2aPn26BgwYUNvxAQAAAEDI8btr3+OPP66zzjqrwoK8UVFRmj9/fiBiBAAAAICQ4nch1bNnT61Zs0avvPKKVq5cKUm65JJLNGbMGDVt2jQQMQIAAABASPGrkDpw4IC6d++ujz76SNdcc03gogIAAACAEObXGKkmTZp4jY0CAAAAgMbI78kmxo8frwcffFAlJSWBiQgAAAAAQpzfY6SWLVumhQsX6vPPP1fPnj0VExPjtf+dd96pzfgAAACA0ON2Szk5ktMpORxSSopk97ONojbOURtqEEeofIRg8LuQiouL03nnnReYaI7i73//uyZPnqybbrpJjz/+uCSpqKhIt956q15//XUVFxdr2LBhevrppytd6woAAACosawsae5cKTtbKiqSoqKk1FQpI0NKT6+7c9SGGsQRKh8hWPwupF588cXARHIUy5Yt07PPPqvjjjvOa/stt9yijz/+WG+++aYcDocmTJig0aNHa9GiRUGJEwAAAA1YVpY0Y4aUlyclJUkxMVJBgZSZKa1fL02devQqojbOEeTPEiofIZh8bnhzu9168MEHNXDgQPXv31933XWX9u3bF9joDtq7d6/GjBmj559/Xi1atPBsdzqd+te//qXHHntMp59+uvr27asXX3xRixcv1g8//FAnsQEAAKCRcLtNE0xenml6iY2VwsLMY2qq2T5vnjkukOcI8mcJlY8QbD63SN1///269957NXToUDVt2lT/+Mc/lJubqxdeeCGwER6c4OLss8/W0KFDdd9993m2Z2Zm6sCBAxo6dKhnW/fu3XXMMcdoyZIlOvHEEys9X3FxsYqLiz3PXS6XdLBYdDf0f/EgcrvdsiyLawwAAOqnNWuklSul5OSKA4FsNrM9O9sc16VL4M5xUI1yqxrEUYsfIST5ej19LqTmzZunp59+Wtddd50kacGCBTr77LP1f//3f7IHcETZ66+/rp9++knLli2rsG/btm2KiIhQXFyc1/Y2bdpo27ZtVZ5z1qxZmj59eoXtO3bsYHr3AHK73XI6nbIsK6A/MwAAAAGRmyu1ayclJlY+o0LLllJkpDnO4QjcOQ6qUW5Vgzhq8SOEpD179vh0nM+F1IYNGzRixAjP86FDh8pms2nLli1KSkqqXpRHsXHjRt1000364osvFBUVVWvnnTx5siZNmuR57nK5lJycrNatWys2NrbW3gfe3G63bDabWrduTSEFAADqH6dT2rpVKi42/dgO53JJ+flSQoL5E6hzHFSj3KoGcdTiRwhJvtYdPhdSJSUlFU7apEkTHThwwP/ofJSZmanc3Fz16dPHs620tFTffvutnnrqKc2fP1/79+/X7t27vVqltm/frrZt21Z53sjISEVGRlbYbrfbSfADzGazcZ0BAED91KWL1L27mVEhNdX0YytjWdLGjVK/fua4qnKd2jhHOdXOrWoQRy1/hJDj67X0uZCyLEvjxo3zKkCKior017/+1WstqdpcR2rIkCH65ZdfvLZdccUV6t69u+68804lJyerSZMmWrhwoWdK9lWrVmnDhg0aMGBArcUBAAAAyG43c3uvX28GASUlSdHRUmGhtGmTFB8vjR175OqhNs4R5M8SKh8h2GyWZVm+HHjFFVf4dMJAT49+6qmnqnfv3p51pK6//np98skneumllxQbG6uJEydKkhYvXuzzOV0ulxwOh5xOJ137Asjtdis3N1cJCQm0SAEAgPqrsgWU0tJM9VCTdaT8PEet5FY1iKM2LkMo8rU28LlFKljrRx3N7NmzZbfbdd5553ktyAsAAAAERHq61KuXlJNjBgw5HFJKin9NMLVxjtpQgzhC5SMEi88tUg0ZLVJ1gxYpAACA2kNuFRi+1gZccQAAAADwE4UUAAAAAPiJQgoAAAAA/EQhBQAAAAB+opACAAAAAD9RSAEAAACAnyikAAAAAMBPFFIAAAAA4CcKKQAAAADwE4UUAAAAAPiJQgoAAAAA/BQe7ADQcLjdUk6O5HRKDoeUkiLZKdUBAADqTm0lZCR2R0UhhVqRlSXNnStlZ0tFRVJUlJSaKmVkSOnpwY4OAACgEaithIzEzicUUqixrCxpxgwpL09KSpJiYqSCAikzU1q/Xpo6le8cAABAQNVWQkZi5zPa51Ajbre5YZGXZ25UxMZKYWHmMTXVbJ83zxwHAACAAKithIzEzi8UUqiRnBzT6puUJNls3vtsNrN9xQpzHAAAAAJg7draSchI7PxCIYUacTpN19mYmMr3R0eb/U5nXUcGAADQSNRWQkZi5xcKKdSIw2HGHxYUVL6/sNDsdzjqOjIAAIBGorYSMhI7v1BIoUZSUkyX2U2bJMvy3mdZZntamjkOAAAAAdC5c+0kZCR2fqGQQo3Y7WYmzPh406XW5ZJKSsxjdrbZPnZsCC874HZLq1dLy5aZRwZPAgCA+qa2ErJ6n9jVLZtlHV5uNj4ul0sOh0NOp1OxsbHBDqdeqmy5gbQ0810rmyHT7XYrNzdXCQkJsofCF5A1EgAAQD1WIbfyJSHzRW2dp57ytTZgHSnUivR0qVeverQANmskAACAhqa2ErJ6l9gFB4UUao3dLnXtGuwofHD4Ggll03uWrZGQnW3WSOjVi18YAACgfqmthKzeJHbBQ5aIxoc1EgAAAFBDFFJofFgjAQAAADVEIYXGhzUSAAAAUEMUUmh8WCMBAAAANUQhhcaHNRIAAABQQ2SKaJzS080U5337Svn5ZmKJ/HypXz+mPgcAAMBRMf05Gi/WSAAAAEA1UUihcWONBAAAAFQDt94BAAAAwE8UUgAAAADgJwopAAAAAPAThRQAAAAA+InJJlDvud1MvAcAAAIshBOOEA6tQaOQQr2WlSXNnWvW0S0qkqKipNRUs94uS0EBAIBaEcIJx9q10oMPSitWhFxoDR6FFOqtrCxpxgwpL09KSpJiYqSCAikzU1q/nnV1AQBALQjhhOPnn6XXX5eWL5fatw+p0BoFGv1QL7nd5sZQXp656xIbK4WFmcfUVLN93jxzHAAAQLWEcMLhdkv//rfkcoVcaI0GhRTqpZwc07qelCTZbN77bDazfcUKcxwAAEC1hHDCURZafHzIhdZoUEihXnI6TT/gmJjK90dHm/1OZ11HBgAAGowQTjjKQouKqnw/uVDgUUg1Rm63tHq1tGyZeayHbb4Oh/nFUVBQ+f7CQrPf4ajryAAAQIMRwglHWWhFRZXvJxcKPCabaGxCeNYZf6SkmLAzM81j+SZty5I2bZL69TPHAQAAVEsIJxxloeXlVWyVIheqG7RINSZls85kZkotW0pdupjHzEyzPSsr2BH6zG43tV98vKkJXS6ppMQ8lvUXHjuWNRQAAEANhHDCYbdLl19uJpcIsdAaDZtlWVawgwg2l8slh8Mhp9Op2NjYYIcTGG63NGlS1XdUsrPNbYtHHw3YN87tdis3N1cJCQmy19J7VNbAlpZmfnHUowY2AAAQykI04XC73VqyJFdvvZWgFSvsoRRaveZrbUDXvsbCn1lnunYNVpR+S0+XevViNW8AABBAIZxwdO4sPfyw9PvvIRdag0ch1Vj4MuvMli31cmoXu71e1X4AAKA+CuGEI4RDa9CoVRuLEJ51BgAAAKhvKKQai7KpXTZtMmOiyiub2iUtjaldAAAAAB9QSDUWITzrDAAAAFDfkDU3Junp0tSpUt++Un6+GTCZn29m65s6laldAAAAAB8x2URjE8KzzgAAAAD1BYVUY8TULgAAAECN0AwBAAAAAH6ikAIAAAAAP1FIAQAAAICfKKQAAAAAwE8UUgAAAADgJ2btQ9XcbqZJ5xoAAIBgC1Y+Qh50RBRSqFxWljR3rpSdLRUVSVFRUmqqlJHReBbu5RoAAIBgO1I+0qtXcN6XPEiikEKlsrKkGTOkvDwpKUmKiZEKCqTMTGn9emnq1Ib/BeIaAACAYDtaPjJlipSYWPfvSx4kMUYKFbjd5u5DXp656xAbK4WFmcfUVLN93jxzXEPFNQAAAMHmSz7y8su1n4+QB/mMQgrecnJME25SkmSzee+z2cz2FSvMcQ0V1wAAAASbr/nI1q3BeV/yIAopHMbpNP1gY2Iq3x8dbfY7nXUdWd3hGgAAgGDzNR8pLAzO+5IHUUjhMA6HGUxYUFD5/sJCs9/hqOvI6g7XAAAABJuv+Uh0dHDelzyIQgqHSUkx/V83bZIsy3ufZZntaWnmuIaKawAAAILN13ykXbvgvC95EIUUDmO3m2kt4+NN/1iXSyopMY/Z2Wb72LENew0BrgEAAAg2X/KRyy6r/XyEPMhnNss6vNRsfFwulxwOh5xOp2JjY4MdTmiobO2AtDTzxanmdJdut1u5ublKSEiQvT58+QJwDQAAAPxyhHzE3atX4HKrRpwH+VobUEhRSFWtllezrneFlFjRGwAAhIAq8pGA51aNNA/ytTZgQV5UzW6XunYNdhTBxTUAAADBFqx8hDzoiBp+SQkAAAAAtYxCCgAAAAD8RCEFAAAAAH6ikAIAAAAAP1FIAQAAAICfKKQAAAAAwE8UUgAAAADgJwopAAAAAPAThRQAAAAA+IlCCgAAAAD8RCEFAAAAAH4K6UJq1qxZ6t+/v5o3b66EhASNGjVKq1at8jqmqKhI48ePV6tWrdSsWTOdd9552r59e9BiblTcbmn1amnZMvPodgc7IgAAANRmjka+V6XwYAdwJN98843Gjx+v/v37q6SkRH/729905plnasWKFYqJiZEk3XLLLfr444/15ptvyuFwaMKECRo9erQWLVoU7PAbtqwsae5cKTtbKiqSoqKk1FQpI0NKTw92dAAAAI1TbeZo5HtHZLMsywp2EL7asWOHEhIS9M0332jQoEFyOp1q3bq1Xn31VZ1//vmSpJUrVyo1NVVLlizRiSee6NN5XS6XHA6HnE6nYmNjA/wpGoCsLGnGDCkvT0pKkmJipIICadMmKT5emjq10i+X2+1Wbm6uEhISZLeHdGMoAABAyKuQW1UzR6tUbZ6rnvG1NqhX2azT6ZQktWzZUpKUmZmpAwcOaOjQoZ5junfvrmOOOUZLliwJWpwNmttt7kzk5Zk7ErGxUliYeUxNNdvnzaPZFwAAoC7VZo5GvueTkO7aV57b7dbNN9+sgQMH6k9/+pMkadu2bYqIiFBcXJzXsW3atNG2bduqPFdxcbGKi4s9z10ul+c93I38B+Ko1qyRVq6UkpOlw1uVbDazPTvbHNeli9dut9sty7K4xgAAALXAK7dau7baOVoFNcj3GgJfc9V6U0iNHz9ev/76q77//vsan2vWrFmaPn16he07duxQUVFRjc/foOXmSu3aSYmJFb9YktSypRQZaY5zOLx2ud1uOZ1OWZZF1z4AAIAa8sqtapCjVVCb56qH9uzZ49Nx9aKQmjBhgj766CN9++23SkpK8mxv27at9u/fr927d3u1Sm3fvl1t27at8nyTJ0/WpEmTPM9dLpeSk5PVunVrxkgdjdMpbd0qFReb5t3DuVxSfr6UkGD+lON2u2Wz2dS6dWsKKQAAgBryyq327Kl2jlZBDfK9hiAqKsqn40K6kLIsSxMnTtS7776rr7/+Wp06dfLa37dvXzVp0kQLFy7UeeedJ0latWqVNmzYoAEDBlR53sjISEVGRlbYbrfbSfCPpksXqXt3KTPT9JG12Q7tsyxp40apXz9zXCXX0mazcZ0BAABqiSe3qmGO5qU2z1UP+ZqnhnQhNX78eL366qt6//331bx5c8+4J4fDoaZNm8rhcOiqq67SpEmT1LJlS8XGxmrixIkaMGCAzzP2wU92u5nycv160zc2KUmKjpYKCw/N4jJ2bIP8UgEAAISs2szRyPd8EtLTn9vKV7/lvPjiixo3bpx0cEHeW2+9Va+99pqKi4s1bNgwPf3000fs2nc4pj+vhsrWFUhLM1+qKqbCZPpzAACA2lNpblWNHK1KtXmuesTX2iCkC6m6QiFVTW63lJNj+tE6HFJKyhHvTFBIAQAA1J4qcys/c7SjvEntnaue8LU2COmufQhxdrvUtWuwowAAAEB5tZmjke9VqWGXkwAAAAAQABRSAAAAAOAnuvbBWyPsBwsAAAD4i0IKh1Q2M0tqqpn+sgHPzAIAAAD4i0IKRlaWNGOGlJdn1gqIiZEKCsxCbOvXS1OnUkwBAAAAB9FnC6Y739y5pohKTZViY6WwMPOYmmq2z5tnjgMAAABAIQWZMVFlq1YfvgiyzWa2r1hhjgMAAABAIQWZiSWKikx3vspER5v9TmddRwYAAACEJAopmNn5oqLMmKjKFBaa/Q5HXUcGAAAAhCQKKZgpzlNTpU2bJMvy3mdZZntamjkOAAAAAIUUZNaJysiQ4uPNWCmXSyopMY/Z2Wb72LGsJwUAAAAcxPTnMNLTzRTnZetIbdliuvP162eKKKY+BwAA8I/bbSbrcjrNEImUlMpvTLvd0sqV0oIFZkhFr17SGWdI4XWUqvsaJ7xQSOGQ9HTzxeWLBAAAUDNZWYduUBcVmRvUqammF1D5G9RZWdLkydKiReY4y5KaNJGOOUa6917pkktCI05UQCEFb3a71LVrsKMAAACov7KypBkzzFqcSUlmZuSCAikzU1q/3vQCSk83x11zjfS//5kCKjLSLD2zf7/0++/SxInmfIEqpnyNE5WiqQFH5HZLq1dLy5aZR9bkBQAAOAK327Tw5OWZlp3YWCkszDympprt8+aZ8egvvmhagiRTxDRpYrrzNW1qHl0u6aGHzLHBipPkr0q0SKFKtPQCAAD4KSfHJE9JSaZ1qTybzWxfsUJauNB05yspOdQSVf64Jk1MK9Uff5hjhw0LTpw5OfRWqgItUqhUWUtvZqbUsqXUpYt5zMw027Oygh0hAABACHI6zR3omJjK90dHm/3btkl795piqbLx6Ha7KWgOHDDHBitOp7P237uBoJBCBbT0AgAAVJPDYbrxFBRUvr+w0Oxv21Zq1swUS5UlVW73oYkn2rYNXpwOR+2/dwNBIYUK/GnpBQAAQDkpKebO86ZNphAqz7LM9rQ0acgQaeBAMxaquNj7WMsyLVFut9Sxozk2WHGmpNT+ezcQFFKogJZeAACAarLbzYDy+HhzZ9rlMuOgXC7zPD7erNEZHi5dcYUpZiTTMnTggDl23z7zGBsr3XFHYNaT8jVOlsGpElcGFdDSCwAAUAPp6Wbq8L59pfx8040nP1/q1897SvH0dOn556XTTzfJVXGxKaLsdqlzZ+nJJwO7jpSvcaJSzNqHCspaejMzzWP57n1lLb39+tHSCwAAUKX0dKlXL1OcOJ3mDnRKSsUWnvR06ZNPpJUrpQULzB3rXr2kM84ITEtUdeNEBRRSMP1vy3157Ckpysiwa/36Q2OloqPN93rTJlp6AQAAfGK3+zZ1uN1uxiOlpdVFVJW/P1Oc+41CqrGrYrGo9IwMTZ2a7tm1ZYvZ1a+fKaJo6QUAAEBjRiHVmJUtFpWXZ5qdYmLMwKjMTGn9eqVPnapej6XT0gsAAAAchkKqsTp8saiygVBli0VlZ0vz5sn+aC917UrlBAAAAJRHhtxYsVgUAAAAUG0UUo0Vi0UBAAAA1UYh1VixWBQAAABQbRRSjVXZYlGbNpnFocorWywqLY3FogAAAIBKUEg1Vna7lJFhFoXKzpZcLqmkxDxmZ7NYFAAAAHAEzNrXmKWnS1OnisWigMbL7XZr//79wQ4DIapJkyYKCwsLdhgAEJIopBq79HSpVy+xWBTQ+Ozfv1/r1q2T2+0OdigIYXFxcWrbtq1sh8/wCgCNHIUUTNHUtWuwowBQhyzL0tatWxUWFqbk5GTZuXmCw1iWpcLCQuXm5kqS2rVrF+yQACCkUEg1Rm43LVBAI1dSUqLCwkIlJiYqOjo62OEgRDVt2lSSlJubq4SEBLr5oeFpjDlRqH3mUIvHDxRSjU1W1qExUUVFZkxUaqqZeIIxUUCjUVpaKkmKiIgIdigIcWWF9oEDByik0LA0xpwo1D5zqMXjJwqpxiQrS5oxQ8rLk5KSzGK8BQVSZqa0fr2ZeKIe/NACqD2Me8HR8DOCBqkx5kSh9plDLZ5qqB/tZqg5t9tU/Hl5ptKPjZXCwsxjaqrZPm+eOQ4AAKChaow5Uah95lCLp5oopBqLnBzTbJqUJB1+d9FmM9tXrDDHAQCqNG7cOI0aNSrg72Oz2fTee+8F/H2ARqcx5kSh9plDLZ5qopBqLJxO0/c0Jqby/dHRZr/TWdeRAYDPxo0bJ5vNJpvNpiZNmqhTp0664447VFRUFOzQ6oxlWRo6dKiGDRtWYd/TTz+tuLg4bdq0KSixAfVCY8yJQu0zh1o81UQh1Vg4HGYAX0FB5fsLC81+h6OuIwNQj7nd0urV0rJl5rEuemGcddZZ2rp1q37//XfNnj1bzz77rKZNmxb4Nw4RNptNL774opYuXapnn33Ws33dunW644479OSTTyopKSmoMQIhrTHmRKH2mUMtnmqikGosUlJMn9NNmyTL8t5nWWZ7Wpo5DgB8kJUlTZokTZwo3XabeZw0yWwPpMjISLVt21bJyckaNWqUhg4dqi+++MKz3+12a9asWerUqZOaNm2qXr166a233vLs37Vrl8aMGaPWrVuradOm6tKli1588UXP/l9++UWnn366mjZtqlatWunaa6/V3r17K43lueeeU2JiYoVFjc8991xdeeWVnufvv/+++vTpo6ioKB177LGaPn26SkpKPPvXrFmjQYMGKSoqSmlpaV6fpzLJycn6xz/+odtuu03r1q2TZVm66qqrdOaZZ+ryyy/Xr7/+quHDh6tZs2Zq06aNLr/8cuXl5Xle/9Zbb6lnz56ezzh06FAVVJXQAA1NY8yJQu0zh1o81UQh1VjY7WYqyfh40yfV5ZJKSsxjdrbZPnZsvZm3H0BwlU22lJkptWwpdeliHjMzzfZAF1Nlfv31Vy1evNhrGvdZs2Zp3rx5mjNnjn777Tfdcsstuuyyy/TNN99IkqZMmaIVK1bo008/VXZ2tp555hnFx8dLkgoKCjRs2DC1aNFCy5Yt05tvvqkFCxZowoQJlb7/BRdcoJ07d+qrr77ybMvPz9dnn32mMWPGSJK+++47jR07VjfddJNWrFihZ599Vi+99JLuv/9+6WDhN3r0aEVERGjp0qWaM2eO7rzzzqN+9oyMDA0ZMkRXXnmlnnrqKf3666969tlntXv3bp1++ulKT0/Xjz/+qM8++0zbt2/XhRdeKEnaunWrLrnkEl155ZXKzs7W119/rdGjR8s6PJkBGqrGmBOF2mcOtXiqyWbxm1Mul0sOh0NOp1OxsbHBDiewKpuvPy3N/LCWTTFZnYXRfHiN2+32LOpoD/EvBtDQFRUVad26derUqZOioqL8eq3bbVqeMjPNDcXy44Qty/x66ddPevTR2v8/cNy4cXr55ZcVFRWlkpISFRcXy26364033tB5552n4uJitWzZUgsWLNCAAQM8r7v66qtVWFioV199Veecc47i4+P1wgsvVDj/888/rzvvvFMbN25UzMG++5988olGjhypLVu2qE2bNho3bpx2797tmQhi1KhRatWqlf71r39JB1uppk+fro0bN8put2vo0KEaMmSIJk+e7Hmfl19+WXfccYe2bNmizz//XGeffbbWr1+vxMRESdJnn32m4cOH69133z3ixBa5ubnq0aOH8vPz9fbbb2vUqFG677779N1332n+/Pme4zZt2qTk5GStWrVKe/fuVd++ffXHH3+oQ4cOR73mNflZAUKaLzlRiPM7twq1zxxq8Rzka23AOlKNTXq61KtX1UVPdRZGq+eLqQHwjz+TLXXtWvvvf9ppp+mZZ55RQUGBZs+erfDwcJ133nkHY8tRYWGhzjjjDK/X7N+/X+kHfx9df/31Ou+88/TTTz/pzDPP1KhRo3TSSSdJkrKzs9WrVy9PESVJAwcOlNvt1qpVq9SmTZsK8YwZM0bXXHONnn76aUVGRuqVV17RxRdf7Elqli9frkWLFnlaoHRwQeSioiIVFhYqOztbycnJniJKklcReCQJCQm67rrr9N5773kKruXLl+urr75Ss2bNKhy/du1anXnmmRoyZIh69uypYcOG6cwzz9T555+vFi1a+PSeQINxtJyoIQq1zxxq8fiJQqoxstsrz26qszBaA1hMDYB/fJlsacuWwE22FBMTo5SD/eZfeOEF9erVS//617901VVXecYyffzxx2rfvr3X6yIjIyVJw4cP1/r16/XJJ5/oiy++0JAhQzR+/Hg98sgj1Ypn5MiRsixLH3/8sfr376/vvvtOs2fP9uzfu3evpk+frtGjR1d4bW208ISHhys8/NB/53v37tXIkSP14IMPVji2Xbt2CgsL0xdffKHFixfr888/15NPPqm7775bS5cuVadOnWocD1CvVJUTNWSh9plDLR4/1I9yD4FXnYXRGshiagD8E0qTLdntdv3tb3/TPffco3379iktLU2RkZHasGGDUlJSvP4kJyd7Xte6dWtlZGTo5Zdf1uOPP67nnntOkpSamqrly5d7TbywaNEi2e12devWrdIYoqKiNHr0aL3yyit67bXX1K1bN/Xp08ezv0+fPlq1alWFeFJSUmS325WamqqNGzdq69atntf88MMP1b4mffr00W+//aaOHTtWeL+yljabzaaBAwdq+vTpysrKUkREhN59991qvycANEYUUjCqszBaA1lMDYB/Qm2ypQsuuEBhYWH65z//qebNm+u2227TLbfcorlz52rt2rX66aef9OSTT2ru3LmSpKlTp+r9999XTk6OfvvtN3300UdKTU2VDnbTi4qKUkZGhn799Vd99dVXmjhxoi6//PJKu/WVGTNmjD7++GO98MILnkkmykydOlXz5s3T9OnT9dtvvyk7O1uvv/667rnnHknS0KFD1bVrV2VkZGj58uX67rvvdPfdd1f7eowfP175+fm65JJLtGzZMq1du1bz58/XFVdcodLSUi1dulQPPPCAfvzxR23YsEHvvPOOduzY4bkGAADfUEjBqM7CaA1kMTUA/gm1yZbCw8M1YcIEPfTQQyooKNDMmTM1ZcoUzZo1S6mpqTrrrLP08ccfe7qtRUREaPLkyTruuOM0aNAghYWF6fXXX5ckRUdHa/78+crPz1f//v11/vnna8iQIXrqqaeOGMPpp5+uli1batWqVbr00ku99g0bNkwfffSRPv/8c/Xv318nnniiZs+e7ZnowW63691339W+fft0/PHH6+qrr/YaT+WvxMRELVq0SKWlpTrzzDPVs2dP3XzzzYqLi5PdbldsbKy+/fZbjRgxQl27dtU999yjRx99VMOHD6/2ewJAY8SsfY1t1r6qrF5tFoFp2dJ0zTucyyXl50tPPnmoH6ufr2HWPiB01MZMbCE62RJqGbP2AaGL3CowmLUP/inrq1PVfMabNpn5jMv31anOawA0GPV8siUAAGqEQgpGWV+d9esPjXuKjjajxjdtqryvTnVeA6BBqceTLQEAUCNkuDgkPd1MV963r+mSl5NjHvv1q3oa8+q8BgAAAKjnaJGCt+r01QlW/x63mz5FAAAACAoKKVTka1+dYBYylY1yT001XQ1pBQMAAECAUUiheoJZyGRlSTNmmAV/k5LM9OsFBWbSi/Xr6VIIAACAgKMfFPxXVshkZpqpz7t0MY+ZmWZ7Vlbg3tvtNgVcXp4p3GJjpbAw85iaarbPm2eOAwAAAAKEFin45/BCpmzK87JCJjvbFDK9elXs5ud2S5s3Sxs2HFp3as8e/7oF5uQcmiGw/HTrknmelCStWGGOYyoxAAAQig4fHnHssdLvv3sPl5AqDqHQwXU8//c/ads2qV07qVMnM1NybQ+vYCz6UVFIwT/VLWSyskyBVVAg/fSTtGOH2R4fb/742i3Q6TRdCWNiKt8fHS1t2WKOAwAACDWHD4/Yv//QMImICPPYqpU5dufOQ/tatTIzIy9fbh7dbqlJE+nkk6XWraXbbqu9oQ2MRfcJZSX840shU1TkXciU7wpos5lfCoWF5s/OnWabr90CHQ7zZS4oqHx/YaHZ73DU4EMCQNXGjRunUaNGBfx9bDab3nvvvYC/D4A6dPjwiBYtpI0bpbVrzWOLFiYvmj/f/LHZzBAKm0365BNp4ULTK8hmkyIjzTn37pU+/VSaNKl2hlcEcwhHPUMhBf/4W8iU7wrYvbspnIqLzV2V+Hjz982bzT5fxjelpJg7Ips2SZblvc+yzPa0tEPN3wAalHHjxslms8lms6lJkybq1KmT7rjjDhUVFQU7tDpXdi3+/ve/e21/7733ZDu8xwCA4Dt8eETz5qaAKimREhOl0lLzfPNm09LUpInpZWOzmW3795tjLMsUUU2amJzLbjf7Vq8256/JOHHGovuFQgr+8beQKd8V0OUyBVhMzKFugTEx0u7dZqxU+W6BVbHbTbNyfLw5r8tlfgG5XOZ5fLw0dix9eIG64nab/7yXLTOPdfCf61lnnaWtW7fq999/1+zZs/Xss89q2rRpAX/fUBQVFaUHH3xQu3btCnYoAI7m8OERTqfJgcryopgYM/Rhxw6pWTPzfNcuU0zt2GHyrLL8qSwHs9lMoSOZm9M//njkPMrfGMs7fAgHKKTgJ38LmfJdAcvupISXG5oXHm5ev39/5d0CyytL2EpKTAx9+pg+wjk55rFfP6Y+B+pSVpbpSjJxoumbP3Fi7XUtOYLIyEi1bdtWycnJGjVqlIYOHaovvvjCs9/tdmvWrFnq1KmTmjZtql69eumtt97y7N+1a5fGjBmj1q1bq2nTpurSpYtefPFFz/5ffvlFp59+upo2bapWrVrp2muv1d69eyuN5bnnnlNiYqLchxWQ5557rq688krP8/fff199+vRRVFSUjj32WE2fPl0lJSWe/WvWrNGgQYMUFRWltLQ0r89zJEOHDlXbtm01a9asIx739ttvq0ePHoqMjFTHjh316KOP+nR+ALXo8OER+/ebnKYsLyrLicq2lT0vLDSP5W9gl/97+eKqoKBm48SrM4SjEWOyCfgvPd0ULGWDELdsMU3L/fqZIqp8IVO+K2BEhLlrUv6XRtnfIyKOPL6pskGP3btL118vtW/PbDJAXQuR9dx+/fVXLV68WB06dPBsmzVrll5++WXNmTNHXbp00bfffqvLLrtMrVu31uDBgzVlyhStWLFCn376qeLj45WTk6N9+/ZJkgoKCjRs2DANGDBAy5YtU25urq6++mpNmDBBL730UoX3v+CCCzRx4kR99dVXGjJkiCQpPz9fn332mT755BNJ0nfffaexY8fqiSee0CmnnKK1a9fq2muvlSRNmzZNbrdbo0ePVps2bbR06VI5nU7dfPPNPn3+sLAwPfDAA7r00kt14403KikpqcIxmZmZuvDCC3Xvvffqoosu0uLFi3XDDTeoVatWGjduXDWvPAC/lc+JYmNN7lNWLEVEVMyPLMs8j442j+VbiMr/vXzrVExMzcaJHx7j4RiL7oVCCtWTnm6mOD/atJhlXQEzM81jScmhL6fNZv4eH2/6Ca9caYqxw8c3VZWw/fSTmUp96lSmOgfqUk2WQagFH330kZo1a6aSkhIVFxfLbrfrqaeekiQVFxfrgQce0IIFCzRgwABJ0rHHHqvvv/9ezz77rAYPHqwNGzYoPT1d/fr1kyR17NjRc+5XX31VRUVFmjdvnmIO3pF96qmnNHLkSD344INq06aNVywtWrTQ8OHD9eqrr3oKqbfeekvx8fE67bTTJEnTp0/XXXfdpYyMDE88M2fO1B133KFp06ZpwYIFWrlypebPn6/ExERJ0gMPPKDhw4f7dD3+8pe/qHfv3po2bZr+9a9/Vdj/2GOPaciQIZoyZYokqWvXrlqxYoUefvhhCimgLh2eEzkcUlyc6bbXpInJbVq3NseWzW6ckGDGT23YYHr/lHXvK98KVVpq/h4ZWXkeVZMYDy/YNm2q+Xs0INy+R/XZ7aaA6d/fPFaWMJXvCrhypZlkIjLSTDqRl2fuwLRvb/ZVNr6JQY9A6AlyH/rTTjtNP//8s5YuXaqMjAxdccUVOu+88w6GlqPCwkKdccYZatasmefPvHnztHbtWknS9ddfr9dff129e/fWHXfcocWLF3vOnZ2drV69enmKKEkaOHCg3G63Vq1aVWk8Y8aM0dtvv63i4mJJ0iuvvKKLL75Y9oO/y5YvX64ZM2Z4xXPNNddo69atKiwsVHZ2tpKTkz1FlCRPEeirBx98UHPnzlV2dnaFfdnZ2Ro4cKDXtoEDB2rNmjUqLUvAAATe4cMj9uyROnc2rU1btpj8pnNnkxcdOGD+JCaaHKd9+0M9e2w2Mx7qwAHTS8ftNvu6djXnr8kNLMai+4UWqYaoNhdQq41zlXUFLFtHqlWrQ4VPfLy5w1FZt0CVS9jatzcx7N9vflk4HCzACwRLkNdzi4mJUcrBu6EvvPCCevXqpX/961+66qqrPGOZPv74Y7Vv397rdZEHpwoePny41q9fr08++URffPGFhgwZovHjx+uRRx6pVjwjR46UZVn6+OOP1b9/f3333XeaPXu2Z//evXs1ffp0jR49usJro6KiqvWehxs0aJCGDRumyZMn08oEhLLDh0cUFUnJyYeGLezaZR7POsvkRzt3mhwnKko6+2zzvGwdqeJi05LVrJk0YoR0662106XanyEcjRyFVENTmwuo1ea50tOlnj3Nl7+09FC/2z17jlygOZ2m1WnzZvP3sv7DcXGmcIqLYwFeoK6FUB96u92uv/3tb5o0aZIuvfRSpaWlKTIyUhs2bNDgwYOrfF3r1q2VkZGhjIwMnXLKKbr99tv1yCOPKDU1VS+99JIKCgo8rVKLFi2S3W5Xt27dKj1XVFSURo8erVdeeUU5OTnq1q2b+vTp49nfp08frVq1ylP8HS41NVUbN27U1q1b1a5dO0nSDz/84Pe1+Pvf/67evXtXiDM1NVWLFi3y2rZo0SJ17dpVYWWzfQGoO5UNjzj2WOn3371vXEsVb2ZLZuKt//1P2rZNatdO6tRJ6t3bezKvQMTIWPQKKKQaktoc/B2IgeR2u2lZSkjw/Yu4ebNZoM6yTMIWHW2KqR07zAJ03box6BGoayHWh/6CCy7Q7bffrn/+85+67bbbdNttt+mWW26R2+3WySefLKfTqUWLFik2NlYZGRmaOnWq+vbtqx49eqi4uFgfffSRUlNTpYPd9KZNm6aMjAzde++92rFjhyZOnKjLL7+8wvio8saMGaM///nP+u2333TZZZd57Zs6dar+/Oc/65hjjtH5558vu92u5cuX69dff9V9992noUOHqmvXrsrIyNDDDz8sl8ulu+++2+/r0LNnT40ZM0ZPPPGE1/Zbb71V/fv318yZM3XRRRdpyZIleuqpp/T000/7/R4AaknZ8IjyKutZU9m27t3NHx3sOZSbG5gCp7IY4YWysqGozbFEoTIuye2WvvrKNFuHhZkufXa7eYyLk/btk375xcTEoEeg7oRYH/rw8HBNmDBBDz30kAoKCjRz5kxNmTJFs2bNUmpqqs466yx9/PHH6tSpkyQpIiJCkydP1nHHHadBgwYpLCxMr7/+uiQpOjpa8+fPV35+vvr376/zzz9fQ4YM8UxmUZXTTz9dLVu21KpVq3TppZd67Rs2bJg++ugjff755+rfv79OPPFEzZ492zPToN1u17vvvqt9+/bp+OOP19VXX63777+/WtdixowZFaZi79Onj9544w29/vrr+tOf/qSpU6dqxowZdAEEgBqyWdbhq6o2Pi6XSw6HQ06nU7GVdVOpD1avNmu4tGxZeVcbl8v0p33yyaPfXajNc5XjdruVm5urhIQEzyBsn+Kw2aRVqw6NySibKtTpNPuee04aNcrnOABIRUVFWrdunTp16lT9cTqVdf9NS6MPfQNTKz8rAALC79wKPvG1NqBrX0NRm4O/j3QuyzKzxOzYYfrnHt5ftrqTU1T2urI4unQxsaxebVYALygwxVRCgkncDhtQDqCO0IceAOpOZbkSgopCqqGozcHfVZ0rL88UM3l5psB5/HHp++8PTT5R3ckpqnrdqaceiiM+3sz253IdmrlPMrPbMD4KCB760ANA4FWVK40da6ZIR1BQSDUUtTn4u7Jz5eWZBXD37TOz7iUmmkkoyiafuPBC6Y03jjw5Ra9eFd/rSJNa/PGHKZ42bjwUR1nRZFnmlwmLwgEAgIbsSLnShg1m2vOEhGBH2SjR/6KhqM3B34efy+k0C+YeXJ9FzZqZ2WIcDlPg7NghPfSQefRncoqjTWqxc6cpnlq1qvlncrtNa9qyZeaRBXwBAECo82UCsK++Iq8JElqkGpLaXECt/Ll+/PHQueLjTTee+HhzXFkr0cqV0oknereEle0vWzR37Vrvbnhli+0mJVX9urw86frrpa+/rv5nqs31sIAGhvmGcDT8jABB5EuutGGDybGqWOsOgUMh1dDU5uDvsnO98450//3mPC1aVPwih4WZCSiqWtixbKKLXbtMq9aGDWb68l27TGETHW1iLRv7FBtr3qPsde3bS489Vr3PFIj1sIAGoGwh1v3796tp06bBDgchrLCwUJLUpEmTYIcCND6+TCZ24IBvk4mh1lFINUS1OfjbbpeOO870vQ0Pr1hESWbMVJMm5rEyhYWmSJozxxRKq1dLkZFS27ammPr+e3NMSYl5j7g4E39ExKEJMqrzmQ5vDi+Lvaw5PDvbdDns1YtZxtDohIeHKzo6Wjt27FCTJk2YNhcVWJalwsJC5ebmKi4uzlN8A6hDvkwm1qwZE28FCYUUju5oE1k4nVJyspmaPDGx4v6VK6U9e0yRdPzxZjrzvXul5ctNU7TNJrVpY+62lJSYwmfPHtP6deqp5v2rM626L83hK1aY45h1DI2MzWZTu3bttG7dOq1fvz7Y4SCExcXFqW3btsEOA2icfJlM7Mwzpc6dgxllo0UhhaMrm3xi/fpDhUl0tLkLsmmT1Lq1NH68mbXv8P0bN5rJIWJjzUKd0dHmrkrz5odasOx2s61ssd3oaDNxRXi4dNllpuCqzhin2lxbC2iAIiIi1KVLF+3fvz/YoSBENWnShJYoIJh8ycFOO42eNUFCIQXf+DKRRffuFfenpJiCqWNH77soTqdpwYqPN8WOw2F+KRQWmgKqXTupZUtTiJV1z/N3jFNtrq0FNFB2u11RUVHBDgMAUJUj5WCXX846UkHUYAqpf/7zn3r44Ye1bds29erVS08++aSOP/74YIfVsBxtIovK9u/aJd1xR8VWof37TTe+5s3N39PSzJiosgknYmLMef7zn+qPcarNtbUAAACCpaocTJJyc4MdXaPVIAqp//znP5o0aZLmzJmjE044QY8//riGDRumVatWKYEFymrX0SZ9OHz/6tWHWoXKt/xERJiWp+Ji8xgZ6b3f5TLjojZuNOOvqjPG6WjN4f6sQwUAABBMleVgrB8VVA0ig3zsscd0zTXX6IorrlBaWprmzJmj6OhovfDCC8EODWWtQps2mVagMg7HoSnQHQ7vrndlrUXJyeaXxpHGOBUVHXmMU1lzeN++Un6+Kbry801LFFOfAwAAoJrqfYvU/v37lZmZqcmTJ3u22e12DR06VEuWLKn0NcXFxSouLvY8dx5MxHfv3i03lX3tGz1aWrtW7l9/lSs9XRH79sleWGjWnWrWzLRI7dolNW0q7dsnbd4stWoljRwpvfCCKZSaN6943j17zLTrdrsZb1WVTp1M0bRu3aHm8E6djv46AACAEOZ2u+VyuRQREcEyFrXI5XJJPixIXu8Lqby8PJWWlqpNmzZe29u0aaOVK1dW+ppZs2Zp+vTpFbZ36NAhYHHioMWLK27butXMzHe4jz7y7Zz9+tU8LgAAAKCcPXv2yHGEScnqfSFVHZMnT9akSZM8z91ut/Lz89WqVSvZKltwFrXC5XIpOTlZGzduVGxls+gBAADAZ+RWgWFZlvbs2aPEo8yIWO8Lqfj4eIWFhWn79u1e27dv317lAoKRkZGKjIz02hYXFxfQOHFIbGwsX3YAAIBaQm5V+47UElWm3nemjIiIUN++fbVw4ULPNrfbrYULF2rAgAFBjQ0AAABAw1TvW6QkadKkScrIyFC/fv10/PHH6/HHH1dBQYGuuOKKYIcGAAAAoAFqEIXURRddpB07dmjq1Knatm2bevfurc8++6zCBBQIrsjISE2bNq1Ct0oAAAD4j9wquGzW0eb1AwAAAAB4qfdjpAAAAACgrlFIAQAAAICfKKQAAAAAwE8UUqhXOnbsqMcff9zz3Gaz6b333gtqTAAAAGh8KKRQr23dulXDhw8PdhgAAAC15tRTT9XNN98c7DAkSUVFRRo3bpx69uyp8PBwjRo1KtghhQwKqUZu//79wQ6hRtq2bcuUnwAAAAFSWlqqpk2b6sYbb9TQoUODHU5IoZBqYE499VRNmDBBEyZMkMPhUHx8vKZMmaKyWe47duyomTNnauzYsYqNjdW1114rSXr77bfVo0cPRUZGqmPHjnr00Ud9fs+OHTvqvvvu09ixY9WsWTN16NBBH3zwgXbs2KFzzz1XzZo103HHHacff/zR63Xff/+9TjnlFDVt2lTJycm68cYbVVBQ4Nmfm5urkSNHqmnTpurUqZNeeeWVCu99eNe+O++8U127dlV0dLSOPfZYTZkyRQcOHPDsv/fee9W7d2/9+9//VseOHeVwOHTxxRdrz549fl5pAAAAk3tNnDhRN998s1q0aKE2bdro+eefV0FBga644go1b95cKSkp+vTTTz2v+fXXXzV8+HA1a9ZMbdq00eWXX668vDxJ0rhx4/TNN9/oH//4h2w2m2w2m/744w+VlpbqqquuUqdOndS0aVN169ZN//jHPyrE88ILL3hyunbt2mnChAlHjH/Xrl0aO3asWrRooejoaA0fPlxr1qzx7I+JidEzzzyja665Rm3btq3Va1ffUUg1QHPnzlV4eLj++9//6h//+Icee+wx/d///Z9n/yOPPKJevXopKytLU6ZMUWZmpi688EJdfPHF+uWXX3TvvfdqypQpeumll3x+z9mzZ2vgwIHKysrS2Wefrcsvv1xjx47VZZddpp9++kmdO3fW2LFjPQXd2rVrddZZZ+m8887T//73P/3nP//R999/7/VlHzdunDZu3KivvvpKb731lp5++mnl5uYeMY7mzZvrpZde0ooVK/SPf/xDzz//vGbPnu11zNq1a/Xee+/po48+0kcffaRvvvlGf//73/24wgAAAIfMnTtX8fHx+u9//6uJEyfq+uuv1wUXXKCTTjpJP/30k84880xdfvnlKiws1O7du3X66acrPT1dP/74oz777DNt375dF154oSTpH//4hwYMGKBrrrlGW7du1datW5WcnCy3262kpCS9+eabWrFihaZOnaq//e1veuONNzxxPPPMMxo/fryuvfZa/fLLL/rggw+UkpJyxNjHjRunH3/8UR988IGWLFkiy7I0YsQIrxvRqIKFBmXw4MFWamqq5Xa7PdvuvPNOKzU11bIsy+rQoYM1atQor9dceuml1hlnnOG17fbbb7fS0tJ8es8OHTpYl112mef51q1bLUnWlClTPNuWLFliSbK2bt1qWZZlXXXVVda1117rdZ7vvvvOstvt1r59+6xVq1ZZkqz//ve/nv3Z2dmWJGv27NmebZKsd999t8rYHn74Yatv376e59OmTbOio6Mtl8vl9VlPOOEEnz4rAABAeYMHD7ZOPvlkz/OSkhIrJibGuvzyyz3bynKjJUuWWDNnzrTOPPNMr3Ns3LjRkmStWrXKc86bbrrpqO89fvx467zzzvM8T0xMtO6++26fY1+9erUlyVq0aJFnW15entW0aVPrjTfeqHB8RkaGde655/p8/oaOFqkG6MQTT5TNZvM8HzBggNasWaPS0lJJUr9+/byOz87O1sCBA722DRw40Os1R3Pcccd5/t6mTRtJUs+ePStsK2tRWr58uV566SU1a9bM82fYsGFyu91at26dsrOzFR4err59+3rO0b17d8XFxR0xjv/85z8aOHCg2rZtq2bNmumee+7Rhg0bvI7p2LGjmjdv7nnerl27o7Z0AQAAVKV8HhQWFqZWrVpVmQctX75cX331lVcO1L17d+lgr5kj+ec//6m+ffuqdevWatasmZ577jlPnpObm6stW7ZoyJAhlb72r3/9q9d76mAOGB4erhNOOMFzXKtWrdStWzdlZ2fX6Jo0BuHBDgB1LyYmptbP2aRJE8/fy4q4yra53W5J0t69e3XdddfpxhtvrHCuY445RqtXr/Y7hiVLlmjMmDGaPn26hg0bJofDoddff73CeK/ycZXFVhYXAACAvyrLLarKg/bu3auRI0fqwQcfrHCedu3aVfker7/+um677TY9+uijGjBggJo3b66HH35YS5culSQ1bdr0iDHOmDFDt912m9+fDVWjkGqAyr5QZX744Qd16dJFYWFhlR6fmpqqRYsWeW1btGiRunbtWuVraqpPnz5asWJFlf12u3fvrpKSEmVmZqp///6SpFWrVmn37t1VnnPx4sXq0KGD7r77bs+29evXByB6AACA6unTp4/efvttdezYUeHhlafiERERFXoFLVq0SCeddJJuuOEGz7byLVjNmzdXx44dtXDhQp122mkVzpmQkKCEhASvbampqSopKdHSpUt10kknSZJ27typVatWKS0trcaftaGja18DtGHDBk2aNEmrVq3Sa6+9pieffFI33XRTlcffeuutWrhwoWbOnKnVq1dr7ty5euqppwJ61+LOO+/U4sWLNWHCBP38889as2aN3n//fc9kE926ddNZZ52l6667TkuXLlVmZqauvvrqI95t6dKlizZs2KDXX39da9eu1RNPPKF33303YJ8BAADAX+PHj1d+fr4uueQSLVu2TGvXrtX8+fN1xRVXeIqnjh07aunSpfrjjz+Ul5cnt9utLl266Mcff9T8+fO1evVqTZkyRcuWLfM697333qtHH31UTzzxhNasWaOffvpJTz75ZJWxdOnSReeee66uueYaff/991q+fLkuu+wytW/fXueee67nuBUrVujnn39Wfn6+nE6nfv75Z/38888BvEr1A4VUAzR27Fjt27dPxx9/vMaPH6+bbrrJM815Zfr06aM33nhDr7/+uv70pz9p6tSpmjFjhsaNGxewGI877jh98803Wr16tU455RSlp6dr6tSpSkxM9Bzz4osvKjExUYMHD9bo0aN17bXXVriTUt4555yjW265RRMmTFDv3r21ePFiTZkyJWCfAQAAwF+JiYlatGiRSktLdeaZZ6pnz566+eabFRcXJ7vdpOa33XabwsLClJaWptatW2vDhg267rrrNHr0aF100UU64YQTtHPnTq/WKUnKyMjQ448/rqefflo9evTQn//8Z6+pzCvz4osvqm/fvvrzn/+sAQMGyLIsffLJJ15dE0eMGKH09HR9+OGH+vrrr5Wenq709PQAXaH6w2aVzUeNBuHUU09V79699fjjjwc7FAAAAKDBokUKAAAAAPxEIYUj+u6777ymyjz8DwAAANAY0bUPR7Rv3z5t3ry5yv1HWy0bAAAAaIgopAAAAADAT3TtAwAAAAA/UUgBAAAAgJ8opAAAAADATxRSAAAAAOAnCikAAAAA8BOFFAAAAAD4iUIKAAAAAPxEIQUAAAAAfvp/bfn25iDVlwcAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -4195,15 +4971,15 @@ "x_pro_no = np.random.normal(1, 0.04, len(resolved_no))\n", "\n", "# Plot points for \"yes\" resolution\n", - "plt.scatter(x_bot_yes, resolved_yes['pro_median'] * 100, \n", + "plt.scatter(x_bot_yes, resolved_yes['pro_median'] * 100,\n", " color='blue', alpha=0.6, label='Resolved Yes')\n", - "plt.scatter(x_pro_yes, resolved_yes[top_bot] * 100, \n", + "plt.scatter(x_pro_yes, resolved_yes[top_bot] * 100,\n", " color='blue', alpha=0.6)\n", "\n", "# Plot points for \"no\" resolution\n", - "plt.scatter(x_bot_no, resolved_no['pro_median'] * 100, \n", + "plt.scatter(x_bot_no, resolved_no['pro_median'] * 100,\n", " color='red', alpha=0.6, label='Resolved No')\n", - "plt.scatter(x_pro_no, resolved_no[top_bot] * 100, \n", + "plt.scatter(x_pro_no, resolved_no[top_bot] * 100,\n", " color='red', alpha=0.6)\n", "\n", "# Customize the plot\n", @@ -4221,14 +4997,14 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_322865/946735765.py:22: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", + "/tmp/ipykernel_17143/946735765.py:22: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", " weighted_scores = df_long.groupby('forecaster').apply(lambda x: (x['score'] * x['question_weight']).sum(axis=0))\n" ] } @@ -4278,7 +5054,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -4291,7 +5067,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 40, "metadata": { "cellView": "form", "id": "tXKRpXAVHMRt" @@ -4354,7 +5130,7 @@ "
\n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4449,7 +5225,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4711,7 +5487,7 @@ "0 1 pro_median 4238.561607 97 \n", "1 2 metac-o1 3010.353788 96 \n", "2 3 metac-perplexity 2774.080331 94 \n", - "3 4 bot_median 2481.552010 97 \n", + "3 4 bot_median 2374.216338 97 \n", "4 5 acm_bot 2239.058675 85 \n", "5 6 metac-claude-3-5-sonnet-20240620 2018.110211 95 \n", "6 7 manticAI 1865.126260 74 \n", @@ -4723,7 +5499,7 @@ "12 13 metac-Gemini-Exp-1206 1595.682612 81 \n", "13 14 NextWorldLab 1583.026226 85 \n", "14 15 metac-o1-preview 1527.657141 96 \n", - "15 16 metac-deepseek-r1 1518.308625 55 \n", + "15 16 metac-deepseek-r1+asknews 1518.308625 55 \n", "16 17 laylaps 1500.567874 68 \n", "17 18 mmBot 1482.726445 97 \n", "18 19 Grizeu_Bot 1399.477718 55 \n", @@ -4806,7 +5582,7 @@ "46 52.10 " ] }, - "execution_count": 41, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -4875,7 +5651,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -4957,17 +5733,17 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5124,7 +5900,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5580,7 +6356,7 @@ "pro_median 4238.6 93.1 45.5 62.229168 \n", "metac-o1 3010.4 92.1 32.7 57.756859 \n", "metac-perplexity 2774.1 90.1 30.8 67.210383 \n", - "bot_median 2481.6 93.1 26.7 55.791339 \n", + "bot_median 2374.2 93.1 25.5 56.712830 \n", "acm_bot 2239.1 81.2 27.6 55.554054 \n", "metac-claude-3-5-sonnet-20240620 2018.1 91.5 22.1 64.219307 \n", "manticAI 1865.1 70.4 26.5 66.353059 \n", @@ -5592,7 +6368,7 @@ "metac-Gemini-Exp-1206 1595.7 77.5 20.6 67.099981 \n", "NextWorldLab 1583.0 81.2 19.5 66.411747 \n", "metac-o1-preview 1527.7 92.1 16.6 87.111568 \n", - "metac-deepseek-r1 1518.3 52.1 29.1 62.764970 \n", + "metac-deepseek-r1+asknews 1518.3 52.1 29.1 62.764970 \n", "laylaps 1500.6 65.1 23.1 74.457365 \n", "mmBot 1482.7 93.1 15.9 79.990502 \n", "Grizeu_Bot 1399.5 52.4 26.7 60.886905 \n", @@ -5629,7 +6405,7 @@ "pro_median 6.449398 7.059105 1.985277 58.3 \n", "metac-o1 6.018299 5.431054 1.985550 44.6 \n", "metac-perplexity 7.080664 4.348308 1.986114 44.9 \n", - "bot_median 5.782185 4.609796 1.985277 38.1 \n", + "bot_median 5.877687 4.338745 1.985277 37.2 \n", "acm_bot 6.163169 4.471343 1.988985 39.8 \n", "metac-claude-3-5-sonnet-20240620 6.713594 3.285252 1.985788 35.4 \n", "manticAI 7.905338 3.348936 1.993488 42.2 \n", @@ -5641,7 +6417,7 @@ "metac-Gemini-Exp-1206 7.622046 2.701303 1.990426 35.8 \n", "NextWorldLab 7.367722 2.644427 1.988985 34.1 \n", "metac-o1-preview 9.077077 1.827344 1.985550 34.6 \n", - "metac-deepseek-r1 8.695578 3.351382 2.005379 46.6 \n", + "metac-deepseek-r1+asknews 8.695578 3.351382 2.005379 46.6 \n", "laylaps 9.228204 2.497799 1.996341 41.5 \n", "mmBot 8.290173 1.921090 1.985277 32.4 \n", "Grizeu_Bot 8.415222 3.176755 2.005555 43.6 \n", @@ -5678,7 +6454,7 @@ "pro_median 32.7 1.000000 0.000000 \n", "metac-o1 20.7 1.000000 0.000000 \n", "metac-perplexity 16.7 0.999982 0.000036 \n", - "bot_median 15.2 0.999994 0.000013 \n", + "bot_median 13.8 0.999982 0.000037 \n", "acm_bot 15.3 0.999987 0.000025 \n", "metac-claude-3-5-sonnet-20240620 8.7 0.999275 0.001450 \n", "manticAI 10.7 0.999343 0.001314 \n", @@ -5690,7 +6466,7 @@ "metac-Gemini-Exp-1206 5.4 0.995749 0.008502 \n", "NextWorldLab 4.8 0.995080 0.009840 \n", "metac-o1-preview -1.4 0.964539 0.070922 \n", - "metac-deepseek-r1 11.7 0.999241 0.001519 \n", + "metac-deepseek-r1+asknews 11.7 0.999241 0.001519 \n", "laylaps 4.6 0.992463 0.015074 \n", "mmBot -0.5 0.971093 0.057813 \n", "Grizeu_Bot 9.9 0.998740 0.002521 \n", @@ -5724,7 +6500,7 @@ "minefrac1 -25.4 0.279560 0.559119 " ] }, - "execution_count": 42, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -5740,7 +6516,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 42, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -5785,62 +6561,6 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -5869,716 +6589,772 @@ " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
botPeer Score
Rank
1metac-o13864.168122
2bot_median3472.028144
3metac-o1-preview3162.155445
4manticAI2142.538438
5metac-Gemini-Exp-12062072.216227
6acm_bot1876.466009
7twsummerbot1763.532046
8metac-perplexity1697.555196
9GreeneiBot21603.998618
10cookics_bot_TEST1140.390796
11metac-claude-3-5-sonnet-latest1134.209821
12SynapseSeer1066.533051
13CumulativeBot1030.716475
14pgodzinai926.081448
15jkraybill_bot627.932509
16metac-deepseek-r1+asknews614.572462
17question_weight378.020000
34bot_median2481.5520102374.2163389793.10
1516metac-deepseek-r1metac-deepseek-r1+asknews1518.3086255552.10
bot_median2481.62374.293.126.755.7913395.7821854.60979625.556.7128305.8776874.3387451.98527738.115.20.9999940.00001337.213.80.9999820.000037
acm_bot0.070922
metac-deepseek-r1metac-deepseek-r1+asknews1518.352.129.1
Grizeu_Bot487.940.012.2123.49852319.5390470.6251002.02031451.7-27.30.7322250.535551
acm_bot149.763.82.3123.16721915.4139760.1521161.99701833.1-28.40.5602090.879583
RPM_bot145.06.024.231.46890712.8471271.8809962.57058257.2-8.90.9406380.118725
X_bot20.75.04.119.7562378.8352580.4688972.77644528.7-20.40.6682210.663558
cobyj-bot0.00.0NA
jonahsingerbot-61.3bean_bot-0.64.7-13.05.4853692.530212-5.154842-0.10.0698490.032219-4.2651062.784843-6.0-20.10.0041410.008283-0.0-0.20.0076740.015349
bean_bot-70.7jonahsingerbot-0.64.7-15.18.8131374.065197-3.702222-0.10.0502720.023189-5.2736302.784843-3.7-26.40.0119250.023851-0.1-0.20.0038390.007677
jkraybill_bot-76.138.2-2.067.06547910.858048-0.1837062.02336020.0-24.00.4276220.855243X_bot-0.77.0-0.10.3540680.133825-0.7471952.4469120.2-0.40.2415940.483189
CumulativeBot-97.0-1.110.2-9.530.1210609.408238-1.005535-0.10.2577980.080522-1.3151322.23184811.5-30.50.1701090.3402180.1-0.30.1100660.220132
swingswish-109.06.7-16.315.1455315.851229-2.7797012.450387-1.9-30.60.0168960.033793-1.27.7-0.20.1402750.050552-3.0749472.367123-0.0-0.30.0094760.018953
RPM_bot-1.37.0-0.20.8031630.303567-0.6018022.4469120.6-0.90.2846660.569332
SynapseSeer-128.527.1-4.847.0810459.052373-0.5249592.04956913.8-23.30.3020260.604052-1.326.2-0.10.4525550.088498-0.5689102.0530760.1-0.20.2872310.574463
KevinTestBot-148.3-1.58.4-17.759.36966920.484482-0.861938-0.20.5894660.203385-0.8971162.31149629.7-65.00.2078890.4157770.3-0.70.1989520.397903
twsummerbot-237.247.0-5.079.50269011.596659-0.4351342.01121518.3-28.40.3327500.665500Grizeu_Bot-1.751.4-0.01.1733920.163747-0.2066162.0064470.3-0.40.4185710.837143
pianobot-272.2-2.74.7-57.992.18716542.522768-1.361786-0.60.9162040.422613-1.3843272.79898661.1-176.90.1251370.250274
annabot-316.024.8-12.743.7374108.782683-1.4506142.0613075.4-30.80.0799700.1599400.6-1.80.1219410.243882
CatrachoCaster-331.3-3.219.7-16.852.31505911.786737-1.426980-0.20.5209010.117361-1.3655322.0887777.8-41.40.0850350.1700710.1-0.40.0941440.188288
cookics_bot_TEST-413.324.6-16.872.42669414.602631-1.1504362.06084513.3-46.90.1307440.261488krm-bot-5.19.5-0.50.5115460.165967-3.2298462.264709-0.2-0.90.0055630.011127
GreeneiBot2-446.645.8-9.888.55320713.092083-0.7457052.01234016.6-36.10.2298720.459745annabot-6.229.3-0.20.5208690.096226-2.2117952.044183-0.0-0.40.0176100.035221
metac-o1-500.374.74Shadower-6.214.0-0.40.7673220.205075-2.1431942.1472390.0-0.90.0257970.051593
cookics_bot_TEST-6.7111.25524212.872419-0.5203391.99159718.9-32.30.3021940.60438727.4-0.20.7480500.142908-1.7220042.0495410.0-0.50.0483840.096767
krm-bot-521.09.5-54.850.62785616.425846-3.3389622.264709-17.6-92.00.0047000.009400jkraybill_bot-7.544.0-0.20.5128530.077272-2.1971332.014642-0.0-0.30.0167210.033441
4Shadower-527.812.2-43.380.79118223.130448-1.8702732.1816957.2-93.70.0438960.087792twsummerbot-8.958.4-0.20.6597100.086327-1.7583912.0008550.0-0.30.0420060.084012
MWG-766.429.5-26.087.75333816.156699-1.6080772.0435277.0-59.00.0594210.118842-9.628.6-0.30.7111600.132979-2.5353842.046561-0.1-0.60.0085950.017191
bot_median-780.675.7-10.385.1138919.782560-1.0541471.9911819.2-29.80.1476070.295213ProfessorSP-10.018.6-0.50.9362770.217094-2.4844802.095243-0.1-1.00.0116440.023289
Bot_Pepa-814.937.2-21.993.06728515.269248-1.4365512.0250989.0-52.90.0797220.159444acm_bot-10.580.2-0.10.9142650.102059-1.2877171.9893440.1-0.30.1007960.201592
metac-o1-10.891.1-0.10.8668240.090818-1.3030181.9858290.1-0.30.0979440.195889
ajf-bot-843.131.4-26.9104.85473318.727046-1.4360202.03766711.3-65.10.0806120.161224-10.934.2-0.31.0855890.185496-1.7223952.0307780.1-0.70.0471450.094289
manticAI-861.555.0-15.782.87386511.169634-1.4011472.0030646.7-38.00.0834430.166886metac-deepseek-r1+asknews-11.252.1-0.20.6342570.087871-2.4450432.005379-0.0-0.40.0089850.017970
ProfessorSP-997.216.8-59.496.91948823.645934-2.5102932.112371-9.4-109.30.0116720.023345GreeneiBot2-11.458.4-0.20.8462280.110781-1.7668112.0008320.0-0.40.0412900.082581
metac-perplexity-1072.972.7-14.8105.31560712.351666-1.1948081.9924629.9-39.40.1180500.236099Bot_Pepa-11.544.0-0.30.7375370.111125-2.3431662.014642-0.0-0.50.0119050.023810
wunderplumb-1159.023.8-48.890.74010618.619477-2.6209902.065034-10.4-87.30.0076770.015353metac-Gemini-Exp-1206-11.576.5-0.20.8952100.102351-1.4718491.9908220.1-0.40.0726090.145218
laylaps-1214.552.2-23.348.0199296.646397-3.5005872.005359-9.9-36.60.0004860.000971-12.964.1-0.20.6619050.082674-2.4404611.996907-0.0-0.40.0087440.017488
NextWorldLab-1224.163.8-19.298.66262212.347306-1.5526991.9970185.5-43.80.0627580.125517bot_median-13.392.1-0.10.7572010.078901-1.8300581.9855500.0-0.30.0352560.070512
metac-Gemini-Exp-1206-1250.565.1-19.294.99321111.773405-1.6315191.9963774.3-42.70.0538420.107685wunderplumb-13.625.6-0.50.9000510.178062-2.9840942.056603-0.2-0.90.0031740.006348
minefrac1-1289.443.5-29.6123.19979118.679504-1.5868582.0149188.0-67.30.0599790.119958metac-perplexity-14.489.1-0.21.1026010.116810-1.3849521.9864050.1-0.40.0847820.169564
pgodzinai-1330.462.0-21.598.40405312.497327-1.7169531.9981743.5-46.40.0455310.091062manticAI-14.669.4-0.20.6709460.080510-2.6133541.993968-0.0-0.40.0055070.011014
metac-deepseek-r1-1360.348.2-28.2108.35980215.607908-1.8082482.0091123.1-59.60.0384710.076941NextWorldLab-16.980.2-0.20.9069640.101244-2.0783931.989344-0.0-0.40.0204550.040909
metac-Llama-3.1-1412.173.7-19.297.48349911.355267-1.6873751.9920243.5-41.80.0479090.095818minefrac1-18.851.1-0.40.8747520.122370-3.0135812.006545-0.1-0.60.0020210.004043
metac-claude-3-5-sonnet-latest-1463.974.7-19.696.85591111.206393-1.7487371.9915972.7-41.90.0422500.084500-21.691.1-0.20.7840730.082148-2.8855811.985829-0.1-0.40.0024440.004888
mmBot-21.992.1-0.20.7250100.075546-3.1501041.985550-0.1-0.40.0011040.002208
metac-claude-3-5-sonnet-20240620-1649.975.1-22.0105.32409412.153679-1.8076161.9915362.2-46.20.0373620.074725-22.190.5-0.20.9921900.104297-2.3447131.986072-0.0-0.50.0106270.021254
metac-o1-preview-1830.674.7-24.5107.51540912.439714-1.9699551.9915970.3-49.30.0263010.052601metac-grok-2-1212-23.291.1-0.30.9691800.101542-2.5044381.985829-0.1-0.50.0070320.014063
mmBot-2006.475.7-26.578.5323519.026111-2.9364461.991181-8.5-44.50.0022050.004411pgodzinai-23.276.4-0.31.0029230.114742-2.6493171.990849-0.1-0.50.0049100.009821
VeritasAI-2024.567.7-29.963.2821037.691066-3.8881871.994849-14.6-45.20.0001180.000235-24.377.1-0.30.6607030.075245-4.1859101.990482-0.2-0.50.0000380.000076
metac-grok-2-1212-2154.674.7-28.8106.09460612.275325-2.3496851.991597-4.4-53.30.0107350.021470metac-o1-preview-24.491.1-0.30.8524320.089310-2.9993961.985829-0.1-0.40.0017490.003497
metac-gpt-4o-2196.674.7-29.4100.42168411.618958-2.5308441.991597-6.3-52.50.0067560.013513-25.191.1-0.30.8735970.091528-3.0097071.985829-0.1-0.50.0016960.003391
metac-exa-2249.172.7-30.991.72329010.757526-2.8758531.992462-9.5-52.40.0026510.005302-26.189.1-0.30.7919350.083898-3.4956951.986405-0.1-0.50.0003710.000743
InstitutPelFutur-2477.372.8-34.0102.04145411.959443-2.8453911.992461-10.2-57.90.0028880.005777-26.990.1-0.30.9737670.102587-2.9085241.986114-0.1-0.50.0022920.004584
metac-Llama-3.1-28.089.1-0.30.9072000.096109-3.2702001.986405-0.1-0.50.0007670.001534
\n", "
" ], "text/plain": [ - " W_score W_count W_ave W_stdev \\\n", - "Grizeu_Bot 487.9 40.0 12.2 123.498523 \n", - "acm_bot 149.7 63.8 2.3 123.167219 \n", - "RPM_bot 145.0 6.0 24.2 31.468907 \n", - "X_bot 20.7 5.0 4.1 19.756237 \n", - "cobyj-bot 0.0 0.0 NaN NaN \n", - "andrewsiah 0.0 0.0 NaN NaN \n", - "jonahsingerbot -61.3 4.7 -13.0 5.485369 \n", - "bean_bot -70.7 4.7 -15.1 8.813137 \n", - "jkraybill_bot -76.1 38.2 -2.0 67.065479 \n", - "CumulativeBot -97.0 10.2 -9.5 30.121060 \n", - "swingswish -109.0 6.7 -16.3 15.145531 \n", - "SynapseSeer -128.5 27.1 -4.8 47.081045 \n", - "KevinTestBot -148.3 8.4 -17.7 59.369669 \n", - "twsummerbot -237.2 47.0 -5.0 79.502690 \n", - "pianobot -272.2 4.7 -57.9 92.187165 \n", - "annabot -316.0 24.8 -12.7 43.737410 \n", - "CatrachoCaster -331.3 19.7 -16.8 52.315059 \n", - "cookics_bot_TEST -413.3 24.6 -16.8 72.426694 \n", - "GreeneiBot2 -446.6 45.8 -9.8 88.553207 \n", - "metac-o1 -500.3 74.7 -6.7 111.255242 \n", - "krm-bot -521.0 9.5 -54.8 50.627856 \n", - "4Shadower -527.8 12.2 -43.3 80.791182 \n", - "MWG -766.4 29.5 -26.0 87.753338 \n", - "bot_median -780.6 75.7 -10.3 85.113891 \n", - "Bot_Pepa -814.9 37.2 -21.9 93.067285 \n", - "ajf-bot -843.1 31.4 -26.9 104.854733 \n", - "manticAI -861.5 55.0 -15.7 82.873865 \n", - "ProfessorSP -997.2 16.8 -59.4 96.919488 \n", - "metac-perplexity -1072.9 72.7 -14.8 105.315607 \n", - "wunderplumb -1159.0 23.8 -48.8 90.740106 \n", - "laylaps -1214.5 52.2 -23.3 48.019929 \n", - "NextWorldLab -1224.1 63.8 -19.2 98.662622 \n", - "metac-Gemini-Exp-1206 -1250.5 65.1 -19.2 94.993211 \n", - "minefrac1 -1289.4 43.5 -29.6 123.199791 \n", - "pgodzinai -1330.4 62.0 -21.5 98.404053 \n", - "metac-deepseek-r1 -1360.3 48.2 -28.2 108.359802 \n", - "metac-Llama-3.1 -1412.1 73.7 -19.2 97.483499 \n", - "metac-claude-3-5-sonnet-latest -1463.9 74.7 -19.6 96.855911 \n", - "metac-claude-3-5-sonnet-20240620 -1649.9 75.1 -22.0 105.324094 \n", - "metac-o1-preview -1830.6 74.7 -24.5 107.515409 \n", - "mmBot -2006.4 75.7 -26.5 78.532351 \n", - "VeritasAI -2024.5 67.7 -29.9 63.282103 \n", - "metac-grok-2-1212 -2154.6 74.7 -28.8 106.094606 \n", - "metac-gpt-4o -2196.6 74.7 -29.4 100.421684 \n", - "metac-exa -2249.1 72.7 -30.9 91.723290 \n", - "InstitutPelFutur -2477.3 72.8 -34.0 102.041454 \n", - "\n", - " std_err t_stat t_crit upper_bound \\\n", - "Grizeu_Bot 19.539047 0.625100 2.020314 51.7 \n", - "acm_bot 15.413976 0.152116 1.997018 33.1 \n", - "RPM_bot 12.847127 1.880996 2.570582 57.2 \n", - "X_bot 8.835258 0.468897 2.776445 28.7 \n", - "cobyj-bot NaN NaN NaN NaN \n", - "andrewsiah NaN NaN NaN NaN \n", - "jonahsingerbot 2.530212 -5.154842 2.784843 -6.0 \n", - "bean_bot 4.065197 -3.702222 2.784843 -3.7 \n", - "jkraybill_bot 10.858048 -0.183706 2.023360 20.0 \n", - "CumulativeBot 9.408238 -1.005535 2.231848 11.5 \n", - "swingswish 5.851229 -2.779701 2.450387 -1.9 \n", - "SynapseSeer 9.052373 -0.524959 2.049569 13.8 \n", - "KevinTestBot 20.484482 -0.861938 2.311496 29.7 \n", - "twsummerbot 11.596659 -0.435134 2.011215 18.3 \n", - "pianobot 42.522768 -1.361786 2.798986 61.1 \n", - "annabot 8.782683 -1.450614 2.061307 5.4 \n", - "CatrachoCaster 11.786737 -1.426980 2.088777 7.8 \n", - "cookics_bot_TEST 14.602631 -1.150436 2.060845 13.3 \n", - "GreeneiBot2 13.092083 -0.745705 2.012340 16.6 \n", - "metac-o1 12.872419 -0.520339 1.991597 18.9 \n", - "krm-bot 16.425846 -3.338962 2.264709 -17.6 \n", - "4Shadower 23.130448 -1.870273 2.181695 7.2 \n", - "MWG 16.156699 -1.608077 2.043527 7.0 \n", - "bot_median 9.782560 -1.054147 1.991181 9.2 \n", - "Bot_Pepa 15.269248 -1.436551 2.025098 9.0 \n", - "ajf-bot 18.727046 -1.436020 2.037667 11.3 \n", - "manticAI 11.169634 -1.401147 2.003064 6.7 \n", - "ProfessorSP 23.645934 -2.510293 2.112371 -9.4 \n", - "metac-perplexity 12.351666 -1.194808 1.992462 9.9 \n", - "wunderplumb 18.619477 -2.620990 2.065034 -10.4 \n", - "laylaps 6.646397 -3.500587 2.005359 -9.9 \n", - "NextWorldLab 12.347306 -1.552699 1.997018 5.5 \n", - "metac-Gemini-Exp-1206 11.773405 -1.631519 1.996377 4.3 \n", - "minefrac1 18.679504 -1.586858 2.014918 8.0 \n", - "pgodzinai 12.497327 -1.716953 1.998174 3.5 \n", - "metac-deepseek-r1 15.607908 -1.808248 2.009112 3.1 \n", - "metac-Llama-3.1 11.355267 -1.687375 1.992024 3.5 \n", - "metac-claude-3-5-sonnet-latest 11.206393 -1.748737 1.991597 2.7 \n", - "metac-claude-3-5-sonnet-20240620 12.153679 -1.807616 1.991536 2.2 \n", - "metac-o1-preview 12.439714 -1.969955 1.991597 0.3 \n", - "mmBot 9.026111 -2.936446 1.991181 -8.5 \n", - "VeritasAI 7.691066 -3.888187 1.994849 -14.6 \n", - "metac-grok-2-1212 12.275325 -2.349685 1.991597 -4.4 \n", - "metac-gpt-4o 11.618958 -2.530844 1.991597 -6.3 \n", - "metac-exa 10.757526 -2.875853 1.992462 -9.5 \n", - "InstitutPelFutur 11.959443 -2.845391 1.992461 -10.2 \n", + " W_score W_count W_ave W_stdev std_err \\\n", + "cobyj-bot 0.0 0.0 NaN NaN NaN \n", + "andrewsiah 0.0 0.0 NaN NaN NaN \n", + "bean_bot -0.6 4.7 -0.1 0.069849 0.032219 \n", + "jonahsingerbot -0.6 4.7 -0.1 0.050272 0.023189 \n", + "X_bot -0.7 7.0 -0.1 0.354068 0.133825 \n", + "CumulativeBot -1.1 10.2 -0.1 0.257798 0.080522 \n", + "swingswish -1.2 7.7 -0.2 0.140275 0.050552 \n", + "RPM_bot -1.3 7.0 -0.2 0.803163 0.303567 \n", + "SynapseSeer -1.3 26.2 -0.1 0.452555 0.088498 \n", + "KevinTestBot -1.5 8.4 -0.2 0.589466 0.203385 \n", + "Grizeu_Bot -1.7 51.4 -0.0 1.173392 0.163747 \n", + "pianobot -2.7 4.7 -0.6 0.916204 0.422613 \n", + "CatrachoCaster -3.2 19.7 -0.2 0.520901 0.117361 \n", + "krm-bot -5.1 9.5 -0.5 0.511546 0.165967 \n", + "annabot -6.2 29.3 -0.2 0.520869 0.096226 \n", + "4Shadower -6.2 14.0 -0.4 0.767322 0.205075 \n", + "cookics_bot_TEST -6.7 27.4 -0.2 0.748050 0.142908 \n", + "jkraybill_bot -7.5 44.0 -0.2 0.512853 0.077272 \n", + "twsummerbot -8.9 58.4 -0.2 0.659710 0.086327 \n", + "MWG -9.6 28.6 -0.3 0.711160 0.132979 \n", + "ProfessorSP -10.0 18.6 -0.5 0.936277 0.217094 \n", + "acm_bot -10.5 80.2 -0.1 0.914265 0.102059 \n", + "metac-o1 -10.8 91.1 -0.1 0.866824 0.090818 \n", + "ajf-bot -10.9 34.2 -0.3 1.085589 0.185496 \n", + "metac-deepseek-r1+asknews -11.2 52.1 -0.2 0.634257 0.087871 \n", + "GreeneiBot2 -11.4 58.4 -0.2 0.846228 0.110781 \n", + "Bot_Pepa -11.5 44.0 -0.3 0.737537 0.111125 \n", + "metac-Gemini-Exp-1206 -11.5 76.5 -0.2 0.895210 0.102351 \n", + "laylaps -12.9 64.1 -0.2 0.661905 0.082674 \n", + "bot_median -13.3 92.1 -0.1 0.757201 0.078901 \n", + "wunderplumb -13.6 25.6 -0.5 0.900051 0.178062 \n", + "metac-perplexity -14.4 89.1 -0.2 1.102601 0.116810 \n", + "manticAI -14.6 69.4 -0.2 0.670946 0.080510 \n", + "NextWorldLab -16.9 80.2 -0.2 0.906964 0.101244 \n", + "minefrac1 -18.8 51.1 -0.4 0.874752 0.122370 \n", + "metac-claude-3-5-sonnet-latest -21.6 91.1 -0.2 0.784073 0.082148 \n", + "mmBot -21.9 92.1 -0.2 0.725010 0.075546 \n", + "metac-claude-3-5-sonnet-20240620 -22.1 90.5 -0.2 0.992190 0.104297 \n", + "metac-grok-2-1212 -23.2 91.1 -0.3 0.969180 0.101542 \n", + "pgodzinai -23.2 76.4 -0.3 1.002923 0.114742 \n", + "VeritasAI -24.3 77.1 -0.3 0.660703 0.075245 \n", + "metac-o1-preview -24.4 91.1 -0.3 0.852432 0.089310 \n", + "metac-gpt-4o -25.1 91.1 -0.3 0.873597 0.091528 \n", + "metac-exa -26.1 89.1 -0.3 0.791935 0.083898 \n", + "InstitutPelFutur -26.9 90.1 -0.3 0.973767 0.102587 \n", + "metac-Llama-3.1 -28.0 89.1 -0.3 0.907200 0.096109 \n", + "\n", + " t_stat t_crit upper_bound \\\n", + "cobyj-bot NaN NaN NaN \n", + "andrewsiah NaN NaN NaN \n", + "bean_bot -4.265106 2.784843 -0.0 \n", + "jonahsingerbot -5.273630 2.784843 -0.1 \n", + "X_bot -0.747195 2.446912 0.2 \n", + "CumulativeBot -1.315132 2.231848 0.1 \n", + "swingswish -3.074947 2.367123 -0.0 \n", + "RPM_bot -0.601802 2.446912 0.6 \n", + "SynapseSeer -0.568910 2.053076 0.1 \n", + "KevinTestBot -0.897116 2.311496 0.3 \n", + "Grizeu_Bot -0.206616 2.006447 0.3 \n", + "pianobot -1.384327 2.798986 0.6 \n", + "CatrachoCaster -1.365532 2.088777 0.1 \n", + "krm-bot -3.229846 2.264709 -0.2 \n", + "annabot -2.211795 2.044183 -0.0 \n", + "4Shadower -2.143194 2.147239 0.0 \n", + "cookics_bot_TEST -1.722004 2.049541 0.0 \n", + "jkraybill_bot -2.197133 2.014642 -0.0 \n", + "twsummerbot -1.758391 2.000855 0.0 \n", + "MWG -2.535384 2.046561 -0.1 \n", + "ProfessorSP -2.484480 2.095243 -0.1 \n", + "acm_bot -1.287717 1.989344 0.1 \n", + "metac-o1 -1.303018 1.985829 0.1 \n", + "ajf-bot -1.722395 2.030778 0.1 \n", + "metac-deepseek-r1+asknews -2.445043 2.005379 -0.0 \n", + "GreeneiBot2 -1.766811 2.000832 0.0 \n", + "Bot_Pepa -2.343166 2.014642 -0.0 \n", + "metac-Gemini-Exp-1206 -1.471849 1.990822 0.1 \n", + "laylaps -2.440461 1.996907 -0.0 \n", + "bot_median -1.830058 1.985550 0.0 \n", + "wunderplumb -2.984094 2.056603 -0.2 \n", + "metac-perplexity -1.384952 1.986405 0.1 \n", + "manticAI -2.613354 1.993968 -0.0 \n", + "NextWorldLab -2.078393 1.989344 -0.0 \n", + "minefrac1 -3.013581 2.006545 -0.1 \n", + "metac-claude-3-5-sonnet-latest -2.885581 1.985829 -0.1 \n", + "mmBot -3.150104 1.985550 -0.1 \n", + "metac-claude-3-5-sonnet-20240620 -2.344713 1.986072 -0.0 \n", + "metac-grok-2-1212 -2.504438 1.985829 -0.1 \n", + "pgodzinai -2.649317 1.990849 -0.1 \n", + "VeritasAI -4.185910 1.990482 -0.2 \n", + "metac-o1-preview -2.999396 1.985829 -0.1 \n", + "metac-gpt-4o -3.009707 1.985829 -0.1 \n", + "metac-exa -3.495695 1.986405 -0.1 \n", + "InstitutPelFutur -2.908524 1.986114 -0.1 \n", + "metac-Llama-3.1 -3.270200 1.986405 -0.1 \n", "\n", " lower_bound cdf p_value \n", - "Grizeu_Bot -27.3 0.732225 0.535551 \n", - "acm_bot -28.4 0.560209 0.879583 \n", - "RPM_bot -8.9 0.940638 0.118725 \n", - "X_bot -20.4 0.668221 0.663558 \n", "cobyj-bot NaN NaN NA \n", "andrewsiah NaN NaN NA \n", - "jonahsingerbot -20.1 0.004141 0.008283 \n", - "bean_bot -26.4 0.011925 0.023851 \n", - "jkraybill_bot -24.0 0.427622 0.855243 \n", - "CumulativeBot -30.5 0.170109 0.340218 \n", - "swingswish -30.6 0.016896 0.033793 \n", - "SynapseSeer -23.3 0.302026 0.604052 \n", - "KevinTestBot -65.0 0.207889 0.415777 \n", - "twsummerbot -28.4 0.332750 0.665500 \n", - "pianobot -176.9 0.125137 0.250274 \n", - "annabot -30.8 0.079970 0.159940 \n", - "CatrachoCaster -41.4 0.085035 0.170071 \n", - "cookics_bot_TEST -46.9 0.130744 0.261488 \n", - "GreeneiBot2 -36.1 0.229872 0.459745 \n", - "metac-o1 -32.3 0.302194 0.604387 \n", - "krm-bot -92.0 0.004700 0.009400 \n", - "4Shadower -93.7 0.043896 0.087792 \n", - "MWG -59.0 0.059421 0.118842 \n", - "bot_median -29.8 0.147607 0.295213 \n", - "Bot_Pepa -52.9 0.079722 0.159444 \n", - "ajf-bot -65.1 0.080612 0.161224 \n", - "manticAI -38.0 0.083443 0.166886 \n", - "ProfessorSP -109.3 0.011672 0.023345 \n", - "metac-perplexity -39.4 0.118050 0.236099 \n", - "wunderplumb -87.3 0.007677 0.015353 \n", - "laylaps -36.6 0.000486 0.000971 \n", - "NextWorldLab -43.8 0.062758 0.125517 \n", - "metac-Gemini-Exp-1206 -42.7 0.053842 0.107685 \n", - "minefrac1 -67.3 0.059979 0.119958 \n", - "pgodzinai -46.4 0.045531 0.091062 \n", - "metac-deepseek-r1 -59.6 0.038471 0.076941 \n", - "metac-Llama-3.1 -41.8 0.047909 0.095818 \n", - "metac-claude-3-5-sonnet-latest -41.9 0.042250 0.084500 \n", - "metac-claude-3-5-sonnet-20240620 -46.2 0.037362 0.074725 \n", - "metac-o1-preview -49.3 0.026301 0.052601 \n", - "mmBot -44.5 0.002205 0.004411 \n", - "VeritasAI -45.2 0.000118 0.000235 \n", - "metac-grok-2-1212 -53.3 0.010735 0.021470 \n", - "metac-gpt-4o -52.5 0.006756 0.013513 \n", - "metac-exa -52.4 0.002651 0.005302 \n", - "InstitutPelFutur -57.9 0.002888 0.005777 " + "bean_bot -0.2 0.007674 0.015349 \n", + "jonahsingerbot -0.2 0.003839 0.007677 \n", + "X_bot -0.4 0.241594 0.483189 \n", + "CumulativeBot -0.3 0.110066 0.220132 \n", + "swingswish -0.3 0.009476 0.018953 \n", + "RPM_bot -0.9 0.284666 0.569332 \n", + "SynapseSeer -0.2 0.287231 0.574463 \n", + "KevinTestBot -0.7 0.198952 0.397903 \n", + "Grizeu_Bot -0.4 0.418571 0.837143 \n", + "pianobot -1.8 0.121941 0.243882 \n", + "CatrachoCaster -0.4 0.094144 0.188288 \n", + "krm-bot -0.9 0.005563 0.011127 \n", + "annabot -0.4 0.017610 0.035221 \n", + "4Shadower -0.9 0.025797 0.051593 \n", + "cookics_bot_TEST -0.5 0.048384 0.096767 \n", + "jkraybill_bot -0.3 0.016721 0.033441 \n", + "twsummerbot -0.3 0.042006 0.084012 \n", + "MWG -0.6 0.008595 0.017191 \n", + "ProfessorSP -1.0 0.011644 0.023289 \n", + "acm_bot -0.3 0.100796 0.201592 \n", + "metac-o1 -0.3 0.097944 0.195889 \n", + "ajf-bot -0.7 0.047145 0.094289 \n", + "metac-deepseek-r1+asknews -0.4 0.008985 0.017970 \n", + "GreeneiBot2 -0.4 0.041290 0.082581 \n", + "Bot_Pepa -0.5 0.011905 0.023810 \n", + "metac-Gemini-Exp-1206 -0.4 0.072609 0.145218 \n", + "laylaps -0.4 0.008744 0.017488 \n", + "bot_median -0.3 0.035256 0.070512 \n", + "wunderplumb -0.9 0.003174 0.006348 \n", + "metac-perplexity -0.4 0.084782 0.169564 \n", + "manticAI -0.4 0.005507 0.011014 \n", + "NextWorldLab -0.4 0.020455 0.040909 \n", + "minefrac1 -0.6 0.002021 0.004043 \n", + "metac-claude-3-5-sonnet-latest -0.4 0.002444 0.004888 \n", + "mmBot -0.4 0.001104 0.002208 \n", + "metac-claude-3-5-sonnet-20240620 -0.5 0.010627 0.021254 \n", + "metac-grok-2-1212 -0.5 0.007032 0.014063 \n", + "pgodzinai -0.5 0.004910 0.009821 \n", + "VeritasAI -0.5 0.000038 0.000076 \n", + "metac-o1-preview -0.4 0.001749 0.003497 \n", + "metac-gpt-4o -0.5 0.001696 0.003391 \n", + "metac-exa -0.5 0.000371 0.000743 \n", + "InstitutPelFutur -0.5 0.002292 0.004584 \n", + "metac-Llama-3.1 -0.5 0.000767 0.001534 " ] }, - "execution_count": 43, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -6604,17 +7380,17 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# Write to csv\n", - "df_W_leaderboard.to_csv('weighted_t_test_h2h_bot_vs_pros.csv', index=True)" + "df_W_leaderboard.to_csv('notebook_outputs/weighted_t_test_h2h_bot_vs_pros.csv', index=True)" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 44, "metadata": { "cellView": "form", "colab": { @@ -6856,7 +7632,7 @@ " \n", " 12\n", " 13\n", - " metac-deepseek-r1\n", + " metac-deepseek-r1+asknews\n", " 516.8\n", " 277.9\n", " 1.9\n", @@ -7399,7 +8175,7 @@ "9 10 metac-claude-3-5-sonnet-latest 951.3 370.3 2.6 \n", "10 11 GreeneiBot2 1494.7 264.1 5.7 \n", "11 12 metac-perplexity 1558.4 354.4 4.4 \n", - "12 13 metac-deepseek-r1 516.8 277.9 1.9 \n", + "12 13 metac-deepseek-r1+asknews 516.8 277.9 1.9 \n", "13 14 pgodzinai 1106.7 325.4 3.4 \n", "14 15 metac-exa 599.9 365.3 1.6 \n", "15 16 MWG 253.8 113.4 2.2 \n", @@ -7528,7 +8304,7 @@ "44 0.040339 0.080679 " ] }, - "execution_count": 45, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -7567,17 +8343,17 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "# Write to csv\n", - "df_W_leaderboard_print.to_csv('weighted_bot_peer_leaderboard_t_test.csv', index=False)" + "df_W_leaderboard_print.to_csv('notebook_outputs/weighted_bot_peer_leaderboard_t_test.csv', index=False)" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -7782,7 +8558,7 @@ "[5 rows x 48 columns]" ] }, - "execution_count": 47, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -7793,7 +8569,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 47, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -7811,9 +8587,9 @@ "<>:29: SyntaxWarning: invalid escape sequence '\\s'\n", "<>:29: SyntaxWarning: invalid escape sequence '\\m'\n", "<>:29: SyntaxWarning: invalid escape sequence '\\s'\n", - "/tmp/ipykernel_322865/2856056443.py:29: SyntaxWarning: invalid escape sequence '\\m'\n", + "/tmp/ipykernel_17143/2856056443.py:29: SyntaxWarning: invalid escape sequence '\\m'\n", " textstr = f'$\\mu={mu:.2f}$\\n$\\sigma={std:.2f}$'\n", - "/tmp/ipykernel_322865/2856056443.py:29: SyntaxWarning: invalid escape sequence '\\s'\n", + "/tmp/ipykernel_17143/2856056443.py:29: SyntaxWarning: invalid escape sequence '\\s'\n", " textstr = f'$\\mu={mu:.2f}$\\n$\\sigma={std:.2f}$'\n" ] }, @@ -7869,7 +8645,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -8291,7 +9067,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 49, "metadata": { "cellView": "form", "colab": { @@ -8341,147 +9117,147 @@ " \n", " \n", " metac-o1\n", - " 6.0\n", + " 5.9\n", " 7.2\n", - " 9.6\n", - " 12.0\n", - " 13.1\n", + " 9.5\n", + " 11.8\n", + " 12.9\n", " \n", " \n", " metac-o1-preview\n", - " 3.9\n", - " 5.2\n", + " 3.5\n", + " 5.3\n", " 8.3\n", " 11.2\n", - " 12.6\n", + " 12.7\n", " \n", " \n", " manticAI\n", - " -0.2\n", - " 2.1\n", - " 5.5\n", + " 0.3\n", + " 2.2\n", + " 5.4\n", " 8.7\n", " 10.4\n", " \n", " \n", " metac-Gemini-Exp-1206\n", - " 0.9\n", - " 2.3\n", - " 5.1\n", - " 7.8\n", - " 9.1\n", + " 0.4\n", + " 2.2\n", + " 5.0\n", + " 7.7\n", + " 9.5\n", " \n", " \n", " acm_bot\n", - " 0.3\n", + " 0.4\n", " 1.9\n", - " 4.5\n", - " 7.5\n", + " 4.6\n", + " 7.4\n", " 8.8\n", " \n", " \n", " metac-perplexity\n", - " -1.7\n", - " 0.5\n", - " 4.1\n", - " 7.7\n", - " 9.9\n", + " -1.8\n", + " 0.1\n", + " 4.2\n", + " 7.8\n", + " 9.5\n", " \n", " \n", - " twsummerbot\n", - " 0.3\n", - " 1.4\n", - " 3.9\n", - " 6.1\n", - " 7.5\n", + " GreeneiBot2\n", + " -0.6\n", + " 0.8\n", + " 4.0\n", + " 7.2\n", + " 8.7\n", " \n", " \n", - " GreeneiBot2\n", - " -1.0\n", - " 0.7\n", + " twsummerbot\n", + " 0.2\n", + " 1.4\n", " 3.8\n", - " 7.2\n", - " 8.8\n", + " 6.3\n", + " 7.4\n", " \n", " \n", " cookics_bot_TEST\n", - " 0.0\n", - " 0.9\n", - " 3.1\n", - " 5.0\n", + " -0.2\n", + " 0.8\n", + " 3.0\n", + " 5.1\n", " 6.2\n", " \n", " \n", " pgodzinai\n", - " -3.1\n", + " -3.0\n", " -1.1\n", - " 2.8\n", - " 6.9\n", - " 8.7\n", - " \n", - " \n", - " CumulativeBot\n", - " -0.2\n", - " 0.8\n", - " 2.6\n", - " 4.4\n", - " 5.4\n", + " 3.0\n", + " 6.8\n", + " 9.0\n", " \n", " \n", " metac-claude-3-5-sonnet-latest\n", - " -1.3\n", - " 0.1\n", + " -1.2\n", + " 0.2\n", " 2.6\n", - " 4.9\n", - " 6.2\n", + " 5.2\n", + " 6.6\n", " \n", " \n", " SynapseSeer\n", " 0.4\n", " 1.1\n", - " 2.5\n", + " 2.6\n", " 4.0\n", - " 4.9\n", + " 4.8\n", + " \n", + " \n", + " CumulativeBot\n", + " -0.5\n", + " 0.6\n", + " 2.6\n", + " 4.5\n", + " 5.4\n", " \n", " \n", " jkraybill_bot\n", - " -3.5\n", - " -1.6\n", + " -3.2\n", + " -1.3\n", " 1.7\n", - " 4.8\n", - " 6.4\n", + " 4.9\n", + " 6.5\n", " \n", " \n", " metac-exa\n", - " -5.2\n", - " -2.7\n", + " -4.8\n", + " -2.6\n", " 1.7\n", - " 5.4\n", - " 7.6\n", + " 5.7\n", + " 7.4\n", " \n", " \n", - " metac-deepseek-r1\n", - " -1.9\n", - " -0.6\n", - " 1.5\n", + " metac-deepseek-r1+asknews\n", + " -1.7\n", + " -0.7\n", + " 1.4\n", " 3.6\n", - " 4.9\n", + " 4.6\n", " \n", " \n", " MWG\n", - " -1.6\n", - " -0.9\n", + " -1.5\n", + " -0.8\n", " 0.7\n", " 2.0\n", - " 2.7\n", + " 2.8\n", " \n", " \n", " andrewsiah\n", - " -1.0\n", + " -0.9\n", " -0.6\n", " -0.0\n", " 0.6\n", - " 1.0\n", + " 0.9\n", " \n", " \n", " X_bot\n", @@ -8493,299 +9269,716 @@ " \n", " \n", " pianobot\n", - " -1.2\n", + " -1.3\n", " -0.8\n", " -0.0\n", - " 0.7\n", - " 1.1\n", + " 0.6\n", + " 1.0\n", " \n", " \n", " cobyj-bot\n", " -1.5\n", " -0.9\n", - " -0.1\n", + " -0.0\n", " 0.9\n", - " 1.4\n", + " 1.3\n", " \n", " \n", - " annabot\n", - " -3.6\n", - " -2.3\n", + " KevinTestBot\n", + " -4.1\n", + " -2.9\n", " -0.4\n", - " 1.2\n", - " 1.9\n", + " 1.5\n", + " 2.7\n", " \n", " \n", - " bean_bot\n", - " -3.0\n", - " -2.1\n", - " -0.4\n", + " annabot\n", + " -3.7\n", + " -2.3\n", + " -0.5\n", " 1.2\n", - " 2.0\n", + " 2.1\n", " \n", " \n", - " KevinTestBot\n", - " -3.8\n", - " -2.7\n", + " bean_bot\n", + " -3.1\n", + " -2.2\n", " -0.5\n", - " 1.6\n", - " 2.5\n", + " 1.1\n", + " 1.9\n", " \n", " \n", " CatrachoCaster\n", - " -2.4\n", + " -2.2\n", " -1.7\n", - " -0.8\n", + " -0.7\n", " 0.2\n", - " 0.8\n", + " 0.7\n", " \n", " \n", " jonahsingerbot\n", - " -3.0\n", - " -2.2\n", + " -2.9\n", + " -2.3\n", " -0.8\n", " 0.4\n", " 1.0\n", " \n", " \n", " krm-bot\n", - " -3.6\n", - " -2.7\n", + " -3.5\n", + " -2.6\n", " -0.9\n", - " 0.8\n", - " 1.6\n", + " 0.6\n", + " 1.5\n", " \n", " \n", " ProfessorSP\n", - " -4.5\n", - " -3.4\n", + " -4.4\n", + " -3.2\n", " -1.0\n", " 1.0\n", - " 2.1\n", + " 2.2\n", " \n", " \n", " metac-grok-2-1212\n", - " -6.5\n", - " -4.7\n", + " -6.6\n", + " -4.8\n", " -1.4\n", " 1.8\n", - " 3.3\n", + " 3.1\n", " \n", " \n", " mmBot\n", - " -7.1\n", - " -5.2\n", + " -7.5\n", + " -5.4\n", " -1.6\n", - " 2.2\n", - " 4.1\n", + " 2.5\n", + " 4.7\n", " \n", " \n", " 4Shadower\n", - " -4.7\n", - " -3.6\n", - " -1.6\n", - " 0.3\n", + " -4.9\n", + " -3.8\n", + " -1.8\n", + " 0.1\n", " 1.2\n", " \n", " \n", - " swingswish\n", - " -5.2\n", - " -4.0\n", - " -1.9\n", - " -0.1\n", + " metac-claude-3-5-sonnet-20240620\n", + " -6.2\n", + " -4.8\n", + " -2.0\n", " 0.7\n", + " 2.0\n", " \n", " \n", " RPM_bot\n", - " -4.9\n", - " -3.9\n", + " -4.7\n", + " -3.8\n", " -2.0\n", " -0.7\n", - " -0.1\n", + " -0.2\n", " \n", " \n", - " metac-claude-3-5-sonnet-20240620\n", - " -6.5\n", - " -5.0\n", + " swingswish\n", + " -5.5\n", + " -4.3\n", " -2.1\n", - " 0.9\n", - " 2.4\n", + " -0.3\n", + " 0.5\n", " \n", " \n", " InstitutPelFutur\n", - " -9.2\n", - " -6.7\n", - " -2.5\n", - " 1.8\n", - " 3.6\n", + " -8.5\n", + " -6.5\n", + " -2.1\n", + " 1.9\n", + " 4.1\n", " \n", " \n", " metac-Llama-3.1\n", " -6.6\n", - " -5.5\n", - " -2.5\n", - " 0.2\n", + " -5.3\n", + " -2.6\n", + " 0.1\n", " 1.4\n", " \n", " \n", - " wunderplumb\n", - " -6.3\n", - " -5.2\n", - " -2.6\n", - " -0.3\n", + " wunderplumb\n", + " -6.2\n", + " -5.0\n", + " -2.7\n", + " -0.2\n", + " 0.6\n", + " \n", + " \n", + " NextWorldLab\n", + " -9.0\n", + " -6.8\n", + " -3.4\n", + " -0.4\n", + " 1.0\n", + " \n", + " \n", + " Bot_Pepa\n", + " -7.1\n", + " -5.8\n", + " -3.9\n", + " -2.0\n", + " -1.0\n", + " \n", + " \n", + " laylaps\n", + " -9.9\n", + " -7.7\n", + " -4.0\n", + " -0.1\n", + " 1.6\n", + " \n", + " \n", + " VeritasAI\n", + " -7.7\n", + " -6.4\n", + " -4.3\n", + " -1.7\n", + " -0.5\n", + " \n", + " \n", + " minefrac1\n", + " -7.9\n", + " -6.8\n", + " -4.5\n", + " -2.6\n", + " -1.7\n", + " \n", + " \n", + " Grizeu_Bot\n", + " -9.4\n", + " -7.5\n", + " -5.0\n", + " -2.4\n", + " -1.0\n", + " \n", + " \n", + " metac-gpt-4o\n", + " -10.2\n", + " -8.9\n", + " -5.8\n", + " -2.9\n", + " -1.5\n", + " \n", + " \n", + " ajf-bot\n", + " -14.8\n", + " -12.6\n", + " -8.4\n", + " -4.6\n", + " -2.2\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " 2.5% CI 10% CI Median 90% CI 97.5% CI\n", + "metac-o1 5.9 7.2 9.5 11.8 12.9\n", + "metac-o1-preview 3.5 5.3 8.3 11.2 12.7\n", + "manticAI 0.3 2.2 5.4 8.7 10.4\n", + "metac-Gemini-Exp-1206 0.4 2.2 5.0 7.7 9.5\n", + "acm_bot 0.4 1.9 4.6 7.4 8.8\n", + "metac-perplexity -1.8 0.1 4.2 7.8 9.5\n", + "GreeneiBot2 -0.6 0.8 4.0 7.2 8.7\n", + "twsummerbot 0.2 1.4 3.8 6.3 7.4\n", + "cookics_bot_TEST -0.2 0.8 3.0 5.1 6.2\n", + "pgodzinai -3.0 -1.1 3.0 6.8 9.0\n", + "metac-claude-3-5-sonnet-latest -1.2 0.2 2.6 5.2 6.6\n", + "SynapseSeer 0.4 1.1 2.6 4.0 4.8\n", + "CumulativeBot -0.5 0.6 2.6 4.5 5.4\n", + "jkraybill_bot -3.2 -1.3 1.7 4.9 6.5\n", + "metac-exa -4.8 -2.6 1.7 5.7 7.4\n", + "metac-deepseek-r1+asknews -1.7 -0.7 1.4 3.6 4.6\n", + "MWG -1.5 -0.8 0.7 2.0 2.8\n", + "andrewsiah -0.9 -0.6 -0.0 0.6 0.9\n", + "X_bot -0.4 -0.2 -0.0 0.1 0.2\n", + "pianobot -1.3 -0.8 -0.0 0.6 1.0\n", + "cobyj-bot -1.5 -0.9 -0.0 0.9 1.3\n", + "KevinTestBot -4.1 -2.9 -0.4 1.5 2.7\n", + "annabot -3.7 -2.3 -0.5 1.2 2.1\n", + "bean_bot -3.1 -2.2 -0.5 1.1 1.9\n", + "CatrachoCaster -2.2 -1.7 -0.7 0.2 0.7\n", + "jonahsingerbot -2.9 -2.3 -0.8 0.4 1.0\n", + "krm-bot -3.5 -2.6 -0.9 0.6 1.5\n", + "ProfessorSP -4.4 -3.2 -1.0 1.0 2.2\n", + "metac-grok-2-1212 -6.6 -4.8 -1.4 1.8 3.1\n", + "mmBot -7.5 -5.4 -1.6 2.5 4.7\n", + "4Shadower -4.9 -3.8 -1.8 0.1 1.2\n", + "metac-claude-3-5-sonnet-20240620 -6.2 -4.8 -2.0 0.7 2.0\n", + "RPM_bot -4.7 -3.8 -2.0 -0.7 -0.2\n", + "swingswish -5.5 -4.3 -2.1 -0.3 0.5\n", + "InstitutPelFutur -8.5 -6.5 -2.1 1.9 4.1\n", + "metac-Llama-3.1 -6.6 -5.3 -2.6 0.1 1.4\n", + "wunderplumb -6.2 -5.0 -2.7 -0.2 0.6\n", + "NextWorldLab -9.0 -6.8 -3.4 -0.4 1.0\n", + "Bot_Pepa -7.1 -5.8 -3.9 -2.0 -1.0\n", + "laylaps -9.9 -7.7 -4.0 -0.1 1.6\n", + "VeritasAI -7.7 -6.4 -4.3 -1.7 -0.5\n", + "minefrac1 -7.9 -6.8 -4.5 -2.6 -1.7\n", + "Grizeu_Bot -9.4 -7.5 -5.0 -2.4 -1.0\n", + "metac-gpt-4o -10.2 -8.9 -5.8 -2.9 -1.5\n", + "ajf-bot -14.8 -12.6 -8.4 -4.6 -2.2" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Drop 'bot_median' from all_bots list\n", + "all_bots_wo_median = np.delete(all_bots, np.where(all_bots == 'bot_median')[0][0])\n", + "df_bot_peer_wide_wo_median = df_bot_peer_wide.drop('bot_median', axis=1)\n", + "\n", + "NUM = round(df_bot_peer_wide['question_weight'].sum())\n", + "ITER = 1000\n", + "\n", + "result_df = weighted_bootstrap_analysis(df_bot_peer_wide_wo_median, all_bots_wo_median, NUM, ITER)\n", + "average_df = result_df / NUM\n", + "\n", + "print(f'BOT LEADERBOARD\\n\\n')\n", + "df_rounded = average_df.round(1)\n", + "df_rounded" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 125 + }, + "id": "MXAev2sNXdbZ", + "outputId": "eebb723f-5494-4b89-cf0d-efa5b1626cb7" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + "
pro_question_idbot_question_idresolutionquestion_weighttypeoptionsrange_minrange_maxopen_upper_boundopen_lower_bound...metac-o1-previewmetac-perplexityminefrac1mmBotpgodzinaipianobotswingswishtwsummerbotwunderplumbbot_team_median
0312683126201.0multiple_choice[0, 1, 2-3, 4-6, >6]NaNNaNFalseFalse...2.3025855.703782NaN2.2926352.703087NaNNaNNaNNaN4.605170
1312693126386.821.0numericNone60.0100.0TrueTrue...-0.158842-0.616988NaN-0.050442-0.163369NaNNaNNaNNaN-1.512868
23127031264no1.0binaryNoneNaNNaNFalseFalse...-0.038208-0.092275NaN-0.210058-0.059485NaNNaNNaNNaN-0.149434
NextWorldLab-8.3-6.7-3.4-0.41.2331280312745-91.0multiple_choice[0-4, 5-9, >9]NaNNaNNoneNone...0.3901980.204794NaN0.1278330.152526NaNNaN-0.046520NaN0.310155
laylaps-9.9-7.7-3.8-0.12.243128131275119.21.0numericNone0.0400.0FalseFalse...0.243782-0.102791NaN0.2653720.041050NaNNaN-0.771754NaN0.184891
Bot_Pepa-7.0-6.0-3.9-1.8-0.9
\n", + "

5 rows × 57 columns

\n", + "
" + ], + "text/plain": [ + " pro_question_id bot_question_id resolution question_weight \\\n", + "0 31268 31262 0 1.0 \n", + "1 31269 31263 86.82 1.0 \n", + "2 31270 31264 no 1.0 \n", + "3 31280 31274 5-9 1.0 \n", + "4 31281 31275 119.2 1.0 \n", + "\n", + " type options range_min range_max \\\n", + "0 multiple_choice [0, 1, 2-3, 4-6, >6] NaN NaN \n", + "1 numeric None 60.0 100.0 \n", + "2 binary None NaN NaN \n", + "3 multiple_choice [0-4, 5-9, >9] NaN NaN \n", + "4 numeric None 0.0 400.0 \n", + "\n", + " open_upper_bound open_lower_bound ... metac-o1-preview metac-perplexity \\\n", + "0 False False ... 2.302585 5.703782 \n", + "1 True True ... -0.158842 -0.616988 \n", + "2 False False ... -0.038208 -0.092275 \n", + "3 None None ... 0.390198 0.204794 \n", + "4 False False ... 0.243782 -0.102791 \n", + "\n", + " minefrac1 mmBot pgodzinai pianobot swingswish twsummerbot \\\n", + "0 NaN 2.292635 2.703087 NaN NaN NaN \n", + "1 NaN -0.050442 -0.163369 NaN NaN NaN \n", + "2 NaN -0.210058 -0.059485 NaN NaN NaN \n", + "3 NaN 0.127833 0.152526 NaN NaN -0.046520 \n", + "4 NaN 0.265372 0.041050 NaN NaN -0.771754 \n", + "\n", + " wunderplumb bot_team_median \n", + "0 NaN 4.605170 \n", + "1 NaN -1.512868 \n", + "2 NaN -0.149434 \n", + "3 NaN 0.310155 \n", + "4 NaN 0.184891 \n", + "\n", + "[5 rows x 57 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
pro_question_idbot_question_idresolutionquestion_weighttypeoptionsrange_minrange_maxopen_upper_boundopen_lower_bound...metac-o1-previewmetac-perplexityminefrac1mmBotpgodzinaipianobotswingswishtwsummerbotwunderplumbbot_team_median
VeritasAI-7.8-6.6-4.3-1.9-0.4943538035345yes1.00binaryNoneNaNNaNFalseFalse...-0.054067NaNNaN0.0000000.000000NaN-0.054067-0.220515-0.054067-0.054067
minefrac1-8.0-6.7-4.6-2.5-1.3953538135354no1.00binaryNoneNaNNaNFalseFalse...-2.251292NaNNaN-0.111226NaNNaN-0.054067-0.083382-2.944439-0.111226
Grizeu_Bot-8.8-7.6-5.1-2.4-0.9963538535358yes1.00binaryNoneNaNNaNFalseFalse...-0.020834NaNNaN-0.074901NaNNaN-0.132060-0.158283-0.132060-0.158283
metac-gpt-4o-10.6-9.0-5.8-2.9-1.4973538635364no0.85binaryNoneNaNNaNFalseFalse...-0.6804300.628948NaN-0.680430-0.680430NaN-0.0912550.8117930.628948-0.091255
ajf-bot-15.0-13.0-8.6-4.4-2.0983538735367no0.85binaryNoneNaNNaNFalseFalse...-0.0177090.000000NaN-0.112251-0.017709NaN-0.163782-0.241614-0.163782-0.112251
\n", + "

5 rows × 57 columns

\n", "
" ], "text/plain": [ - " 2.5% CI 10% CI Median 90% CI 97.5% CI\n", - "metac-o1 6.0 7.2 9.6 12.0 13.1\n", - "metac-o1-preview 3.9 5.2 8.3 11.2 12.6\n", - "manticAI -0.2 2.1 5.5 8.7 10.4\n", - "metac-Gemini-Exp-1206 0.9 2.3 5.1 7.8 9.1\n", - "acm_bot 0.3 1.9 4.5 7.5 8.8\n", - "metac-perplexity -1.7 0.5 4.1 7.7 9.9\n", - "twsummerbot 0.3 1.4 3.9 6.1 7.5\n", - "GreeneiBot2 -1.0 0.7 3.8 7.2 8.8\n", - "cookics_bot_TEST 0.0 0.9 3.1 5.0 6.2\n", - "pgodzinai -3.1 -1.1 2.8 6.9 8.7\n", - "CumulativeBot -0.2 0.8 2.6 4.4 5.4\n", - "metac-claude-3-5-sonnet-latest -1.3 0.1 2.6 4.9 6.2\n", - "SynapseSeer 0.4 1.1 2.5 4.0 4.9\n", - "jkraybill_bot -3.5 -1.6 1.7 4.8 6.4\n", - "metac-exa -5.2 -2.7 1.7 5.4 7.6\n", - "metac-deepseek-r1 -1.9 -0.6 1.5 3.6 4.9\n", - "MWG -1.6 -0.9 0.7 2.0 2.7\n", - "andrewsiah -1.0 -0.6 -0.0 0.6 1.0\n", - "X_bot -0.4 -0.2 -0.0 0.1 0.2\n", - "pianobot -1.2 -0.8 -0.0 0.7 1.1\n", - "cobyj-bot -1.5 -0.9 -0.1 0.9 1.4\n", - "annabot -3.6 -2.3 -0.4 1.2 1.9\n", - "bean_bot -3.0 -2.1 -0.4 1.2 2.0\n", - "KevinTestBot -3.8 -2.7 -0.5 1.6 2.5\n", - "CatrachoCaster -2.4 -1.7 -0.8 0.2 0.8\n", - "jonahsingerbot -3.0 -2.2 -0.8 0.4 1.0\n", - "krm-bot -3.6 -2.7 -0.9 0.8 1.6\n", - "ProfessorSP -4.5 -3.4 -1.0 1.0 2.1\n", - "metac-grok-2-1212 -6.5 -4.7 -1.4 1.8 3.3\n", - "mmBot -7.1 -5.2 -1.6 2.2 4.1\n", - "4Shadower -4.7 -3.6 -1.6 0.3 1.2\n", - "swingswish -5.2 -4.0 -1.9 -0.1 0.7\n", - "RPM_bot -4.9 -3.9 -2.0 -0.7 -0.1\n", - "metac-claude-3-5-sonnet-20240620 -6.5 -5.0 -2.1 0.9 2.4\n", - "InstitutPelFutur -9.2 -6.7 -2.5 1.8 3.6\n", - "metac-Llama-3.1 -6.6 -5.5 -2.5 0.2 1.4\n", - "wunderplumb -6.3 -5.2 -2.6 -0.3 1.0\n", - "NextWorldLab -8.3 -6.7 -3.4 -0.4 1.2\n", - "laylaps -9.9 -7.7 -3.8 -0.1 2.2\n", - "Bot_Pepa -7.0 -6.0 -3.9 -1.8 -0.9\n", - "VeritasAI -7.8 -6.6 -4.3 -1.9 -0.4\n", - "minefrac1 -8.0 -6.7 -4.6 -2.5 -1.3\n", - "Grizeu_Bot -8.8 -7.6 -5.1 -2.4 -0.9\n", - "metac-gpt-4o -10.6 -9.0 -5.8 -2.9 -1.4\n", - "ajf-bot -15.0 -13.0 -8.6 -4.4 -2.0" + " pro_question_id bot_question_id resolution question_weight type \\\n", + "94 35380 35345 yes 1.00 binary \n", + "95 35381 35354 no 1.00 binary \n", + "96 35385 35358 yes 1.00 binary \n", + "97 35386 35364 no 0.85 binary \n", + "98 35387 35367 no 0.85 binary \n", + "\n", + " options range_min range_max open_upper_bound open_lower_bound ... \\\n", + "94 None NaN NaN False False ... \n", + "95 None NaN NaN False False ... \n", + "96 None NaN NaN False False ... \n", + "97 None NaN NaN False False ... \n", + "98 None NaN NaN False False ... \n", + "\n", + " metac-o1-preview metac-perplexity minefrac1 mmBot pgodzinai \\\n", + "94 -0.054067 NaN NaN 0.000000 0.000000 \n", + "95 -2.251292 NaN NaN -0.111226 NaN \n", + "96 -0.020834 NaN NaN -0.074901 NaN \n", + "97 -0.680430 0.628948 NaN -0.680430 -0.680430 \n", + "98 -0.017709 0.000000 NaN -0.112251 -0.017709 \n", + "\n", + " pianobot swingswish twsummerbot wunderplumb bot_team_median \n", + "94 NaN -0.054067 -0.220515 -0.054067 -0.054067 \n", + "95 NaN -0.054067 -0.083382 -2.944439 -0.111226 \n", + "96 NaN -0.132060 -0.158283 -0.132060 -0.158283 \n", + "97 NaN -0.091255 0.811793 0.628948 -0.091255 \n", + "98 NaN -0.163782 -0.241614 -0.163782 -0.112251 \n", + "\n", + "[5 rows x 57 columns]" ] }, - "execution_count": 50, "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Drop 'bot_median' from all_bots list\n", - "all_bots_wo_median = np.delete(all_bots, np.where(all_bots == 'bot_median')[0][0])\n", - "df_bot_peer_wide_wo_median = df_bot_peer_wide.drop('bot_median', axis=1)\n", - "\n", - "NUM = round(df_bot_peer_wide['question_weight'].sum())\n", - "ITER = 1000\n", - "\n", - "result_df = weighted_bootstrap_analysis(df_bot_peer_wide_wo_median, all_bots_wo_median, NUM, ITER)\n", - "average_df = result_df / NUM\n", - "\n", - "print(f'BOT LEADERBOARD\\n\\n')\n", - "df_rounded = average_df.round(1)\n", - "df_rounded" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "cellView": "form", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 125 + "output_type": "display_data" }, - "id": "MXAev2sNXdbZ", - "outputId": "eebb723f-5494-4b89-cf0d-efa5b1626cb7" - }, - "outputs": [ { "name": "stdout", "output_type": "stream", @@ -8828,31 +10021,7 @@ " \n", " \n", " \n", - " Grizeu_Bot\n", - " -9.7\n", - " -5.4\n", - " 4.4\n", - " 15.9\n", - " 22.2\n", - " \n", - " \n", - " RPM_bot\n", - " -0.1\n", - " 0.3\n", - " 1.4\n", - " 2.8\n", - " 3.7\n", - " \n", - " \n", - " X_bot\n", - " -0.4\n", - " -0.3\n", - " 0.2\n", - " 0.7\n", - " 1.2\n", - " \n", - " \n", - " andrewsiah\n", + " cobyj-bot\n", " 0.0\n", " 0.0\n", " 0.0\n", @@ -8860,7 +10029,7 @@ " 0.0\n", " \n", " \n", - " cobyj-bot\n", + " andrewsiah\n", " 0.0\n", " 0.0\n", " 0.0\n", @@ -8868,332 +10037,356 @@ " 0.0\n", " \n", " \n", - " acm_bot\n", - " -16.3\n", - " -11.3\n", - " -0.2\n", - " 14.8\n", - " 22.5\n", + " X_bot\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", " jonahsingerbot\n", - " -1.4\n", - " -1.1\n", - " -0.6\n", - " -0.3\n", - " -0.1\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", " \n", " \n", " bean_bot\n", - " -1.6\n", - " -1.3\n", - " -0.7\n", - " -0.3\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " \n", + " \n", + " RPM_bot\n", " -0.1\n", + " -0.0\n", + " -0.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", " CumulativeBot\n", - " -2.9\n", - " -2.3\n", - " -1.0\n", - " 0.2\n", - " 1.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " 0.0\n", " \n", " \n", " swingswish\n", - " -2.4\n", - " -1.9\n", - " -1.1\n", - " -0.5\n", - " -0.3\n", - " \n", - " \n", - " jkraybill_bot\n", - " -8.5\n", - " -6.2\n", - " -1.1\n", - " 4.6\n", - " 7.5\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", + " -0.0\n", " \n", " \n", " KevinTestBot\n", - " -5.8\n", - " -3.9\n", - " -1.4\n", - " 0.4\n", - " 1.1\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", " SynapseSeer\n", - " -6.3\n", - " -4.6\n", - " -1.5\n", - " 1.9\n", - " 3.9\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", - " pianobot\n", - " -8.0\n", - " -5.9\n", - " -2.6\n", + " Grizeu_Bot\n", " -0.2\n", + " -0.1\n", + " -0.0\n", " 0.1\n", + " 0.2\n", " \n", " \n", - " twsummerbot\n", - " -13.4\n", - " -10.3\n", - " -2.9\n", - " 4.6\n", - " 9.2\n", + " pianobot\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", + " 0.0\n", " \n", " \n", " CatrachoCaster\n", - " -8.6\n", - " -6.8\n", - " -3.4\n", - " -0.3\n", - " 1.1\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", + " 0.0\n", + " \n", + " \n", + " krm-bot\n", + " -0.1\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", + " \n", + " \n", + " 4Shadower\n", + " -0.1\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", " \n", " \n", " annabot\n", - " -8.4\n", - " -6.5\n", - " -3.4\n", - " -0.6\n", - " 0.9\n", + " -0.1\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", " \n", " \n", " cookics_bot_TEST\n", - " -12.1\n", - " -9.7\n", - " -4.2\n", - " 0.1\n", - " 2.1\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", + " 0.0\n", " \n", " \n", - " GreeneiBot2\n", - " -17.4\n", - " -13.2\n", - " -4.9\n", - " 3.6\n", - " 7.4\n", + " jkraybill_bot\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", " \n", " \n", - " krm-bot\n", - " -10.6\n", - " -8.6\n", - " -5.3\n", - " -2.6\n", - " -1.6\n", + " twsummerbot\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", + " 0.0\n", " \n", " \n", - " 4Shadower\n", - " -12.8\n", - " -9.8\n", - " -5.3\n", - " -1.8\n", - " -1.1\n", + " MWG\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", + " \n", + " \n", + " ProfessorSP\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", + " -0.0\n", + " \n", + " \n", + " ajf-bot\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", + " 0.0\n", + " \n", + " \n", + " acm_bot\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " 0.0\n", + " 0.1\n", " \n", " \n", - " metac-o1\n", - " -22.7\n", - " -18.5\n", - " -6.7\n", - " 8.5\n", - " 16.1\n", + " GreeneiBot2\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", + " 0.0\n", " \n", " \n", - " MWG\n", - " -18.3\n", - " -14.9\n", - " -8.3\n", - " -2.2\n", - " 1.3\n", + " metac-deepseek-r1+asknews\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", " \n", " \n", - " ajf-bot\n", - " -22.3\n", - " -17.2\n", - " -8.8\n", - " -1.4\n", - " 2.5\n", + " metac-Gemini-Exp-1206\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", + " 0.1\n", " \n", " \n", - " bot_median\n", - " -22.7\n", - " -18.3\n", - " -9.0\n", - " 2.1\n", - " 8.9\n", + " metac-o1\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " 0.0\n", + " 0.1\n", " \n", " \n", " Bot_Pepa\n", - " -20.9\n", - " -16.3\n", - " -9.0\n", - " -1.2\n", - " 2.7\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", " \n", " \n", - " manticAI\n", - " -22.1\n", - " -17.7\n", - " -9.5\n", - " -0.7\n", - " 4.9\n", + " laylaps\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", " \n", " \n", - " ProfessorSP\n", - " -20.7\n", - " -16.8\n", - " -10.1\n", - " -4.7\n", - " -2.4\n", + " wunderplumb\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", + " -0.0\n", " \n", " \n", - " wunderplumb\n", - " -22.4\n", - " -19.1\n", - " -12.0\n", - " -5.8\n", - " -3.3\n", + " bot_median\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", + " 0.0\n", " \n", " \n", " metac-perplexity\n", - " -29.1\n", - " -24.0\n", - " -12.0\n", - " 0.8\n", - " 8.0\n", + " -0.4\n", + " -0.3\n", + " -0.1\n", + " -0.0\n", + " 0.1\n", " \n", " \n", - " laylaps\n", - " -21.0\n", - " -17.8\n", - " -12.8\n", - " -8.1\n", - " -5.8\n", + " manticAI\n", + " -0.3\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", " \n", " \n", " NextWorldLab\n", - " -28.4\n", - " -24.0\n", - " -13.6\n", - " -2.8\n", - " 4.0\n", - " \n", - " \n", - " pgodzinai\n", - " -31.7\n", - " -25.6\n", - " -14.0\n", - " -4.1\n", - " 1.9\n", - " \n", - " \n", - " metac-Gemini-Exp-1206\n", - " -28.1\n", - " -23.3\n", - " -14.0\n", - " -2.7\n", - " 3.2\n", - " \n", - " \n", - " metac-deepseek-r1\n", - " -30.7\n", - " -25.2\n", - " -14.6\n", - " -4.9\n", - " 0.5\n", + " -0.3\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " 0.0\n", " \n", " \n", " minefrac1\n", - " -29.8\n", - " -24.8\n", - " -14.9\n", - " -3.1\n", - " 4.1\n", + " -0.3\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", " \n", " \n", - " metac-Llama-3.1\n", - " -32.9\n", - " -26.8\n", - " -15.1\n", - " -3.3\n", - " 3.2\n", + " metac-claude-3-5-sonnet-latest\n", + " -0.4\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", " \n", " \n", - " metac-claude-3-5-sonnet-latest\n", - " -32.6\n", - " -26.6\n", - " -15.9\n", - " -3.5\n", - " 3.2\n", + " mmBot\n", + " -0.4\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", " \n", " \n", " metac-claude-3-5-sonnet-20240620\n", - " -35.3\n", - " -29.9\n", - " -18.2\n", - " -4.3\n", - " 2.8\n", + " -0.4\n", + " -0.4\n", + " -0.2\n", + " -0.1\n", + " -0.0\n", " \n", " \n", - " metac-o1-preview\n", - " -38.9\n", - " -32.4\n", - " -19.3\n", - " -6.9\n", - " 0.3\n", + " pgodzinai\n", + " -0.4\n", + " -0.4\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", " \n", " \n", - " mmBot\n", - " -36.2\n", - " -30.9\n", - " -21.1\n", - " -11.7\n", - " -7.1\n", + " metac-grok-2-1212\n", + " -0.4\n", + " -0.4\n", + " -0.2\n", + " -0.1\n", + " -0.1\n", " \n", " \n", " VeritasAI\n", - " -33.5\n", - " -28.9\n", - " -21.3\n", - " -14.4\n", - " -11.1\n", + " -0.4\n", + " -0.3\n", + " -0.2\n", + " -0.2\n", + " -0.1\n", " \n", " \n", - " metac-grok-2-1212\n", - " -41.8\n", - " -35.2\n", - " -23.4\n", - " -10.4\n", - " -3.8\n", + " metac-o1-preview\n", + " -0.4\n", + " -0.4\n", + " -0.3\n", + " -0.1\n", + " -0.1\n", " \n", " \n", - " metac-exa\n", - " -40.4\n", - " -34.4\n", - " -23.4\n", - " -13.8\n", - " -7.9\n", + " metac-gpt-4o\n", + " -0.4\n", + " -0.4\n", + " -0.3\n", + " -0.1\n", + " -0.1\n", " \n", " \n", - " metac-gpt-4o\n", - " -41.7\n", - " -34.7\n", - " -23.8\n", - " -11.3\n", - " -5.3\n", + " metac-exa\n", + " -0.4\n", + " -0.4\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", " \n", " \n", " InstitutPelFutur\n", - " -43.6\n", - " -37.9\n", - " -26.5\n", - " -14.9\n", - " -6.6\n", + " -0.5\n", + " -0.4\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", + " \n", + " \n", + " metac-Llama-3.1\n", + " -0.5\n", + " -0.4\n", + " -0.3\n", + " -0.2\n", + " -0.1\n", " \n", " \n", "\n", @@ -9201,55 +10394,55 @@ ], "text/plain": [ " 2.5% CI 10% CI Median 90% CI 97.5% CI\n", - "Grizeu_Bot -9.7 -5.4 4.4 15.9 22.2\n", - "RPM_bot -0.1 0.3 1.4 2.8 3.7\n", - "X_bot -0.4 -0.3 0.2 0.7 1.2\n", - "andrewsiah 0.0 0.0 0.0 0.0 0.0\n", "cobyj-bot 0.0 0.0 0.0 0.0 0.0\n", - "acm_bot -16.3 -11.3 -0.2 14.8 22.5\n", - "jonahsingerbot -1.4 -1.1 -0.6 -0.3 -0.1\n", - "bean_bot -1.6 -1.3 -0.7 -0.3 -0.1\n", - "CumulativeBot -2.9 -2.3 -1.0 0.2 1.0\n", - "swingswish -2.4 -1.9 -1.1 -0.5 -0.3\n", - "jkraybill_bot -8.5 -6.2 -1.1 4.6 7.5\n", - "KevinTestBot -5.8 -3.9 -1.4 0.4 1.1\n", - "SynapseSeer -6.3 -4.6 -1.5 1.9 3.9\n", - "pianobot -8.0 -5.9 -2.6 -0.2 0.1\n", - "twsummerbot -13.4 -10.3 -2.9 4.6 9.2\n", - "CatrachoCaster -8.6 -6.8 -3.4 -0.3 1.1\n", - "annabot -8.4 -6.5 -3.4 -0.6 0.9\n", - "cookics_bot_TEST -12.1 -9.7 -4.2 0.1 2.1\n", - "GreeneiBot2 -17.4 -13.2 -4.9 3.6 7.4\n", - "krm-bot -10.6 -8.6 -5.3 -2.6 -1.6\n", - "4Shadower -12.8 -9.8 -5.3 -1.8 -1.1\n", - "metac-o1 -22.7 -18.5 -6.7 8.5 16.1\n", - "MWG -18.3 -14.9 -8.3 -2.2 1.3\n", - "ajf-bot -22.3 -17.2 -8.8 -1.4 2.5\n", - "bot_median -22.7 -18.3 -9.0 2.1 8.9\n", - "Bot_Pepa -20.9 -16.3 -9.0 -1.2 2.7\n", - "manticAI -22.1 -17.7 -9.5 -0.7 4.9\n", - "ProfessorSP -20.7 -16.8 -10.1 -4.7 -2.4\n", - "wunderplumb -22.4 -19.1 -12.0 -5.8 -3.3\n", - "metac-perplexity -29.1 -24.0 -12.0 0.8 8.0\n", - "laylaps -21.0 -17.8 -12.8 -8.1 -5.8\n", - "NextWorldLab -28.4 -24.0 -13.6 -2.8 4.0\n", - "pgodzinai -31.7 -25.6 -14.0 -4.1 1.9\n", - "metac-Gemini-Exp-1206 -28.1 -23.3 -14.0 -2.7 3.2\n", - "metac-deepseek-r1 -30.7 -25.2 -14.6 -4.9 0.5\n", - "minefrac1 -29.8 -24.8 -14.9 -3.1 4.1\n", - "metac-Llama-3.1 -32.9 -26.8 -15.1 -3.3 3.2\n", - "metac-claude-3-5-sonnet-latest -32.6 -26.6 -15.9 -3.5 3.2\n", - "metac-claude-3-5-sonnet-20240620 -35.3 -29.9 -18.2 -4.3 2.8\n", - "metac-o1-preview -38.9 -32.4 -19.3 -6.9 0.3\n", - "mmBot -36.2 -30.9 -21.1 -11.7 -7.1\n", - "VeritasAI -33.5 -28.9 -21.3 -14.4 -11.1\n", - "metac-grok-2-1212 -41.8 -35.2 -23.4 -10.4 -3.8\n", - "metac-exa -40.4 -34.4 -23.4 -13.8 -7.9\n", - "metac-gpt-4o -41.7 -34.7 -23.8 -11.3 -5.3\n", - "InstitutPelFutur -43.6 -37.9 -26.5 -14.9 -6.6" + "andrewsiah 0.0 0.0 0.0 0.0 0.0\n", + "X_bot -0.0 -0.0 -0.0 0.0 0.0\n", + "jonahsingerbot -0.0 -0.0 -0.0 -0.0 -0.0\n", + "bean_bot -0.0 -0.0 -0.0 -0.0 -0.0\n", + "RPM_bot -0.1 -0.0 -0.0 0.0 0.0\n", + "CumulativeBot -0.0 -0.0 -0.0 -0.0 0.0\n", + "swingswish -0.0 -0.0 -0.0 -0.0 -0.0\n", + "KevinTestBot -0.1 -0.0 -0.0 0.0 0.0\n", + "SynapseSeer -0.1 -0.0 -0.0 0.0 0.0\n", + "Grizeu_Bot -0.2 -0.1 -0.0 0.1 0.2\n", + "pianobot -0.1 -0.1 -0.0 -0.0 0.0\n", + "CatrachoCaster -0.1 -0.1 -0.0 -0.0 0.0\n", + "krm-bot -0.1 -0.1 -0.1 -0.0 -0.0\n", + "4Shadower -0.1 -0.1 -0.1 -0.0 -0.0\n", + "annabot -0.1 -0.1 -0.1 -0.0 -0.0\n", + "cookics_bot_TEST -0.2 -0.1 -0.1 -0.0 0.0\n", + "jkraybill_bot -0.2 -0.1 -0.1 -0.0 -0.0\n", + "twsummerbot -0.2 -0.2 -0.1 -0.0 0.0\n", + "MWG -0.2 -0.2 -0.1 -0.0 -0.0\n", + "ProfessorSP -0.2 -0.2 -0.1 -0.0 -0.0\n", + "ajf-bot -0.2 -0.2 -0.1 -0.0 0.0\n", + "acm_bot -0.3 -0.2 -0.1 0.0 0.1\n", + "GreeneiBot2 -0.3 -0.2 -0.1 -0.0 0.0\n", + "metac-deepseek-r1+asknews -0.2 -0.2 -0.1 -0.1 -0.0\n", + "metac-Gemini-Exp-1206 -0.3 -0.2 -0.1 -0.0 0.1\n", + "metac-o1 -0.3 -0.2 -0.1 0.0 0.1\n", + "Bot_Pepa -0.2 -0.2 -0.1 -0.1 -0.0\n", + "laylaps -0.2 -0.2 -0.1 -0.1 -0.0\n", + "wunderplumb -0.3 -0.2 -0.1 -0.1 -0.0\n", + "bot_median -0.3 -0.2 -0.1 -0.0 0.0\n", + "metac-perplexity -0.4 -0.3 -0.1 -0.0 0.1\n", + "manticAI -0.3 -0.2 -0.2 -0.1 -0.0\n", + "NextWorldLab -0.3 -0.3 -0.2 -0.1 0.0\n", + "minefrac1 -0.3 -0.3 -0.2 -0.1 -0.1\n", + "metac-claude-3-5-sonnet-latest -0.4 -0.3 -0.2 -0.1 -0.1\n", + "mmBot -0.4 -0.3 -0.2 -0.1 -0.1\n", + "metac-claude-3-5-sonnet-20240620 -0.4 -0.4 -0.2 -0.1 -0.0\n", + "pgodzinai -0.4 -0.4 -0.2 -0.1 -0.1\n", + "metac-grok-2-1212 -0.4 -0.4 -0.2 -0.1 -0.1\n", + "VeritasAI -0.4 -0.3 -0.2 -0.2 -0.1\n", + "metac-o1-preview -0.4 -0.4 -0.3 -0.1 -0.1\n", + "metac-gpt-4o -0.4 -0.4 -0.3 -0.1 -0.1\n", + "metac-exa -0.4 -0.4 -0.3 -0.2 -0.1\n", + "InstitutPelFutur -0.5 -0.4 -0.3 -0.2 -0.1\n", + "metac-Llama-3.1 -0.5 -0.4 -0.3 -0.2 -0.1" ] }, - "execution_count": 51, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -9258,6 +10451,7 @@ "NUM = round(df_bot_vs_pro_peer['question_weight'].sum())\n", "ITER = 1000\n", "\n", + "display_head_and_tail(df_bot_vs_pro_peer)\n", "result_df = weighted_bootstrap_analysis(df_bot_vs_pro_peer, all_bots, NUM, ITER)\n", "average_df = result_df / NUM\n", "\n", @@ -9270,17 +10464,17 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# Write df_rounded (bootstrapping h2h) to csv\n", - "df_rounded.to_csv('bootstrapped_h2h_bot_vs_pros.csv')" + "df_rounded.to_csv('notebook_outputs/bootstrapped_h2h_bot_vs_pros.csv')" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -9340,7 +10534,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 53, "metadata": { "cellView": "form", "colab": { @@ -9476,7 +10670,7 @@ " 0.153662\n", " \n", " \n", - " metac-deepseek-r1\n", + " metac-deepseek-r1+asknews\n", " 0.8\n", " 225.8\n", " -4.2\n", @@ -9749,7 +10943,7 @@ "twsummerbot 4.9 181.9 -1.8 11.6 \n", "cookics_bot_TEST 5.8 135.2 -1.8 13.4 \n", "CumulativeBot 8.0 94.2 -3.0 18.9 \n", - "metac-deepseek-r1 0.8 225.8 -4.2 5.8 \n", + "metac-deepseek-r1+asknews 0.8 225.8 -4.2 5.8 \n", "MWG 3.6 84.8 -4.3 11.5 \n", "metac-perplexity 2.8 264.3 -4.8 10.3 \n", "metac-grok-2-1212 0.1 281.2 -5.7 6.0 \n", @@ -9795,7 +10989,7 @@ "twsummerbot 0.152393 \n", "cookics_bot_TEST 0.132509 \n", "CumulativeBot 0.153662 \n", - "metac-deepseek-r1 0.763142 \n", + "metac-deepseek-r1+asknews 0.763142 \n", "MWG 0.365354 \n", "metac-perplexity 0.470416 \n", "metac-grok-2-1212 0.961620 \n", @@ -9829,7 +11023,7 @@ "RPM_bot 0.126191 " ] }, - "execution_count": 54, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -9850,16 +11044,16 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ - "df_W_bot_only_peer_leaderboard.to_csv('weighted_bot_ONLY_peer_leaderboard_t_test.csv', index=True)" + "df_W_bot_only_peer_leaderboard.to_csv('notebook_outputs/weighted_bot_ONLY_peer_leaderboard_t_test.csv', index=True)" ] }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -9898,7 +11092,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 56, "metadata": { "cellView": "form", "id": "x6e1kZl12qFZ" @@ -9908,506 +11102,506 @@ "name": "stdout", "output_type": "stream", "text": [ - " >>> Collected 1 forecasts: [0.15]\n", + " >>> Collected 1 forecasts: [0.05]\n", " >>> Collected 1 forecasts: [0.35]\n", - " >>> Collected 1 forecasts: [0.95]\n", - " >>> Collected 1 forecasts: [0.75]\n", - " >>> Collected 1 forecasts: [0.1]\n", - " >>> Collected 1 forecasts: [0.7]\n", + " >>> Collected 1 forecasts: [0.9]\n", + " >>> Collected 1 forecasts: [0.85]\n", + " >>> Collected 1 forecasts: [0.05]\n", + " >>> Collected 1 forecasts: [0.8]\n", " >>> Collected 1 forecasts: [0.7]\n", " >>> Collected 1 forecasts: [0.05]\n", - " >>> Collected 1 forecasts: [0.15]\n", - " >>> Collected 1 forecasts: [0.2]\n", + " >>> Collected 1 forecasts: [0.05]\n", + " >>> Collected 1 forecasts: [0.1]\n", " >>> Collected 1 forecasts: [0.2]\n", - " >>> Collected 1 forecasts: [0.7]\n", + " >>> Collected 1 forecasts: [0.6]\n", + " >>> Collected 1 forecasts: [0.15]\n", " >>> Collected 1 forecasts: [0.25]\n", - " >>> Collected 1 forecasts: [0.1]\n", " >>> Collected 1 forecasts: [0.05]\n", " >>> Collected 1 forecasts: [0.15]\n", " >>> Collected 1 forecasts: [0.95]\n", - " >>> Collected 1 forecasts: [0.1]\n", + " >>> Collected 1 forecasts: [0.15]\n", + " >>> Collected 1 forecasts: [0.02]\n", " >>> Collected 1 forecasts: [0.05]\n", " >>> Collected 1 forecasts: [0.1]\n", - " >>> Collected 1 forecasts: [0.1]\n", " >>> Collected 1 forecasts: [0.25]\n", - " >>> Collected 1 forecasts: [0.15]\n", + " >>> Collected 1 forecasts: [0.2]\n", " >>> Collected 1 forecasts: [0.98]\n", - " >>> Collected 1 forecasts: [0.35]\n", - " >>> Collected 1 forecasts: [0.35]\n", - " >>> Collected 1 forecasts: [0.85]\n", - " >>> Collected 1 forecasts: [0.01]\n", + " >>> Collected 1 forecasts: [0.7]\n", + " >>> Collected 1 forecasts: [0.25]\n", + " >>> Collected 1 forecasts: [0.9]\n", + " >>> Collected 1 forecasts: [0.25]\n", " >>> Collected 1 forecasts: [0.85]\n", " >>> Collected 1 forecasts: [0.99]\n", " >>> Collected 1 forecasts: [0.2]\n", + " >>> Collected 1 forecasts: [0.3]\n", " >>> Collected 1 forecasts: [0.95]\n", " >>> Collected 1 forecasts: [0.9]\n", - " >>> Collected 1 forecasts: [0.9]\n", " >>> Collected 1 forecasts: [0.35]\n", " >>> Collected 1 forecasts: [0.9]\n", " >>> Collected 1 forecasts: [0.05]\n", - " >>> Collected 1 forecasts: [0.3]\n", - " >>> Collected 1 forecasts: [0.75]\n", " >>> Collected 1 forecasts: [0.2]\n", + " >>> Collected 1 forecasts: [0.75]\n", + " >>> Collected 1 forecasts: [0.3]\n", + " >>> Collected 1 forecasts: [0.15]\n", + " >>> Collected 1 forecasts: [0.1]\n", " >>> Collected 1 forecasts: [0.1]\n", - " >>> Collected 1 forecasts: [0.05]\n", " >>> Collected 1 forecasts: [0.1]\n", - " >>> Collected 1 forecasts: [0.15]\n", " >>> Collected 1 forecasts: [0.8]\n", " >>> Collected 1 forecasts: [0.9]\n", " >>> Collected 1 forecasts: [0.9]\n", - " >>> Collected 1 forecasts: [0.9]\n", + " >>> Collected 1 forecasts: [0.95]\n", " >>> Collected 1 forecasts: [0.85]\n", - " >>> Collected 1 forecasts: [0.1]\n", - " >>> Collected 2 forecasts: [0.15, 0.1]\n", - " >>> Collected 2 forecasts: [0.35, 0.6]\n", - " >>> Collected 2 forecasts: [0.95, 0.9]\n", - " >>> Collected 2 forecasts: [0.75, 0.75]\n", - " >>> Collected 2 forecasts: [0.1, 0.05]\n", + " >>> Collected 1 forecasts: [0.05]\n", + " >>> Collected 2 forecasts: [0.05, 0.1]\n", + " >>> Collected 2 forecasts: [0.35, 0.7]\n", + " >>> Collected 2 forecasts: [0.9, 0.9]\n", + " >>> Collected 2 forecasts: [0.85, 0.85]\n", + " >>> Collected 2 forecasts: [0.05, 0.05]\n", + " >>> Collected 2 forecasts: [0.8, 0.6]\n", " >>> Collected 2 forecasts: [0.7, 0.6]\n", - " >>> Collected 2 forecasts: [0.7, 0.35]\n", " >>> Collected 2 forecasts: [0.05, 0.05]\n", - " >>> Collected 2 forecasts: [0.15, 0.05]\n", - " >>> Collected 2 forecasts: [0.2, 0.25]\n", + " >>> Collected 2 forecasts: [0.05, 0.05]\n", + " >>> Collected 2 forecasts: [0.1, 0.2]\n", " >>> Collected 2 forecasts: [0.2, 0.15]\n", - " >>> Collected 2 forecasts: [0.7, 0.8]\n", - " >>> Collected 2 forecasts: [0.25, 0.35]\n", - " >>> Collected 2 forecasts: [0.1, 0.15]\n", - " >>> Collected 2 forecasts: [0.05, 0.1]\n", + " >>> Collected 2 forecasts: [0.6, 0.85]\n", + " >>> Collected 2 forecasts: [0.15, 0.5]\n", + " >>> Collected 2 forecasts: [0.25, 0.3]\n", + " >>> Collected 2 forecasts: [0.05, 0.05]\n", " >>> Collected 2 forecasts: [0.15, 0.25]\n", " >>> Collected 2 forecasts: [0.95, 0.95]\n", - " >>> Collected 2 forecasts: [0.1, 0.3]\n", + " >>> Collected 2 forecasts: [0.15, 0.35]\n", + " >>> Collected 2 forecasts: [0.02, 0.05]\n", " >>> Collected 2 forecasts: [0.05, 0.05]\n", - " >>> Collected 2 forecasts: [0.1, 0.02]\n", - " >>> Collected 2 forecasts: [0.1, 0.3]\n", - " >>> Collected 2 forecasts: [0.25, 0.3]\n", - " >>> Collected 2 forecasts: [0.15, 0.15]\n", + " >>> Collected 2 forecasts: [0.1, 0.4]\n", + " >>> Collected 2 forecasts: [0.25, 0.35]\n", + " >>> Collected 2 forecasts: [0.2, 0.2]\n", " >>> Collected 2 forecasts: [0.98, 0.97]\n", - " >>> Collected 2 forecasts: [0.35, 0.4]\n", - " >>> Collected 2 forecasts: [0.35, 0.25]\n", - " >>> Collected 2 forecasts: [0.85, 0.7]\n", - " >>> Collected 2 forecasts: [0.01, 0.02]\n", + " >>> Collected 2 forecasts: [0.7, 0.4]\n", + " >>> Collected 2 forecasts: [0.25, 0.4]\n", + " >>> Collected 2 forecasts: [0.9, 0.7]\n", + " >>> Collected 2 forecasts: [0.25, 0.02]\n", " >>> Collected 2 forecasts: [0.85, 0.75]\n", - " >>> Collected 2 forecasts: [0.99, 0.85]\n", + " >>> Collected 2 forecasts: [0.99, 0.99]\n", " >>> Collected 2 forecasts: [0.2, 0.99]\n", - " >>> Collected 2 forecasts: [0.95, 0.25]\n", - " >>> Collected 2 forecasts: [0.9, 0.9]\n", - " >>> Collected 2 forecasts: [0.9, 0.65]\n", + " >>> Collected 2 forecasts: [0.3, 0.15]\n", + " >>> Collected 2 forecasts: [0.95, 0.9]\n", + " >>> Collected 2 forecasts: [0.9, 0.75]\n", " >>> Collected 2 forecasts: [0.35, 0.6]\n", " >>> Collected 2 forecasts: [0.9, 0.85]\n", " >>> Collected 2 forecasts: [0.05, 0.1]\n", - " >>> Collected 2 forecasts: [0.3, 0.3]\n", - " >>> Collected 2 forecasts: [0.75, 0.8]\n", - " >>> Collected 2 forecasts: [0.2, 0.15]\n", - " >>> Collected 2 forecasts: [0.1, 0.3]\n", - " >>> Collected 2 forecasts: [0.05, 0.05]\n", + " >>> Collected 2 forecasts: [0.2, 0.25]\n", + " >>> Collected 2 forecasts: [0.75, 0.7]\n", + " >>> Collected 2 forecasts: [0.3, 0.15]\n", + " >>> Collected 2 forecasts: [0.15, 0.3]\n", + " >>> Collected 2 forecasts: [0.1, 0.15]\n", " >>> Collected 2 forecasts: [0.1, 0.15]\n", - " >>> Collected 2 forecasts: [0.15, 0.05]\n", + " >>> Collected 2 forecasts: [0.1, 0.1]\n", " >>> Collected 2 forecasts: [0.8, 0.9]\n", " >>> Collected 2 forecasts: [0.9, 0.9]\n", - " >>> Collected 2 forecasts: [0.9, 0.2]\n", - " >>> Collected 2 forecasts: [0.9, 0.85]\n", - " >>> Collected 2 forecasts: [0.85, 0.75]\n", - " >>> Collected 2 forecasts: [0.1, 0.07]\n", - " >>> Collected 3 forecasts: [0.15, 0.1, 0.07]\n", - " >>> Collected 3 forecasts: [0.35, 0.6, 0.62]\n", - " >>> Collected 3 forecasts: [0.95, 0.9, 0.82]\n", - " >>> Collected 3 forecasts: [0.75, 0.75, 0.85]\n", - " >>> Collected 3 forecasts: [0.1, 0.05, nan]\n", + " >>> Collected 2 forecasts: [0.9, 0.4]\n", + " >>> Collected 2 forecasts: [0.95, 0.8]\n", + " >>> Collected 2 forecasts: [0.85, 0.8]\n", + " >>> Collected 2 forecasts: [0.05, 0.05]\n", + " >>> Collected 3 forecasts: [0.05, 0.1, 0.07]\n", + " >>> Collected 3 forecasts: [0.35, 0.7, 0.62]\n", + " >>> Collected 3 forecasts: [0.9, 0.9, 0.82]\n", + " >>> Collected 3 forecasts: [0.85, 0.85, 0.85]\n", + " >>> Collected 3 forecasts: [0.05, 0.05, nan]\n", + " >>> Collected 3 forecasts: [0.8, 0.6, nan]\n", " >>> Collected 3 forecasts: [0.7, 0.6, nan]\n", - " >>> Collected 3 forecasts: [0.7, 0.35, nan]\n", " >>> Collected 3 forecasts: [0.05, 0.05, nan]\n", - " >>> Collected 3 forecasts: [0.15, 0.05, nan]\n", - " >>> Collected 3 forecasts: [0.2, 0.25, 0.25]\n", + " >>> Collected 3 forecasts: [0.05, 0.05, nan]\n", + " >>> Collected 3 forecasts: [0.1, 0.2, 0.25]\n", " >>> Collected 3 forecasts: [0.2, 0.15, nan]\n", - " >>> Collected 3 forecasts: [0.7, 0.8, nan]\n", - " >>> Collected 3 forecasts: [0.25, 0.35, 0.108]\n", - " >>> Collected 3 forecasts: [0.1, 0.15, 0.16]\n", - " >>> Collected 3 forecasts: [0.05, 0.1, 0.95]\n", + " >>> Collected 3 forecasts: [0.6, 0.85, nan]\n", + " >>> Collected 3 forecasts: [0.15, 0.5, 0.108]\n", + " >>> Collected 3 forecasts: [0.25, 0.3, 0.16]\n", + " >>> Collected 3 forecasts: [0.05, 0.05, 0.95]\n", " >>> Collected 3 forecasts: [0.15, 0.25, 0.15]\n", " >>> Collected 3 forecasts: [0.95, 0.95, 0.05]\n", - " >>> Collected 3 forecasts: [0.1, 0.3, 0.125]\n", - " >>> Collected 3 forecasts: [0.05, 0.05, 0.034]\n", - " >>> Collected 3 forecasts: [0.1, 0.02, 0.03]\n", - " >>> Collected 3 forecasts: [0.1, 0.3, 0.35]\n", - " >>> Collected 3 forecasts: [0.25, 0.3, 0.35]\n", - " >>> Collected 3 forecasts: [0.15, 0.15, 0.115]\n", + " >>> Collected 3 forecasts: [0.15, 0.35, 0.125]\n", + " >>> Collected 3 forecasts: [0.02, 0.05, 0.034]\n", + " >>> Collected 3 forecasts: [0.05, 0.05, 0.03]\n", + " >>> Collected 3 forecasts: [0.1, 0.4, 0.35]\n", + " >>> Collected 3 forecasts: [0.25, 0.35, 0.35]\n", + " >>> Collected 3 forecasts: [0.2, 0.2, 0.115]\n", " >>> Collected 3 forecasts: [0.98, 0.97, 0.97]\n", - " >>> Collected 3 forecasts: [0.35, 0.4, 0.285]\n", - " >>> Collected 3 forecasts: [0.35, 0.25, 0.3833333333333333]\n", - " >>> Collected 3 forecasts: [0.85, 0.7, 0.17]\n", - " >>> Collected 3 forecasts: [0.01, 0.02, 0.12]\n", + " >>> Collected 3 forecasts: [0.7, 0.4, 0.285]\n", + " >>> Collected 3 forecasts: [0.25, 0.4, 0.3833333333333333]\n", + " >>> Collected 3 forecasts: [0.9, 0.7, 0.17]\n", + " >>> Collected 3 forecasts: [0.25, 0.02, 0.12]\n", " >>> Collected 3 forecasts: [0.85, 0.75, 0.875]\n", - " >>> Collected 3 forecasts: [0.99, 0.85, 0.99]\n", + " >>> Collected 3 forecasts: [0.99, 0.99, 0.99]\n", " >>> Collected 3 forecasts: [0.2, 0.99, 0.9233333333333332]\n", - " >>> Collected 3 forecasts: [0.95, 0.25, 0.14]\n", - " >>> Collected 3 forecasts: [0.9, 0.9, 0.8340000000000001]\n", - " >>> Collected 3 forecasts: [0.9, 0.65, 0.7666666666666667]\n", + " >>> Collected 3 forecasts: [0.3, 0.15, 0.4166666666666666]\n", + " >>> Collected 3 forecasts: [0.95, 0.9, 0.8340000000000001]\n", + " >>> Collected 3 forecasts: [0.9, 0.75, 0.7666666666666667]\n", " >>> Collected 3 forecasts: [0.35, 0.6, 0.875]\n", " >>> Collected 3 forecasts: [0.9, 0.85, 0.84]\n", " >>> Collected 3 forecasts: [0.05, 0.1, 0.026]\n", - " >>> Collected 3 forecasts: [0.3, 0.3, 0.16]\n", - " >>> Collected 3 forecasts: [0.75, 0.8, 0.67]\n", - " >>> Collected 3 forecasts: [0.2, 0.15, nan]\n", - " >>> Collected 3 forecasts: [0.1, 0.3, 0.3925]\n", - " >>> Collected 3 forecasts: [0.05, 0.05, 0.086]\n", + " >>> Collected 3 forecasts: [0.2, 0.25, 0.16]\n", + " >>> Collected 3 forecasts: [0.75, 0.7, 0.67]\n", + " >>> Collected 3 forecasts: [0.3, 0.15, nan]\n", + " >>> Collected 3 forecasts: [0.15, 0.3, 0.3925]\n", + " >>> Collected 3 forecasts: [0.1, 0.15, 0.086]\n", " >>> Collected 3 forecasts: [0.1, 0.15, 0.285]\n", - " >>> Collected 3 forecasts: [0.15, 0.05, 0.02]\n", + " >>> Collected 3 forecasts: [0.1, 0.1, 0.02]\n", " >>> Collected 3 forecasts: [0.8, 0.9, nan]\n", " >>> Collected 3 forecasts: [0.9, 0.9, 0.95]\n", - " >>> Collected 3 forecasts: [0.9, 0.2, nan]\n", - " >>> Collected 3 forecasts: [0.9, 0.85, nan]\n", - " >>> Collected 3 forecasts: [0.85, 0.75, 0.85]\n", - " >>> Collected 3 forecasts: [0.1, 0.07, 0.05]\n", - " >>> Collected 4 forecasts: [0.15, 0.1, 0.07, 0.0559999999999999]\n", - " >>> Collected 4 forecasts: [0.35, 0.6, 0.62, 0.7]\n", - " >>> Collected 4 forecasts: [0.95, 0.9, 0.82, 0.794]\n", - " >>> Collected 4 forecasts: [0.75, 0.75, 0.85, 0.884]\n", - " >>> Collected 4 forecasts: [0.1, 0.05, nan, nan]\n", + " >>> Collected 3 forecasts: [0.9, 0.4, nan]\n", + " >>> Collected 3 forecasts: [0.95, 0.8, nan]\n", + " >>> Collected 3 forecasts: [0.85, 0.8, 0.85]\n", + " >>> Collected 3 forecasts: [0.05, 0.05, 0.05]\n", + " >>> Collected 4 forecasts: [0.05, 0.1, 0.07, 0.0559999999999999]\n", + " >>> Collected 4 forecasts: [0.35, 0.7, 0.62, 0.7]\n", + " >>> Collected 4 forecasts: [0.9, 0.9, 0.82, 0.794]\n", + " >>> Collected 4 forecasts: [0.85, 0.85, 0.85, 0.884]\n", + " >>> Collected 4 forecasts: [0.05, 0.05, nan, nan]\n", + " >>> Collected 4 forecasts: [0.8, 0.6, nan, nan]\n", " >>> Collected 4 forecasts: [0.7, 0.6, nan, nan]\n", - " >>> Collected 4 forecasts: [0.7, 0.35, nan, nan]\n", " >>> Collected 4 forecasts: [0.05, 0.05, nan, nan]\n", - " >>> Collected 4 forecasts: [0.15, 0.05, nan, nan]\n", - " >>> Collected 4 forecasts: [0.2, 0.25, 0.25, nan]\n", + " >>> Collected 4 forecasts: [0.05, 0.05, nan, nan]\n", + " >>> Collected 4 forecasts: [0.1, 0.2, 0.25, nan]\n", " >>> Collected 4 forecasts: [0.2, 0.15, nan, 0.242]\n", - " >>> Collected 4 forecasts: [0.7, 0.8, nan, 0.936]\n", - " >>> Collected 4 forecasts: [0.25, 0.35, 0.108, 0.264]\n", - " >>> Collected 4 forecasts: [0.1, 0.15, 0.16, 0.652]\n", - " >>> Collected 4 forecasts: [0.05, 0.1, 0.95, 0.052]\n", - " >>> Collected 4 forecasts: [0.15, 0.25, 0.15, 0.12]\n", - " >>> Collected 4 forecasts: [0.95, 0.95, 0.05, 0.918]\n", - " >>> Collected 4 forecasts: [0.1, 0.3, 0.125, 0.212]\n", - " >>> Collected 4 forecasts: [0.05, 0.05, 0.034, nan]\n", - " >>> Collected 4 forecasts: [0.1, 0.02, 0.03, 0.072]\n", - " >>> Collected 4 forecasts: [0.1, 0.3, 0.35, 0.226]\n", - " >>> Collected 4 forecasts: [0.25, 0.3, 0.35, 0.5]\n", - " >>> Collected 4 forecasts: [0.15, 0.15, 0.115, 0.102]\n", + " >>> Collected 4 forecasts: [0.6, 0.85, nan, 0.936]\n", + " >>> Collected 4 forecasts: [0.15, 0.5, 0.108, 0.264]\n", + " >>> Collected 4 forecasts: [0.25, 0.3, 0.16, 0.652]\n", + " >>> Collected 4 forecasts: [0.05, 0.05, 0.95, 0.052]\n", + " >>> Collected 4 forecasts: [0.15, 0.25, 0.15, 0.144]\n", + " >>> Collected 4 forecasts: [0.95, 0.95, 0.05, 0.866]\n", + " >>> Collected 4 forecasts: [0.15, 0.35, 0.125, 0.212]\n", + " >>> Collected 4 forecasts: [0.02, 0.05, 0.034, nan]\n", + " >>> Collected 4 forecasts: [0.05, 0.05, 0.03, 0.072]\n", + " >>> Collected 4 forecasts: [0.1, 0.4, 0.35, 0.226]\n", + " >>> Collected 4 forecasts: [0.25, 0.35, 0.35, 0.5]\n", + " >>> Collected 4 forecasts: [0.2, 0.2, 0.115, 0.102]\n", " >>> Collected 4 forecasts: [0.98, 0.97, 0.97, 0.932]\n", - " >>> Collected 4 forecasts: [0.35, 0.4, 0.285, 0.34]\n", - " >>> Collected 4 forecasts: [0.35, 0.25, 0.3833333333333333, 0.42]\n", - " >>> Collected 4 forecasts: [0.85, 0.7, 0.17, 0.236]\n", - " >>> Collected 4 forecasts: [0.01, 0.02, 0.12, 0.29]\n", + " >>> Collected 4 forecasts: [0.7, 0.4, 0.285, 0.34]\n", + " >>> Collected 4 forecasts: [0.25, 0.4, 0.3833333333333333, 0.42]\n", + " >>> Collected 4 forecasts: [0.9, 0.7, 0.17, 0.236]\n", + " >>> Collected 4 forecasts: [0.25, 0.02, 0.12, 0.29]\n", " >>> Collected 4 forecasts: [0.85, 0.75, 0.875, 0.92]\n", - " >>> Collected 4 forecasts: [0.99, 0.85, 0.99, 0.99]\n", + " >>> Collected 4 forecasts: [0.99, 0.99, 0.99, 0.99]\n", " >>> Collected 4 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954]\n", - " >>> Collected 4 forecasts: [0.95, 0.25, 0.14, 0.2]\n", - " >>> Collected 4 forecasts: [0.9, 0.9, 0.8340000000000001, nan]\n", - " >>> Collected 4 forecasts: [0.9, 0.65, 0.7666666666666667, nan]\n", + " >>> Collected 4 forecasts: [0.3, 0.15, 0.4166666666666666, 0.2]\n", + " >>> Collected 4 forecasts: [0.95, 0.9, 0.8340000000000001, nan]\n", + " >>> Collected 4 forecasts: [0.9, 0.75, 0.7666666666666667, nan]\n", " >>> Collected 4 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999]\n", " >>> Collected 4 forecasts: [0.9, 0.85, 0.84, 0.86]\n", " >>> Collected 4 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999]\n", - " >>> Collected 4 forecasts: [0.3, 0.3, 0.16, nan]\n", - " >>> Collected 4 forecasts: [0.75, 0.8, 0.67, nan]\n", - " >>> Collected 4 forecasts: [0.2, 0.15, nan, nan]\n", - " >>> Collected 4 forecasts: [0.1, 0.3, 0.3925, nan]\n", - " >>> Collected 4 forecasts: [0.05, 0.05, 0.086, nan]\n", + " >>> Collected 4 forecasts: [0.2, 0.25, 0.16, nan]\n", + " >>> Collected 4 forecasts: [0.75, 0.7, 0.67, nan]\n", + " >>> Collected 4 forecasts: [0.3, 0.15, nan, nan]\n", + " >>> Collected 4 forecasts: [0.15, 0.3, 0.3925, nan]\n", + " >>> Collected 4 forecasts: [0.1, 0.15, 0.086, nan]\n", " >>> Collected 4 forecasts: [0.1, 0.15, 0.285, nan]\n", - " >>> Collected 4 forecasts: [0.15, 0.05, 0.02, nan]\n", + " >>> Collected 4 forecasts: [0.1, 0.1, 0.02, nan]\n", " >>> Collected 4 forecasts: [0.8, 0.9, nan, nan]\n", " >>> Collected 4 forecasts: [0.9, 0.9, 0.95, 0.905]\n", - " >>> Collected 4 forecasts: [0.9, 0.2, nan, nan]\n", - " >>> Collected 4 forecasts: [0.9, 0.85, nan, nan]\n", - " >>> Collected 4 forecasts: [0.85, 0.75, 0.85, 0.71]\n", - " >>> Collected 4 forecasts: [0.1, 0.07, 0.05, 0.02]\n", - " >>> Collected 5 forecasts: [0.15, 0.1, 0.07, 0.0559999999999999, nan]\n", - " >>> Collected 5 forecasts: [0.35, 0.6, 0.62, 0.7, 0.324676]\n", - " >>> Collected 5 forecasts: [0.95, 0.9, 0.82, 0.794, nan]\n", - " >>> Collected 5 forecasts: [0.75, 0.75, 0.85, 0.884, 0.76]\n", - " >>> Collected 5 forecasts: [0.1, 0.05, nan, nan, nan]\n", + " >>> Collected 4 forecasts: [0.9, 0.4, nan, nan]\n", + " >>> Collected 4 forecasts: [0.95, 0.8, nan, nan]\n", + " >>> Collected 4 forecasts: [0.85, 0.8, 0.85, 0.71]\n", + " >>> Collected 4 forecasts: [0.05, 0.05, 0.05, 0.02]\n", + " >>> Collected 5 forecasts: [0.05, 0.1, 0.07, 0.0559999999999999, nan]\n", + " >>> Collected 5 forecasts: [0.35, 0.7, 0.62, 0.7, 0.324676]\n", + " >>> Collected 5 forecasts: [0.9, 0.9, 0.82, 0.794, nan]\n", + " >>> Collected 5 forecasts: [0.85, 0.85, 0.85, 0.884, 0.76]\n", + " >>> Collected 5 forecasts: [0.05, 0.05, nan, nan, nan]\n", + " >>> Collected 5 forecasts: [0.8, 0.6, nan, nan, nan]\n", " >>> Collected 5 forecasts: [0.7, 0.6, nan, nan, nan]\n", - " >>> Collected 5 forecasts: [0.7, 0.35, nan, nan, nan]\n", " >>> Collected 5 forecasts: [0.05, 0.05, nan, nan, nan]\n", - " >>> Collected 5 forecasts: [0.15, 0.05, nan, nan, nan]\n", - " >>> Collected 5 forecasts: [0.2, 0.25, 0.25, nan, nan]\n", + " >>> Collected 5 forecasts: [0.05, 0.05, nan, nan, nan]\n", + " >>> Collected 5 forecasts: [0.1, 0.2, 0.25, nan, nan]\n", " >>> Collected 5 forecasts: [0.2, 0.15, nan, 0.242, nan]\n", - " >>> Collected 5 forecasts: [0.7, 0.8, nan, 0.936, nan]\n", - " >>> Collected 5 forecasts: [0.25, 0.35, 0.108, 0.264, nan]\n", - " >>> Collected 5 forecasts: [0.1, 0.15, 0.16, 0.652, nan]\n", - " >>> Collected 5 forecasts: [0.05, 0.1, 0.95, 0.052, 0.0699999999999999]\n", - " >>> Collected 5 forecasts: [0.15, 0.25, 0.15, 0.12, 0.05]\n", - " >>> Collected 5 forecasts: [0.95, 0.95, 0.05, 0.918, 0.8925]\n", - " >>> Collected 5 forecasts: [0.1, 0.3, 0.125, 0.212, 0.085]\n", - " >>> Collected 5 forecasts: [0.05, 0.05, 0.034, nan, 0.0925]\n", - " >>> Collected 5 forecasts: [0.1, 0.02, 0.03, 0.072, 0.1]\n", - " >>> Collected 5 forecasts: [0.1, 0.3, 0.35, 0.226, 0.1149999999999999]\n", - " >>> Collected 5 forecasts: [0.25, 0.3, 0.35, 0.5, 0.1375]\n", - " >>> Collected 5 forecasts: [0.15, 0.15, 0.115, 0.102, 0.1425]\n", + " >>> Collected 5 forecasts: [0.6, 0.85, nan, 0.936, nan]\n", + " >>> Collected 5 forecasts: [0.15, 0.5, 0.108, 0.264, nan]\n", + " >>> Collected 5 forecasts: [0.25, 0.3, 0.16, 0.652, nan]\n", + " >>> Collected 5 forecasts: [0.05, 0.05, 0.95, 0.052, 0.0699999999999999]\n", + " >>> Collected 5 forecasts: [0.15, 0.25, 0.15, 0.144, 0.05]\n", + " >>> Collected 5 forecasts: [0.95, 0.95, 0.05, 0.866, 0.8925]\n", + " >>> Collected 5 forecasts: [0.15, 0.35, 0.125, 0.212, 0.085]\n", + " >>> Collected 5 forecasts: [0.02, 0.05, 0.034, nan, 0.0925]\n", + " >>> Collected 5 forecasts: [0.05, 0.05, 0.03, 0.072, 0.1]\n", + " >>> Collected 5 forecasts: [0.1, 0.4, 0.35, 0.226, 0.1149999999999999]\n", + " >>> Collected 5 forecasts: [0.25, 0.35, 0.35, 0.5, 0.1375]\n", + " >>> Collected 5 forecasts: [0.2, 0.2, 0.115, 0.102, 0.1425]\n", " >>> Collected 5 forecasts: [0.98, 0.97, 0.97, 0.932, 0.9475]\n", - " >>> Collected 5 forecasts: [0.35, 0.4, 0.285, 0.34, 0.2]\n", - " >>> Collected 5 forecasts: [0.35, 0.25, 0.3833333333333333, 0.42, 0.4]\n", - " >>> Collected 5 forecasts: [0.85, 0.7, 0.17, 0.236, nan]\n", - " >>> Collected 5 forecasts: [0.01, 0.02, 0.12, 0.29, 0.06]\n", + " >>> Collected 5 forecasts: [0.7, 0.4, 0.285, 0.34, 0.2]\n", + " >>> Collected 5 forecasts: [0.25, 0.4, 0.3833333333333333, 0.42, 0.4]\n", + " >>> Collected 5 forecasts: [0.9, 0.7, 0.17, 0.236, nan]\n", + " >>> Collected 5 forecasts: [0.25, 0.02, 0.12, 0.29, 0.06]\n", " >>> Collected 5 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999]\n", - " >>> Collected 5 forecasts: [0.99, 0.85, 0.99, 0.99, 0.95]\n", + " >>> Collected 5 forecasts: [0.99, 0.99, 0.99, 0.99, 0.95]\n", " >>> Collected 5 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002]\n", - " >>> Collected 5 forecasts: [0.95, 0.25, 0.14, 0.2, 0.336]\n", - " >>> Collected 5 forecasts: [0.9, 0.9, 0.8340000000000001, nan, nan]\n", - " >>> Collected 5 forecasts: [0.9, 0.65, 0.7666666666666667, nan, nan]\n", + " >>> Collected 5 forecasts: [0.3, 0.15, 0.4166666666666666, 0.2, 0.336]\n", + " >>> Collected 5 forecasts: [0.95, 0.9, 0.8340000000000001, nan, nan]\n", + " >>> Collected 5 forecasts: [0.9, 0.75, 0.7666666666666667, nan, nan]\n", " >>> Collected 5 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999]\n", " >>> Collected 5 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999]\n", " >>> Collected 5 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05]\n", - " >>> Collected 5 forecasts: [0.3, 0.3, 0.16, nan, 0.05]\n", - " >>> Collected 5 forecasts: [0.75, 0.8, 0.67, nan, 0.76]\n", - " >>> Collected 5 forecasts: [0.2, 0.15, nan, nan, 0.2]\n", - " >>> Collected 5 forecasts: [0.1, 0.3, 0.3925, nan, 0.38]\n", - " >>> Collected 5 forecasts: [0.05, 0.05, 0.086, nan, 0.12]\n", + " >>> Collected 5 forecasts: [0.2, 0.25, 0.16, nan, 0.05]\n", + " >>> Collected 5 forecasts: [0.75, 0.7, 0.67, nan, 0.76]\n", + " >>> Collected 5 forecasts: [0.3, 0.15, nan, nan, 0.2]\n", + " >>> Collected 5 forecasts: [0.15, 0.3, 0.3925, nan, 0.38]\n", + " >>> Collected 5 forecasts: [0.1, 0.15, 0.086, nan, 0.12]\n", " >>> Collected 5 forecasts: [0.1, 0.15, 0.285, nan, 0.096]\n", - " >>> Collected 5 forecasts: [0.15, 0.05, 0.02, nan, 0.098]\n", + " >>> Collected 5 forecasts: [0.1, 0.1, 0.02, nan, 0.098]\n", " >>> Collected 5 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999]\n", " >>> Collected 5 forecasts: [0.9, 0.9, 0.95, 0.905, 0.78]\n", - " >>> Collected 5 forecasts: [0.9, 0.2, nan, nan, 0.05]\n", - " >>> Collected 5 forecasts: [0.9, 0.85, nan, nan, 0.744]\n", - " >>> Collected 5 forecasts: [0.85, 0.75, 0.85, 0.71, 0.55]\n", - " >>> Collected 5 forecasts: [0.1, 0.07, 0.05, 0.02, 0.052]\n", - " >>> Collected 6 forecasts: [0.15, 0.1, 0.07, 0.0559999999999999, nan, 0.175]\n", - " >>> Collected 6 forecasts: [0.35, 0.6, 0.62, 0.7, 0.324676, 0.5]\n", - " >>> Collected 6 forecasts: [0.95, 0.9, 0.82, 0.794, nan, 0.75]\n", - " >>> Collected 6 forecasts: [0.75, 0.75, 0.85, 0.884, 0.76, 0.85]\n", - " >>> Collected 6 forecasts: [0.1, 0.05, nan, nan, nan, 0.15]\n", - " >>> Collected 6 forecasts: [0.7, 0.6, nan, nan, nan, 0.7]\n", - " >>> Collected 6 forecasts: [0.7, 0.35, nan, nan, nan, 0.65]\n", + " >>> Collected 5 forecasts: [0.9, 0.4, nan, nan, 0.05]\n", + " >>> Collected 5 forecasts: [0.95, 0.8, nan, nan, 0.744]\n", + " >>> Collected 5 forecasts: [0.85, 0.8, 0.85, 0.71, 0.55]\n", + " >>> Collected 5 forecasts: [0.05, 0.05, 0.05, 0.02, 0.052]\n", + " >>> Collected 6 forecasts: [0.05, 0.1, 0.07, 0.0559999999999999, nan, 0.175]\n", + " >>> Collected 6 forecasts: [0.35, 0.7, 0.62, 0.7, 0.324676, 0.5]\n", + " >>> Collected 6 forecasts: [0.9, 0.9, 0.82, 0.794, nan, 0.75]\n", + " >>> Collected 6 forecasts: [0.85, 0.85, 0.85, 0.884, 0.76, 0.85]\n", + " >>> Collected 6 forecasts: [0.05, 0.05, nan, nan, nan, 0.15]\n", + " >>> Collected 6 forecasts: [0.8, 0.6, nan, nan, nan, 0.7]\n", + " >>> Collected 6 forecasts: [0.7, 0.6, nan, nan, nan, 0.65]\n", + " >>> Collected 6 forecasts: [0.05, 0.05, nan, nan, nan, 0.15]\n", " >>> Collected 6 forecasts: [0.05, 0.05, nan, nan, nan, 0.15]\n", - " >>> Collected 6 forecasts: [0.15, 0.05, nan, nan, nan, 0.15]\n", - " >>> Collected 6 forecasts: [0.2, 0.25, 0.25, nan, nan, 0.225]\n", + " >>> Collected 6 forecasts: [0.1, 0.2, 0.25, nan, nan, 0.225]\n", " >>> Collected 6 forecasts: [0.2, 0.15, nan, 0.242, nan, 0.275]\n", - " >>> Collected 6 forecasts: [0.7, 0.8, nan, 0.936, nan, 0.85]\n", - " >>> Collected 6 forecasts: [0.25, 0.35, 0.108, 0.264, nan, 0.2]\n", - " >>> Collected 6 forecasts: [0.1, 0.15, 0.16, 0.652, nan, 0.275]\n", - " >>> Collected 6 forecasts: [0.05, 0.1, 0.95, 0.052, 0.0699999999999999, 0.125]\n", - " >>> Collected 6 forecasts: [0.15, 0.25, 0.15, 0.12, 0.05, 0.15]\n", - " >>> Collected 6 forecasts: [0.95, 0.95, 0.05, 0.918, 0.8925, 0.85]\n", - " >>> Collected 6 forecasts: [0.1, 0.3, 0.125, 0.212, 0.085, 0.725]\n", - " >>> Collected 6 forecasts: [0.05, 0.05, 0.034, nan, 0.0925, 0.125]\n", - " >>> Collected 6 forecasts: [0.1, 0.02, 0.03, 0.072, 0.1, 0.075]\n", - " >>> Collected 6 forecasts: [0.1, 0.3, 0.35, 0.226, 0.1149999999999999, 0.275]\n", - " >>> Collected 6 forecasts: [0.25, 0.3, 0.35, 0.5, 0.1375, 0.35]\n", - " >>> Collected 6 forecasts: [0.15, 0.15, 0.115, 0.102, 0.1425, 0.275]\n", + " >>> Collected 6 forecasts: [0.6, 0.85, nan, 0.936, nan, 0.85]\n", + " >>> Collected 6 forecasts: [0.15, 0.5, 0.108, 0.264, nan, 0.2]\n", + " >>> Collected 6 forecasts: [0.25, 0.3, 0.16, 0.652, nan, 0.275]\n", + " >>> Collected 6 forecasts: [0.05, 0.05, 0.95, 0.052, 0.0699999999999999, 0.125]\n", + " >>> Collected 6 forecasts: [0.15, 0.25, 0.15, 0.144, 0.05, 0.15]\n", + " >>> Collected 6 forecasts: [0.95, 0.95, 0.05, 0.866, 0.8925, 0.85]\n", + " >>> Collected 6 forecasts: [0.15, 0.35, 0.125, 0.212, 0.085, 0.725]\n", + " >>> Collected 6 forecasts: [0.02, 0.05, 0.034, nan, 0.0925, 0.125]\n", + " >>> Collected 6 forecasts: [0.05, 0.05, 0.03, 0.072, 0.1, 0.075]\n", + " >>> Collected 6 forecasts: [0.1, 0.4, 0.35, 0.226, 0.1149999999999999, 0.275]\n", + " >>> Collected 6 forecasts: [0.25, 0.35, 0.35, 0.5, 0.1375, 0.35]\n", + " >>> Collected 6 forecasts: [0.2, 0.2, 0.115, 0.102, 0.1425, 0.275]\n", " >>> Collected 6 forecasts: [0.98, 0.97, 0.97, 0.932, 0.9475, 0.5]\n", - " >>> Collected 6 forecasts: [0.35, 0.4, 0.285, 0.34, 0.2, 0.35]\n", - " >>> Collected 6 forecasts: [0.35, 0.25, 0.3833333333333333, 0.42, 0.4, 0.35]\n", - " >>> Collected 6 forecasts: [0.85, 0.7, 0.17, 0.236, nan, 0.3]\n", - " >>> Collected 6 forecasts: [0.01, 0.02, 0.12, 0.29, 0.06, 0.05]\n", + " >>> Collected 6 forecasts: [0.7, 0.4, 0.285, 0.34, 0.2, 0.35]\n", + " >>> Collected 6 forecasts: [0.25, 0.4, 0.3833333333333333, 0.42, 0.4, 0.35]\n", + " >>> Collected 6 forecasts: [0.9, 0.7, 0.17, 0.236, nan, 0.3]\n", + " >>> Collected 6 forecasts: [0.25, 0.02, 0.12, 0.29, 0.06, 0.05]\n", " >>> Collected 6 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75]\n", - " >>> Collected 6 forecasts: [0.99, 0.85, 0.99, 0.99, 0.95, 0.5]\n", + " >>> Collected 6 forecasts: [0.99, 0.99, 0.99, 0.99, 0.95, 0.5]\n", " >>> Collected 6 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002, 0.5]\n", - " >>> Collected 6 forecasts: [0.95, 0.25, 0.14, 0.2, 0.336, 0.325]\n", - " >>> Collected 6 forecasts: [0.9, 0.9, 0.8340000000000001, nan, nan, nan]\n", - " >>> Collected 6 forecasts: [0.9, 0.65, 0.7666666666666667, nan, nan, nan]\n", + " >>> Collected 6 forecasts: [0.3, 0.15, 0.4166666666666666, 0.2, 0.336, 0.325]\n", + " >>> Collected 6 forecasts: [0.95, 0.9, 0.8340000000000001, nan, nan, nan]\n", + " >>> Collected 6 forecasts: [0.9, 0.75, 0.7666666666666667, nan, nan, nan]\n", " >>> Collected 6 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999, 0.75]\n", " >>> Collected 6 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75]\n", " >>> Collected 6 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085]\n", - " >>> Collected 6 forecasts: [0.3, 0.3, 0.16, nan, 0.05, 0.225]\n", - " >>> Collected 6 forecasts: [0.75, 0.8, 0.67, nan, 0.76, 0.725]\n", - " >>> Collected 6 forecasts: [0.2, 0.15, nan, nan, 0.2, 0.2]\n", - " >>> Collected 6 forecasts: [0.1, 0.3, 0.3925, nan, 0.38, 0.675]\n", - " >>> Collected 6 forecasts: [0.05, 0.05, 0.086, nan, 0.12, 0.1]\n", + " >>> Collected 6 forecasts: [0.2, 0.25, 0.16, nan, 0.05, 0.225]\n", + " >>> Collected 6 forecasts: [0.75, 0.7, 0.67, nan, 0.76, 0.725]\n", + " >>> Collected 6 forecasts: [0.3, 0.15, nan, nan, 0.2, 0.2]\n", + " >>> Collected 6 forecasts: [0.15, 0.3, 0.3925, nan, 0.38, 0.675]\n", + " >>> Collected 6 forecasts: [0.1, 0.15, 0.086, nan, 0.12, 0.1]\n", " >>> Collected 6 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15]\n", - " >>> Collected 6 forecasts: [0.15, 0.05, 0.02, nan, 0.098, 0.05]\n", + " >>> Collected 6 forecasts: [0.1, 0.1, 0.02, nan, 0.098, 0.05]\n", " >>> Collected 6 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935]\n", " >>> Collected 6 forecasts: [0.9, 0.9, 0.95, 0.905, 0.78, 0.935]\n", - " >>> Collected 6 forecasts: [0.9, 0.2, nan, nan, 0.05, 0.055]\n", - " >>> Collected 6 forecasts: [0.9, 0.85, nan, nan, 0.744, 0.8]\n", - " >>> Collected 6 forecasts: [0.85, 0.75, 0.85, 0.71, 0.55, 0.475]\n", - " >>> Collected 6 forecasts: [0.1, 0.07, 0.05, 0.02, 0.052, 0.04]\n", - " >>> Collected 7 forecasts: [0.15, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.28]\n", - " >>> Collected 7 forecasts: [0.35, 0.6, 0.62, 0.7, 0.324676, 0.5, 0.65]\n", - " >>> Collected 7 forecasts: [0.95, 0.9, 0.82, 0.794, nan, 0.75, 0.88]\n", - " >>> Collected 7 forecasts: [0.75, 0.75, 0.85, 0.884, 0.76, 0.85, 0.8]\n", - " >>> Collected 7 forecasts: [0.1, 0.05, nan, nan, nan, 0.15, 0.15]\n", - " >>> Collected 7 forecasts: [0.7, 0.6, nan, nan, nan, 0.7, 0.75]\n", - " >>> Collected 7 forecasts: [0.7, 0.35, nan, nan, nan, 0.65, 0.78]\n", + " >>> Collected 6 forecasts: [0.9, 0.4, nan, nan, 0.05, 0.055]\n", + " >>> Collected 6 forecasts: [0.95, 0.8, nan, nan, 0.744, 0.8]\n", + " >>> Collected 6 forecasts: [0.85, 0.8, 0.85, 0.71, 0.55, 0.475]\n", + " >>> Collected 6 forecasts: [0.05, 0.05, 0.05, 0.02, 0.052, 0.04]\n", + " >>> Collected 7 forecasts: [0.05, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.27]\n", + " >>> Collected 7 forecasts: [0.35, 0.7, 0.62, 0.7, 0.324676, 0.5, 0.3]\n", + " >>> Collected 7 forecasts: [0.9, 0.9, 0.82, 0.794, nan, 0.75, 0.88]\n", + " >>> Collected 7 forecasts: [0.85, 0.85, 0.85, 0.884, 0.76, 0.85, 0.75]\n", + " >>> Collected 7 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15]\n", + " >>> Collected 7 forecasts: [0.8, 0.6, nan, nan, nan, 0.7, 0.75]\n", + " >>> Collected 7 forecasts: [0.7, 0.6, nan, nan, nan, 0.65, 0.78]\n", + " >>> Collected 7 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15]\n", " >>> Collected 7 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.1]\n", - " >>> Collected 7 forecasts: [0.15, 0.05, nan, nan, nan, 0.15, 0.15]\n", - " >>> Collected 7 forecasts: [0.2, 0.25, 0.25, nan, nan, 0.225, 0.18]\n", + " >>> Collected 7 forecasts: [0.1, 0.2, 0.25, nan, nan, 0.225, 0.18]\n", " >>> Collected 7 forecasts: [0.2, 0.15, nan, 0.242, nan, 0.275, 0.2]\n", - " >>> Collected 7 forecasts: [0.7, 0.8, nan, 0.936, nan, 0.85, nan]\n", - " >>> Collected 7 forecasts: [0.25, 0.35, 0.108, 0.264, nan, 0.2, 0.35]\n", - " >>> Collected 7 forecasts: [0.1, 0.15, 0.16, 0.652, nan, 0.275, 0.15]\n", - " >>> Collected 7 forecasts: [0.05, 0.1, 0.95, 0.052, 0.0699999999999999, 0.125, 0.05]\n", - " >>> Collected 7 forecasts: [0.15, 0.25, 0.15, 0.12, 0.05, 0.15, 0.1]\n", - " >>> Collected 7 forecasts: [0.95, 0.95, 0.05, 0.918, 0.8925, 0.85, 0.9]\n", - " >>> Collected 7 forecasts: [0.1, 0.3, 0.125, 0.212, 0.085, 0.725, 0.15]\n", - " >>> Collected 7 forecasts: [0.05, 0.05, 0.034, nan, 0.0925, 0.125, nan]\n", - " >>> Collected 7 forecasts: [0.1, 0.02, 0.03, 0.072, 0.1, 0.075, 0.1]\n", - " >>> Collected 7 forecasts: [0.1, 0.3, 0.35, 0.226, 0.1149999999999999, 0.275, 0.27]\n", - " >>> Collected 7 forecasts: [0.25, 0.3, 0.35, 0.5, 0.1375, 0.35, 0.65]\n", - " >>> Collected 7 forecasts: [0.15, 0.15, 0.115, 0.102, 0.1425, 0.275, nan]\n", + " >>> Collected 7 forecasts: [0.6, 0.85, nan, 0.936, nan, 0.85, nan]\n", + " >>> Collected 7 forecasts: [0.15, 0.5, 0.108, 0.264, nan, 0.2, 0.35]\n", + " >>> Collected 7 forecasts: [0.25, 0.3, 0.16, 0.652, nan, 0.275, 0.15]\n", + " >>> Collected 7 forecasts: [0.05, 0.05, 0.95, 0.052, 0.0699999999999999, 0.125, 0.02]\n", + " >>> Collected 7 forecasts: [0.15, 0.25, 0.15, 0.144, 0.05, 0.15, 0.1]\n", + " >>> Collected 7 forecasts: [0.95, 0.95, 0.05, 0.866, 0.8925, 0.85, 0.9]\n", + " >>> Collected 7 forecasts: [0.15, 0.35, 0.125, 0.212, 0.085, 0.725, 0.2]\n", + " >>> Collected 7 forecasts: [0.02, 0.05, 0.034, nan, 0.0925, 0.125, nan]\n", + " >>> Collected 7 forecasts: [0.05, 0.05, 0.03, 0.072, 0.1, 0.075, 0.15]\n", + " >>> Collected 7 forecasts: [0.1, 0.4, 0.35, 0.226, 0.1149999999999999, 0.275, 0.15]\n", + " >>> Collected 7 forecasts: [0.25, 0.35, 0.35, 0.5, 0.1375, 0.35, 0.38]\n", + " >>> Collected 7 forecasts: [0.2, 0.2, 0.115, 0.102, 0.1425, 0.275, nan]\n", " >>> Collected 7 forecasts: [0.98, 0.97, 0.97, 0.932, 0.9475, 0.5, nan]\n", - " >>> Collected 7 forecasts: [0.35, 0.4, 0.285, 0.34, 0.2, 0.35, nan]\n", - " >>> Collected 7 forecasts: [0.35, 0.25, 0.3833333333333333, 0.42, 0.4, 0.35, 0.65]\n", - " >>> Collected 7 forecasts: [0.85, 0.7, 0.17, 0.236, nan, 0.3, 0.1]\n", - " >>> Collected 7 forecasts: [0.01, 0.02, 0.12, 0.29, 0.06, 0.05, nan]\n", - " >>> Collected 7 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.65]\n", - " >>> Collected 7 forecasts: [0.99, 0.85, 0.99, 0.99, 0.95, 0.5, 0.99]\n", + " >>> Collected 7 forecasts: [0.7, 0.4, 0.285, 0.34, 0.2, 0.35, nan]\n", + " >>> Collected 7 forecasts: [0.25, 0.4, 0.3833333333333333, 0.42, 0.4, 0.35, 0.28]\n", + " >>> Collected 7 forecasts: [0.9, 0.7, 0.17, 0.236, nan, 0.3, 0.35]\n", + " >>> Collected 7 forecasts: [0.25, 0.02, 0.12, 0.29, 0.06, 0.05, nan]\n", + " >>> Collected 7 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.7]\n", + " >>> Collected 7 forecasts: [0.99, 0.99, 0.99, 0.99, 0.95, 0.5, 0.99]\n", " >>> Collected 7 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002, 0.5, 0.98]\n", - " >>> Collected 7 forecasts: [0.95, 0.25, 0.14, 0.2, 0.336, 0.325, 0.2]\n", - " >>> Collected 7 forecasts: [0.9, 0.9, 0.8340000000000001, nan, nan, nan, 0.15]\n", - " >>> Collected 7 forecasts: [0.9, 0.65, 0.7666666666666667, nan, nan, nan, 0.55]\n", + " >>> Collected 7 forecasts: [0.3, 0.15, 0.4166666666666666, 0.2, 0.336, 0.325, 0.2]\n", + " >>> Collected 7 forecasts: [0.95, 0.9, 0.8340000000000001, nan, nan, nan, 0.38]\n", + " >>> Collected 7 forecasts: [0.9, 0.75, 0.7666666666666667, nan, nan, nan, 0.65]\n", " >>> Collected 7 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999, 0.75, 0.27]\n", - " >>> Collected 7 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.9]\n", - " >>> Collected 7 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.1]\n", - " >>> Collected 7 forecasts: [0.3, 0.3, 0.16, nan, 0.05, 0.225, 0.35]\n", - " >>> Collected 7 forecasts: [0.75, 0.8, 0.67, nan, 0.76, 0.725, 0.78]\n", - " >>> Collected 7 forecasts: [0.2, 0.15, nan, nan, 0.2, 0.2, 0.15]\n", - " >>> Collected 7 forecasts: [0.1, 0.3, 0.3925, nan, 0.38, 0.675, 0.15]\n", - " >>> Collected 7 forecasts: [0.05, 0.05, 0.086, nan, 0.12, 0.1, 0.05]\n", - " >>> Collected 7 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.05]\n", - " >>> Collected 7 forecasts: [0.15, 0.05, 0.02, nan, 0.098, 0.05, 0.05]\n", - " >>> Collected 7 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.75]\n", + " >>> Collected 7 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.85]\n", + " >>> Collected 7 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.05]\n", + " >>> Collected 7 forecasts: [0.2, 0.25, 0.16, nan, 0.05, 0.225, 0.9]\n", + " >>> Collected 7 forecasts: [0.75, 0.7, 0.67, nan, 0.76, 0.725, 0.78]\n", + " >>> Collected 7 forecasts: [0.3, 0.15, nan, nan, 0.2, 0.2, 0.2]\n", + " >>> Collected 7 forecasts: [0.15, 0.3, 0.3925, nan, 0.38, 0.675, 0.75]\n", + " >>> Collected 7 forecasts: [0.1, 0.15, 0.086, nan, 0.12, 0.1, 0.1]\n", + " >>> Collected 7 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.07]\n", + " >>> Collected 7 forecasts: [0.1, 0.1, 0.02, nan, 0.098, 0.05, 0.1]\n", + " >>> Collected 7 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.85]\n", " >>> Collected 7 forecasts: [0.9, 0.9, 0.95, 0.905, 0.78, 0.935, 0.95]\n", - " >>> Collected 7 forecasts: [0.9, 0.2, nan, nan, 0.05, 0.055, 0.65]\n", - " >>> Collected 7 forecasts: [0.9, 0.85, nan, nan, 0.744, 0.8, 0.75]\n", - " >>> Collected 7 forecasts: [0.85, 0.75, 0.85, 0.71, 0.55, 0.475, 0.9]\n", - " >>> Collected 7 forecasts: [0.1, 0.07, 0.05, 0.02, 0.052, 0.04, 0.02]\n", - " >>> Collected 8 forecasts: [0.15, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.28, nan]\n", - " >>> Collected 8 forecasts: [0.35, 0.6, 0.62, 0.7, 0.324676, 0.5, 0.65, nan]\n", - " >>> Collected 8 forecasts: [0.95, 0.9, 0.82, 0.794, nan, 0.75, 0.88, nan]\n", - " >>> Collected 8 forecasts: [0.75, 0.75, 0.85, 0.884, 0.76, 0.85, 0.8, nan]\n", - " >>> Collected 8 forecasts: [0.1, 0.05, nan, nan, nan, 0.15, 0.15, nan]\n", - " >>> Collected 8 forecasts: [0.7, 0.6, nan, nan, nan, 0.7, 0.75, nan]\n", - " >>> Collected 8 forecasts: [0.7, 0.35, nan, nan, nan, 0.65, 0.78, nan]\n", + " >>> Collected 7 forecasts: [0.9, 0.4, nan, nan, 0.05, 0.055, 0.65]\n", + " >>> Collected 7 forecasts: [0.95, 0.8, nan, nan, 0.744, 0.8, 0.75]\n", + " >>> Collected 7 forecasts: [0.85, 0.8, 0.85, 0.71, 0.55, 0.475, 0.1]\n", + " >>> Collected 7 forecasts: [0.05, 0.05, 0.05, 0.02, 0.052, 0.04, 0.02]\n", + " >>> Collected 8 forecasts: [0.05, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.27, nan]\n", + " >>> Collected 8 forecasts: [0.35, 0.7, 0.62, 0.7, 0.324676, 0.5, 0.3, nan]\n", + " >>> Collected 8 forecasts: [0.9, 0.9, 0.82, 0.794, nan, 0.75, 0.88, nan]\n", + " >>> Collected 8 forecasts: [0.85, 0.85, 0.85, 0.884, 0.76, 0.85, 0.75, nan]\n", + " >>> Collected 8 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15, nan]\n", + " >>> Collected 8 forecasts: [0.8, 0.6, nan, nan, nan, 0.7, 0.75, nan]\n", + " >>> Collected 8 forecasts: [0.7, 0.6, nan, nan, nan, 0.65, 0.78, nan]\n", + " >>> Collected 8 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15, nan]\n", " >>> Collected 8 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.1, nan]\n", - " >>> Collected 8 forecasts: [0.15, 0.05, nan, nan, nan, 0.15, 0.15, nan]\n", - " >>> Collected 8 forecasts: [0.2, 0.25, 0.25, nan, nan, 0.225, 0.18, nan]\n", + " >>> Collected 8 forecasts: [0.1, 0.2, 0.25, nan, nan, 0.225, 0.18, nan]\n", " >>> Collected 8 forecasts: [0.2, 0.15, nan, 0.242, nan, 0.275, 0.2, nan]\n", - " >>> Collected 8 forecasts: [0.7, 0.8, nan, 0.936, nan, 0.85, nan, nan]\n", - " >>> Collected 8 forecasts: [0.25, 0.35, 0.108, 0.264, nan, 0.2, 0.35, nan]\n", - " >>> Collected 8 forecasts: [0.1, 0.15, 0.16, 0.652, nan, 0.275, 0.15, nan]\n", - " >>> Collected 8 forecasts: [0.05, 0.1, 0.95, 0.052, 0.0699999999999999, 0.125, 0.05, nan]\n", - " >>> Collected 8 forecasts: [0.15, 0.25, 0.15, 0.12, 0.05, 0.15, 0.1, nan]\n", - " >>> Collected 8 forecasts: [0.95, 0.95, 0.05, 0.918, 0.8925, 0.85, 0.9, nan]\n", - " >>> Collected 8 forecasts: [0.1, 0.3, 0.125, 0.212, 0.085, 0.725, 0.15, nan]\n", - " >>> Collected 8 forecasts: [0.05, 0.05, 0.034, nan, 0.0925, 0.125, nan, nan]\n", - " >>> Collected 8 forecasts: [0.1, 0.02, 0.03, 0.072, 0.1, 0.075, 0.1, 0.124]\n", - " >>> Collected 8 forecasts: [0.1, 0.3, 0.35, 0.226, 0.1149999999999999, 0.275, 0.27, 0.6765]\n", - " >>> Collected 8 forecasts: [0.25, 0.3, 0.35, 0.5, 0.1375, 0.35, 0.65, 0.55]\n", - " >>> Collected 8 forecasts: [0.15, 0.15, 0.115, 0.102, 0.1425, 0.275, nan, 0.195]\n", + " >>> Collected 8 forecasts: [0.6, 0.85, nan, 0.936, nan, 0.85, nan, nan]\n", + " >>> Collected 8 forecasts: [0.15, 0.5, 0.108, 0.264, nan, 0.2, 0.35, nan]\n", + " >>> Collected 8 forecasts: [0.25, 0.3, 0.16, 0.652, nan, 0.275, 0.15, nan]\n", + " >>> Collected 8 forecasts: [0.05, 0.05, 0.95, 0.052, 0.0699999999999999, 0.125, 0.02, nan]\n", + " >>> Collected 8 forecasts: [0.15, 0.25, 0.15, 0.144, 0.05, 0.15, 0.1, nan]\n", + " >>> Collected 8 forecasts: [0.95, 0.95, 0.05, 0.866, 0.8925, 0.85, 0.9, nan]\n", + " >>> Collected 8 forecasts: [0.15, 0.35, 0.125, 0.212, 0.085, 0.725, 0.2, nan]\n", + " >>> Collected 8 forecasts: [0.02, 0.05, 0.034, nan, 0.0925, 0.125, nan, nan]\n", + " >>> Collected 8 forecasts: [0.05, 0.05, 0.03, 0.072, 0.1, 0.075, 0.15, 0.124]\n", + " >>> Collected 8 forecasts: [0.1, 0.4, 0.35, 0.226, 0.1149999999999999, 0.275, 0.15, 0.6765]\n", + " >>> Collected 8 forecasts: [0.25, 0.35, 0.35, 0.5, 0.1375, 0.35, 0.38, 0.55]\n", + " >>> Collected 8 forecasts: [0.2, 0.2, 0.115, 0.102, 0.1425, 0.275, nan, 0.195]\n", " >>> Collected 8 forecasts: [0.98, 0.97, 0.97, 0.932, 0.9475, 0.5, nan, 0.95]\n", - " >>> Collected 8 forecasts: [0.35, 0.4, 0.285, 0.34, 0.2, 0.35, nan, 0.4375]\n", - " >>> Collected 8 forecasts: [0.35, 0.25, 0.3833333333333333, 0.42, 0.4, 0.35, 0.65, 0.513]\n", - " >>> Collected 8 forecasts: [0.85, 0.7, 0.17, 0.236, nan, 0.3, 0.1, 0.6485000000000001]\n", - " >>> Collected 8 forecasts: [0.01, 0.02, 0.12, 0.29, 0.06, 0.05, nan, 0.345]\n", - " >>> Collected 8 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.65, 0.85]\n", - " >>> Collected 8 forecasts: [0.99, 0.85, 0.99, 0.99, 0.95, 0.5, 0.99, nan]\n", + " >>> Collected 8 forecasts: [0.7, 0.4, 0.285, 0.34, 0.2, 0.35, nan, 0.4375]\n", + " >>> Collected 8 forecasts: [0.25, 0.4, 0.3833333333333333, 0.42, 0.4, 0.35, 0.28, 0.513]\n", + " >>> Collected 8 forecasts: [0.9, 0.7, 0.17, 0.236, nan, 0.3, 0.35, 0.6485000000000001]\n", + " >>> Collected 8 forecasts: [0.25, 0.02, 0.12, 0.29, 0.06, 0.05, nan, 0.345]\n", + " >>> Collected 8 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.7, 0.85]\n", + " >>> Collected 8 forecasts: [0.99, 0.99, 0.99, 0.99, 0.95, 0.5, 0.99, nan]\n", " >>> Collected 8 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002, 0.5, 0.98, 0.95]\n", - " >>> Collected 8 forecasts: [0.95, 0.25, 0.14, 0.2, 0.336, 0.325, 0.2, 0.34]\n", - " >>> Collected 8 forecasts: [0.9, 0.9, 0.8340000000000001, nan, nan, nan, 0.15, nan]\n", - " >>> Collected 8 forecasts: [0.9, 0.65, 0.7666666666666667, nan, nan, nan, 0.55, nan]\n", + " >>> Collected 8 forecasts: [0.3, 0.15, 0.4166666666666666, 0.2, 0.336, 0.325, 0.2, 0.34]\n", + " >>> Collected 8 forecasts: [0.95, 0.9, 0.8340000000000001, nan, nan, nan, 0.38, nan]\n", + " >>> Collected 8 forecasts: [0.9, 0.75, 0.7666666666666667, nan, nan, nan, 0.65, nan]\n", " >>> Collected 8 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999, 0.75, 0.27, 0.847]\n", - " >>> Collected 8 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.9, 0.8620000000000001]\n", - " >>> Collected 8 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.1, 0.1615]\n", - " >>> Collected 8 forecasts: [0.3, 0.3, 0.16, nan, 0.05, 0.225, 0.35, 0.55]\n", - " >>> Collected 8 forecasts: [0.75, 0.8, 0.67, nan, 0.76, 0.725, 0.78, 0.85]\n", - " >>> Collected 8 forecasts: [0.2, 0.15, nan, nan, 0.2, 0.2, 0.15, 0.223]\n", - " >>> Collected 8 forecasts: [0.1, 0.3, 0.3925, nan, 0.38, 0.675, 0.15, 0.58]\n", - " >>> Collected 8 forecasts: [0.05, 0.05, 0.086, nan, 0.12, 0.1, 0.05, 0.1109999999999999]\n", - " >>> Collected 8 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.05, 0.125]\n", - " >>> Collected 8 forecasts: [0.15, 0.05, 0.02, nan, 0.098, 0.05, 0.05, 0.073]\n", - " >>> Collected 8 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.75, 0.94]\n", + " >>> Collected 8 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.85, 0.8620000000000001]\n", + " >>> Collected 8 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.05, 0.1615]\n", + " >>> Collected 8 forecasts: [0.2, 0.25, 0.16, nan, 0.05, 0.225, 0.9, 0.55]\n", + " >>> Collected 8 forecasts: [0.75, 0.7, 0.67, nan, 0.76, 0.725, 0.78, 0.85]\n", + " >>> Collected 8 forecasts: [0.3, 0.15, nan, nan, 0.2, 0.2, 0.2, 0.223]\n", + " >>> Collected 8 forecasts: [0.15, 0.3, 0.3925, nan, 0.38, 0.675, 0.75, 0.58]\n", + " >>> Collected 8 forecasts: [0.1, 0.15, 0.086, nan, 0.12, 0.1, 0.1, 0.1109999999999999]\n", + " >>> Collected 8 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.07, 0.125]\n", + " >>> Collected 8 forecasts: [0.1, 0.1, 0.02, nan, 0.098, 0.05, 0.1, 0.073]\n", + " >>> Collected 8 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.85, 0.94]\n", " >>> Collected 8 forecasts: [0.9, 0.9, 0.95, 0.905, 0.78, 0.935, 0.95, 0.785]\n", - " >>> Collected 8 forecasts: [0.9, 0.2, nan, nan, 0.05, 0.055, 0.65, 0.067]\n", - " >>> Collected 8 forecasts: [0.9, 0.85, nan, nan, 0.744, 0.8, 0.75, 0.7240000000000001]\n", - " >>> Collected 8 forecasts: [0.85, 0.75, 0.85, 0.71, 0.55, 0.475, 0.9, 0.708]\n", - " >>> Collected 8 forecasts: [0.1, 0.07, 0.05, 0.02, 0.052, 0.04, 0.02, 0.042]\n", - " >>> Collected 9 forecasts: [0.15, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.28, nan, 0.15]\n", - " >>> Collected 9 forecasts: [0.35, 0.6, 0.62, 0.7, 0.324676, 0.5, 0.65, nan, 0.35]\n", - " >>> Collected 9 forecasts: [0.95, 0.9, 0.82, 0.794, nan, 0.75, 0.88, nan, 0.8]\n", - " >>> Collected 9 forecasts: [0.75, 0.75, 0.85, 0.884, 0.76, 0.85, 0.8, nan, 0.85]\n", - " >>> Collected 9 forecasts: [0.1, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.05]\n", - " >>> Collected 9 forecasts: [0.7, 0.6, nan, nan, nan, 0.7, 0.75, nan, 0.35]\n", - " >>> Collected 9 forecasts: [0.7, 0.35, nan, nan, nan, 0.65, 0.78, nan, 0.75]\n", + " >>> Collected 8 forecasts: [0.9, 0.4, nan, nan, 0.05, 0.055, 0.65, 0.067]\n", + " >>> Collected 8 forecasts: [0.95, 0.8, nan, nan, 0.744, 0.8, 0.75, 0.7240000000000001]\n", + " >>> Collected 8 forecasts: [0.85, 0.8, 0.85, 0.71, 0.55, 0.475, 0.1, 0.708]\n", + " >>> Collected 8 forecasts: [0.05, 0.05, 0.05, 0.02, 0.052, 0.04, 0.02, 0.042]\n", + " >>> Collected 9 forecasts: [0.05, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.27, nan, 0.15]\n", + " >>> Collected 9 forecasts: [0.35, 0.7, 0.62, 0.7, 0.324676, 0.5, 0.3, nan, 0.75]\n", + " >>> Collected 9 forecasts: [0.9, 0.9, 0.82, 0.794, nan, 0.75, 0.88, nan, 0.8]\n", + " >>> Collected 9 forecasts: [0.85, 0.85, 0.85, 0.884, 0.76, 0.85, 0.75, nan, 0.9]\n", + " >>> Collected 9 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.15]\n", + " >>> Collected 9 forecasts: [0.8, 0.6, nan, nan, nan, 0.7, 0.75, nan, 0.35]\n", + " >>> Collected 9 forecasts: [0.7, 0.6, nan, nan, nan, 0.65, 0.78, nan, 0.85]\n", + " >>> Collected 9 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.15]\n", " >>> Collected 9 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.1, nan, 0.05]\n", - " >>> Collected 9 forecasts: [0.15, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.05]\n", - " >>> Collected 9 forecasts: [0.2, 0.25, 0.25, nan, nan, 0.225, 0.18, nan, 0.25]\n", + " >>> Collected 9 forecasts: [0.1, 0.2, 0.25, nan, nan, 0.225, 0.18, nan, 0.2]\n", " >>> Collected 9 forecasts: [0.2, 0.15, nan, 0.242, nan, 0.275, 0.2, nan, 0.25]\n", - " >>> Collected 9 forecasts: [0.7, 0.8, nan, 0.936, nan, 0.85, nan, nan, 0.95]\n", - " >>> Collected 9 forecasts: [0.25, 0.35, 0.108, 0.264, nan, 0.2, 0.35, nan, 0.25]\n", - " >>> Collected 9 forecasts: [0.1, 0.15, 0.16, 0.652, nan, 0.275, 0.15, nan, 0.25]\n", - " >>> Collected 9 forecasts: [0.05, 0.1, 0.95, 0.052, 0.0699999999999999, 0.125, 0.05, nan, 0.05]\n", - " >>> Collected 9 forecasts: [0.15, 0.25, 0.15, 0.12, 0.05, 0.15, 0.1, nan, 0.15]\n", - " >>> Collected 9 forecasts: [0.95, 0.95, 0.05, 0.918, 0.8925, 0.85, 0.9, nan, 0.9]\n", - " >>> Collected 9 forecasts: [0.1, 0.3, 0.125, 0.212, 0.085, 0.725, 0.15, nan, 0.15]\n", - " >>> Collected 9 forecasts: [0.05, 0.05, 0.034, nan, 0.0925, 0.125, nan, nan, 0.05]\n", - " >>> Collected 9 forecasts: [0.1, 0.02, 0.03, 0.072, 0.1, 0.075, 0.1, 0.124, 0.15]\n", - " >>> Collected 9 forecasts: [0.1, 0.3, 0.35, 0.226, 0.1149999999999999, 0.275, 0.27, 0.6765, 0.25]\n", - " >>> Collected 9 forecasts: [0.25, 0.3, 0.35, 0.5, 0.1375, 0.35, 0.65, 0.55, 0.4]\n", - " >>> Collected 9 forecasts: [0.15, 0.15, 0.115, 0.102, 0.1425, 0.275, nan, 0.195, 0.15]\n", + " >>> Collected 9 forecasts: [0.6, 0.85, nan, 0.936, nan, 0.85, nan, nan, 0.95]\n", + " >>> Collected 9 forecasts: [0.15, 0.5, 0.108, 0.264, nan, 0.2, 0.35, nan, 0.15]\n", + " >>> Collected 9 forecasts: [0.25, 0.3, 0.16, 0.652, nan, 0.275, 0.15, nan, 0.25]\n", + " >>> Collected 9 forecasts: [0.05, 0.05, 0.95, 0.052, 0.0699999999999999, 0.125, 0.02, nan, 0.15]\n", + " >>> Collected 9 forecasts: [0.15, 0.25, 0.15, 0.144, 0.05, 0.15, 0.1, nan, 0.15]\n", + " >>> Collected 9 forecasts: [0.95, 0.95, 0.05, 0.866, 0.8925, 0.85, 0.9, nan, 0.85]\n", + " >>> Collected 9 forecasts: [0.15, 0.35, 0.125, 0.212, 0.085, 0.725, 0.2, nan, 0.15]\n", + " >>> Collected 9 forecasts: [0.02, 0.05, 0.034, nan, 0.0925, 0.125, nan, nan, 0.05]\n", + " >>> Collected 9 forecasts: [0.05, 0.05, 0.03, 0.072, 0.1, 0.075, 0.15, 0.124, 0.15]\n", + " >>> Collected 9 forecasts: [0.1, 0.4, 0.35, 0.226, 0.1149999999999999, 0.275, 0.15, 0.6765, 0.25]\n", + " >>> Collected 9 forecasts: [0.25, 0.35, 0.35, 0.5, 0.1375, 0.35, 0.38, 0.55, 0.4]\n", + " >>> Collected 9 forecasts: [0.2, 0.2, 0.115, 0.102, 0.1425, 0.275, nan, 0.195, 0.25]\n", " >>> Collected 9 forecasts: [0.98, 0.97, 0.97, 0.932, 0.9475, 0.5, nan, 0.95, 0.92]\n", - " >>> Collected 9 forecasts: [0.35, 0.4, 0.285, 0.34, 0.2, 0.35, nan, 0.4375, 0.35]\n", - " >>> Collected 9 forecasts: [0.35, 0.25, 0.3833333333333333, 0.42, 0.4, 0.35, 0.65, 0.513, 0.65]\n", - " >>> Collected 9 forecasts: [0.85, 0.7, 0.17, 0.236, nan, 0.3, 0.1, 0.6485000000000001, 0.75]\n", - " >>> Collected 9 forecasts: [0.01, 0.02, 0.12, 0.29, 0.06, 0.05, nan, 0.345, 0.05]\n", - " >>> Collected 9 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.65, 0.85, 0.85]\n", - " >>> Collected 9 forecasts: [0.99, 0.85, 0.99, 0.99, 0.95, 0.5, 0.99, nan, 0.99]\n", - " >>> Collected 9 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002, 0.5, 0.98, 0.95, 0.95]\n", - " >>> Collected 9 forecasts: [0.95, 0.25, 0.14, 0.2, 0.336, 0.325, 0.2, 0.34, 0.25]\n", - " >>> Collected 9 forecasts: [0.9, 0.9, 0.8340000000000001, nan, nan, nan, 0.15, nan, 0.85]\n", - " >>> Collected 9 forecasts: [0.9, 0.65, 0.7666666666666667, nan, nan, nan, 0.55, nan, 0.85]\n", - " >>> Collected 9 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999, 0.75, 0.27, 0.847, 0.25]\n", - " >>> Collected 9 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.9, 0.8620000000000001, 0.85]\n", - " >>> Collected 9 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.1, 0.1615, 0.15]\n", - " >>> Collected 9 forecasts: [0.3, 0.3, 0.16, nan, 0.05, 0.225, 0.35, 0.55, 0.25]\n", - " >>> Collected 9 forecasts: [0.75, 0.8, 0.67, nan, 0.76, 0.725, 0.78, 0.85, 0.85]\n", - " >>> Collected 9 forecasts: [0.2, 0.15, nan, nan, 0.2, 0.2, 0.15, 0.223, 0.35]\n", - " >>> Collected 9 forecasts: [0.1, 0.3, 0.3925, nan, 0.38, 0.675, 0.15, 0.58, 0.35]\n", - " >>> Collected 9 forecasts: [0.05, 0.05, 0.086, nan, 0.12, 0.1, 0.05, 0.1109999999999999, 0.15]\n", - " >>> Collected 9 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.05, 0.125, 0.15]\n", - " >>> Collected 9 forecasts: [0.15, 0.05, 0.02, nan, 0.098, 0.05, 0.05, 0.073, 0.15]\n", - " >>> Collected 9 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.75, 0.94, 0.85]\n", + " >>> Collected 9 forecasts: [0.7, 0.4, 0.285, 0.34, 0.2, 0.35, nan, 0.4375, 0.35]\n", + " >>> Collected 9 forecasts: [0.25, 0.4, 0.3833333333333333, 0.42, 0.4, 0.35, 0.28, 0.513, 0.65]\n", + " >>> Collected 9 forecasts: [0.9, 0.7, 0.17, 0.236, nan, 0.3, 0.35, 0.6485000000000001, 0.35]\n", + " >>> Collected 9 forecasts: [0.25, 0.02, 0.12, 0.29, 0.06, 0.05, nan, 0.345, 0.05]\n", + " >>> Collected 9 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.7, 0.85, 0.75]\n", + " >>> Collected 9 forecasts: [0.99, 0.99, 0.99, 0.99, 0.95, 0.5, 0.99, nan, 0.99]\n", + " >>> Collected 9 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002, 0.5, 0.98, 0.95, 0.98]\n", + " >>> Collected 9 forecasts: [0.3, 0.15, 0.4166666666666666, 0.2, 0.336, 0.325, 0.2, 0.34, 0.25]\n", + " >>> Collected 9 forecasts: [0.95, 0.9, 0.8340000000000001, nan, nan, nan, 0.38, nan, 0.85]\n", + " >>> Collected 9 forecasts: [0.9, 0.75, 0.7666666666666667, nan, nan, nan, 0.65, nan, 0.85]\n", + " >>> Collected 9 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999, 0.75, 0.27, 0.847, 0.35]\n", + " >>> Collected 9 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.85, 0.8620000000000001, 0.85]\n", + " >>> Collected 9 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.05, 0.1615, 0.15]\n", + " >>> Collected 9 forecasts: [0.2, 0.25, 0.16, nan, 0.05, 0.225, 0.9, 0.55, 0.35]\n", + " >>> Collected 9 forecasts: [0.75, 0.7, 0.67, nan, 0.76, 0.725, 0.78, 0.85, 0.85]\n", + " >>> Collected 9 forecasts: [0.3, 0.15, nan, nan, 0.2, 0.2, 0.2, 0.223, 0.65]\n", + " >>> Collected 9 forecasts: [0.15, 0.3, 0.3925, nan, 0.38, 0.675, 0.75, 0.58, 0.25]\n", + " >>> Collected 9 forecasts: [0.1, 0.15, 0.086, nan, 0.12, 0.1, 0.1, 0.1109999999999999, 0.15]\n", + " >>> Collected 9 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.07, 0.125, 0.15]\n", + " >>> Collected 9 forecasts: [0.1, 0.1, 0.02, nan, 0.098, 0.05, 0.1, 0.073, 0.15]\n", + " >>> Collected 9 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.85, 0.94, 0.85]\n", " >>> Collected 9 forecasts: [0.9, 0.9, 0.95, 0.905, 0.78, 0.935, 0.95, 0.785, 0.9]\n", - " >>> Collected 9 forecasts: [0.9, 0.2, nan, nan, 0.05, 0.055, 0.65, 0.067, 0.8]\n", - " >>> Collected 9 forecasts: [0.9, 0.85, nan, nan, 0.744, 0.8, 0.75, 0.7240000000000001, 0.9]\n", - " >>> Collected 9 forecasts: [0.85, 0.75, 0.85, 0.71, 0.55, 0.475, 0.9, 0.708, 0.85]\n", - " >>> Collected 9 forecasts: [0.1, 0.07, 0.05, 0.02, 0.052, 0.04, 0.02, 0.042, 0.05]\n", - " >>> Collected 10 forecasts: [0.15, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.28, nan, 0.15, nan]\n", - " >>> Collected 10 forecasts: [0.35, 0.6, 0.62, 0.7, 0.324676, 0.5, 0.65, nan, 0.35, nan]\n", - " >>> Collected 10 forecasts: [0.95, 0.9, 0.82, 0.794, nan, 0.75, 0.88, nan, 0.8, 0.638]\n", - " >>> Collected 10 forecasts: [0.75, 0.75, 0.85, 0.884, 0.76, 0.85, 0.8, nan, 0.85, 0.546]\n", - " >>> Collected 10 forecasts: [0.1, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.05, 0.127]\n", - " >>> Collected 10 forecasts: [0.7, 0.6, nan, nan, nan, 0.7, 0.75, nan, 0.35, 0.319]\n", - " >>> Collected 10 forecasts: [0.7, 0.35, nan, nan, nan, 0.65, 0.78, nan, 0.75, nan]\n", + " >>> Collected 9 forecasts: [0.9, 0.4, nan, nan, 0.05, 0.055, 0.65, 0.067, 0.8]\n", + " >>> Collected 9 forecasts: [0.95, 0.8, nan, nan, 0.744, 0.8, 0.75, 0.7240000000000001, 0.9]\n", + " >>> Collected 9 forecasts: [0.85, 0.8, 0.85, 0.71, 0.55, 0.475, 0.1, 0.708, 0.85]\n", + " >>> Collected 9 forecasts: [0.05, 0.05, 0.05, 0.02, 0.052, 0.04, 0.02, 0.042, 0.05]\n", + " >>> Collected 10 forecasts: [0.05, 0.1, 0.07, 0.0559999999999999, nan, 0.175, 0.27, nan, 0.15, nan]\n", + " >>> Collected 10 forecasts: [0.35, 0.7, 0.62, 0.7, 0.324676, 0.5, 0.3, nan, 0.75, nan]\n", + " >>> Collected 10 forecasts: [0.9, 0.9, 0.82, 0.794, nan, 0.75, 0.88, nan, 0.8, 0.638]\n", + " >>> Collected 10 forecasts: [0.85, 0.85, 0.85, 0.884, 0.76, 0.85, 0.75, nan, 0.9, 0.546]\n", + " >>> Collected 10 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.15, 0.127]\n", + " >>> Collected 10 forecasts: [0.8, 0.6, nan, nan, nan, 0.7, 0.75, nan, 0.35, 0.319]\n", + " >>> Collected 10 forecasts: [0.7, 0.6, nan, nan, nan, 0.65, 0.78, nan, 0.85, nan]\n", + " >>> Collected 10 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.15, nan]\n", " >>> Collected 10 forecasts: [0.05, 0.05, nan, nan, nan, 0.15, 0.1, nan, 0.05, nan]\n", - " >>> Collected 10 forecasts: [0.15, 0.05, nan, nan, nan, 0.15, 0.15, nan, 0.05, nan]\n", - " >>> Collected 10 forecasts: [0.2, 0.25, 0.25, nan, nan, 0.225, 0.18, nan, 0.25, 0.1939999999999999]\n", + " >>> Collected 10 forecasts: [0.1, 0.2, 0.25, nan, nan, 0.225, 0.18, nan, 0.2, 0.1939999999999999]\n", " >>> Collected 10 forecasts: [0.2, 0.15, nan, 0.242, nan, 0.275, 0.2, nan, 0.25, 0.281]\n", - " >>> Collected 10 forecasts: [0.7, 0.8, nan, 0.936, nan, 0.85, nan, nan, 0.95, 0.946]\n", - " >>> Collected 10 forecasts: [0.25, 0.35, 0.108, 0.264, nan, 0.2, 0.35, nan, 0.25, nan]\n", - " >>> Collected 10 forecasts: [0.1, 0.15, 0.16, 0.652, nan, 0.275, 0.15, nan, 0.25, nan]\n", - " >>> Collected 10 forecasts: [0.05, 0.1, 0.95, 0.052, 0.0699999999999999, 0.125, 0.05, nan, 0.05, nan]\n", - " >>> Collected 10 forecasts: [0.15, 0.25, 0.15, 0.12, 0.05, 0.15, 0.1, nan, 0.15, 0.154]\n", - " >>> Collected 10 forecasts: [0.95, 0.95, 0.05, 0.918, 0.8925, 0.85, 0.9, nan, 0.9, 0.85]\n", - " >>> Collected 10 forecasts: [0.1, 0.3, 0.125, 0.212, 0.085, 0.725, 0.15, nan, 0.15, 0.408]\n", - " >>> Collected 10 forecasts: [0.05, 0.05, 0.034, nan, 0.0925, 0.125, nan, nan, 0.05, 0.132]\n", - " >>> Collected 10 forecasts: [0.1, 0.02, 0.03, 0.072, 0.1, 0.075, 0.1, 0.124, 0.15, 0.063]\n", - " >>> Collected 10 forecasts: [0.1, 0.3, 0.35, 0.226, 0.1149999999999999, 0.275, 0.27, 0.6765, 0.25, 0.289]\n", - " >>> Collected 10 forecasts: [0.25, 0.3, 0.35, 0.5, 0.1375, 0.35, 0.65, 0.55, 0.4, 0.293]\n", - " >>> Collected 10 forecasts: [0.15, 0.15, 0.115, 0.102, 0.1425, 0.275, nan, 0.195, 0.15, 0.201]\n", + " >>> Collected 10 forecasts: [0.6, 0.85, nan, 0.936, nan, 0.85, nan, nan, 0.95, 0.946]\n", + " >>> Collected 10 forecasts: [0.15, 0.5, 0.108, 0.264, nan, 0.2, 0.35, nan, 0.15, nan]\n", + " >>> Collected 10 forecasts: [0.25, 0.3, 0.16, 0.652, nan, 0.275, 0.15, nan, 0.25, nan]\n", + " >>> Collected 10 forecasts: [0.05, 0.05, 0.95, 0.052, 0.0699999999999999, 0.125, 0.02, nan, 0.15, nan]\n", + " >>> Collected 10 forecasts: [0.15, 0.25, 0.15, 0.144, 0.05, 0.15, 0.1, nan, 0.15, 0.154]\n", + " >>> Collected 10 forecasts: [0.95, 0.95, 0.05, 0.866, 0.8925, 0.85, 0.9, nan, 0.85, 0.85]\n", + " >>> Collected 10 forecasts: [0.15, 0.35, 0.125, 0.212, 0.085, 0.725, 0.2, nan, 0.15, 0.408]\n", + " >>> Collected 10 forecasts: [0.02, 0.05, 0.034, nan, 0.0925, 0.125, nan, nan, 0.05, 0.132]\n", + " >>> Collected 10 forecasts: [0.05, 0.05, 0.03, 0.072, 0.1, 0.075, 0.15, 0.124, 0.15, 0.063]\n", + " >>> Collected 10 forecasts: [0.1, 0.4, 0.35, 0.226, 0.1149999999999999, 0.275, 0.15, 0.6765, 0.25, 0.289]\n", + " >>> Collected 10 forecasts: [0.25, 0.35, 0.35, 0.5, 0.1375, 0.35, 0.38, 0.55, 0.4, 0.293]\n", + " >>> Collected 10 forecasts: [0.2, 0.2, 0.115, 0.102, 0.1425, 0.275, nan, 0.195, 0.25, 0.201]\n", " >>> Collected 10 forecasts: [0.98, 0.97, 0.97, 0.932, 0.9475, 0.5, nan, 0.95, 0.92, 0.955]\n", - " >>> Collected 10 forecasts: [0.35, 0.4, 0.285, 0.34, 0.2, 0.35, nan, 0.4375, 0.35, 0.126]\n", - " >>> Collected 10 forecasts: [0.35, 0.25, 0.3833333333333333, 0.42, 0.4, 0.35, 0.65, 0.513, 0.65, 0.425]\n", - " >>> Collected 10 forecasts: [0.85, 0.7, 0.17, 0.236, nan, 0.3, 0.1, 0.6485000000000001, 0.75, 0.155]\n", - " >>> Collected 10 forecasts: [0.01, 0.02, 0.12, 0.29, 0.06, 0.05, nan, 0.345, 0.05, 0.161]\n", - " >>> Collected 10 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.65, 0.85, 0.85, 0.6659999999999999]\n", - " >>> Collected 10 forecasts: [0.99, 0.85, 0.99, 0.99, 0.95, 0.5, 0.99, nan, 0.99, 0.959]\n", - " >>> Collected 10 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002, 0.5, 0.98, 0.95, 0.95, 0.7759999999999999]\n", - " >>> Collected 10 forecasts: [0.95, 0.25, 0.14, 0.2, 0.336, 0.325, 0.2, 0.34, 0.25, 0.408]\n", - " >>> Collected 10 forecasts: [0.9, 0.9, 0.8340000000000001, nan, nan, nan, 0.15, nan, 0.85, nan]\n", - " >>> Collected 10 forecasts: [0.9, 0.65, 0.7666666666666667, nan, nan, nan, 0.55, nan, 0.85, nan]\n", - " >>> Collected 10 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999, 0.75, 0.27, 0.847, 0.25, nan]\n", - " >>> Collected 10 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.9, 0.8620000000000001, 0.85, nan]\n", - " >>> Collected 10 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.1, 0.1615, 0.15, nan]\n", - " >>> Collected 10 forecasts: [0.3, 0.3, 0.16, nan, 0.05, 0.225, 0.35, 0.55, 0.25, nan]\n", - " >>> Collected 10 forecasts: [0.75, 0.8, 0.67, nan, 0.76, 0.725, 0.78, 0.85, 0.85, nan]\n", - " >>> Collected 10 forecasts: [0.2, 0.15, nan, nan, 0.2, 0.2, 0.15, 0.223, 0.35, 0.088]\n", - " >>> Collected 10 forecasts: [0.1, 0.3, 0.3925, nan, 0.38, 0.675, 0.15, 0.58, 0.35, 0.574]\n", - " >>> Collected 10 forecasts: [0.05, 0.05, 0.086, nan, 0.12, 0.1, 0.05, 0.1109999999999999, 0.15, nan]\n", - " >>> Collected 10 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.05, 0.125, 0.15, nan]\n", - " >>> Collected 10 forecasts: [0.15, 0.05, 0.02, nan, 0.098, 0.05, 0.05, 0.073, 0.15, 0.086]\n", - " >>> Collected 10 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.75, 0.94, 0.85, 0.8220000000000001]\n", + " >>> Collected 10 forecasts: [0.7, 0.4, 0.285, 0.34, 0.2, 0.35, nan, 0.4375, 0.35, 0.126]\n", + " >>> Collected 10 forecasts: [0.25, 0.4, 0.3833333333333333, 0.42, 0.4, 0.35, 0.28, 0.513, 0.65, 0.425]\n", + " >>> Collected 10 forecasts: [0.9, 0.7, 0.17, 0.236, nan, 0.3, 0.35, 0.6485000000000001, 0.35, 0.155]\n", + " >>> Collected 10 forecasts: [0.25, 0.02, 0.12, 0.29, 0.06, 0.05, nan, 0.345, 0.05, 0.161]\n", + " >>> Collected 10 forecasts: [0.85, 0.75, 0.875, 0.92, 0.6599999999999999, 0.75, 0.7, 0.85, 0.75, 0.6659999999999999]\n", + " >>> Collected 10 forecasts: [0.99, 0.99, 0.99, 0.99, 0.95, 0.5, 0.99, nan, 0.99, 0.959]\n", + " >>> Collected 10 forecasts: [0.2, 0.99, 0.9233333333333332, 0.954, 0.9280000000000002, 0.5, 0.98, 0.95, 0.98, 0.7759999999999999]\n", + " >>> Collected 10 forecasts: [0.3, 0.15, 0.4166666666666666, 0.2, 0.336, 0.325, 0.2, 0.34, 0.25, 0.408]\n", + " >>> Collected 10 forecasts: [0.95, 0.9, 0.8340000000000001, nan, nan, nan, 0.38, nan, 0.85, nan]\n", + " >>> Collected 10 forecasts: [0.9, 0.75, 0.7666666666666667, nan, nan, nan, 0.65, nan, 0.85, nan]\n", + " >>> Collected 10 forecasts: [0.35, 0.6, 0.875, 0.7759999999999999, 0.2299999999999999, 0.75, 0.27, 0.847, 0.35, nan]\n", + " >>> Collected 10 forecasts: [0.9, 0.85, 0.84, 0.86, 0.8019999999999999, 0.75, 0.85, 0.8620000000000001, 0.85, nan]\n", + " >>> Collected 10 forecasts: [0.05, 0.1, 0.026, 0.0559999999999999, 0.05, 0.085, 0.05, 0.1615, 0.15, nan]\n", + " >>> Collected 10 forecasts: [0.2, 0.25, 0.16, nan, 0.05, 0.225, 0.9, 0.55, 0.35, nan]\n", + " >>> Collected 10 forecasts: [0.75, 0.7, 0.67, nan, 0.76, 0.725, 0.78, 0.85, 0.85, nan]\n", + " >>> Collected 10 forecasts: [0.3, 0.15, nan, nan, 0.2, 0.2, 0.2, 0.223, 0.65, 0.088]\n", + " >>> Collected 10 forecasts: [0.15, 0.3, 0.3925, nan, 0.38, 0.675, 0.75, 0.58, 0.25, 0.574]\n", + " >>> Collected 10 forecasts: [0.1, 0.15, 0.086, nan, 0.12, 0.1, 0.1, 0.1109999999999999, 0.15, nan]\n", + " >>> Collected 10 forecasts: [0.1, 0.15, 0.285, nan, 0.096, 0.15, 0.07, 0.125, 0.15, nan]\n", + " >>> Collected 10 forecasts: [0.1, 0.1, 0.02, nan, 0.098, 0.05, 0.1, 0.073, 0.15, 0.086]\n", + " >>> Collected 10 forecasts: [0.8, 0.9, nan, nan, 0.5599999999999999, 0.935, 0.85, 0.94, 0.85, 0.8220000000000001]\n", " >>> Collected 10 forecasts: [0.9, 0.9, 0.95, 0.905, 0.78, 0.935, 0.95, 0.785, 0.9, 0.762]\n", - " >>> Collected 10 forecasts: [0.9, 0.2, nan, nan, 0.05, 0.055, 0.65, 0.067, 0.8, 0.126]\n", - " >>> Collected 10 forecasts: [0.9, 0.85, nan, nan, 0.744, 0.8, 0.75, 0.7240000000000001, 0.9, 0.828]\n", - " >>> Collected 10 forecasts: [0.85, 0.75, 0.85, 0.71, 0.55, 0.475, 0.9, 0.708, 0.85, 0.132]\n", - " >>> Collected 10 forecasts: [0.1, 0.07, 0.05, 0.02, 0.052, 0.04, 0.02, 0.042, 0.05, 0.27]\n" + " >>> Collected 10 forecasts: [0.9, 0.4, nan, nan, 0.05, 0.055, 0.65, 0.067, 0.8, 0.126]\n", + " >>> Collected 10 forecasts: [0.95, 0.8, nan, nan, 0.744, 0.8, 0.75, 0.7240000000000001, 0.9, 0.828]\n", + " >>> Collected 10 forecasts: [0.85, 0.8, 0.85, 0.71, 0.55, 0.475, 0.1, 0.708, 0.85, 0.132]\n", + " >>> Collected 10 forecasts: [0.05, 0.05, 0.05, 0.02, 0.052, 0.04, 0.02, 0.042, 0.05, 0.27]\n" ] } ], @@ -10416,7 +11610,7 @@ "\n", "df_bot_team_forecasts = pd.merge(\n", " df_bot_forecasts,\n", - " df_pro_bot_resolved_questions[['bot_question_id', 'pro_question_id', 'question_weight', 'resolution', 'type', 'options', 'range_min', 'range_max']],\n", + " df_pro_bot_resolved_questions[['bot_question_id', 'pro_question_id', 'question_weight', 'resolution', 'type', 'options', 'range_min', 'range_max', 'open_lower_bound', 'open_upper_bound']],\n", " on='bot_question_id',\n", " how='left'\n", ")\n", @@ -10424,7 +11618,7 @@ "# KEEP ONLY ROWS WHERE PRO_QUESTION_ID IS NA\n", "df_bot_team_forecasts = df_bot_team_forecasts[~df_bot_team_forecasts['pro_question_id'].isna()]\n", "\n", - "columns_to_keep = ['bot_question_id', 'question_weight', 'resolution', 'type', 'options', 'range_min', 'range_max'] + top_10_bots\n", + "columns_to_keep = ['bot_question_id', 'question_weight', 'resolution', 'type', 'options', 'range_min', 'range_max', 'open_lower_bound', 'open_upper_bound'] + top_10_bots\n", "\n", "# Filter the DataFrame to keep only the specified columns\n", "df_bot_team_forecasts = df_bot_team_forecasts[columns_to_keep]\n", @@ -10440,7 +11634,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -10450,7 +11644,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 58, "metadata": {}, "outputs": [ { @@ -10488,45 +11682,45 @@ " multiple_choice\n", " [0, 1, 2-3, 4-6, >6]\n", " 0\n", - " [0.02,0.7,0.2,0.07,0.01]\n", - " 0.017463\n", - " 0.085\n", + " [0.01,0.7,0.2,0.07,0.02]\n", + " [0.012462871287128714, 0.0001, 0.0001, 0.0001,...\n", + " [0.057462871287128715, 0.0001, 0.0001, 0.0001,...\n", " \n", " \n", " 1\n", " numeric\n", " NaN\n", " 86.82\n", - " [0.05,0.0506666667,0.0513333333,0.052,0.052666...\n", - " [0.037750000000000006, 0.038250620225000004, 0...\n", - " [0.0402, 0.040750496180000005, 0.04130456232, ...\n", + " [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05...\n", + " [0.05, 0.0505982539, 0.0511965078, 0.051794761...\n", + " [0.05, 0.0506082725, 0.051216545, 0.0518248175...\n", " \n", " \n", " 2\n", " binary\n", " NaN\n", " no\n", - " 0.15\n", + " 0.05\n", + " 0.063\n", " 0.085\n", - " 0.125\n", " \n", " \n", " 3\n", " multiple_choice\n", " [0-4, 5-9, >9]\n", " 5-9\n", - " [0.2,0.6,0.2]\n", - " 0.6\n", - " 0.5125\n", + " [0.15,0.65,0.2]\n", + " [0.0001, 0.5125, 0.0001]\n", + " [0.0001, 0.45, 0.0001]\n", " \n", " \n", " 4\n", " numeric\n", " NaN\n", " 119.2\n", - " [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0...\n", - " [0.0, 0.00161112178, 0.0032277004800000003, 0....\n", - " [0.0, 0.0017712494571428573, 0.0035463967, 0.0...\n", + " [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,...\n", + " [0.0, 0.0018181818, 0.0036363636, 0.0054545455...\n", + " [0.0, 0.0018431373, 0.0036862745, 0.0055294118...\n", " \n", " \n", " ...\n", @@ -10552,16 +11746,16 @@ " NaN\n", " no\n", " 0.9\n", - " 0.2\n", - " 0.1335\n", + " 0.4\n", + " 0.2335\n", " \n", " \n", " 355\n", " binary\n", " NaN\n", " yes\n", - " 0.9\n", - " 0.85\n", + " 0.95\n", + " 0.8\n", " 0.775\n", " \n", " \n", @@ -10570,16 +11764,16 @@ " NaN\n", " no\n", " 0.85\n", - " 0.75\n", - " 0.73\n", + " 0.8\n", + " 0.709\n", " \n", " \n", " 364\n", " binary\n", " NaN\n", " no\n", - " 0.1\n", - " 0.052\n", + " 0.05\n", + " 0.05\n", " 0.046\n", " \n", " \n", @@ -10602,48 +11796,48 @@ "364 binary NaN no \n", "\n", " metac-o1-preview \\\n", - "0 [0.02,0.7,0.2,0.07,0.01] \n", - "1 [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 0.15 \n", - "3 [0.2,0.6,0.2] \n", - "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", + "0 [0.01,0.7,0.2,0.07,0.02] \n", + "1 [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05... \n", + "2 0.05 \n", + "3 [0.15,0.65,0.2] \n", + "4 [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,... \n", ".. ... \n", "342 0.9 \n", "351 0.9 \n", - "355 0.9 \n", + "355 0.95 \n", "361 0.85 \n", - "364 0.1 \n", + "364 0.05 \n", "\n", " median_forecast_5_bots \\\n", - "0 0.017463 \n", - "1 [0.037750000000000006, 0.038250620225000004, 0... \n", - "2 0.085 \n", - "3 0.6 \n", - "4 [0.0, 0.00161112178, 0.0032277004800000003, 0.... \n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505982539, 0.0511965078, 0.051794761... \n", + "2 0.063 \n", + "3 [0.0001, 0.5125, 0.0001] \n", + "4 [0.0, 0.0018181818, 0.0036363636, 0.0054545455... \n", ".. ... \n", "342 0.9 \n", - "351 0.2 \n", - "355 0.85 \n", - "361 0.75 \n", - "364 0.052 \n", + "351 0.4 \n", + "355 0.8 \n", + "361 0.8 \n", + "364 0.05 \n", "\n", " median_forecast_8_bots \n", - "0 0.085 \n", - "1 [0.0402, 0.040750496180000005, 0.04130456232, ... \n", - "2 0.125 \n", - "3 0.5125 \n", - "4 [0.0, 0.0017712494571428573, 0.0035463967, 0.0... \n", + "0 [0.057462871287128715, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0506082725, 0.051216545, 0.0518248175... \n", + "2 0.085 \n", + "3 [0.0001, 0.45, 0.0001] \n", + "4 [0.0, 0.0018431373, 0.0036862745, 0.0055294118... \n", ".. ... \n", "342 0.9025 \n", - "351 0.1335 \n", + "351 0.2335 \n", "355 0.775 \n", - "361 0.73 \n", + "361 0.709 \n", "364 0.046 \n", "\n", "[99 rows x 6 columns]" ] }, - "execution_count": 59, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -10654,7 +11848,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 59, "metadata": {}, "outputs": [ { @@ -10674,7 +11868,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 60, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -10683,6 +11877,22 @@ "outputId": "7327c204-c501-4dfb-bdfb-176606c96dc4" }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n", + " >>> Error calculating baseline score for question 34454 — skipping: Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue\n" + ] + }, { "data": { "text/html": [ @@ -10712,52 +11922,52 @@ " \n", " 0\n", " 1\n", - " 16.68\n", + " 1252.60\n", " \n", " \n", " 1\n", " 2\n", - " 26.29\n", + " 2269.15\n", " \n", " \n", " 2\n", " 3\n", - " 28.21\n", + " 2400.04\n", " \n", " \n", " 3\n", " 4\n", - " 26.98\n", + " 2413.81\n", " \n", " \n", " 4\n", " 5\n", - " 27.65\n", + " 2591.97\n", " \n", " \n", " 5\n", " 6\n", - " 26.39\n", + " 2483.23\n", " \n", " \n", " 6\n", " 7\n", - " 26.89\n", + " 2478.69\n", " \n", " \n", " 7\n", " 8\n", - " 27.15\n", + " 2536.53\n", " \n", " \n", " 8\n", " 9\n", - " 27.29\n", + " 2388.76\n", " \n", " \n", " 9\n", " 10\n", - " 26.71\n", + " 2370.53\n", " \n", " \n", "\n", @@ -10765,19 +11975,19 @@ ], "text/plain": [ " Bot_Team_Size Weighted_Baseline_Score_for_Bot_Team_Median\n", - "0 1 16.68\n", - "1 2 26.29\n", - "2 3 28.21\n", - "3 4 26.98\n", - "4 5 27.65\n", - "5 6 26.39\n", - "6 7 26.89\n", - "7 8 27.15\n", - "8 9 27.29\n", - "9 10 26.71" + "0 1 1252.60\n", + "1 2 2269.15\n", + "2 3 2400.04\n", + "3 4 2413.81\n", + "4 5 2591.97\n", + "5 6 2483.23\n", + "6 7 2478.69\n", + "7 8 2536.53\n", + "8 9 2388.76\n", + "9 10 2370.53" ] }, - "execution_count": 61, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -10808,16 +12018,16 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['metac-o1-preview', 'metac-o1', 'pgodzinai']" + "['metac-o1-preview', 'metac-o1', 'pgodzinai', 'GreeneiBot2', 'manticAI']" ] }, - "execution_count": 62, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -10831,7 +12041,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 62, "metadata": {}, "outputs": [ { @@ -10840,7 +12050,7 @@ "(424, 47)" ] }, - "execution_count": 63, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -10851,26 +12061,311 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 63, "metadata": {}, "outputs": [], "source": [ - "# Merge bot_team_forecasts with df_top_bot_forecasts, just get type and options columns from bot_team_forecasts, merge on bot_question_id\n", - "df_bot_forecasts = pd.merge(\n", - " df_bot_forecasts,\n", - " df_bot_team_forecasts[['bot_question_id', 'type', 'options', 'resolution']],\n", - " on='bot_question_id',\n", - " how='left'\n", - ")\n", - "\n", - "# And make bot_question_id, type and options the first columns\n", - "df_bot_forecasts = df_bot_forecasts[['bot_question_id', 'type', 'options', 'resolution'] + [col for col in df_bot_forecasts.columns if col not in ['bot_question_id', 'type', 'options']]]" + "# Merge bot_team_forecasts with df_top_bot_forecasts, just get type and options columns from bot_team_forecasts, merge on bot_question_id\n", + "df_bot_forecasts = pd.merge(\n", + " df_bot_forecasts,\n", + " df_bot_team_forecasts[['bot_question_id', 'type', 'options', 'resolution']],\n", + " on='bot_question_id',\n", + " how='left'\n", + ")\n", + "\n", + "# And make bot_question_id, type and options the first columns\n", + "df_bot_forecasts = df_bot_forecasts[['bot_question_id', 'type', 'options', 'resolution'] + [col for col in df_bot_forecasts.columns if col not in ['bot_question_id', 'type', 'options']]]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bot_question_idquestion_weightresolutiontypeoptionsrange_minrange_maxopen_lower_boundopen_upper_boundmetac-o1-preview...median_forecast_1_botsmedian_forecast_2_botsmedian_forecast_3_botsmedian_forecast_4_botsmedian_forecast_5_botsmedian_forecast_6_botsmedian_forecast_7_botsmedian_forecast_8_botsmedian_forecast_9_botsmedian_forecast_10_bots
0312621.00multiple_choice[0, 1, 2-3, 4-6, >6]NaNNaNFalseFalse[0.01,0.7,0.2,0.07,0.02]...[0.01, 0.0001, 0.0001, 0.0001, 0.0001][0.13, 0.0001, 0.0001, 0.0001, 0.0001][0.014925742574257425, 0.0001, 0.0001, 0.0001,...[0.012462871287128714, 0.0001, 0.0001, 0.0001,...[0.012462871287128714, 0.0001, 0.0001, 0.0001,...[0.014925742574257425, 0.0001, 0.0001, 0.0001,...[0.057462871287128715, 0.0001, 0.0001, 0.0001,...[0.057462871287128715, 0.0001, 0.0001, 0.0001,...[0.01623640201331385, 0.0001, 0.0001, 0.0001, ...[0.01623640201331385, 0.0001, 0.0001, 0.0001, ...
1312631.086.82numericNaN60.0100.0TrueTrue[0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05......[0.05, 0.051, 0.052, 0.053, 0.054, 0.055, 0.05...[0.05, 0.05079411765, 0.0515882353, 0.05238235...[0.05, 0.0505882353, 0.0511764706, 0.051764705...[0.05, 0.0505982539, 0.0511965078, 0.051794761...[0.05, 0.0505982539, 0.0511965078, 0.051794761...[0.05, 0.0506082725, 0.051216545, 0.0518248175...[0.05, 0.0506082725, 0.051216545, 0.0518248175...[0.05, 0.0506082725, 0.051216545, 0.0518248175...[0.05, 0.0506374696, 0.051274939150000004, 0.0...[0.05, 0.0506374696, 0.051274939150000004, 0.0...
2312641.0nobinaryNaNNaNNaNFalseFalse0.05...0.050.0750.070.0630.0630.070.0850.0850.10.1
3312741.05-9multiple_choice[0-4, 5-9, >9]NaNNaNNaNNaN[0.15,0.65,0.2]...[0.0001, 0.65, 0.0001][0.0001, 0.55, 0.0001][0.0001, 0.5125, 0.0001][0.0001, 0.5662499999999999, 0.0001][0.0001, 0.5125, 0.0001][0.0001, 0.48124999999999996, 0.0001][0.0001, 0.45, 0.0001][0.0001, 0.45, 0.0001][0.0001, 0.48124999999999996, 0.0001][0.0001, 0.45, 0.0001]
4312751.0119.2numericNaN0.0400.0FalseFalse[0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,......[0.0, 0.004, 0.008, 0.012, 0.016, 0.02, 0.024,...[0.0, 0.00366666665, 0.00733333335, 0.011, 0.0...[0.0, 0.0033333333, 0.0066666667, 0.01, 0.0133...[0.0, 0.00257575755, 0.00515151515, 0.00772727...[0.0, 0.0018181818, 0.0036363636, 0.0054545455...[0.0, 0.00183065955, 0.00366131905, 0.00549197...[0.0, 0.0018431373, 0.0036862745, 0.0055294118...[0.0, 0.0018431373, 0.0036862745, 0.0055294118...[0.0, 0.002254902, 0.0045098039, 0.0067647059,...[0.0, 0.0018431373, 0.0036862745, 0.0055294118...
\n", + "

5 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " bot_question_id question_weight resolution type \\\n", + "0 31262 1.0 0 multiple_choice \n", + "1 31263 1.0 86.82 numeric \n", + "2 31264 1.0 no binary \n", + "3 31274 1.0 5-9 multiple_choice \n", + "4 31275 1.0 119.2 numeric \n", + "\n", + " options range_min range_max open_lower_bound \\\n", + "0 [0, 1, 2-3, 4-6, >6] NaN NaN False \n", + "1 NaN 60.0 100.0 True \n", + "2 NaN NaN NaN False \n", + "3 [0-4, 5-9, >9] NaN NaN NaN \n", + "4 NaN 0.0 400.0 False \n", + "\n", + " open_upper_bound metac-o1-preview ... \\\n", + "0 False [0.01,0.7,0.2,0.07,0.02] ... \n", + "1 True [0.05,0.051,0.052,0.053,0.054,0.055,0.056,0.05... ... \n", + "2 False 0.05 ... \n", + "3 NaN [0.15,0.65,0.2] ... \n", + "4 False [0.0,0.004,0.008,0.012,0.016,0.02,0.024,0.028,... ... \n", + "\n", + " median_forecast_1_bots \\\n", + "0 [0.01, 0.0001, 0.0001, 0.0001, 0.0001] \n", + "1 [0.05, 0.051, 0.052, 0.053, 0.054, 0.055, 0.05... \n", + "2 0.05 \n", + "3 [0.0001, 0.65, 0.0001] \n", + "4 [0.0, 0.004, 0.008, 0.012, 0.016, 0.02, 0.024,... \n", + "\n", + " median_forecast_2_bots \\\n", + "0 [0.13, 0.0001, 0.0001, 0.0001, 0.0001] \n", + "1 [0.05, 0.05079411765, 0.0515882353, 0.05238235... \n", + "2 0.075 \n", + "3 [0.0001, 0.55, 0.0001] \n", + "4 [0.0, 0.00366666665, 0.00733333335, 0.011, 0.0... \n", + "\n", + " median_forecast_3_bots \\\n", + "0 [0.014925742574257425, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505882353, 0.0511764706, 0.051764705... \n", + "2 0.07 \n", + "3 [0.0001, 0.5125, 0.0001] \n", + "4 [0.0, 0.0033333333, 0.0066666667, 0.01, 0.0133... \n", + "\n", + " median_forecast_4_bots \\\n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505982539, 0.0511965078, 0.051794761... \n", + "2 0.063 \n", + "3 [0.0001, 0.5662499999999999, 0.0001] \n", + "4 [0.0, 0.00257575755, 0.00515151515, 0.00772727... \n", + "\n", + " median_forecast_5_bots \\\n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505982539, 0.0511965078, 0.051794761... \n", + "2 0.063 \n", + "3 [0.0001, 0.5125, 0.0001] \n", + "4 [0.0, 0.0018181818, 0.0036363636, 0.0054545455... \n", + "\n", + " median_forecast_6_bots \\\n", + "0 [0.014925742574257425, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0506082725, 0.051216545, 0.0518248175... \n", + "2 0.07 \n", + "3 [0.0001, 0.48124999999999996, 0.0001] \n", + "4 [0.0, 0.00183065955, 0.00366131905, 0.00549197... \n", + "\n", + " median_forecast_7_bots \\\n", + "0 [0.057462871287128715, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0506082725, 0.051216545, 0.0518248175... \n", + "2 0.085 \n", + "3 [0.0001, 0.45, 0.0001] \n", + "4 [0.0, 0.0018431373, 0.0036862745, 0.0055294118... \n", + "\n", + " median_forecast_8_bots \\\n", + "0 [0.057462871287128715, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0506082725, 0.051216545, 0.0518248175... \n", + "2 0.085 \n", + "3 [0.0001, 0.45, 0.0001] \n", + "4 [0.0, 0.0018431373, 0.0036862745, 0.0055294118... \n", + "\n", + " median_forecast_9_bots \\\n", + "0 [0.01623640201331385, 0.0001, 0.0001, 0.0001, ... \n", + "1 [0.05, 0.0506374696, 0.051274939150000004, 0.0... \n", + "2 0.1 \n", + "3 [0.0001, 0.48124999999999996, 0.0001] \n", + "4 [0.0, 0.002254902, 0.0045098039, 0.0067647059,... \n", + "\n", + " median_forecast_10_bots \n", + "0 [0.01623640201331385, 0.0001, 0.0001, 0.0001, ... \n", + "1 [0.05, 0.0506374696, 0.051274939150000004, 0.0... \n", + "2 0.1 \n", + "3 [0.0001, 0.45, 0.0001] \n", + "4 [0.0, 0.0018431373, 0.0036862745, 0.0055294118... \n", + "\n", + "[5 rows x 29 columns]" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_bot_team_forecasts.head()" ] }, { "cell_type": "code", "execution_count": 65, - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z3TTBVWoZVzU", + "outputId": "0eb32f2c-09c6-4a15-e81a-bee353b1bccf" + }, "outputs": [ { "data": { @@ -10894,283 +12389,339 @@ " \n", " \n", " bot_question_id\n", - " question_weight\n", + " title\n", " resolution\n", + " scheduled_close_time\n", + " actual_close_time\n", " type\n", " options\n", " range_min\n", " range_max\n", - " metac-o1-preview\n", - " metac-o1\n", - " pgodzinai\n", - " ...\n", - " median_forecast_1_bots\n", - " median_forecast_2_bots\n", - " median_forecast_3_bots\n", - " median_forecast_4_bots\n", - " median_forecast_5_bots\n", - " median_forecast_6_bots\n", - " median_forecast_7_bots\n", - " median_forecast_8_bots\n", - " median_forecast_9_bots\n", - " median_forecast_10_bots\n", + " open_upper_bound\n", + " open_lower_bound\n", + " pro_question_id\n", + " question_weight\n", + " bot_team_median\n", + " pro_median\n", " \n", " \n", " \n", " \n", " 0\n", " 31262\n", - " 1.0\n", + " For Q1 2025, how many banks will be listed on ...\n", " 0\n", + " 2025-01-20 03:27:00\n", + " 2025-01-20 03:27:00\n", " multiple_choice\n", " [0, 1, 2-3, 4-6, >6]\n", " NaN\n", " NaN\n", - " [0.02,0.7,0.2,0.07,0.01]\n", - " [0.45,0.3,0.15,0.05,0.05]\n", - " [0.014925742574257425,0.5137871287128712,0.334...\n", - " ...\n", - " 0.02\n", - " 0.235\n", - " 0.02\n", - " 0.017463\n", - " 0.017463\n", - " 0.02\n", - " 0.085\n", - " 0.085\n", - " 0.15\n", - " 0.15\n", + " False\n", + " False\n", + " 31268\n", + " 1.0\n", + " [0.012462871287128714, 0.0001, 0.0001, 0.0001,...\n", + " [0.001,0.62,0.35,0.019,0.01]\n", " \n", " \n", " 1\n", " 31263\n", - " 1.0\n", + " What percentage of the vote will Alexander Luk...\n", " 86.82\n", + " 2025-01-20 03:27:00\n", + " 2025-01-20 03:27:00\n", " numeric\n", " NaN\n", " 60.0\n", " 100.0\n", - " [0.05,0.0506666667,0.0513333333,0.052,0.052666...\n", - " [0.05,0.0506666667,0.0513333333,0.052,0.052666...\n", - " [0.001,0.001060875,0.0011396,0.0012863125,0.00...\n", - " ...\n", - " [0.05, 0.0506666667, 0.0513333333, 0.052, 0.05...\n", - " [0.05, 0.0506666667, 0.0513333333, 0.052, 0.05...\n", - " [0.03366666666666667, 0.0341314028, 0.03460208...\n", - " [0.037750000000000006, 0.038250620225000004, 0...\n", - " [0.037750000000000006, 0.038250620225000004, 0...\n", - " [0.0402, 0.040750496180000005, 0.04130456232, ...\n", - " [0.0402, 0.040750496180000005, 0.04130456232, ...\n", - " [0.0402, 0.040750496180000005, 0.04130456232, ...\n", - " [0.041833333333333333, 0.042403191266666675, 0...\n", - " [0.041833333333333333, 0.042403191266666675, 0...\n", + " True\n", + " True\n", + " 31269\n", + " 1.0\n", + " [0.05, 0.0505982539, 0.0511965078, 0.051794761...\n", + " [0.0013749738,0.0014499743,0.001526641,0.00160...\n", " \n", " \n", " 2\n", " 31264\n", - " 1.0\n", + " Will the bubble in the Magnificent Seven pop b...\n", " no\n", + " 2025-01-20 03:27:00\n", + " 2025-01-20 03:27:00\n", " binary\n", " NaN\n", " NaN\n", " NaN\n", - " 0.15\n", - " 0.1\n", - " 0.07\n", - " ...\n", - " 0.15\n", - " 0.125\n", - " 0.1\n", - " 0.085\n", - " 0.085\n", - " 0.1\n", - " 0.125\n", - " 0.125\n", - " 0.15\n", - " 0.15\n", + " False\n", + " False\n", + " 31270\n", + " 1.0\n", + " 0.063\n", + " 0.013\n", " \n", " \n", " 3\n", " 31274\n", - " 1.0\n", + " How many arms sales globally will the US State...\n", " 5-9\n", + " 2025-01-21 11:42:00\n", + " 2025-01-21 11:42:00\n", " multiple_choice\n", " [0-4, 5-9, >9]\n", " NaN\n", " NaN\n", - " [0.2,0.6,0.2]\n", - " [0.25,0.6,0.15]\n", - " [0.27499999999999997,0.5125,0.21249999999999997]\n", - " ...\n", - " 0.6\n", - " 0.6\n", - " 0.6\n", - " 0.6\n", - " 0.6\n", - " 0.55625\n", - " 0.5125\n", - " 0.5125\n", - " 0.53125\n", - " 0.5125\n", + " NaN\n", + " NaN\n", + " 31280\n", + " 1.0\n", + " [0.0001, 0.5125, 0.0001]\n", + " [0.16,0.44,0.4]\n", " \n", " \n", " 4\n", " 31275\n", - " 1.0\n", + " How much will it rain in Brasília, Brazil in F...\n", " 119.2\n", + " 2025-01-21 11:42:00\n", + " 2025-01-21 11:42:00\n", " numeric\n", " NaN\n", " 0.0\n", " 400.0\n", - " [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0...\n", - " [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0...\n", - " [0.0,0.0001141583,0.0002446967,0.0003862688,0....\n", - " ...\n", - " [0.0, 0.0025, 0.005, 0.0075, 0.01, 0.0125, 0.0...\n", - " [0.0, 0.0025, 0.005, 0.0075, 0.01, 0.0125, 0.0...\n", - " [0.0, 0.0017047194333333333, 0.0034148989, 0.0...\n", - " [0.0, 0.001733085025, 0.003470265075, 0.005210...\n", - " [0.0, 0.00161112178, 0.0032277004800000003, 0....\n", - " [0.0, 0.0016497910333333336, 0.003304129483333...\n", - " [0.0, 0.0017712494571428573, 0.0035463967, 0.0...\n", - " [0.0, 0.0017712494571428573, 0.0035463967, 0.0...\n", - " [0.0, 0.0019069861375000002, 0.003817382825, 0...\n", - " [0.0, 0.0018408706777777778, 0.003684772944444...\n", + " False\n", + " False\n", + " 31281\n", + " 1.0\n", + " [0.0, 0.0018181818, 0.0036363636, 0.0054545455...\n", + " [0.0,0.0005044914,0.0010323506,0.0015847475,0....\n", " \n", " \n", "\n", - "

5 rows × 27 columns

\n", "" ], "text/plain": [ - " bot_question_id question_weight resolution type \\\n", - "0 31262 1.0 0 multiple_choice \n", - "1 31263 1.0 86.82 numeric \n", - "2 31264 1.0 no binary \n", - "3 31274 1.0 5-9 multiple_choice \n", - "4 31275 1.0 119.2 numeric \n", - "\n", - " options range_min range_max \\\n", - "0 [0, 1, 2-3, 4-6, >6] NaN NaN \n", - "1 NaN 60.0 100.0 \n", - "2 NaN NaN NaN \n", - "3 [0-4, 5-9, >9] NaN NaN \n", - "4 NaN 0.0 400.0 \n", - "\n", - " metac-o1-preview \\\n", - "0 [0.02,0.7,0.2,0.07,0.01] \n", - "1 [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 0.15 \n", - "3 [0.2,0.6,0.2] \n", - "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", - "\n", - " metac-o1 \\\n", - "0 [0.45,0.3,0.15,0.05,0.05] \n", - "1 [0.05,0.0506666667,0.0513333333,0.052,0.052666... \n", - "2 0.1 \n", - "3 [0.25,0.6,0.15] \n", - "4 [0.0,0.0025,0.005,0.0075,0.01,0.0125,0.015,0.0... \n", - "\n", - " pgodzinai ... \\\n", - "0 [0.014925742574257425,0.5137871287128712,0.334... ... \n", - "1 [0.001,0.001060875,0.0011396,0.0012863125,0.00... ... \n", - "2 0.07 ... \n", - "3 [0.27499999999999997,0.5125,0.21249999999999997] ... \n", - "4 [0.0,0.0001141583,0.0002446967,0.0003862688,0.... ... \n", - "\n", - " median_forecast_1_bots \\\n", - "0 0.02 \n", - "1 [0.05, 0.0506666667, 0.0513333333, 0.052, 0.05... \n", - "2 0.15 \n", - "3 0.6 \n", - "4 [0.0, 0.0025, 0.005, 0.0075, 0.01, 0.0125, 0.0... \n", - "\n", - " median_forecast_2_bots \\\n", - "0 0.235 \n", - "1 [0.05, 0.0506666667, 0.0513333333, 0.052, 0.05... \n", - "2 0.125 \n", - "3 0.6 \n", - "4 [0.0, 0.0025, 0.005, 0.0075, 0.01, 0.0125, 0.0... \n", - "\n", - " median_forecast_3_bots \\\n", - "0 0.02 \n", - "1 [0.03366666666666667, 0.0341314028, 0.03460208... \n", - "2 0.1 \n", - "3 0.6 \n", - "4 [0.0, 0.0017047194333333333, 0.0034148989, 0.0... \n", - "\n", - " median_forecast_4_bots \\\n", - "0 0.017463 \n", - "1 [0.037750000000000006, 0.038250620225000004, 0... \n", - "2 0.085 \n", - "3 0.6 \n", - "4 [0.0, 0.001733085025, 0.003470265075, 0.005210... \n", - "\n", - " median_forecast_5_bots \\\n", - "0 0.017463 \n", - "1 [0.037750000000000006, 0.038250620225000004, 0... \n", - "2 0.085 \n", - "3 0.6 \n", - "4 [0.0, 0.00161112178, 0.0032277004800000003, 0.... \n", - "\n", - " median_forecast_6_bots \\\n", - "0 0.02 \n", - "1 [0.0402, 0.040750496180000005, 0.04130456232, ... \n", - "2 0.1 \n", - "3 0.55625 \n", - "4 [0.0, 0.0016497910333333336, 0.003304129483333... \n", - "\n", - " median_forecast_7_bots \\\n", - "0 0.085 \n", - "1 [0.0402, 0.040750496180000005, 0.04130456232, ... \n", - "2 0.125 \n", - "3 0.5125 \n", - "4 [0.0, 0.0017712494571428573, 0.0035463967, 0.0... \n", + " bot_question_id title \\\n", + "0 31262 For Q1 2025, how many banks will be listed on ... \n", + "1 31263 What percentage of the vote will Alexander Luk... \n", + "2 31264 Will the bubble in the Magnificent Seven pop b... \n", + "3 31274 How many arms sales globally will the US State... \n", + "4 31275 How much will it rain in Brasília, Brazil in F... \n", "\n", - " median_forecast_8_bots \\\n", - "0 0.085 \n", - "1 [0.0402, 0.040750496180000005, 0.04130456232, ... \n", - "2 0.125 \n", - "3 0.5125 \n", - "4 [0.0, 0.0017712494571428573, 0.0035463967, 0.0... \n", + " resolution scheduled_close_time actual_close_time type \\\n", + "0 0 2025-01-20 03:27:00 2025-01-20 03:27:00 multiple_choice \n", + "1 86.82 2025-01-20 03:27:00 2025-01-20 03:27:00 numeric \n", + "2 no 2025-01-20 03:27:00 2025-01-20 03:27:00 binary \n", + "3 5-9 2025-01-21 11:42:00 2025-01-21 11:42:00 multiple_choice \n", + "4 119.2 2025-01-21 11:42:00 2025-01-21 11:42:00 numeric \n", + "\n", + " options range_min range_max open_upper_bound \\\n", + "0 [0, 1, 2-3, 4-6, >6] NaN NaN False \n", + "1 NaN 60.0 100.0 True \n", + "2 NaN NaN NaN False \n", + "3 [0-4, 5-9, >9] NaN NaN NaN \n", + "4 NaN 0.0 400.0 False \n", + "\n", + " open_lower_bound pro_question_id question_weight \\\n", + "0 False 31268 1.0 \n", + "1 True 31269 1.0 \n", + "2 False 31270 1.0 \n", + "3 NaN 31280 1.0 \n", + "4 False 31281 1.0 \n", + "\n", + " bot_team_median \\\n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505982539, 0.0511965078, 0.051794761... \n", + "2 0.063 \n", + "3 [0.0001, 0.5125, 0.0001] \n", + "4 [0.0, 0.0018181818, 0.0036363636, 0.0054545455... \n", + "\n", + " pro_median \n", + "0 [0.001,0.62,0.35,0.019,0.01] \n", + "1 [0.0013749738,0.0014499743,0.001526641,0.00160... \n", + "2 0.013 \n", + "3 [0.16,0.44,0.4] \n", + "4 [0.0,0.0005044914,0.0010323506,0.0015847475,0.... " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bot_question_idtitleresolutionscheduled_close_timeactual_close_timetypeoptionsrange_minrange_maxopen_upper_boundopen_lower_boundpro_question_idquestion_weightbot_team_medianpro_median
34235345Will the US Citizenship and Immigration Servic...yes2025-03-12 22:00:002025-03-12 22:00:00binaryNaNNaNNaNFalseFalse353801.000.90.95
35135354Will the United States impose any new tariffs ...no2025-03-13 03:00:002025-03-13 03:00:00binaryNaNNaNNaNFalseFalse353811.000.40.05
35535358Will ChatGPT rank in the top 10 global website...yes2025-03-13 03:00:002025-03-13 03:00:00binaryNaNNaNNaNFalseFalse353851.000.80.97
36135364Will Doge's Agency Efficiency Leaderboard have...no2025-03-14 23:00:002025-03-14 23:00:00binaryNaNNaNNaNFalseFalse353860.850.80.666
36435367Will the Project 2025 Tracker spreadsheet mark...no2025-03-14 23:00:002025-03-14 23:00:00binaryNaNNaNNaNFalseFalse353870.850.050.03
\n", + "
" + ], + "text/plain": [ + " bot_question_id title \\\n", + "342 35345 Will the US Citizenship and Immigration Servic... \n", + "351 35354 Will the United States impose any new tariffs ... \n", + "355 35358 Will ChatGPT rank in the top 10 global website... \n", + "361 35364 Will Doge's Agency Efficiency Leaderboard have... \n", + "364 35367 Will the Project 2025 Tracker spreadsheet mark... \n", "\n", - "[5 rows x 27 columns]" + " resolution scheduled_close_time actual_close_time type options \\\n", + "342 yes 2025-03-12 22:00:00 2025-03-12 22:00:00 binary NaN \n", + "351 no 2025-03-13 03:00:00 2025-03-13 03:00:00 binary NaN \n", + "355 yes 2025-03-13 03:00:00 2025-03-13 03:00:00 binary NaN \n", + "361 no 2025-03-14 23:00:00 2025-03-14 23:00:00 binary NaN \n", + "364 no 2025-03-14 23:00:00 2025-03-14 23:00:00 binary NaN \n", + "\n", + " range_min range_max open_upper_bound open_lower_bound pro_question_id \\\n", + "342 NaN NaN False False 35380 \n", + "351 NaN NaN False False 35381 \n", + "355 NaN NaN False False 35385 \n", + "361 NaN NaN False False 35386 \n", + "364 NaN NaN False False 35387 \n", + "\n", + " question_weight bot_team_median pro_median \n", + "342 1.00 0.9 0.95 \n", + "351 1.00 0.4 0.05 \n", + "355 1.00 0.8 0.97 \n", + "361 0.85 0.8 0.666 \n", + "364 0.85 0.05 0.03 " ] }, - "execution_count": 65, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/molly/metaculus/aib-analysis/refactored_notebook/scoring.py:38: RuntimeWarning: invalid value encountered in scalar divide\n", + " peer_score = np.log(forecast_for_resolution / geometric_mean)\n" + ] } ], - "source": [ - "df_bot_team_forecasts.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Z3TTBVWoZVzU", - "outputId": "0eb32f2c-09c6-4a15-e81a-bee353b1bccf" - }, - "outputs": [], "source": [ "# @title Weighted team-vs-pro\n", "\n", @@ -11198,26 +12749,28 @@ " how='left'\n", ")\n", "\n", - "# Copy with union (not just overlapping questions)\n", + "# Copy with union (not just questions at the intersection)\n", "df_top_bot_pro_forecasts_all = df_top_bot_pro_forecasts.copy()\n", "\n", "# Filter to only those rows where pro_median is not NA\n", "df_top_bot_pro_forecasts = df_top_bot_pro_forecasts.dropna(subset=['pro_median'])\n", "\n", + "display_head_and_tail(df_top_bot_pro_forecasts)\n", + "\n", "# Add the head_to_head column\n", - "df_top_bot_pro_forecasts['head_to_head'] = df_top_bot_pro_forecasts.apply(calculate_head_to_head, args=('bot_team_median', 'pro_median'), axis=1)" + "df_top_bot_pro_forecasts['head_to_head'] = df_top_bot_pro_forecasts.apply(calculate_weighted_h2h_score_between_two_forecast_columns, args=('bot_team_median', 'pro_median'), axis=1)" ] }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Weighted Total Score: -14.9893\n" + "Weighted Total Score: -0.1312\n" ] } ], @@ -11227,7 +12780,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 67, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -11239,7 +12792,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -11251,7 +12804,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "The average of 'head_to_head' is: -14.97\n" + "The average of 'head_to_head' is: -0.14\n" ] } ], @@ -11261,7 +12814,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 68, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -11307,28 +12860,289 @@ " \n", " \n", " head_to_head\n", - " -1424.0\n", - " 93.1\n", - " -15.3\n", - " 90.635958\n", - " 9.393462\n", - " -1.628277\n", - " 1.985277\n", - " 3.4\n", - " -33.9\n", - " 0.053441\n", - " 0.106882\n", + " -12.5\n", + " 92.1\n", + " -0.1\n", + " 0.669453\n", + " 0.069757\n", + " -1.939479\n", + " 1.98555\n", + " 0.0\n", + " -0.3\n", + " 0.027769\n", + " 0.055537\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " W_score W_count W_ave W_stdev std_err t_stat t_crit \\\n", + "head_to_head -12.5 92.1 -0.1 0.669453 0.069757 -1.939479 1.98555 \n", + "\n", + " upper_bound lower_bound cdf p_value \n", + "head_to_head 0.0 -0.3 0.027769 0.055537 " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_bot_team_h2h = calculate_t_test(df_top_bot_pro_forecasts, ['head_to_head'])\n", + "\n", + "df_bot_team_h2h" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0I0myCHpl7FT", + "outputId": "bcc45b9a-f328-4f0c-ef98-a7620af7e358" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Top 5:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlebot_team_medianpro_medianresolutionhead_to_head
279What will Kalshi's rank in the iPhone Top Free...[0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.05][0.02,0.01,0.015,0.015,0.05,0.89]Not in top 50-2.9
121How many movies will be new on Netflix's top 1...[0.0001, 0.0001, 0.0001, 0.125][0.005,0.017,0.157,0.821]3 or more-1.9
47What will be Donald Trump's net worth, accordi...[0.16999999999999998, 0.0001, 0.0001, 0.0001, ...[0.6,0.2,0.1,0.075,0.025]0-$6 billion, inclusive-1.3
232How many movies will be new on Netflix's top 1...[0.0001, 0.0001, 0.0001, 0.2963039014373716][0.002,0.008,0.09,0.9]3 or more-1.1
247Will the 500th richest person on Bloomberg's B...0.7666670.333no-1.1
\n", + "
" + ], + "text/plain": [ + " title \\\n", + "279 What will Kalshi's rank in the iPhone Top Free... \n", + "121 How many movies will be new on Netflix's top 1... \n", + "47 What will be Donald Trump's net worth, accordi... \n", + "232 How many movies will be new on Netflix's top 1... \n", + "247 Will the 500th richest person on Bloomberg's B... \n", + "\n", + " bot_team_median \\\n", + "279 [0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.05] \n", + "121 [0.0001, 0.0001, 0.0001, 0.125] \n", + "47 [0.16999999999999998, 0.0001, 0.0001, 0.0001, ... \n", + "232 [0.0001, 0.0001, 0.0001, 0.2963039014373716] \n", + "247 0.766667 \n", + "\n", + " pro_median resolution head_to_head \n", + "279 [0.02,0.01,0.015,0.015,0.05,0.89] Not in top 50 -2.9 \n", + "121 [0.005,0.017,0.157,0.821] 3 or more -1.9 \n", + "47 [0.6,0.2,0.1,0.075,0.025] 0-$6 billion, inclusive -1.3 \n", + "232 [0.002,0.008,0.09,0.9] 3 or more -1.1 \n", + "247 0.333 no -1.1 " + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.set_option('display.max_colwidth', 50)\n", + "\n", + "df_sorted = df_top_bot_pro_forecasts.sort_values(by='head_to_head')\n", + "df_sorted['head_to_head'] = df_sorted['head_to_head'].round(1)\n", + "#df_sorted['resolution'] = df_sorted['resolution'].map({1: 'yes', 0: 'no'})\n", + "\n", + "df_top5 = df_sorted.head(5)\n", + "df_bottom5 = df_sorted.tail(5)\n", + "\n", + "print(\"Top 5:\")\n", + "\n", + "df_top5[['title', 'bot_team_median', 'pro_median', 'resolution', 'head_to_head']]" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Bottom 5:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
titlebot_team_medianpro_medianresolutionhead_to_head
0For Q1 2025, how many banks will be listed on ...[0.012462871287128714, 0.0001, 0.0001, 0.0001,...[0.001,0.62,0.35,0.019,0.01]02.5
189What will the highest rank of metac-GPT4o or m...[0.0, 0.0369946063, 0.07475, 0.10485, 0.1198, ...[0.0,5.19918e-05,0.0001040776,0.0001562618,0.0...34.02.8
151How many earthquakes of magnitude ≥ 4 will hap...[0.0, 0.0035714286, 0.0071428571, 0.0107142857...[0.0,0.0158237002,0.0235315723,0.0279864362,0....0.0NaN
211Will Nikola Corporation file for bankruptcy be...0.990.999annulledNaN
214Will the state of Rhode Island have any recrea...0.9280.95annulledNaN
\n", "
" ], "text/plain": [ - " W_score W_count W_ave W_stdev std_err t_stat \\\n", - "head_to_head -1424.0 93.1 -15.3 90.635958 9.393462 -1.628277 \n", + " title \\\n", + "0 For Q1 2025, how many banks will be listed on ... \n", + "189 What will the highest rank of metac-GPT4o or m... \n", + "151 How many earthquakes of magnitude ≥ 4 will hap... \n", + "211 Will Nikola Corporation file for bankruptcy be... \n", + "214 Will the state of Rhode Island have any recrea... \n", + "\n", + " bot_team_median \\\n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "189 [0.0, 0.0369946063, 0.07475, 0.10485, 0.1198, ... \n", + "151 [0.0, 0.0035714286, 0.0071428571, 0.0107142857... \n", + "211 0.99 \n", + "214 0.928 \n", + "\n", + " pro_median resolution \\\n", + "0 [0.001,0.62,0.35,0.019,0.01] 0 \n", + "189 [0.0,5.19918e-05,0.0001040776,0.0001562618,0.0... 34.0 \n", + "151 [0.0,0.0158237002,0.0235315723,0.0279864362,0.... 0.0 \n", + "211 0.999 annulled \n", + "214 0.95 annulled \n", "\n", - " t_crit upper_bound lower_bound cdf p_value \n", - "head_to_head 1.985277 3.4 -33.9 0.053441 0.106882 " + " head_to_head \n", + "0 2.5 \n", + "189 2.8 \n", + "151 NaN \n", + "211 NaN \n", + "214 NaN " ] }, "execution_count": 70, @@ -11337,29 +13151,56 @@ } ], "source": [ - "df_bot_team_h2h = calculate_t_test(df_top_bot_pro_forecasts, ['head_to_head'])\n", + "print(\"\\nBottom 5:\")\n", "\n", - "df_bot_team_h2h" + "df_bottom5[['title', 'bot_team_median', 'pro_median', 'resolution', 'head_to_head']]" ] }, { "cell_type": "code", - "execution_count": 73, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0I0myCHpl7FT", - "outputId": "bcc45b9a-f328-4f0c-ef98-a7620af7e358" - }, + "execution_count": 71, + "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Top 5:\n" - ] - }, + "data": { + "text/plain": [ + "bot_question_id Int64\n", + "title object\n", + "resolution float64\n", + "scheduled_close_time datetime64[ns]\n", + "actual_close_time datetime64[ns]\n", + "type object\n", + "options object\n", + "range_min float64\n", + "range_max float64\n", + "open_upper_bound object\n", + "open_lower_bound object\n", + "pro_question_id Int64\n", + "question_weight float64\n", + "bot_team_median object\n", + "pro_median object\n", + "head_to_head float64\n", + "weighted_score float64\n", + "dtype: object" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Cast df_top_bot_pro_forecasts['resolution'] as string - idk why this is necessary but it is\n", + "df_top_bot_pro_forecasts['resolution'] = df_top_bot_pro_forecasts['resolution'].astype(pd.StringDtype())\n", + "df_top_bot_pro_forecasts['resolution'] = df_top_bot_pro_forecasts['resolution'].map({'yes': 1, 'no': 0})\n", + "df_top_bot_pro_forecasts.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -11381,121 +13222,317 @@ " \n", " \n", " \n", + " bot_question_id\n", " title\n", + " resolution\n", + " scheduled_close_time\n", + " actual_close_time\n", + " type\n", + " options\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", + " pro_question_id\n", + " question_weight\n", " bot_team_median\n", " pro_median\n", - " resolution\n", " head_to_head\n", + " weighted_score\n", " \n", " \n", " \n", " \n", - " 279\n", - " What will Kalshi's rank in the iPhone Top Free...\n", - " 0.03\n", - " [0.02,0.01,0.015,0.015,0.05,0.89]\n", - " Not in top 50\n", - " -339.0\n", + " 0\n", + " 31262\n", + " For Q1 2025, how many banks will be listed on ...\n", + " NaN\n", + " 2025-01-20 03:27:00\n", + " 2025-01-20 03:27:00\n", + " multiple_choice\n", + " [0, 1, 2-3, 4-6, >6]\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " 31268\n", + " 1.0\n", + " [0.012462871287128714, 0.0001, 0.0001, 0.0001,...\n", + " [0.001,0.62,0.35,0.019,0.01]\n", + " 2.522754\n", + " 2.522754\n", " \n", " \n", - " 121\n", - " How many movies will be new on Netflix's top 1...\n", - " 0.1\n", - " [0.005,0.017,0.157,0.821]\n", - " 3 or more\n", - " -210.5\n", + " 1\n", + " 31263\n", + " What percentage of the vote will Alexander Luk...\n", + " NaN\n", + " 2025-01-20 03:27:00\n", + " 2025-01-20 03:27:00\n", + " numeric\n", + " NaN\n", + " 60.0\n", + " 100.0\n", + " True\n", + " True\n", + " 31269\n", + " 1.0\n", + " [0.05, 0.0505982539, 0.0511965078, 0.051794761...\n", + " [0.0013749738,0.0014499743,0.001526641,0.00160...\n", + " -0.158842\n", + " -0.158842\n", " \n", " \n", - " 335\n", - " How many cubic meters of water produced and su...\n", - " [0.12255555556666668, 0.1304049507, 0.13838334...\n", - " [0.0346238299,0.0364286012,0.0383259676,0.0403...\n", - " 130027.0\n", - " -158.7\n", + " 2\n", + " 31264\n", + " Will the bubble in the Magnificent Seven pop b...\n", + " 0.0\n", + " 2025-01-20 03:27:00\n", + " 2025-01-20 03:27:00\n", + " binary\n", + " NaN\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " 31270\n", + " 1.0\n", + " 0.063\n", + " 0.013\n", + " -0.051987\n", + " -0.051987\n", " \n", " \n", - " 12\n", - " What will be the monthly cargo volumes at the ...\n", - " [0.03366666666666667, 0.034913915633333334, 0....\n", - " [0.001714054,0.0017985406,0.0018846914,0.00197...\n", - " 720283.0\n", - " -130.3\n", + " 3\n", + " 31274\n", + " How many arms sales globally will the US State...\n", + " NaN\n", + " 2025-01-21 11:42:00\n", + " 2025-01-21 11:42:00\n", + " multiple_choice\n", + " [0-4, 5-9, >9]\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 31280\n", + " 1.0\n", + " [0.0001, 0.5125, 0.0001]\n", + " [0.16,0.44,0.4]\n", + " 0.152526\n", + " 0.152526\n", " \n", " \n", - " 71\n", - " Will OpenAI, Anthropic, or Perplexity run an a...\n", - " 0.15\n", - " 0.55\n", - " yes\n", - " -129.9\n", + " 4\n", + " 31275\n", + " How much will it rain in Brasília, Brazil in F...\n", + " NaN\n", + " 2025-01-21 11:42:00\n", + " 2025-01-21 11:42:00\n", + " numeric\n", + " NaN\n", + " 0.0\n", + " 400.0\n", + " False\n", + " False\n", + " 31281\n", + " 1.0\n", + " [0.0, 0.0018181818, 0.0036363636, 0.0054545455...\n", + " [0.0,0.0005044914,0.0010323506,0.0015847475,0....\n", + " 0.132210\n", + " 0.132210\n", " \n", " \n", "\n", "" ], "text/plain": [ - " title \\\n", - "279 What will Kalshi's rank in the iPhone Top Free... \n", - "121 How many movies will be new on Netflix's top 1... \n", - "335 How many cubic meters of water produced and su... \n", - "12 What will be the monthly cargo volumes at the ... \n", - "71 Will OpenAI, Anthropic, or Perplexity run an a... \n", + " bot_question_id title \\\n", + "0 31262 For Q1 2025, how many banks will be listed on ... \n", + "1 31263 What percentage of the vote will Alexander Luk... \n", + "2 31264 Will the bubble in the Magnificent Seven pop b... \n", + "3 31274 How many arms sales globally will the US State... \n", + "4 31275 How much will it rain in Brasília, Brazil in F... \n", "\n", - " bot_team_median \\\n", - "279 0.03 \n", - "121 0.1 \n", - "335 [0.12255555556666668, 0.1304049507, 0.13838334... \n", - "12 [0.03366666666666667, 0.034913915633333334, 0.... \n", - "71 0.15 \n", - "\n", - " pro_median resolution \\\n", - "279 [0.02,0.01,0.015,0.015,0.05,0.89] Not in top 50 \n", - "121 [0.005,0.017,0.157,0.821] 3 or more \n", - "335 [0.0346238299,0.0364286012,0.0383259676,0.0403... 130027.0 \n", - "12 [0.001714054,0.0017985406,0.0018846914,0.00197... 720283.0 \n", - "71 0.55 yes \n", + " resolution scheduled_close_time actual_close_time type \\\n", + "0 NaN 2025-01-20 03:27:00 2025-01-20 03:27:00 multiple_choice \n", + "1 NaN 2025-01-20 03:27:00 2025-01-20 03:27:00 numeric \n", + "2 0.0 2025-01-20 03:27:00 2025-01-20 03:27:00 binary \n", + "3 NaN 2025-01-21 11:42:00 2025-01-21 11:42:00 multiple_choice \n", + "4 NaN 2025-01-21 11:42:00 2025-01-21 11:42:00 numeric \n", "\n", - " head_to_head \n", - "279 -339.0 \n", - "121 -210.5 \n", - "335 -158.7 \n", - "12 -130.3 \n", - "71 -129.9 " + " options range_min range_max open_upper_bound \\\n", + "0 [0, 1, 2-3, 4-6, >6] NaN NaN False \n", + "1 NaN 60.0 100.0 True \n", + "2 NaN NaN NaN False \n", + "3 [0-4, 5-9, >9] NaN NaN NaN \n", + "4 NaN 0.0 400.0 False \n", + "\n", + " open_lower_bound pro_question_id question_weight \\\n", + "0 False 31268 1.0 \n", + "1 True 31269 1.0 \n", + "2 False 31270 1.0 \n", + "3 NaN 31280 1.0 \n", + "4 False 31281 1.0 \n", + "\n", + " bot_team_median \\\n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505982539, 0.0511965078, 0.051794761... \n", + "2 0.063 \n", + "3 [0.0001, 0.5125, 0.0001] \n", + "4 [0.0, 0.0018181818, 0.0036363636, 0.0054545455... \n", + "\n", + " pro_median head_to_head \\\n", + "0 [0.001,0.62,0.35,0.019,0.01] 2.522754 \n", + "1 [0.0013749738,0.0014499743,0.001526641,0.00160... -0.158842 \n", + "2 0.013 -0.051987 \n", + "3 [0.16,0.44,0.4] 0.152526 \n", + "4 [0.0,0.0005044914,0.0010323506,0.0015847475,0.... 0.132210 \n", + "\n", + " weighted_score \n", + "0 2.522754 \n", + "1 -0.158842 \n", + "2 -0.051987 \n", + "3 0.152526 \n", + "4 0.132210 " ] }, - "execution_count": 73, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.set_option('display.max_colwidth', 50)\n", + "df_top_bot_pro_forecasts.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rows in calibration df: 48\n" + ] + } + ], + "source": [ + "# Make binary-only df_top_bot_pro_forecasts for calibration curves etc\n", + "df_top_bot_pro_forecasts_binary = df_top_bot_pro_forecasts[\n", + " (df_top_bot_pro_forecasts['type'] == 'binary') &\n", + " (df_top_bot_pro_forecasts['resolution'].notna())\n", + "].copy()\n", + "print(f\"Rows in calibration df: {len(df_top_bot_pro_forecasts_binary)}\")\n", "\n", - "df_sorted = df_top_bot_pro_forecasts.sort_values(by='head_to_head')\n", - "df_sorted['head_to_head'] = df_sorted['head_to_head'].round(1)\n", - "#df_sorted['resolution'] = df_sorted['resolution'].map({1: 'yes', 0: 'no'})\n", "\n", - "df_top5 = df_sorted.head(5)\n", - "df_bottom5 = df_sorted.tail(5)\n", + "df_top_bot_pro_forecasts_all_binary = df_top_bot_pro_forecasts_all[df_top_bot_pro_forecasts_all['type'] == 'binary'].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 807 + }, + "id": "BjNQ4IND6Ct7", + "outputId": "c0ec1316-ef4e-4bd1-875d-148b65ba0114" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of pro forecasts: 48\n" + ] + } + ], + "source": [ + "# Set up the plot\n", + "plt.figure(figsize=(10, 8))\n", + "plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfectly calibrated')\n", + "\n", + "# Plot calibration curves for bot_team_median and pro_median\n", + "plot_calibration_curve(df_top_bot_pro_forecasts_binary, 'bot_team_median', 'Bot Team Median', 'blue')\n", + "plot_calibration_curve(df_top_bot_pro_forecasts_binary, 'pro_median', 'Pro Median', 'red')\n", + "\n", + "# Customize the plot\n", + "plt.xlabel('Assigned Probability', fontsize=12)\n", + "plt.ylabel('Fraction that Resolved \\'Yes\\'', fontsize=12)\n", + "plt.title(f'Calibration Curve: Bot Team Median vs Pro Median\\n(only overlap: {len(df_top_bot_pro_forecasts_binary)} questions)', fontsize=14)\n", + "plt.legend(fontsize=10)\n", + "plt.grid(True, alpha=0.3)\n", "\n", - "print(\"Top 5:\")\n", + "# Set axis limits\n", + "plt.xlim(0, 1)\n", + "plt.ylim(0, 1)\n", "\n", - "df_top5[['title', 'bot_team_median', 'pro_median', 'resolution', 'head_to_head']]" + "# Show the plot\n", + "plt.tight_layout()\n", + "plt.show()\n", + "print(f\"Number of pro forecasts: {len(df_top_bot_pro_forecasts_binary)}\")" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "# Map resolution to 0 and 1\n", + "df_top_bot_pro_forecasts_all_binary['resolution'] = df_top_bot_pro_forecasts_all_binary['resolution'].map({'yes': 1, 'no': 0})\n", + "df_top_bot_pro_forecasts_all_binary = df_top_bot_pro_forecasts_all_binary[\n", + " df_top_bot_pro_forecasts_all_binary['resolution'].notna()\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 76, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "Bottom 5:\n" + "2 0.0\n", + "5 1.0\n", + "8 1.0\n", + "10 1.0\n", + "13 1.0\n", + " ... \n", + "417 0.0\n", + "418 0.0\n", + "419 0.0\n", + "420 1.0\n", + "421 0.0\n", + "Name: resolution, Length: 236, dtype: float64\n" ] - }, + } + ], + "source": [ + "print(df_top_bot_pro_forecasts_all_binary['resolution'])" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -11517,140 +13554,396 @@ " \n", " \n", " \n", + " bot_question_id\n", " title\n", + " resolution\n", + " scheduled_close_time\n", + " actual_close_time\n", + " type\n", + " options\n", + " range_min\n", + " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", + " pro_question_id\n", + " question_weight\n", " bot_team_median\n", " pro_median\n", - " resolution\n", - " head_to_head\n", " \n", " \n", " \n", " \n", - " 170\n", - " In its March update, will Similarweb report de...\n", - " 0.7\n", - " 0.144\n", - " yes\n", - " 158.1\n", + " 2\n", + " 31264\n", + " Will the bubble in the Magnificent Seven pop b...\n", + " 0.0\n", + " 2025-01-20 03:27:00\n", + " 2025-01-20 03:27:00\n", + " binary\n", + " NaN\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " 31270\n", + " 1.0\n", + " 0.063\n", + " 0.013\n", " \n", " \n", - " 0\n", - " For Q1 2025, how many banks will be listed on ...\n", - " 0.02\n", - " [0.001,0.62,0.35,0.019,0.01]\n", - " 0\n", - " 299.6\n", + " 5\n", + " 31276\n", + " Will the USDA-posted recall by Pork Dynasty In...\n", + " 1.0\n", + " 2025-01-21 11:42:00\n", + " 2025-01-21 11:42:00\n", + " binary\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 31282\n", + " 1.0\n", + " 0.62\n", + " 0.45\n", " \n", " \n", - " 189\n", - " What will the highest rank of metac-GPT4o or m...\n", - " [0.0, 0.05003188076666667, 0.11135575903333333...\n", - " [0.0,5.19918e-05,0.0001040776,0.0001562618,0.0...\n", - " 34.0\n", - " 502.6\n", + " 8\n", + " 31288\n", + " Will Eric Adams be Mayor of New York City on t...\n", + " 1.0\n", + " 2025-01-22 20:19:00\n", + " 2025-01-22 20:19:00\n", + " binary\n", + " NaN\n", + " NaN\n", + " NaN\n", + " False\n", + " False\n", + " 31294\n", + " 1.0\n", + " 0.86\n", + " 0.95\n", " \n", " \n", - " 211\n", - " Will Nikola Corporation file for bankruptcy be...\n", - " 0.99\n", - " 0.999\n", - " annulled\n", + " 10\n", + " 31318\n", + " Will the S&P 500 index go up in January 2025?\n", + " 1.0\n", + " 2025-01-23 23:23:00\n", + " 2025-01-23 23:23:00\n", + " binary\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " <NA>\n", + " 1.0\n", + " NaN\n", " NaN\n", " \n", " \n", - " 214\n", - " Will the state of Rhode Island have any recrea...\n", - " 0.923333\n", - " 0.95\n", - " annulled\n", + " 13\n", + " 31334\n", + " At the end of March 2025, will Wikipedia still...\n", + " 1.0\n", + " 2025-01-24 14:23:00\n", + " 2025-01-24 14:23:00\n", + " binary\n", + " NaN\n", + " NaN\n", " NaN\n", + " False\n", + " False\n", + " 31338\n", + " 1.0\n", + " 0.85\n", + " 0.9\n", " \n", " \n", "\n", "" ], "text/plain": [ - " title \\\n", - "170 In its March update, will Similarweb report de... \n", - "0 For Q1 2025, how many banks will be listed on ... \n", - "189 What will the highest rank of metac-GPT4o or m... \n", - "211 Will Nikola Corporation file for bankruptcy be... \n", - "214 Will the state of Rhode Island have any recrea... \n", - "\n", - " bot_team_median \\\n", - "170 0.7 \n", - "0 0.02 \n", - "189 [0.0, 0.05003188076666667, 0.11135575903333333... \n", - "211 0.99 \n", - "214 0.923333 \n", + " bot_question_id title \\\n", + "2 31264 Will the bubble in the Magnificent Seven pop b... \n", + "5 31276 Will the USDA-posted recall by Pork Dynasty In... \n", + "8 31288 Will Eric Adams be Mayor of New York City on t... \n", + "10 31318 Will the S&P 500 index go up in January 2025? \n", + "13 31334 At the end of March 2025, will Wikipedia still... \n", "\n", - " pro_median resolution \\\n", - "170 0.144 yes \n", - "0 [0.001,0.62,0.35,0.019,0.01] 0 \n", - "189 [0.0,5.19918e-05,0.0001040776,0.0001562618,0.0... 34.0 \n", - "211 0.999 annulled \n", - "214 0.95 annulled \n", + " resolution scheduled_close_time actual_close_time type options \\\n", + "2 0.0 2025-01-20 03:27:00 2025-01-20 03:27:00 binary NaN \n", + "5 1.0 2025-01-21 11:42:00 2025-01-21 11:42:00 binary NaN \n", + "8 1.0 2025-01-22 20:19:00 2025-01-22 20:19:00 binary NaN \n", + "10 1.0 2025-01-23 23:23:00 2025-01-23 23:23:00 binary NaN \n", + "13 1.0 2025-01-24 14:23:00 2025-01-24 14:23:00 binary NaN \n", "\n", - " head_to_head \n", - "170 158.1 \n", - "0 299.6 \n", - "189 502.6 \n", - "211 NaN \n", - "214 NaN " + " range_min range_max open_upper_bound open_lower_bound pro_question_id \\\n", + "2 NaN NaN False False 31270 \n", + "5 NaN NaN NaN NaN 31282 \n", + "8 NaN NaN False False 31294 \n", + "10 NaN NaN NaN NaN \n", + "13 NaN NaN False False 31338 \n", + "\n", + " question_weight bot_team_median pro_median \n", + "2 1.0 0.063 0.013 \n", + "5 1.0 0.62 0.45 \n", + "8 1.0 0.86 0.95 \n", + "10 1.0 NaN NaN \n", + "13 1.0 0.85 0.9 " + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_top_bot_pro_forecasts_all_binary.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "bot_question_id Int64\n", + "title object\n", + "resolution float64\n", + "scheduled_close_time datetime64[ns]\n", + "actual_close_time datetime64[ns]\n", + "type object\n", + "options object\n", + "range_min float64\n", + "range_max float64\n", + "open_upper_bound object\n", + "open_lower_bound object\n", + "pro_question_id Int64\n", + "question_weight float64\n", + "bot_team_median object\n", + "pro_median object\n", + "dtype: object" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_top_bot_pro_forecasts_all_binary.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "bot_question_id Int64\n", + "title object\n", + "resolution float64\n", + "scheduled_close_time datetime64[ns]\n", + "actual_close_time datetime64[ns]\n", + "type object\n", + "options object\n", + "range_min float64\n", + "range_max float64\n", + "open_upper_bound object\n", + "open_lower_bound object\n", + "pro_question_id Int64\n", + "question_weight float64\n", + "bot_team_median object\n", + "pro_median object\n", + "head_to_head float64\n", + "weighted_score float64\n", + "dtype: object" ] }, - "execution_count": 74, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "print(\"\\nBottom 5:\")\n", + "df_top_bot_pro_forecasts_binary.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of pro forecasts: 48\n", + "Number of bot forecasts: 236\n" + ] + } + ], + "source": [ + "# Set up the plot\n", + "plt.figure(figsize=(10, 8))\n", + "plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfectly calibrated')\n", "\n", - "df_bottom5[['title', 'bot_team_median', 'pro_median', 'resolution', 'head_to_head']]" + "# Plot calibration curves for bot_team_median and pro_median\n", + "plot_calibration_curve(df_top_bot_pro_forecasts_all_binary, 'bot_team_median', 'Bot Team Median', 'blue')\n", + "plot_calibration_curve(df_top_bot_pro_forecasts_binary, 'pro_median', 'Pro Median', 'red')\n", + "\n", + "# Customize the plot\n", + "plt.xlabel('Assigned Probability', fontsize=12)\n", + "plt.ylabel('Fraction that Resolved \\'Yes\\'', fontsize=12)\n", + "plt.title(f'Calibration Curve: Bot Team Median vs Pro Median\\n(all questions)', fontsize=14)\n", + "plt.legend(fontsize=10)\n", + "plt.grid(True, alpha=0.3)\n", + "\n", + "# Set axis limits\n", + "plt.xlim(0, 1)\n", + "plt.ylim(0, 1)\n", + "\n", + "# Show the plot\n", + "plt.tight_layout()\n", + "plt.show()\n", + "print(f\"Number of pro forecasts: {len(df_top_bot_pro_forecasts_binary)}\")\n", + "print(f\"Number of bot forecasts: {len(df_top_bot_pro_forecasts_all_binary)}\")" ] }, { "cell_type": "code", - "execution_count": 75, - "metadata": {}, + "execution_count": 81, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "N26JZjCV9_jc", + "outputId": "eacb7626-54d0-47c7-8f21-48e95e709564" + }, "outputs": [ { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAASlCAYAAAC1GLqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACal0lEQVR4nOzdd3gV1d728XsnkF5oKUQQAgm9HYqIFGkSFBEEpUMogkepIoq8CiQqTQ886KGKSFMEURBF6YIFREGKqBiRIiihKJCQBAIk6/3jMfOwTYAkZNgJfD/Xta8re82amd+ePZQ7s2aNwxhjBAAAAAAA8pybqwsAAAAAAOBWRegGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYA3Hbmz58vh8Ohw4cPu7qUHGnatKmqVavm6jLyrc2bN8vhcGjz5s03fd+9e/dW2bJlb/p+AQD5H6EbAJArP/74o3r06KE77rhDnp6eCgsLU48ePfTTTz+5ujTL+PHj9eGHH7q6jBw5duyYYmJitHv3btv3lZKSopiYmGyH1IxQm/Fyd3dXcHCwHnnkEe3bt8/2em8FTZs2Ve/evSVJEydOlMPh0Nq1a7Ps+8ADDygwMFDHjh3Lk31nfH8F7ZdNAFDQEboBADm2fPly1a5dWxs3blSfPn00Y8YM9evXT5999plq166tlStXurpE6Rqhu2fPnjp//rzKlCnjkrqu5dixY4qNjb1poTs2NjbHV4aHDBmiRYsW6c0331T37t31ySefqHHjxjp+/Lhttd6Knn76aVWvXl1PPvmkzp8/77Rs2bJlWr16tSZMmKCwsDCX1QgAuHGFXF0AAKBgOXDggHr27Kly5crpiy++UFBQkLVs6NChaty4sXr06KHvv/9e4eHhLq31atzd3eXu7u7qMgqsxo0b65FHHrHeV6xYUU888YQWLlyoZ5991qW1FSSFCxfWG2+8oYYNG+qll17S+PHjJUnnzp3TsGHDdPfdd+vf//63q8sEANwgrnQDAHLk1VdfVUpKit544w2nwC1JJUqU0OzZs5WUlKRXX33Var/a/a4xMTFyOByZ2t9++23VqVNH3t7eKlasmLp06aKjR4869dm/f786duyo0NBQeXl5qVSpUurSpYsSEhIkSQ6HQ8nJyVqwYIE1HDpjWO/V7umeMWOGqlatag2XHzhwoM6ePevUJ+O+6p9++knNmjWTj4+P7rjjDr3yyiuZPsd///tfVa1aVT4+PipatKjq1q2rxYsXX/XYbt68WfXq1ZMk9enTx6p7/vz5Tv2ut++LFy9qzJgxqlOnjgIDA+Xr66vGjRtr06ZNVp/Dhw9b319sbKy1r5iYmKvWdzWNGzeW/v6FzJX++OMP9e3bVyEhIfL09FTVqlX11ltv5eo47dq1S/fff78CAgLk5+enFi1aaNu2bdesa9CgQfLz81NKSkqmZV27dlVoaKjS0tKsttWrV6tx48by9fWVv7+/2rRpox9//DHTuh9++KGqVasmLy8vVatWTStWrMjGUcpaRrD+z3/+Y92a8cILL+jkyZN644035ObmprNnz2rYsGEqXbq0PD09FRERoUmTJik9Pd1pW0uWLFGdOnXk7++vgIAAVa9eXa+99lquawMA5A1CNwAgRz7++GOVLVvWClr/1KRJE5UtW1Yff/xxrrY/btw49erVS5GRkZoyZYqGDRumjRs3qkmTJlYAvnjxoqKiorRt2zYNHjxY06dP14ABA3Tw4EGrz6JFi+Tp6anGjRtr0aJFWrRokR5//PGr7jcmJkYDBw5UWFiYJk+erI4dO2r27Nlq1aqVLl265NT3zJkzat26tWrWrKnJkyerUqVKGjlypFavXm31mTNnjoYMGaIqVapo6tSpio2NVa1atfTNN99ctYbKlSvrxRdflCQNGDDAqrtJkyY52ndiYqLefPNNNW3aVJMmTVJMTIxOnTqlqKgoa9h6UFCQZs6cKUl6+OGHrX116NAhx99Zxi8vihYtarWdOHFCd999tzZs2KBBgwbptddeU0REhPr166epU6fm6Dj9+OOPaty4sfbs2aNnn31Wo0eP1qFDh9S0adNrHs/OnTsrOTlZn3zyiVN7SkqKPv74Yz3yyCPWiIdFixapTZs28vPz06RJkzR69Gj99NNPatSokdMvZ9atW6eOHTvK4XBowoQJat++vfr06aMdO3bk+LhlmDBhgoKCgvT444/ru+++0/Tp0zVixAhVr15dKSkpuvfee/X222+rV69eev3119WwYUONGjVKw4cPt7axfv16de3aVUWLFtWkSZM0ceJENW3aVFu2bMl1XQCAPGIAAMims2fPGkmmXbt21+z30EMPGUkmMTHRGGNMdHS0KVOmTKZ+Y8eONVf+U3T48GHj7u5uxo0b59Rv7969plChQlb7rl27jCSzbNmya9bh6+troqOjM7XPmzfPSDKHDh0yxhhz8uRJ4+HhYVq1amXS0tKsftOmTTOSzFtvvWW13XvvvUaSWbhwodWWmppqQkNDTceOHa22du3amapVq16zvqxs377dSDLz5s3LtCy7+758+bJJTU11WvfMmTMmJCTE9O3b12o7deqUkWTGjh2brdo2bdpkHY9Tp06ZY8eOmTVr1piIiAjjcDjMt99+a/Xt16+fKVmypPnzzz+dttGlSxcTGBhoUlJSjMnmcWrfvr3x8PAwBw4csNqOHTtm/P39TZMmTTLVt2nTJmOMMenp6eaOO+5wOjbGGPPee+8ZSeaLL74wxhhz7tw5U6RIEdO/f3+nfsePHzeBgYFO7bVq1TIlS5Y0Z8+etdrWrVtnJGV5jmfX+++/bySZYsWKmXLlylnH56WXXjK+vr7ml19+cer/3HPPGXd3d3PkyBFjjDFDhw41AQEB5vLly7muAQBgD650AwCy7dy5c5Ikf3//a/bLWJ7RP7uWL1+u9PR0derUSX/++af1Cg0NVWRkpDU8OjAwUJK0du3aLIcO59SGDRt08eJFDRs2TG5u//dPY//+/RUQEJDpSqmfn5969Ohhvffw8NBdd92lgwcPWm1FihTR77//ru3bt99wfTndt7u7uzw8PCRJ6enpOn36tC5fvqy6detq586dN1xD3759FRQUpLCwMLVu3VoJCQlatGiRNTTeGKMPPvhAbdu2lTHG6buMiopSQkKCVcf1jlNaWprWrVun9u3bq1y5clZ7yZIl1a1bN3311VdKTEzMcl2Hw6FHH31Un376qZKSkqz2pUuX6o477lCjRo2kv68Snz17Vl27dnWq1d3dXfXr17fOu/j4eO3evVvR0dHWOShJ9913n6pUqXJDx7Rjx4564IEHdPr0aU2fPl3e3t7S3xOqNW7cWEWLFnWqrWXLlkpLS9MXX3xhHcfk5GStX7/+huoAAOQ9QjcAINuyG6bPnTsnh8OhEiVK5Gj7+/fvlzFGkZGRCgoKcnrt27dPJ0+elCSFh4dr+PDhevPNN1WiRAlFRUVp+vTp1v3cOfXbb79Jf08IdiUPDw+VK1fOWp6hVKlSme5FL1q0qM6cOWO9HzlypPz8/HTXXXcpMjJSAwcOzJOhvtnZtyQtWLBANWrUkJeXl4oXL66goCB98sknuT5GVxozZozWr1+vFStWqFevXkpISHD6ZcWpU6d09uxZ677/K199+vSRJOu7vN5xOnXqlFJSUjJ9N/p7OH56enqm+/2v1LlzZ50/f14fffSRJCkpKUmffvqpHn30Ues47t+/X5LUvHnzTPWuW7fOqjXjPIiMjMy0n6zqy6mMX1rUrVvXatu/f7/WrFmTqa6WLVtKVxzHJ598UhUqVND999+vUqVKqW/fvlqzZs0N1wQAuHHMXg4AyLbAwECFhYXp+++/v2a/77//XqVKlbKutmY1WZr+vop5pfT0dDkcDq1evTrL2cX9/PysnydPnqzevXtr5cqVWrdunYYMGaIJEyZo27ZtKlWqVC4/YfZcbeZzY4z1c+XKlRUXF6dVq1ZpzZo1+uCDDzRjxgyNGTNGsbGxtu777bffVu/evdW+fXs988wzCg4Olru7uyZMmJBpsrPcqF69uhX62rdvr5SUFPXv31+NGjVS6dKlrQm+evTooejo6Cy3UaNGDcnG45Th7rvvVtmyZfXee++pW7du+vjjj3X+/Hl17tzZ6pNR76JFixQaGpppG4UKue6/S+np6brvvvuuOit8hQoVJEnBwcHavXu31q5dq9WrV2v16tWaN2+eevXqpQULFtzkqgEAVyJ0AwBypG3btpo9e7a++uora3julb788ksdPnzYaZKnokWLZpoFXFdcOcxQvnx5GWMUHh5uhYlrqV69uqpXr64XXnhBW7duVcOGDTVr1iy9/PLL0jXC/j9lPK87Li7OaQjzxYsXdejQIStg5pSvr686d+6szp076+LFi+rQoYPGjRunUaNGycvLK8t1slvztbz//vsqV66cli9f7rS9sWPH5vm+JGnixIlasWKFxo0bp1mzZikoKEj+/v5KS0vL1rG71nEKCgqSj4+P4uLiMq33888/y83NTaVLl77m9jt16qTXXntNiYmJWrp0qcqWLau7777bWl6+fHnp7+B6rXozzpOMK+NXyqq+vFC+fHklJSVl6zh6eHiobdu2atu2rdLT0/Xkk09q9uzZGj16tCIiImypDwBwfQwvBwDkyIgRI+Tj46PHH39cf/31l9Oy06dP69///rcCAgI0aNAgq718+fJKSEhwukIeHx+f6VFLHTp0kLu7u2JjY52u3OrvK7kZ+0tMTNTly5edllevXl1ubm5KTU212nx9fbMM+//UsmVLeXh46PXXX3fa79y5c5WQkKA2bdpk48g4++ex8fDwUJUqVWSMyTQb+pV8fX0lKVt1X03G1fArP8s333yjr7/+2qmfj4/PDe9Lf3+/HTt21Pz583X8+HG5u7urY8eO+uCDD/TDDz9k6n/q1Cnr5+sdJ3d3d7Vq1UorV650mkX8xIkTWrx4sRo1aqSAgIBr1te5c2elpqZqwYIFWrNmjTp16uS0PCoqSgEBARo/fnyW301GvSVLllStWrW0YMECp2H669evtx73ldc6deqkr7/+WmvXrs207OzZs9afg38eRzc3N2s0wZV/JgAANx9XugEAORIREaGFCxeqa9euql69uvr166fw8HAdPnxYc+fO1ZkzZ7RkyRKFh4db63Tp0kUjR47Uww8/rCFDhiglJUUzZ85UhQoVnCb2Kl++vF5++WWNGjVKhw8fVvv27eXv769Dhw5pxYoVGjBggEaMGKHPPvtMgwYN0qOPPqoKFSro8uXLWrRokRX2MtSpU0cbNmzQlClTFBYWpvDwcNWvXz/TZwoKCtKoUaMUGxur1q1b66GHHlJcXJxmzJihevXqOU1cll2tWrVSaGioGjZsqJCQEO3bt0/Tpk1TmzZtrjkRXfny5VWkSBHNmjVL/v7+8vX1Vf369Z2O5/U8+OCDWr58uR5++GG1adNGhw4d0qxZs1SlShWnCcW8vb1VpUoVLV26VBUqVFCxYsVUrVo1VatWLcef95lnntF7772nqVOnauLEiZo4caI2bdqk+vXrq3///qpSpYpOnz6tnTt3asOGDTp9+nS2j9PLL7+s9evXq1GjRnryySdVqFAhzZ49W6mpqVk+H/2fateurYiICD3//PNKTU11GlouSQEBAZo5c6Z69uyp2rVrq0uXLgoKCtKRI0f0ySefqGHDhpo2bZr09+O92rRpo0aNGqlv3746ffq09ZzxK49tXnnmmWf00Ucf6cEHH1Tv3r1Vp04dJScna+/evXr//fd1+PBhlShRQo899phOnz6t5s2bq1SpUvrtt9/03//+V7Vq1VLlypXzvC4AQA64evp0AEDBtHfvXtOtWzcTGhpq3NzcjCTj5eVlfvzxxyz7r1u3zlSrVs14eHiYihUrmrfffjvTI8MyfPDBB6ZRo0bG19fX+Pr6mkqVKpmBAweauLg4Y4wxBw8eNH379jXly5c3Xl5eplixYqZZs2Zmw4YNTtv5+eefTZMmTYy3t7eRZD0+7J+PDMswbdo0U6lSJVO4cGETEhJinnjiCXPmzBmnPvfee2+Wj7j652PRZs+ebZo0aWKKFy9uPD09Tfny5c0zzzxjEhISrntsV65caapUqWIKFSrk9Piw7O47PT3djB8/3pQpU8Z4enqaf/3rX2bVqlVZPrpt69atpk6dOsbDw+O6jw/LeCTX1R7V1rRpUxMQEGA9TuvEiRNm4MCBpnTp0qZw4cImNDTUtGjRwrzxxhs5Pk47d+40UVFRxs/Pz/j4+JhmzZqZrVu3ZllfxiPDrvT8888bSSYiIuKany8qKsoEBgYaLy8vU758edO7d2+zY8cOp34ffPCBqVy5svH09DRVqlQxy5cvv+pj8XIi48/DqVOnnNrPnTtnRo0aZSIiIoyHh4cpUaKEueeee8x//vMfc/HiRWP+fuRYq1atTHBwsPHw8DB33nmnefzxx018fPwN1QQAuHEO88/xewAA5MLChQvVu3dv9ejRQwsXLnR1OQAAAPkCw8sBAHmiV69eio+P13PPPadSpUpp/Pjxri4JAADA5bjSDQAAAACATZi9HAAAAAAAmxC6AQAAAACwCaEbAAAAAACb3PITqaWnp+vYsWPy9/eXw+FwdTkAAAAAgFuAMUbnzp1TWFiY3Nyufj37lg/dx44dU+nSpV1dBgAAAADgFnT06FGVKlXqqstv+dDt7+8v/X0gAgICXF0OAAAAAOAWkJiYqNKlS1uZ82pu+dCdMaQ8ICCA0A0AAAAAyFPXu42ZidQAAAAAALAJoRsAAAAAAJsQugEAAAAAsMktf093dqWlpenSpUuuLgO5ULhwYbm7u7u6DAAAAADI5LYP3cYYHT9+XGfPnnV1KbgBRYoUUWhoKM9iBwAAAJCv3PahOyNwBwcHy8fHh9BWwBhjlJKSopMnT0qSSpYs6eqSAAAAAMByW4futLQ0K3AXL17c1eUgl7y9vSVJJ0+eVHBwMEPNAQAAAOQbt/VEahn3cPv4+Li6FNygjO+Q+/IBAAAA5Ce3dejOwJDygo/vEAAAAEB+ROgGAAAAAMAmhG4AAAAAAGxyW0+kdjX95m+/qfub27veTd0fAAAAAODm4Ep3AdS7d285HA7rVbx4cbVu3Vrff/99jrfTvn37a/a5cj9ZvWJiYm7w0wAAAADArYvQXUC1bt1a8fHxio+P18aNG1WoUCE9+OCDeb6fjH3Ex8dr6tSpCggIcGobMWJEnu8TAAAAAG4VhO4CytPTU6GhoQoNDVWtWrX03HPP6ejRozp16pTVZ+/evWrevLm8vb1VvHhxDRgwQElJSZKkmJgYLViwQCtXrrSuWm/evDnTfjL2ERoaqsDAQDkcDqe2JUuWqHLlyvLy8lKlSpU0Y8YMp/VHjhypChUqyMfHR+XKldPo0aOdHusVExOjWrVq6a233tKdd94pPz8/Pfnkk0pLS9Mrr7yi0NBQBQcHa9y4cbYeTwAAAACwA/d03wKSkpL09ttvKyIiQsWLF5ckJScnKyoqSg0aNND27dt18uRJPfbYYxo0aJDmz5+vESNGaN++fUpMTNS8efMkScWKFcvRft955x2NGTNG06ZN07/+9S/t2rVL/fv3l6+vr6KjoyVJ/v7+mj9/vsLCwrR37171799f/v7+evbZZ63tHDhwQKtXr9aaNWt04MABPfLIIzp48KAqVKigzz//XFu3blXfvn3VsmVL1a9fP0+PHQAAAADYidBdQK1atUp+fn7S3wG7ZMmSWrVqldzc/nfwwuLFi3XhwgUtXLhQvr6+kqRp06apbdu2mjRpkkJCQuTt7a3U1FSFhobmqoaxY8dq8uTJ6tChgyQpPDxcP/30k2bPnm2F7hdeeMHqX7ZsWY0YMUJLlixxCt3p6el666235O/vrypVqqhZs2aKi4vTp59+Kjc3N1WsWFGTJk3Spk2bCN0AAAAAChRCdwHVrFkzzZw5U5J05swZzZgxQ/fff7++/fZblSlTRvv27VPNmjWtwC1JDRs2VHp6uuLi4hQSEnJD+09OTtaBAwfUr18/9e/f32q/fPmyAgMDrfdLly7V66+/rgMHDigpKUmXL19WQECA07bKli0rf39/631ISIjc3d2tXyBktJ08efKGagYAAACAm82l93R/8cUXatu2rcLCwuRwOPThhx86LTfGaMyYMSpZsqS8vb3VsmVL7d+/32X15ie+vr6KiIhQRESE6tWrpzfffFPJycmaM2fOTdl/xr3hc+bM0e7du63XDz/8oG3btkmSvv76a3Xv3l0PPPCAVq1apV27dun555/XxYsXnbZVuHBhp/cOhyPLtvT0dNs/FwAAAADkJZeG7uTkZNWsWVPTp0/Pcvkrr7yi119/XbNmzdI333wjX19fRUVF6cKFCze91vzO4XDIzc1N58+flyRVrlxZe/bsUXJystVny5Yt1nBtSfLw8FBaWlqu9hcSEqKwsDAdPHjQCv8Zr/DwcEnS1q1bVaZMGT3//POqW7euIiMj9dtvv+XJ5wUAAACAgsClw8vvv/9+3X///VkuM8Zo6tSpeuGFF9SuXTtJ0sKFCxUSEqIPP/xQXbp0ucnV5i+pqak6fvy49Pfw8mnTpikpKUlt27aVJHXv3l1jx45VdHS0YmJidOrUKQ0ePFg9e/a0hpaXLVtWa9euVVxcnIoXL67AwMBMV5ivJTY2VkOGDFFgYKBat26t1NRU7dixQ2fOnNHw4cMVGRmpI0eOaMmSJapXr54++eQTrVixwqYjAgAAAAD5T769p/vQoUM6fvy4WrZsabUFBgaqfv36+vrrr68aulNTU5Wammq9T0xMzPG+5/aul8uqb541a9aoZMmS0t8zhFeqVEnLli1T06ZNJUk+Pj5au3athg4dqnr16snHx0cdO3bUlClTrG30799fmzdvVt26dZWUlKRNmzZZ62fHY489Jh8fH7366qt65pln5Ovrq+rVq2vYsGGSpIceekhPPfWUBg0apNTUVLVp00ajR49WTExMnh8PAAAAAMiPHMYY4+oi9Pfw6BUrVqh9+/bS30OTGzZsqGPHjlnhUpI6deokh8OhpUuXZrmdmJgYxcbGZmpPSEjINIHXhQsXdOjQIYWHh8vLyyvPPxNuHr5LAAAAFEiLO7u6gvypW9Z5Lz9JTExUYGBgllnzSi69p9sOo0aNUkJCgvU6evSoq0sCAAAAANym8m3oznh29IkTJ5zaT5w4cc3nSnt6eiogIMDpBQAAAACAK+Tb0B0eHq7Q0FBt3LjRaktMTNQ333yjBg0auLQ2AAAAAACyw6UTqSUlJenXX3+13h86dEi7d+9WsWLFdOedd2rYsGF6+eWXFRkZqfDwcI0ePVphYWHWfd8AAAAAAORnLg3dO3bsULNmzaz3w4cPlyRFR0dr/vz5evbZZ5WcnKwBAwbo7NmzatSokdasWcNEWQAAAACAAsGlobtp06a61uTpDodDL774ol588cWbWhcAAAAAAHkh397TDQAAAABAQUfoBgAAAADAJoRuAAAAAABs4tJ7uvOtxZ1v7v66Lb25+wMAAAAA3BRc6S6AevfuLYfDIYfDIQ8PD0VEROjFF1/U5cuX82wfMTEx1j6u9gIAAAAAXBuhu4Bq3bq14uPjtX//fj399NOKiYnRq6++mmXfixcv5nj7I0aMUHx8vPUqVaqUXnzxRac2AAAAAMC1EboLKE9PT4WGhqpMmTJ64okn1LJlS3300UfS31fC27dvr3HjxiksLEwVK1aUJO3du1fNmzeXt7e3ihcvrgEDBigpKSnL7fv5+Sk0NNR6ubu7y9/f33p/6dIlderUSUWKFFGxYsXUrl07HT582Fp/+/btuu+++1SiRAkFBgbq3nvv1c6dO5324XA4NHv2bD344IPy8fFR5cqV9fXXX+vXX39V06ZN5evrq3vuuUcHDhyw9VgCAAAAgF0I3bcIb29vpyvaGzduVFxcnNavX69Vq1YpOTlZUVFRKlq0qLZv365ly5Zpw4YNGjRoUI73denSJUVFRcnf319ffvmltmzZIj8/P7Vu3dqq4dy5c4qOjtZXX32lbdu2KTIyUg888IDOnTvntK2XXnpJvXr10u7du1WpUiV169ZNjz/+uEaNGqUdO3bIGJOrGgEAAAAgP2AitQLOGKONGzdq7dq1Gjx4sNXu6+urN998Ux4eHpKkOXPm6MKFC1q4cKF8fX0lSdOmTVPbtm01adIkhYSEZHufS5cuVXp6ut58803r3u558+apSJEi2rx5s1q1aqXmzZs7rfPGG2+oSJEi+vzzz/Xggw9a7X369FGnTp0kSSNHjlSDBg00evRoRUVFSZKGDh2qPn363NAxAgAAAABXIXQXUKtWrZKfn58uXbqk9PR0devWTTExMdby6tWrW4Fbkvbt26eaNWtagVuSGjZsqPT0dMXFxeUodO/Zs0e//vqr/P39ndovXLhgDQU/ceKEXnjhBW3evFknT55UWlqaUlJSdOTIEad1atSoYf2cUUP16tWd2i5cuKDExEQFBARku0YAAAAAyA8I3QVUs2bNNHPmTHl4eCgsLEyFCjl/lVeG67yWlJSkOnXq6J133sm0LCgoSJIUHR2tv/76S6+99prKlCkjT09PNWjQINOkboULF7Z+zrhqnlVbenq6bZ8HAAAAAOxC6C6gfH19FRERke3+lStX1vz585WcnGwF8i1btsjNzc2aaC27ateuraVLlyo4OPiqV5+3bNmiGTNm6IEHHpAkHT16VH/++WeO9gMAAAAABR0Tqd0munfvLi8vL0VHR+uHH37Qpk2bNHjwYPXs2TNHQ8sztlWiRAm1a9dOX375pQ4dOqTNmzdryJAh+v333yVJkZGRWrRokfbt26dvvvlG3bt3l7e3t02fDgAAAADyJ650Z6XbUldXkOd8fHy0du1aDR06VPXq1ZOPj486duyoKVOm5GpbX3zxhUaOHKkOHTro3LlzuuOOO9SiRQvryvfcuXM1YMAA1a5dW6VLl9b48eM1YsQIGz4ZAAAAAORfDmOMcXURdkpMTFRgYKASEhIyDYW+cOGCDh06pPDwcHl5ebmsRtw4vksAAAAUSIs7u7qC/KkAXAi9Vta8EsPLAQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQuiWlp6e7ugTcIL5DAAAAAPnRbf3IMA8PD7m5uenYsWMKCgqSh4eHHA6Hq8tCDhhjdPHiRZ06dUpubm7y8PBwdUkAAAAAYLmtQ7ebm5vCw8MVHx+vY8eOuboc3AAfHx/deeedcnNj8AYAAACA/OO2Dt36+2r3nXfeqcuXLystLc3V5SAX3N3dVahQIUYpAAAAAMh3bvvQLUkOh0OFCxdW4cKFXV0KAAAAAOAWwlhcAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbFLI1QUAAAAAuPX1m7/d1SXkS3M9XF0B7MaVbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABskq9Dd1pamkaPHq3w8HB5e3urfPnyeumll2SMcXVpAAAAAABcVyFXF3AtkyZN0syZM7VgwQJVrVpVO3bsUJ8+fRQYGKghQ4a4ujwAAAAAAK4pX4furVu3ql27dmrTpo0kqWzZsnr33Xf17bffXnWd1NRUpaamWu8TExNvSq0AAAAAAPxTvh5efs8992jjxo365ZdfJEl79uzRV199pfvvv/+q60yYMEGBgYHWq3Tp0jexYgAAAAAA/k++vtL93HPPKTExUZUqVZK7u7vS0tI0btw4de/e/arrjBo1SsOHD7feJyYmErwBAAAAAC6Rr0P3e++9p3feeUeLFy9W1apVtXv3bg0bNkxhYWGKjo7Och1PT095enre9FoBAAAAAPinfB26n3nmGT333HPq0qWLJKl69er67bffNGHChKuGbgAAAAAA8ot8fU93SkqK3NycS3R3d1d6errLagIAAAAAILvy9ZXutm3baty4cbrzzjtVtWpV7dq1S1OmTFHfvn1dXRoAAAAAANeVr0P3f//7X40ePVpPPvmkTp48qbCwMD3++OMaM2aMq0sDAAAAAOC68nXo9vf319SpUzV16lRXlwIAAAAAQI7l63u6AQAAAAAoyAjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYJN+H7j/++EM9evRQ8eLF5e3trerVq2vHjh2uLgsAAAAAgOsq5OoCruXMmTNq2LChmjVrptWrVysoKEj79+9X0aJFXV0aAAAAAADXla9D96RJk1S6dGnNmzfPagsPD3dpTQAAAAAAZFe+Hl7+0UcfqW7dunr00UcVHBysf/3rX5ozZ84110lNTVViYqLTCwAAAAAAV8jXofvgwYOaOXOmIiMjtXbtWj3xxBMaMmSIFixYcNV1JkyYoMDAQOtVunTpm1ozAAAAAAAZHMYY4+oirsbDw0N169bV1q1brbYhQ4Zo+/bt+vrrr7NcJzU1Vampqdb7xMRElS5dWgkJCQoICLgpdQMAAABw1m/+dleXkC/N9fiPq0vIn7otdXUF15WYmKjAwMDrZs18faW7ZMmSqlKlilNb5cqVdeTIkauu4+npqYCAAKcXAAAAAACukK9Dd8OGDRUXF+fU9ssvv6hMmTIuqwkAAAAAgOzKVeg+ePBg3leShaeeekrbtm3T+PHj9euvv2rx4sV64403NHDgwJuyfwAAAAAAbkSuQndERISaNWumt99+WxcuXMj7qv5Wr149rVixQu+++66qVauml156SVOnTlX37t1t2ycAAAAAAHklV6F7586dqlGjhoYPH67Q0FA9/vjj+vbbb/O+OkkPPvig9u7dqwsXLmjfvn3q37+/LfsBAAAAACCv5Sp016pVS6+99pqOHTumt956S/Hx8WrUqJGqVaumKVOm6NSpU3lfKQAAAAAABcwNTaRWqFAhdejQQcuWLdOkSZP066+/asSIESpdurR69eql+Pj4vKsUAAAAAIAC5oZC944dO/Tkk0+qZMmSmjJlikaMGKEDBw5o/fr1OnbsmNq1a5d3lQIAAAAAUMAUys1KU6ZM0bx58xQXF6cHHnhACxcu1AMPPCA3t//N8OHh4Zo/f77Kli2b1/UCAAAAAFBg5Cp0z5w5U3379lXv3r1VsmTJLPsEBwdr7ty5N1ofAAAAAAAFVq5C9/79+6/bx8PDQ9HR0bnZPAAAAAAAt4Rc3dM9b948LVu2LFP7smXLtGDBgryoCwAAAACAAi9XoXvChAkqUaJEpvbg4GCNHz8+L+oCAAAAAKDAy1XoPnLkiMLDwzO1lylTRkeOHMmLugAAAAAAKPByFbqDg4P1/fffZ2rfs2ePihcvnhd1AQAAAABQ4OUqdHft2lVDhgzRpk2blJaWprS0NH322WcaOnSounTpkvdVAgAAAABQAOVq9vKXXnpJhw8fVosWLVSo0P9uIj09Xb169eKebgAAAAAA/par0O3h4aGlS5fqpZde0p49e+Tt7a3q1aurTJkyeV8hAAAAAAAFVK5Cd4YKFSqoQoUKeVcNAAAAAAC3kFyF7rS0NM2fP18bN27UyZMnlZ6e7rT8s88+y6v6AAAAAAAosHIVuocOHar58+erTZs2qlatmhwOR95XBgAAAABAAZer0L1kyRK99957euCBB/K+IgAAAAAAbhG5emSYh4eHIiIi8r4aAAAAAABuIbkK3U8//bRee+01GWPyviIAAAAAAG4RuRpe/tVXX2nTpk1avXq1qlatqsKFCzstX758eV7VBwAAAABAgZWr0F2kSBE9/PDDeV8NAAAAAAC3kFyF7nnz5uV9JQAAAAAA3GJydU+3JF2+fFkbNmzQ7Nmzde7cOUnSsWPHlJSUlJf1AQAAAABQYOXqSvdvv/2m1q1b68iRI0pNTdV9990nf39/TZo0SampqZo1a1beVwoAAAAAQAGTqyvdQ4cOVd26dXXmzBl5e3tb7Q8//LA2btyYl/UBAAAAAFBg5epK95dffqmtW7fKw8PDqb1s2bL6448/8qo2AAAAAAAKtFxd6U5PT1daWlqm9t9//13+/v55URcAAAAAAAVerkJ3q1atNHXqVOu9w+FQUlKSxo4dqwceeCAv6wMAAAAAoMDK1fDyyZMnKyoqSlWqVNGFCxfUrVs37d+/XyVKlNC7776b91UCAAAAAFAA5Sp0lypVSnv27NGSJUv0/fffKykpSf369VP37t2dJlYDAAAAAOB2lqvQLUmFChVSjx498rYaAAAAAABuIbkK3QsXLrzm8l69euW2HgAAAAAAbhm5Ct1Dhw51en/p0iWlpKTIw8NDPj4+hG4AAAAAAHI7e/mZM2ecXklJSYqLi1OjRo2YSA0AAAAAgL/lKnRnJTIyUhMnTsx0FRwAAAAAgNtVnoVu/T252rFjx/JykwAAAAAAFFi5uqf7o48+cnpvjFF8fLymTZumhg0b5lVtAAAAAAAUaLkK3e3bt3d673A4FBQUpObNm2vy5Ml5VRsAAAAAAAVarkJ3enp63lcCAAAAAMAtJk/v6QYAAAAAAP8nV1e6hw8fnu2+U6ZMyc0uAAAAAAAo8HIVunft2qVdu3bp0qVLqlixoiTpl19+kbu7u2rXrm31czgceVcpAAAAAAAFTK5Cd9u2beXv768FCxaoaNGikqQzZ86oT58+aty4sZ5++um8rhMAAAAAgAInV/d0T548WRMmTLACtyQVLVpUL7/8MrOXAwAAAADwt1yF7sTERJ06dSpT+6lTp3Tu3Lm8qAsAAAAAgAIvV6H74YcfVp8+fbR8+XL9/vvv+v333/XBBx+oX79+6tChQ95XCQAAAABAAZSre7pnzZqlESNGqFu3brp06dL/bqhQIfXr10+vvvpqXtcIAAAAAECBlKvQ7ePjoxkzZujVV1/VgQMHJEnly5eXr69vXtcHAAAAAECBlavh5Rni4+MVHx+vyMhI+fr6yhiTd5UBAAAAAFDA5Sp0//XXX2rRooUqVKigBx54QPHx8ZKkfv368bgwAAAAAAD+lqvQ/dRTT6lw4cI6cuSIfHx8rPbOnTtrzZo1eVkfAAAAAAAFVq7u6V63bp3Wrl2rUqVKObVHRkbqt99+y6vaAAAAAAAo0HJ1pTs5OdnpCneG06dPy9PTMy/qAgAAAACgwMtV6G7cuLEWLlxovXc4HEpPT9crr7yiZs2a5WV9AAAAAAAUWLkaXv7KK6+oRYsW2rFjhy5evKhnn31WP/74o06fPq0tW7bkfZUAAAAAABRAubrSXa1aNf3yyy9q1KiR2rVrp+TkZHXo0EG7du1S+fLl875KAAAAAAAKoBxf6b506ZJat26tWbNm6fnnn7enKgAAAAAAbgE5vtJduHBhff/99/ZUAwAAAADALSRXw8t79OihuXPn5n01AAAAAADcQnI1kdrly5f11ltvacOGDapTp458fX2dlk+ZMiWv6gMAAAAAoMDKUeg+ePCgypYtqx9++EG1a9eWJP3yyy9OfRwOR95WCAAAAABAAZWj0B0ZGan4+Hht2rRJktS5c2e9/vrrCgkJsas+AAAAAAAKrBzd022McXq/evVqJScn53VNAAAAAADcEnI1kVqGf4ZwAAAAAADwf3IUuh0OR6Z7trmHGwAAAACArOXonm5jjHr37i1PT09J0oULF/Tvf/870+zly5cvz9sqAQAAAAAogHIUuqOjo53e9+jRI6/rAQAAAADglpGj0D1v3jz7KgEAAAAA4BZzQxOpAQAAAACAqyN0AwAAAABgE0I3AAAAAAA2IXQDAAAAAGATQjcAAAAAADYhdAMAAAAAYBNCNwAAAAAANiF0AwAAAABgE0I3AAAAAAA2IXQDAAAAAGATQjcAAAAAADYhdAMAAAAAYBNCNwAAAAAANiF0AwAAAABgE0I3AAAAAAA2IXQDAAAAAGATQjcAAAAAADYhdAMAAAAAYBNCNwAAAAAANilQoXvixIlyOBwaNmyYq0sBAAAAAOC6Ckzo3r59u2bPnq0aNWq4uhQAAAAAALKlQITupKQkde/eXXPmzFHRokWv2Tc1NVWJiYlOLwAAAAAAXKFAhO6BAweqTZs2atmy5XX7TpgwQYGBgdardOnSN6VGAAAAAAD+Kd+H7iVLlmjnzp2aMGFCtvqPGjVKCQkJ1uvo0aO21wgAAAAAQFYKubqAazl69KiGDh2q9evXy8vLK1vreHp6ytPT0/baAAAAAAC4nnwdur/77judPHlStWvXttrS0tL0xRdfaNq0aUpNTZW7u7tLawQAAAAA4Grydehu0aKF9u7d69TWp08fVapUSSNHjiRwAwAAAADytXwduv39/VWtWjWnNl9fXxUvXjxTOwAAAAAA+U2+n0gNAAAAAICCKl9f6c7K5s2bXV0CAAAAAADZwpVuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAm+Tp0T5gwQfXq1ZO/v7+Cg4PVvn17xcXFubosAAAAAACyJV+H7s8//1wDBw7Utm3btH79el26dEmtWrVScnKyq0sDAAAAAOC6Crm6gGtZs2aN0/v58+crODhY3333nZo0aeKyugAAAAAAyI58Hbr/KSEhQZJUrFixq/ZJTU1Vamqq9T4xMfGm1AYAAAAAwD8VmNCdnp6uYcOGqWHDhqpWrdpV+02YMEGxsbE3tTbgdtdv/nZXl5AvzfX4j6tLyJ+6LXV1BRbO3cw4b6+C8zbfm9u7nqtLAIAs5et7uq80cOBA/fDDD1qyZMk1+40aNUoJCQnW6+jRozetRgAAAAAArlQgrnQPGjRIq1at0hdffKFSpUpds6+np6c8PT1vWm0AAAAAAFxNvg7dxhgNHjxYK1as0ObNmxUeHu7qkgAAAAAAyLZ8HboHDhyoxYsXa+XKlfL399fx48clSYGBgfL29nZ1eQAAAAAAXFO+vqd75syZSkhIUNOmTVWyZEnrtXRp/pnMBAAAAACAq8nXV7qNMa4uAQAAAACAXMvXV7oBAAAAACjICN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANiE0A0AAAAAgE0I3QAAAAAA2ITQDQAAAACATQq5ugDguhZ3dnUF+VO3pa6uAAAAAMB1cKUbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsUiNA9ffp0lS1bVl5eXqpfv76+/fZbV5cEAAAAAMB15fvQvXTpUg0fPlxjx47Vzp07VbNmTUVFRenkyZOuLg0AAAAAgGsq5OoCrmfKlCnq37+/+vTpI0maNWuWPvnkE7311lt67rnnMvVPTU1Vamqq9T4hIUGSlJiYeBOrRp5KueTqCvKnfHROXzyf5OoS8qXEy5y7WeLczdc4b6+C8zbf4/96+R/nbtb4e/cqCsCf6Yy/d4wx1+znMNfr4UIXL16Uj4+P3n//fbVv395qj46O1tmzZ7Vy5cpM68TExCg2NvYmVwoAAAAAuB0dPXpUpUqVuuryfH2l+88//1RaWppCQkKc2kNCQvTzzz9nuc6oUaM0fPhw6316erpOnz6t4sWLy+Fw5HmNiYmJKl26tI4ePaqAgIA83z5gF85dFFScuyioOHdRUHHuoqCy+9w1xujcuXMKCwu7Zr98Hbpzw9PTU56enk5tRYoUsX2/AQEB/CWEAolzFwUV5y4KKs5dFFScuyio7Dx3AwMDr9snX0+kVqJECbm7u+vEiRNO7SdOnFBoaKjL6gIAAAAAIDvydej28PBQnTp1tHHjRqstPT1dGzduVIMGDVxaGwAAAAAA15Pvh5cPHz5c0dHRqlu3ru666y5NnTpVycnJ1mzmrubp6amxY8dmGtIO5HecuyioOHdRUHHuoqDi3EVBlV/O3Xw9e3mGadOm6dVXX9Xx48dVq1Ytvf7666pfv76rywIAAAAA4JoKROgGAAAAAKAgytf3dAMAAAAAUJARugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaE7G6ZPn66yZcvKy8tL9evX17fffnvN/suWLVOlSpXk5eWl6tWr69NPP71ptQJXysm5O2fOHDVu3FhFixZV0aJF1bJly+ue64Bdcvr3boYlS5bI4XCoffv2ttcIZCWn5+7Zs2c1cOBAlSxZUp6enqpQoQL/b4BL5PTcnTp1qipWrChvb2+VLl1aTz31lC5cuHDT6gW++OILtW3bVmFhYXI4HPrwww+vu87mzZtVu3ZteXp6KiIiQvPnz78ptRK6r2Pp0qUaPny4xo4dq507d6pmzZqKiorSyZMns+y/detWde3aVf369dOuXbvUvn17tW/fXj/88MNNrx23t5yeu5s3b1bXrl21adMmff311ypdurRatWqlP/7446bXjttbTs/dDIcPH9aIESPUuHHjm1YrcKWcnrsXL17Ufffdp8OHD+v9999XXFyc5syZozvuuOOm147bW07P3cWLF+u5557T2LFjtW/fPs2dO1dLly7V//t//++m147bV3JysmrWrKnp06dnq/+hQ4fUpk0bNWvWTLt379awYcP02GOPae3atbbXKoNruuuuu8zAgQOt92lpaSYsLMxMmDAhy/6dOnUybdq0cWqrX7++efzxx22vFbhSTs/df7p8+bLx9/c3CxYssLFKILPcnLuXL18299xzj3nzzTdNdHS0adeu3U2qFvg/OT13Z86cacqVK2cuXrx4E6sEMsvpuTtw4EDTvHlzp7bhw4ebhg0b2l4rkBVJZsWKFdfs8+yzz5qqVas6tXXu3NlERUXZXJ0xXOm+hosXL+q7775Ty5YtrTY3Nze1bNlSX3/9dZbrfP311079JSkqKuqq/QE75Obc/aeUlBRdunRJxYoVs7FSwFluz90XX3xRwcHB6tev302qFHCWm3P3o48+UoMGDTRw4ECFhISoWrVqGj9+vNLS0m5i5bjd5ebcveeee/Tdd99ZQ9APHjyoTz/9VA888MBNqxvIKVfmtEK276EA+/PPP5WWlqaQkBCn9pCQEP38889ZrnP8+PEs+x8/ftzWWoEr5ebc/aeRI0cqLCws019OgJ1yc+5+9dVXmjt3rnbv3n2TqgQyy825e/DgQX322Wfq3r27Pv30U/3666968skndenSJY0dO/YmVY7bXW7O3W7duunPP/9Uo0aNZIzR5cuX9e9//5vh5cjXrpbTEhMTdf78eXl7e9u2b650A8hk4sSJWrJkiVasWCEvLy9XlwNc1blz59SzZ0/NmTNHJUqUcHU5QI6kp6crODhYb7zxhurUqaPOnTvr+eef16xZs1xdGnBNmzdv1vjx4zVjxgzt3LlTy5cv1yeffKKXXnrJ1aUB+RJXuq+hRIkScnd314kTJ5zaT5w4odDQ0CzXCQ0NzVF/wA65OXcz/Oc//9HEiRO1YcMG1ahRw+ZKAWc5PXcPHDigw4cPq23btlZbenq6JKlQoUKKi4tT+fLlb0LluN3l5u/dkiVLqnDhwnJ3d7faKleurOPHj+vixYvy8PCwvW4gN+fu6NGj1bNnTz322GOSpOrVqys5OVkDBgzQ888/Lzc3rush/7laTgsICLD1Kre40n1tHh4eqlOnjjZu3Gi1paena+PGjWrQoEGW6zRo0MCpvyStX7/+qv0BO+Tm3JWkV155RS+99JLWrFmjunXr3qRqgf+T03O3UqVK2rt3r3bv3m29HnroIWtm0tKlS9/kT4DbVW7+3m3YsKF+/fVX6xdFkvTLL7+oZMmSBG7cNLk5d1NSUjIF64xfHv3vnFZA/uPSnGb7VG0F3JIlS4ynp6eZP3+++emnn8yAAQNMkSJFzPHjx40xxvTs2dM899xzVv8tW7aYQoUKmf/85z9m3759ZuzYsaZw4cJm7969LvwUuB3l9NydOHGi8fDwMO+//76Jj4+3XufOnXPhp8DtKKfn7j8xezlcJafn7pEjR4y/v78ZNGiQiYuLM6tWrTLBwcHm5ZdfduGnwO0op+fu2LFjjb+/v3n33XfNwYMHzbp160z58uVNp06dXPgpcLs5d+6c2bVrl9m1a5eRZKZMmWJ27dplfvvtN2OMMc8995zp2bOn1f/gwYPGx8fHPPPMM2bfvn1m+vTpxt3d3axZs8b2Wgnd2fDf//7X3HnnncbDw8PcddddZtu2bdaye++910RHRzv1f++990yFChWMh4eHqVq1qvnkk09cUDWQs3O3TJkyRlKm19ixY11UPW5nOf1790qEbrhSTs/drVu3mvr16xtPT09Trlw5M27cOHP58mUXVI7bXU7O3UuXLpmYmBhTvnx54+XlZUqXLm2efPJJc+bMGRdVj9vRpk2bsvy/a8a5Gh0dbe69995M69SqVct4eHiYcuXKmXnz5t2UWh2GMSAAAAAAANiCe7oBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAsInD4dCHH37o6jIkF9bSu3dvtW/f/oa2cfjwYTkcDu3evfuqfTZv3iyHw6GzZ89KkubPn68iRYpYy2NiYlSrVq0bqgMAgNwgdAMAbntff/213N3d1aZNmzzdbnx8vO6///483aZdevfuLYfDIYfDIQ8PD0VEROjFF1/U5cuXXV1attxzzz2Kj49XYGBglstHjBihjRs3Wu/z4pcBAABkB6EbAHDbmzt3rgYPHqwvvvhCx44dy7PthoaGytPTM8+2Z7fWrVsrPj5e+/fv19NPP62YmBi9+uqrWfa9ePHiTa/vWjw8PBQaGiqHw5Hlcj8/PxUvXvym1wUAAKEbAHBbS0pK0tKlS/XEE0+oTZs2mj9/vtPyM2fOqHv37goKCpK3t7ciIyM1b9486e/gOWjQIJUsWVJeXl4qU6aMJkyYYK37zyHdW7duVa1ateTl5aW6devqww8/dBo2nTFEeuPGjapbt658fHx0zz33KC4uzqmmlStXqnbt2vLy8lK5cuUUGxvrdEV6//79atKkiby8vFSlShWtX78+W8fC09NToaGhKlOmjJ544gm1bNlSH330kXTFleFx48YpLCxMFStWlCTt3btXzZs3l7e3t4oXL64BAwYoKSkp07ZjY2MVFBSkgIAA/fvf/3YK7WvWrFGjRo1UpEgRFS9eXA8++KAOHDiQaRs///yz7rnnHnl5ealatWr6/PPPrWX/HF7+T1cOL4+JidGCBQu0cuVK6+r+5s2b1bx5cw0aNMhpvVOnTsnDw8PpKjkAADlB6AYA3Nbee+89VapUSRUrVlSPHj301ltvyRhjLR89erR++uknrV69Wvv27dPMmTNVokQJSdLrr7+ujz76SO+9957i4uL0zjvvqGzZslnuJzExUW3btlX16tW1c+dOvfTSSxo5cmSWfZ9//nlNnjxZO3bsUKFChdS3b19r2ZdffqlevXpp6NCh+umnnzR79mzNnz9f48aNkySlp6erQ4cO8vDw0DfffKNZs2ZddT/X4+3t7RSON27cqLi4OK1fv16rVq1ScnKyoqKiVLRoUW3fvl3Lli3Thg0bMgXXjRs3at++fdq8ebPeffddLV++XLGxsdby5ORkDR8+XDt27NDGjRvl5uamhx9+WOnp6U7beeaZZ/T0009r165datCggdq2bau//vorx59rxIgR6tSpk3VlPz4+Xvfcc48ee+wxLV68WKmpqVbft99+W3fccYeaN2+e4/0AACBJMgAA3MbuueceM3XqVGOMMZcuXTIlSpQwmzZtspa3bdvW9OnTJ8t1Bw8ebJo3b27S09OzXC7JrFixwhhjzMyZM03x4sXN+fPnreVz5swxksyuXbuMMcZs2rTJSDIbNmyw+nzyySdGkrVeixYtzPjx4532s2jRIlOyZEljjDFr1641hQoVMn/88Ye1fPXq1U61ZCU6Otq0a9fOGGNMenq6Wb9+vfH09DQjRoywloeEhJjU1FRrnTfeeMMULVrUJCUlOdXr5uZmjh8/bq1XrFgxk5ycbPWZOXOm8fPzM2lpaVnWcurUKSPJ7N271xhjzKFDh4wkM3HiRKvPpUuXTKlSpcykSZOcjt2ZM2eMMcbMmzfPBAYGWv3Hjh1ratasmeXnzXD+/HlTtGhRs3TpUqutRo0aJiYm5qrHDQCA6+FKNwDgthUXF6dvv/1WXbt2lSQVKlRInTt31ty5c60+TzzxhJYsWaJatWrp2Wef1datW61lvXv31u7du1WxYkUNGTJE69atu+a+atSoIS8vL6vtrrvuyrJvjRo1rJ9LliwpSTp58qQkac+ePXrxxRfl5+dnvfr376/4+HilpKRo3759Kl26tMLCwqxtNGjQIFvHY9WqVfLz85OXl5fuv/9+de7cWTExMdby6tWry8PDw3q/b98+1axZU76+vlZbw4YNlZ6e7jQkvmbNmvLx8XGqJykpSUePHpX+Hg7ftWtXlStXTgEBAdZogSNHjjjVd+XnKFSokOrWrat9+/Zl67Nlh5eXl3r27Km33npLkrRz50798MMP6t27d57tAwBw+ynk6gIAAHCVuXPn6vLly04B1RgjT09PTZs2TYGBgbr//vv122+/6dNPP9X69evVokULDRw4UP/5z39Uu3ZtHTp0SKtXr9aGDRvUqVMntWzZUu+///4N1VW4cGHr54yJwTKGWiclJSk2NlYdOnTItN6VgT43mjVrppkzZ8rDw0NhYWEqVMj5vwlXhuu81LZtW5UpU0Zz5sxRWFiY0tPTVa1aNZdM1vbYY4+pVq1a+v333zVv3jw1b95cZcqUuel1AABuHVzpBgDcli5fvqyFCxdq8uTJ2r17t/Xas2ePwsLC9O6771p9g4KCFB0drbfffltTp07VG2+8YS0LCAhQ586dNWfOHC1dulQffPCBTp8+nWl/FStW1N69e53uF96+fXuO665du7bi4uIUERGR6eXm5qbKlSvr6NGjio+Pt9bZtm1btrbt6+uriIgI3XnnnZkCd1YqV66sPXv2KDk52WrbsmWL3NzcrInW9PfV+fPnzzvV4+fnp9KlS+uvv/5SXFycXnjhBbVo0UKVK1fWmTNnstzflZ/j8uXL+u6771S5cuVsfbZ/8vDwUFpaWqb26tWrq27dupozZ44WL17sdD89AAC5QegGANyWVq1apTNnzqhfv36qVq2a06tjx47WEPMxY8Zo5cqV+vXXX/Xjjz9q1apVVtCbMmWK3n33Xf3888/65ZdftGzZMoWGhqpIkSKZ9tetWzelp6drwIAB2rdvn9auXav//Oc/0hVXs7NjzJgxWrhwoWJjY/Xjjz9q3759WrJkiV544QVJUsuWLVWhQgVFR0drz549+vLLL/X888/n0VFz1r17d3l5eSk6Olo//PCDNm3apMGDB6tnz54KCQmx+l28eFH9+vXTTz/9pE8//VRjx47VoEGD5ObmpqJFi6p48eJ644039Ouvv+qzzz7T8OHDs9zf9OnTtWLFCv38888aOHCgzpw5k+tQXLZsWX3//feKi4vTn3/+qUuXLlnLHnvsMU2cOFHGGD388MO52j4AABkI3QCA29LcuXPVsmVLBQYGZlrWsWNH7dixQ99//708PDw0atQo1ahRQ02aNJG7u7uWLFkiSfL399crr7yiunXrql69ejp8+LA+/fRTubll/uc1ICBAH3/8sXbv3q1atWrp+eef15gxY6QcDguPiorSqlWrtG7dOtWrV0933323/ud//scaAu3m5qYVK1bo/Pnzuuuuu/TYY49ZM5vnNR8fH61du1anT59WvXr19Mgjj6hFixaaNm2aU78WLVooMjJSTZo0UefOnfXQQw9Z94q7ublpyZIl+u6771StWjU99dRTV302+MSJEzVx4kTVrFlTX331lT766CNrJvmc6t+/vypWrKi6desqKChIW7ZssZZ17dpVhQoVUteuXW94yD4AAA5z5XNRAADATfPOO++oT58+SkhIkLe3t6vLwd8OHz6s8uXLa/v27apdu7arywEAFHBMpAYAwE2ycOFClStXTnfccYf27NmjkSNHqlOnTgTufOLSpUv666+/9MILL+juu+8mcAMA8gShGwCAm+T48eMaM2aMjh8/rpIlS+rRRx+1beg3cm7Lli1q1qyZKlSocMMz0AMAkIHh5QAAAAAA2ISJ1AAAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwBw25o/f74cDocOHz7s6lJypGnTpqpWrZqry8i3Nm/eLIfDoc2bN9/0fffu3Vtly5a96fsFAORfhG4AwA358ccf1aNHD91xxx3y9PRUWFiYevTooZ9++snVpVnGjx+vDz/80NVl5MixY8cUExOj3bt3276vlJQUxcTEZDukZoTajJe7u7uCg4P1yCOPaN++fbbXeyto2rSpevfubb0/fPiwdTw/+OCDTP1jYmLkcDj0559/5nhfMTEx/CIAAFyI0A0AyLXly5erdu3a2rhxo/r06aMZM2aoX79++uyzz1S7dm2tXLnS1SVK1wjdPXv21Pnz51WmTBmX1HUtx44dU2xs7E0L3bGxsTm+MjxkyBAtWrRIb775prp3765PPvlEjRs31vHjx22r9Xbw4osvyhjj6jIAAHmkkKsLAAAUTAcOHFDPnj1Vrlw5ffHFFwoKCrKWDR06VI0bN1aPHj30/fffKzw83KW1Xo27u7vc3d1dXUaB1bhxYz3yyCPW+4oVK+qJJ57QwoUL9eyzz7q0toKqVq1a2r17t1asWKEOHTq4uhwAQB7gSjcAIFdeffVVpaSk6I033nAK3JJUokQJzZ49W0lJSXr11Vet9qvd75oxdPaf3n77bdWpU0fe3t4qVqyYunTpoqNHjzr12b9/vzp27KjQ0FB5eXmpVKlS6tKlixISEiRJDodDycnJWrBggTV8N2NY79Xu6Z4xY4aqVq1qDZcfOHCgzp4969Qn477qn376Sc2aNZOPj4/uuOMOvfLKK5k+x3//+19VrVpVPj4+Klq0qOrWravFixdf9dhu3rxZ9erVkyT16dPHqnv+/PlO/a6374sXL2rMmDGqU6eOAgMD5evrq8aNG2vTpk1Wn8OHD1vfX2xsrLWvmJiYq9Z3NY0bN5b+/oXMlf744w/17dtXISEh8vT0VNWqVfXWW2/l6jjt2rVL999/vwICAuTn56cWLVpo27Zt16xr0KBB8vPzU0pKSqZlXbt2VWhoqNLS0qy21atXq3HjxvL19ZW/v7/atGmjH3/8MdO6H374oapVqyYvLy9Vq1ZNK1asyMZRurYuXbqoQoUK2b7avWzZMuvPSIkSJdSjRw/98ccfN1wHACDvELoBALny8ccfq2zZslbQ+qcmTZqobNmy+vjjj3O1/XHjxqlXr16KjIzUlClTNGzYMG3cuFFNmjSxAvDFixcVFRWlbdu2afDgwZo+fboGDBiggwcPWn0WLVokT09PNW7cWIsWLdKiRYv0+OOPX3W/MTExGjhwoMLCwjR58mR17NhRs2fPVqtWrXTp0iWnvmfOnFHr1q1Vs2ZNTZ48WZUqVdLIkSO1evVqq8+cOXM0ZMgQValSRVOnTlVsbKxq1aqlb7755qo1VK5cWS+++KIkacCAAVbdTZo0ydG+ExMT9eabb6pp06aaNGmSYmJidOrUKUVFRVnD1oOCgjRz5kxJ0sMPP2ztKzdXWTN+eVG0aFGr7cSJE7r77ru1YcMGDRo0SK+99poiIiLUr18/TZ06NUfH6ccff1Tjxo21Z88ePfvssxo9erQOHTqkpk2bXvN4du7cWcnJyfrkk0+c2lNSUvTxxx/rkUcesUY8LFq0SG3atJGfn58mTZqk0aNH66efflKjRo2cfjmzbt06dezYUQ6HQxMmTFD79u3Vp08f7dixI8fH7Uru7u564YUXtGfPnuuG+Pnz56tTp05yd3fXhAkT1L9/fy1fvlyNGjXK9EsiAIALGQAAcujs2bNGkmnXrt01+z300ENGkklMTDTGGBMdHW3KlCmTqd/YsWPNlf8kHT582Li7u5tx48Y59du7d68pVKiQ1b5r1y4jySxbtuyadfj6+pro6OhM7fPmzTOSzKFDh4wxxpw8edJ4eHiYVq1ambS0NKvftGnTjCTz1ltvWW333nuvkWQWLlxotaWmpprQ0FDTsWNHq61du3amatWq16wvK9u3bzeSzLx58zIty+6+L1++bFJTU53WPXPmjAkJCTF9+/a12k6dOmUkmbFjx2artk2bNlnH49SpU+bYsWNmzZo1JiIiwjgcDvPtt99affv162dKlixp/vzzT6dtdOnSxQQGBpqUlBRjsnmc2rdvbzw8PMyBAwestmPHjhl/f3/TpEmTTPVt2rTJGGNMenq6ueOOO5yOjTHGvPfee0aS+eKLL4wxxpw7d84UKVLE9O/f36nf8ePHTWBgoFN7rVq1TMmSJc3Zs2ettnXr1hlJWZ7j13Po0CEjybz66qvm8uXLJjIy0tSsWdOkp6cbc8WfkVOnThljjLl48aIJDg421apVM+fPn7e2s2rVKiPJjBkzJsc1AADswZVuAECOnTt3TpLk7+9/zX4ZyzP6Z9fy5cuVnp6uTp066c8//7ReoaGhioyMtIZHBwYGSpLWrl2b5dDhnNqwYYMuXryoYcOGyc3t//6J7N+/vwICAjJdKfXz81OPHj2s9x4eHrrrrrt08OBBq61IkSL6/ffftX379huuL6f7dnd3l4eHhyQpPT1dp0+f1uXLl1W3bl3t3Lnzhmvo27evgoKCFBYWptatWyshIUGLFi2yhsYbY/TBBx+obdu2MsY4fZdRUVFKSEiw6rjecUpLS9O6devUvn17lStXzmovWbKkunXrpq+++kqJiYlZrutwOPToo4/q008/VVJSktW+dOlS3XHHHWrUqJEkaf369Tp79qy6du3qVKu7u7vq169vnXfx8fHavXu3oqOjrXNQku677z5VqVLlho/rlVe7rzbr/o4dO3Ty5Ek9+eST8vLystrbtGmjSpUqZTpXAQCuQ+gGAORYdsP0uXPn5HA4VKJEiRxtf//+/TLGKDIyUkFBQU6vffv26eTJk5Kk8PBwDR8+XG+++aZKlCihqKgoTZ8+3bqfO6d+++036e8Jwa7k4eGhcuXKWcszlCpVKtO96EWLFtWZM2es9yNHjpSfn5/uuusuRUZGauDAgdqyZUuu6svpviVpwYIFqlGjhry8vFS8eHEFBQXpk08+yfUxutKYMWO0fv16rVixQr169VJCQoLTLytOnTqls2fPWvf9X/nq06ePJFnf5fWO06lTp5SSkpLpu9Hfw/HT09Mz3e9/pc6dO+v8+fP66KOPJElJSUn69NNP9eijj1rHcf/+/ZKk5s2bZ6p33bp1Vq0Z50FkZGSm/WRVX250795dERERV723+2rnqiRVqlQp07kKAHAdZi8HAORYYGCgwsLC9P3331+z3/fff69SpUpZV1uzmixNf1/FvFJ6erocDodWr16d5ezifn5+1s+TJ09W7969tXLlSq1bt05DhgzRhAkTtG3bNpUqVSqXnzB7rjbz+ZUhqXLlyoqLi9OqVau0Zs0affDBB5oxY4bGjBmj2NhYW/f99ttvq3fv3mrfvr2eeeYZBQcHW/f//nOys9yoXr26WrZsKUlq3769UlJS1L9/fzVq1EilS5dWenq6JKlHjx6Kjo7Ochs1atSQbDxOGe6++26VLVtW7733nrp166aPP/5Y58+fV+fOna0+GfUuWrRIoaGhmbZRqNDN+29TxtXujHMbAFBwEboBALnStm1bzZ49W1999ZU1PPdKX375pQ4fPqzhw4dbbUWLFs1ygqd/XpUrX768jDEKDw9XhQoVrltL9erVVb16db3wwgvaunWrGjZsqFmzZunll1+WrhH2/ynjed1xcXFOQ5gvXryoQ4cOWQEzp3x9fdW5c2d17txZFy9eVIcOHTRu3DiNGjXKaWjwlbJb87W8//77KleunJYvX+60vbFjx+b5viRp4sSJWrFihcaNG6dZs2YpKChI/v7+SktLy9axu9ZxCgoKko+Pj+Li4jKt9/PPP8vNzU2lS5e+5vY7deqk1157TYmJiVq6dKnKli2ru+++21pevnx5SVJwcPA16804TzKujF8pq/pyq0ePHnr55ZcVGxurhx56KMsa4uLi1Lx580w15MdnzwPA7Yrh5QCAXBkxYoR8fHz0+OOP66+//nJadvr0af373/9WQECABg0aZLWXL19eCQkJTlfI4+PjM83S3KFDB7m7uys2NjbT0FpjjLW/xMREXb582Wl59erV5ebmptTUVKvN19c3W7M5t2zZUh4eHnr99ded9jt37lwlJCSoTZs22Tgyzv55bDw8PFSlShUZYzLNhn4lX19fSbqhWagzroZf+Vm++eYbff311079fHx8bnhf+vv77dixo+bPn6/jx4/L3d1dHTt21AcffKAffvghU/9Tp05ZP1/vOLm7u6tVq1ZauXKl0yziJ06c0OLFi9WoUSMFBARcs77OnTsrNTVVCxYs0Jo1a9SpUyen5VFRUQoICND48eOz/G4y6i1ZsqRq1aqlBQsWOA3TX79+vX766adsHavsyLjavXv3bmtYfIa6desqODhYs2bNcjrXV69erX379uXqXAUA2IMr3QCAXImIiNDChQvVtWtXVa9eXf369VN4eLgOHz6suXPn6syZM1qyZInCw8Otdbp06aKRI0fq4Ycf1pAhQ5SSkqKZM2eqQoUKThN7lS9fXi+//LJGjRqlw4cPq3379vL399ehQ4e0YsUKDRgwQCNGjNBnn32mQYMG6dFHH1WFChV0+fJlLVq0yAp7GerUqaMNGzZoypQpCgsLU3h4uOrXr5/pMwUFBWnUqFGKjY1V69at9dBDDykuLk4zZsxQvXr1nCYuy65WrVopNDRUDRs2VEhIiPbt26dp06apTZs215yIrnz58ipSpIhmzZolf39/+fr6qn79+k7H83oefPBBLV++XA8//LDatGmjQ4cOadasWapSpYrThGLe3t6qUqWKli5dqgoVKqhYsWKqVq2aqlWrluPP+8wzz+i9997T1KlTNXHiRE2cOFGbNm1S/fr11b9/f1WpUkWnT5/Wzp07tWHDBp0+fTrbx+nll1/W+vXr1ahRIz355JMqVKiQZs+erdTU1Cyfj/5PtWvXVkREhJ5//nmlpqY6DS2XpICAAM2cOVM9e/ZU7dq11aVLFwUFBenIkSP65JNP1LBhQ02bNk2SNGHCBLVp00aNGjVS3759dfr0aes541ce2xvVvXt3vfTSS9Yj3jIULlxYkyZNUp8+fXTvvfeqa9euOnHihF577TWVLVtWTz31VJ7VAAC4Qa6ePh0AULDt3bvXdOvWzYSGhho3NzcjyXh5eZkff/wxy/7r1q0z1apVMx4eHqZixYrm7bffzvTIsAwffPCBadSokfH19TW+vr6mUqVKZuDAgSYuLs4YY8zBgwdN3759Tfny5Y2Xl5cpVqyYadasmdmwYYPTdn7++WfTpEkT4+3tbSRZjw/75yPDMkybNs1UqlTJFC5c2ISEhJgnnnjCnDlzxqnPvffem+Ujrv75WLTZs2ebJk2amOLFixtPT09Tvnx588wzz5iEhITrHtuVK1eaKlWqmEKFCjk9Piy7+05PTzfjx483ZcqUMZ6enuZf//qXWbVqVZaPbtu6daupU6eO8fDwuO7jwzIeyXW1R7U1bdrUBAQEWI/TOnHihBk4cKApXbq0KVy4sAkNDTUtWrQwb7zxRo6P086dO01UVJTx8/MzPj4+plmzZmbr1q1Z1pfxyLArPf/880aSiYiIuObni4qKMoGBgcbLy8uUL1/e9O7d2+zYscOp3wcffGAqV65sPD09TZUqVczy5cuv+li867nykWH/lHGeXvnIsAxLly41//rXv4ynp6cpVqyY6d69u/n9999zvH8AgH0cJqspMQEAyKWFCxeqd+/e6tGjhxYuXOjqcgAAAFyK4eUAgDzVq1cvxcfH67nnnlOpUqU0fvx4V5cEAADgMlzpBgAAAADAJsxeDgAAAACATQjdAAAAAADYhNANAAAAAIBNCN0AAAAAANjklp+9PD09XceOHZO/v78cDoerywEAAAAA3AKMMTp37pzCwsLk5nb169m3fOg+duyYSpcu7eoyAAAAAAC3oKNHj6pUqVJXXX7Lh25/f3/p7wMREBDg6nIAAAAAALeAxMRElS5d2sqcV3PLh+6MIeUBAQGEbgAAAABAnrrebcxMpAYAAAAAgE0I3QAAAAAA2ITQDQAAAACATW75e7oBAAAAwFXS0tJ06dIlV5eBXChcuLDc3d1veDuEbgAAAADIY8YYHT9+XGfPnnV1KbgBRYoUUWho6HUnS7sWQjcAAAAA5LGMwB0cHCwfH58bCm24+YwxSklJ0cmTJyVJJUuWzPW2CN0AAAAAkIfS0tKswF28eHFXl4Nc8vb2liSdPHlSwcHBuR5qzkRqAAAAAJCHMu7h9vHxcXUpuEEZ3+GN3JdP6AYAAAAAGzCkvODLi++Q0A0AAAAAgE0I3QAAAAAA2ISJ1AAAAADgJuk3f/tN3d/c3vVy1L93795asGCB9b5YsWKqV6+eXnnlFdWoUSNH2zl79qw+/PDDq/a53tDtsWPHKiYmJtv7zK+40g0AAAAAsLRu3Vrx8fGKj4/Xxo0bVahQIT344IN5vp+MfcTHx2vq1KkKCAhwahsxYkSe79MVCN0AAAAAAIunp6dCQ0MVGhqqWrVq6bnnntPRo0d16tQpq8/evXvVvHlzeXt7q3jx4howYICSkpIkSTExMVqwYIFWrlwph8Mhh8OhzZs3Z9pPxj5CQ0MVGBgoh8Ph1LZkyRJVrlxZXl5eqlSpkmbMmOG0/siRI1WhQgX5+PioXLlyGj16tNMs4zExMapVq5beeust3XnnnfLz89OTTz6ptLQ0vfLKKwoNDVVwcLDGjRtn6/FkeDkAAAAAIEtJSUl6++23FRERYT1zPDk5WVFRUWrQoIG2b9+ukydP6rHHHtOgQYM0f/58jRgxQvv27VNiYqLmzZsn/T1MPSfeeecdjRkzRtOmTdO//vUv7dq1S/3795evr6+io6MlSf7+/po/f77CwsK0d+9e9e/fX/7+/nr22Wet7Rw4cECrV6/WmjVrdODAAT3yyCM6ePCgKlSooM8//1xbt25V37591bJlS9WvXz9Pj10GQjcAAAAAwLJq1Sr5+flJfwfskiVLatWqVXJz+9+B0osXL9aFCxe0cOFC+fr6SpKmTZumtm3batKkSQoJCZG3t7dSU1MVGhqaqxrGjh2ryZMnq0OHDpKk8PBw/fTTT5o9e7YVul944QWrf9myZTVixAgtWbLEKXSnp6frrbfekr+/v6pUqaJmzZopLi5On376qdzc3FSxYkVNmjRJmzZtInQDAAAAAOzXrFkzzZw5U5J05swZzZgxQ/fff7++/fZblSlTRvv27VPNmjWtwC1JDRs2VHp6uuLi4hQSEnJD+09OTtaBAwfUr18/9e/f32q/fPmyAgMDrfdLly7V66+/rgMHDigpKUmXL19WQECA07bKli0rf39/631ISIjc3d2tXyBktJ08efKGar4WQjcAAAAAwOLr66uIiAjr/ZtvvqnAwEDNmTNHL7/8su37z7g3fM6cOZmuPru7u0uSvv76a3Xv3l2xsbGKiopSYGCglixZosmTJzv1L1y4sNN7h8ORZVt6erpNn8bFE6lNmDBB9erVk7+/v4KDg9W+fXvFxcU59blw4YIGDhyo4sWLy8/PTx07dtSJEydcVjMAAAAA3E4cDofc3Nx0/vx5SVLlypW1Z88eJScnW322bNliDdeWJA8PD6WlpeVqfyEhIQoLC9PBgwcVERHh9AoPD5ckbd26VWXKlNHzzz+vunXrKjIyUr/99luefN685tLQ/fnnn2vgwIHatm2b1q9fr0uXLqlVq1ZOX95TTz2ljz/+WMuWLdPnn3+uY8eOWeP6AQAAAAB5KzU1VcePH9fx48e1b98+DR48WElJSWrbtq0kqXv37vLy8lJ0dLR++OEHbdq0SYMHD1bPnj2toeVly5bV999/r7i4OP35559Os4pnR2xsrCZMmKDXX39dv/zyi/bu3at58+ZpypQpkqTIyEgdOXJES5Ys0YEDB/T6669rxYoVNhyNG+fS4eVr1qxxej9//nwFBwfru+++U5MmTZSQkKC5c+dq8eLFat68uSRp3rx5qly5srZt26a777470zZTU1OVmppqvU9MTLwJnwQAAAAArm9u73quLuG61qxZo5IlS0p/zxBeqVIlLVu2TE2bNpUk+fj4aO3atRo6dKjq1asnHx8fdezY0QrEktS/f39t3rxZdevWVVJSkjZt2mStnx2PPfaYfHx89Oqrr+qZZ56Rr6+vqlevrmHDhkmSHnroIT311FMaNGiQUlNT1aZNG40ePVoxMTF5fjxulMMYY1xdRIZff/1VkZGR2rt3r6pVq6bPPvtMLVq00JkzZ1SkSBGrX5kyZTRs2DA99dRTmbYRExOj2NjYTO0JCQmZbqpHAbG4s6sryJ+6LXV1BQAAAMjChQsXdOjQIYWHh8vLy8vV5eAGXOu7TExMVGBg4HWzpkuHl18pPT1dw4YNU8OGDVWtWjVJ0vHjx+Xh4eEUuPX3GP/jx49nuZ1Ro0YpISHBeh09evSm1A8AAAAAwD/lm9nLBw4cqB9++EFfffXVDW3H09NTnp6eeVYXAAAAAAC5lS+udA8aNEirVq3Spk2bVKpUKas9NDRUFy9e1NmzZ536nzhxItcPWQcAAAAA4GZxaeg2xmjQoEFasWKFPvvsM2v69wx16tRR4cKFtXHjRqstLi5OR44cUYMGDVxQMQAAAAAA2efS4eUDBw7U4sWLtXLlSvn7+1v3aQcGBsrb21uBgYHq16+fhg8frmLFiikgIECDBw9WgwYNspy5HAAAAACA/MSloXvmzJmSlGnq+Hnz5ql3796SpP/5n/+Rm5ubOnbsqNTUVEVFRWnGjBkuqRcAAAAAgJxwaejOztPKvLy8NH36dE2fPv2m1AQAAAAAQF7JFxOpAQAAAABwKyJ0AwAAAABgE0I3AAAAAAA2cek93QAAAABwW1nc+ebur9vSHHXv3bu3FixYIEkqXLiw7rzzTvXq1Uv/7//9PxUqlDfxMSYmRrGxsdfsk535vwoKrnQDAAAAACytW7dWfHy89u/fr6effloxMTF69dVXs+x78eLFHG9/xIgRio+Pt16lSpXSiy++6NR2KyF0AwAAAAAsnp6eCg0NVZkyZfTEE0+oZcuW+uijj6S/r4S3b99e48aNU1hYmCpWrChJ2rt3r5o3by5vb28VL15cAwYMUFJSUpbb9/PzU2hoqPVyd3eXv7+/9f7SpUvq1KmTihQpomLFiqldu3Y6fPiwtf727dt13333qUSJEgoMDNS9996rnTt3Ou3D4XBo9uzZevDBB+Xj46PKlSvr66+/1q+//qqmTZvK19dX99xzjw4cOGDrsRShGwAAAABwLd7e3k5XtDdu3Ki4uDitX79eq1atUnJysqKiolS0aFFt375dy5Yt04YNGzRo0KAc7+vSpUuKioqSv7+/vvzyS23ZskV+fn5q3bq1VcO5c+cUHR2tr776Stu2bVNkZKQeeOABnTt3zmlbL730knr16qXdu3erUqVK6tatmx5//HGNGjVKO3bskDEmVzXmFPd0AwAAAAAyMcZo48aNWrt2rQYPHmy1+/r66s0335SHh4ckac6cObpw4YIWLlwoX19fSdK0adPUtm1bTZo0SSEhIdne59KlS5Wenq4333xTDodDkjRv3jwVKVJEmzdvVqtWrdS8eXOndd544w0VKVJEn3/+uR588EGrvU+fPurUqZMkaeTIkWrQoIFGjx6tqKgoSdLQoUPVp0+fGzpG2UHoBgAAAABYVq1aJT8/P126dEnp6enq1q2bYmJirOXVq1e3Arck7du3TzVr1rQCtyQ1bNhQ6enpiouLy1Ho3rNnj3799Vf5+/s7tV+4cMEaCn7ixAm98MIL2rx5s06ePKm0tDSlpKToyJEjTuvUqFHD+jmjhurVqzu1XbhwQYmJiQoICMh2jTlF6AYAAAAAWJo1a6aZM2fKw8NDYWFhmWYtvzJc57WkpCTVqVNH77zzTqZlQUFBkqTo6Gj99ddfeu2111SmTBl5enqqQYMGmSZ1K1y4sPVzxlXzrNrS09Nt+zwidAMAAAAAruTr66uIiIhs969cubLmz5+v5ORkK5Bv2bJFbm5u1kRr2VW7dm0tXbpUwcHBV736vGXLFs2YMUMPPPCAJOno0aP6888/c7Sfm4mJ1AAAAAAAuda9e3d5eXkpOjpaP/zwgzZt2qTBgwerZ8+eORpanrGtEiVKqF27dvryyy916NAhbd68WUOGDNHvv/8uSYqMjNSiRYu0b98+ffPNN+revbu8vb1t+nQ3jivdAAAAAHCzdFvq6grynI+Pj9auXauhQ4eqXr168vHxUceOHTVlypRcbeuLL77QyJEj1aFDB507d0533HGHWrRoYV35njt3rgYMGKDatWurdOnSGj9+vEaMGGHDJ8sbDmOMcXURdkpMTFRgYKASEhJsvTkeNlrc2dUV5E+34F/YAAAAt4ILFy7o0KFDCg8Pl5eXl6vLwQ241neZ3azJ8HIAAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAALDBLT5n9W0hL75DQjcAAAAA5KHChQtLklJSUlxdCm5QxneY8Z3mBs/pBgAAAIA85O7uriJFiujkyZPS38+edjgcri4LOWCMUUpKik6ePKkiRYrI3d0919sidAMAAABAHgsNDZUkK3ijYCpSpIj1XeYWoRsAAAAA8pjD4VDJkiUVHBysS5cuuboc5ELhwoVv6Ap3BkI3AAAAANjE3d09T4IbCi4mUgMAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhG4AAAAAAGxC6AYAAAAAwCaEbgAAAAAAbELoBgAAAADAJoRuAAAAAABsQugGAAAAAMAmhVxdAP5Pv/nbXV1CvjTXw9UVAAAAAEDucKUbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJsQugEAAAAAsAmhGwAAAAAAmxC6AQAAAACwCaEbAAAAAACbELoBAAAAALAJoRsAAAAAAJv8//buO0qr+kzg+DMwzgwoTVCKIogCFhAOogYsOQIJikss2ZVgA4Maa4yIbS2AFQsscXUhsgi6RkFd29oFMdZYaDbEihodEBVFRqXN3T9W33VkVOZ1fgwDn885c47vvfd932fI70z4cu97R3QDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJ5BXdb7/9dvVPAgAAABuYvKJ7++23j3333Tduuumm+Prrr6t/KgAAANgA5BXds2bNil122SWGDh0aLVq0iD/84Q/x3HPPVf90AAAAUIvlFd1du3aNP//5z/Hhhx/G9ddfH6WlpbHXXntFp06dYsyYMbF48eLqnxQAAABqmZ91I7XCwsI45JBD4rbbbovLL7883nzzzRg2bFi0bt06jjrqqCgtLa2+SQEAAKCW+VnR/cILL8SJJ54YLVu2jDFjxsSwYcPirbfeikceeSQ+/PDDOPDAA6tvUgAAAKhlCvN50pgxY2LSpEkxf/786NevX9x4443Rr1+/qFPn/xp+2223jcmTJ0fbtm2re14AAACoNfKK7nHjxsXvf//7GDx4cLRs2bLSY7bccsuYOHHiz50PAAAAaq28ovuNN974yWOKiopi0KBB+bw8AAAAbBDy+kz3pEmT4rbbbltj+2233RY33HBDdcwFAAAAtV5e0X3ZZZdFs2bN1ti+5ZZbxqWXXlodcwEAAECtl1d0v/fee7Htttuusb1Nmzbx3nvvVcdcAAAAUOvlFd1bbrllvPjii2tsnzt3bjRt2rQ65gIAAIBaL6/oHjhwYPzxj3+MGTNmxOrVq2P16tXx6KOPxqmnnhq/+93vqn9KAAAAqIXyunv5RRddFAsWLIjevXtHYeH/vUR5eXkcddRRPtMNAAAA38gruouKimLq1Klx0UUXxdy5c6NevXrRuXPnaNOmTfVPCAAAALVUXtH9rQ4dOkSHDh2qbxoAAADYgOQV3atXr47JkyfH9OnT46OPPory8vIK+x999NHqmg8AAABqrbyi+9RTT43JkyfHAQccEJ06dYqCgoLqnwwAAABqubyie8qUKXHrrbdGv379qn8iAAAA2EDk9SvDioqKYvvtt6/+aQAAAGADkld0n3766fHnP/85siyr/okAAABgA5HX5eVPPvlkzJgxIx544IHYeeedY5NNNqmw/4477qiu+QAAAKDWyiu6GzduHAcffHD1TwMAAAAbkLyie9KkSdU/CQAAAGxg8vpMd0TEqlWrYtq0afGXv/wlvvjii4iI+PDDD2PZsmXVOR8AAADUWnmd6X733Xdjv/32i/feey+WL18ev/rVr6JBgwZx+eWXx/Lly2P8+PHVPykAAADUMnmd6T711FOje/fusWTJkqhXr15u+8EHHxzTp0+vzvkAAACg1srrTPcTTzwRTz/9dBQVFVXY3rZt2/jggw+qazYAAACo1fI6011eXh6rV69eY/s//vGPaNCgQXXMBQAAALVeXtH961//OsaOHZt7XFBQEMuWLYvhw4dHv379qnM+AAAAqLXyurx89OjR0bdv39hpp53i66+/jsMOOyzeeOONaNasWdxyyy3VPyUAAADUQnlF99Zbbx1z586NKVOmxIsvvhjLli2LIUOGxOGHH17hxmoAAACwMcsruiMiCgsL44gjjqjeaQAAAGADkld033jjjT+6/6ijjsp3HgAAANhg5BXdp556aoXHK1eujC+//DKKioqifv36ohsAAADyvXv5kiVLKnwtW7Ys5s+fH3vttZcbqQEAAMA38oruyrRv3z5GjRq1xllwAAAA2FhVW3THNzdX+/DDD6vzJQEAAKDWyusz3ffcc0+Fx1mWRWlpaVxzzTWx5557VtdsAAAAUKvlFd0HHXRQhccFBQWxxRZbRK9evWL06NFr/TqPP/54XHnllTFz5swoLS2NO++8s8JrZ1kWw4cPjwkTJsRnn30We+65Z4wbNy7at2+fz9gAAACwTuV1eXl5eXmFr9WrV8fChQvj5ptvjpYtW67165SVlUWXLl3i2muvrXT/FVdcEVdffXWMHz8+nn322dh0002jb9++8fXXX+czNgAAAKxTeZ3pri77779/7L///pXuy7Isxo4dG+edd14ceOCBEd/8fvDmzZvHXXfdFb/73e/W8bQAAABQNXlF99ChQ9f62DFjxuTzFvHOO+/EwoULo0+fPrltjRo1ij322COeeeaZH4zu5cuXx/Lly3OPly5dmtf7AwAAwM+VV3TPnj07Zs+eHStXroyOHTtGRMTrr78edevWjW7duuWOKygoyHuwhQsXRkRE8+bNK2xv3rx5bl9lLrvsshg5cmTe7wsAAADVJa/o7t+/fzRo0CBuuOGGaNKkSURELFmyJI4++ujYe++94/TTT6/uOdfaOeecU+FM/NKlS6N169Y1Ng8AAAAbr7xupDZ69Oi47LLLcsEdEdGkSZO4+OKLq3T38h/TokWLiIhYtGhRhe2LFi3K7atMcXFxNGzYsMIXAAAA1IS8onvp0qWxePHiNbYvXrw4vvjii+qYK7bddtto0aJFTJ8+vcL7Pvvss9GjR49qeQ8AAABIKa/Lyw8++OA4+uijY/To0bH77rtHRMSzzz4bZ5xxRhxyyCFr/TrLli2LN998M/f4nXfeiTlz5sTmm28e22yzTfzpT3+Kiy++ONq3bx/bbrttnH/++dGqVas1fk84AAAArI/yiu7x48fHsGHD4rDDDouVK1f+3wsVFsaQIUPiyiuvXOvXeeGFF2LffffNPf72s9iDBg2KyZMnx5lnnhllZWVx3HHHxWeffRZ77bVXPPjgg1FSUpLP2AAAALBOFWRZluX75LKysnjrrbciImK77baLTTfdtDpnqxZLly6NRo0axeeff77ef757yOTna3qE9dLEoqtqeoT102FTa3oCAADYaK1ta+b1me5vlZaWRmlpabRv3z423XTT+Bn9DgAAABucvKL7k08+id69e0eHDh2iX79+UVpaGhERQ4YMqdFfFwYAAADrk7yi+7TTTotNNtkk3nvvvahfv35u+4ABA+LBBx+szvkAAACg1srrRmoPP/xwPPTQQ7H11ltX2N6+fft49913q2s2AAAAqNXyOtNdVlZW4Qz3tz799NMoLi6ujrkAAACg1ssruvfee++48cYbc48LCgqivLw8rrjiigq/AgwAAAA2ZnldXn7FFVdE796944UXXogVK1bEmWeeGa+88kp8+umn8dRTT1X/lAAAAFAL5XWmu1OnTvH666/HXnvtFQceeGCUlZXFIYccErNnz47tttuu+qcEAACAWqjKZ7pXrlwZ++23X4wfPz7OPffcNFMBAADABqDKZ7o32WSTePHFF9NMAwAAABuQvC4vP+KII2LixInVPw0AAABsQPK6kdqqVavi+uuvj2nTpsWuu+4am266aYX9Y8aMqa75AAAAoNaqUnS//fbb0bZt23j55ZejW7duERHx+uuvVzimoKCgeicEAACAWqpK0d2+ffsoLS2NGTNmRETEgAED4uqrr47mzZunmg8AAABqrSp9pjvLsgqPH3jggSgrK6vumQAAAGCDkNeN1L71/QgHAAAA/l+VorugoGCNz2z7DDcAAABUrkqf6c6yLAYPHhzFxcUREfH111/H8ccfv8bdy++4447qnRIAAABqoSpF96BBgyo8PuKII6p7HgAAANhgVCm6J02alG4SAAAA2MD8rBupAQAAAD9MdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJFJY0wMAtd+Qyc/X9AjrpYmDd6vpEQAAqGHOdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEiksKYHANhg3TygpidYPx02taYnAABYZ5zpBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEims6QEAgPXIzQNqeoL102FTa3oCfoq1WzlrF2qcM90AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASKazpAQAAWHtDJj9f0yOslyYW1fQE/BRrt3ITB+9W0yOQmDPdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEhHdAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIoU1PQAA1JQhk5+v6RHWOxOLanoCANiwONMNAAAAiYhuAAAASER0AwAAQCKiGwAAABIR3QAAAJCI6AYAAIBERDcAAAAkIroBAAAgEdENAAAAiYhuAAAASER0AwAAQCKiGwAAABIR3QAAAJCI6AYAAIBERDcAAAAkIroBAAAgEdENAAAAiYhuAAAASER0AwAAQCKiGwAAABIR3QAAAJCI6AYAAIBERDcAAAAkIroBAAAgEdENAAAAiYhuAAAASER0AwAAQCKiGwAAABIR3QAAAJCI6AYAAIBERDcAAAAkIroBAAAgEdENAAAAiYhuAAAASER0AwAAQCKiGwAAABIR3QAAAJCI6AYAAIBERDcAAAAkIroBAAAgEdENAAAAiYhuAAAASER0AwAAQCKiGwAAABKpFdF97bXXRtu2baOkpCT22GOPeO6552p6JAAAAPhJ6310T506NYYOHRrDhw+PWbNmRZcuXaJv377x0Ucf1fRoAAAA8KPW++geM2ZMHHvssXH00UfHTjvtFOPHj4/69evH9ddfX9OjAQAAwI8qrOkBfsyKFSti5syZcc455+S21alTJ/r06RPPPPNMpc9Zvnx5LF++PPf4888/j4iIpUuXroOJf54VXy2r6RHWS0tXrazpEdZP69GatnYrZ+3+AGt3vWbd/gDrdr1n7f4Aa3e9Vxs6hcp9+79dlmU/elxB9lNH1KAPP/wwttpqq3j66aejR48eue1nnnlm/O1vf4tnn312jeeMGDEiRo4cuY4nBQAAYGP0/vvvx9Zbb/2D+9frM935OOecc2Lo0KG5x+Xl5fHpp59G06ZNo6CgoNrfb+nSpdG6det4//33o2HDhtX++pCKtUttZe1SW1m71FbWLrVV6rWbZVl88cUX0apVqx89br2O7mbNmkXdunVj0aJFFbYvWrQoWrRoUelziouLo7i4uMK2xo0bJ50zIqJhw4Z+CFErWbvUVtYutZW1S21l7VJbpVy7jRo1+slj1usbqRUVFcWuu+4a06dPz20rLy+P6dOnV7jcHAAAANZH6/WZ7oiIoUOHxqBBg6J79+6x++67x9ixY6OsrCyOPvromh4NAAAAftR6H90DBgyIxYsXxwUXXBALFy6Mrl27xoMPPhjNmzev6dEivrmcffjw4Wtc0g7rO2uX2srapbaydqmtrF1qq/Vl7a7Xdy8HAACA2my9/kw3AAAA1GaiGwAAABIR3QAAAJCI6AYAAIBERPdauPbaa6Nt27ZRUlISe+yxRzz33HM/evxtt90WO+ywQ5SUlETnzp3j/vvvX2ezwndVZe1OmDAh9t5772jSpEk0adIk+vTp85NrHVKp6s/db02ZMiUKCgrioIMOSj4jVKaqa/ezzz6Lk046KVq2bBnFxcXRoUMHf2+gRlR17Y4dOzY6duwY9erVi9atW8dpp50WX3/99TqbFx5//PHo379/tGrVKgoKCuKuu+76yec89thj0a1btyguLo7tt98+Jk+evE5mFd0/YerUqTF06NAYPnx4zJo1K7p06RJ9+/aNjz76qNLjn3766Rg4cGAMGTIkZs+eHQcddFAcdNBB8fLLL6/z2dm4VXXtPvbYYzFw4MCYMWNGPPPMM9G6dev49a9/HR988ME6n52NW1XX7rcWLFgQw4YNi7333nudzQrfVdW1u2LFivjVr34VCxYsiNtvvz3mz58fEyZMiK222mqdz87Grapr9+abb46zzz47hg8fHvPmzYuJEyfG1KlT41//9V/X+exsvMrKyqJLly5x7bXXrtXx77zzThxwwAGx7777xpw5c+JPf/pTHHPMMfHQQw8lnzUyftTuu++enXTSSbnHq1evzlq1apVddtlllR5/6KGHZgcccECFbXvssUf2hz/8Ifms8F1VXbvft2rVqqxBgwbZDTfckHBKWFM+a3fVqlVZz549s//8z//MBg0alB144IHraFr4f1Vdu+PGjcvatWuXrVixYh1OCWuq6to96aSTsl69elXYNnTo0GzPPfdMPitUJiKyO++880ePOfPMM7Odd965wrYBAwZkffv2TTxdljnT/SNWrFgRM2fOjD59+uS21alTJ/r06RPPPPNMpc955plnKhwfEdG3b98fPB5SyGftft+XX34ZK1eujM033zzhpFBRvmv3wgsvjC233DKGDBmyjiaFivJZu/fcc0/06NEjTjrppGjevHl06tQpLr300li9evU6nJyNXT5rt2fPnjFz5szcJehvv/123H///dGvX791NjdUVU12WmHyd6jFPv7441i9enU0b968wvbmzZvHa6+9VulzFi5cWOnxCxcuTDorfFc+a/f7zjrrrGjVqtUaP5wgpXzW7pNPPhkTJ06MOXPmrKMpYU35rN233347Hn300Tj88MPj/vvvjzfffDNOPPHEWLlyZQwfPnwdTc7GLp+1e9hhh8XHH38ce+21V2RZFqtWrYrjjz/e5eWs136o05YuXRpfffVV1KtXL9l7O9MNrGHUqFExZcqUuPPOO6OkpKSmx4Ef9MUXX8SRRx4ZEyZMiGbNmtX0OFAl5eXlseWWW8Z1110Xu+66awwYMCDOPffcGD9+fE2PBj/qsccei0svvTT+4z/+I2bNmhV33HFH3HfffXHRRRfV9GiwXnKm+0c0a9Ys6tatG4sWLaqwfdGiRdGiRYtKn9OiRYsqHQ8p5LN2v3XVVVfFqFGjYtq0abHLLrsknhQqqurafeutt2LBggXRv3//3Lby8vKIiCgsLIz58+fHdttttw4mZ2OXz8/dli1bxiabbBJ169bNbdtxxx1j4cKFsWLFiigqKko+N+Szds8///w48sgj45hjjomIiM6dO0dZWVkcd9xxce6550adOs7rsf75oU5r2LBh0rPc4Uz3jysqKopdd901pk+fnttWXl4e06dPjx49elT6nB49elQ4PiLikUce+cHjIYV81m5ExBVXXBEXXXRRPPjgg9G9e/d1NC38v6qu3R122CFeeumlmDNnTu7rN7/5Te7OpK1bt17H3wEbq3x+7u65557x5ptv5v6hKCLi9ddfj5YtWwpu1pl81u6XX365Rlh/+49H/3dPK1j/1GinJb9VWy03ZcqUrLi4OJs8eXL26quvZscdd1zWuHHjbOHChVmWZdmRRx6ZnX322bnjn3rqqaywsDC76qqrsnnz5mXDhw/PNtlkk+yll16qwe+CjVFV1+6oUaOyoqKi7Pbbb89KS0tzX1988UUNfhdsjKq6dr/P3cupKVVdu++9917WoEGD7OSTT87mz5+f3XvvvdmWW26ZXXzxxTX4XbAxquraHT58eNagQYPslltuyd5+++3s4Ycfzrbbbrvs0EMPrcHvgo3NF198kc2ePTubPXt2FhHZmDFjstmzZ2fvvvtulmVZdvbZZ2dHHnlk7vi33347q1+/fnbGGWdk8+bNy6699tqsbt262YMPPph8VtG9Fv793/8922abbbKioqJs9913z/7+97/n9v3yl7/MBg0aVOH4W2+9NevQoUNWVFSU7bzzztl9991XA1ND1dZumzZtsohY42v48OE1ND0bs6r+3P0u0U1Nquraffrpp7M99tgjKy4uztq1a5ddcskl2apVq2pgcjZ2VVm7K1euzEaMGJFtt912WUlJSda6devsxBNPzJYsWVJD07MxmjFjRqV/d/12rQ4aNCj75S9/ucZzunbtmhUVFWXt2rXLJk2atE5mLchcAwIAAABJ+Ew3AAAAJCK6AQAAIBHRDQAAAImIbgAAAEhEdAMAAEAiohsAAAASEd0AAACQiOgGAACAREQ3ACRSUFAQd911V02PEVGDswwePDgOOuign/UaCxYsiIKCgpgzZ84PHvPYY49FQUFBfPbZZxERMXny5GjcuHFu/4gRI6Jr164/aw4AyIfoBmCj98wzz0TdunXjgAMOqNbXLS0tjf33379aXzOVwYMHR0FBQRQUFERRUVFsv/32ceGFF8aqVatqerS10rNnzygtLY1GjRpVun/YsGExffr03OPq+McAAFgbohuAjd7EiRPjlFNOiccffzw+/PDDanvdFi1aRHFxcbW9Xmr77bdflJaWxhtvvBGnn356jBgxIq688spKj12xYsU6n+/HFBUVRYsWLaKgoKDS/Ztttlk0bdp0nc8FAKIbgI3asmXLYurUqXHCCSfEAQccEJMnT66wf8mSJXH44YfHFltsEfXq1Yv27dvHpEmTIr4Jz5NPPjlatmwZJSUl0aZNm7jssstyz/3+Jd1PP/10dO3aNUpKSqJ79+5x1113Vbhs+ttLpKdPnx7du3eP+vXrR8+ePWP+/PkVZrr77rujW7duUVJSEu3atYuRI0dWOCP9xhtvxD777BMlJSWx0047xSOPPLJWfxbFxcXRokWLaNOmTZxwwgnRp0+fuOeeeyK+c2b4kksuiVatWkXHjh0jIuKll16KXr16Rb169aJp06Zx3HHHxbJly9Z47ZEjR8YWW2wRDRs2jOOPP75CtD/44IOx1157RePGjaNp06bxT//0T/HWW2+t8RqvvfZa9OzZM0pKSqJTp07xt7/9Lbfv+5eXf993Ly8fMWJE3HDDDXH33Xfnzu4/9thj0atXrzj55JMrPG/x4sVRVFRU4Sw5AFSF6AZgo3brrbfGDjvsEB07dowjjjgirr/++siyLLf//PPPj1dffTUeeOCBmDdvXowbNy6aNWsWERFXX3113HPPPXHrrbfG/Pnz469//Wu0bdu20vdZunRp9O/fPzp37hyzZs2Kiy66KM4666xKjz333HNj9OjR8cILL0RhYWH8/ve/z+174okn4qijjopTTz01Xn311fjLX/4SkydPjksuuSQiIsrLy+OQQw6JoqKiePbZZ2P8+PE/+D4/pV69ehXiePr06TF//vx45JFH4t57742ysrLo27dvNGnSJJ5//vm47bbbYtq0aWuE6/Tp02PevHnx2GOPxS233BJ33HFHjBw5Mre/rKwshg4dGi+88EJMnz496tSpEwcffHCUl5dXeJ0zzjgjTj/99Jg9e3b06NEj+vfvH5988kmVv69hw4bFoYcemjuzX1paGj179oxjjjkmbr755li+fHnu2Jtuuim22mqr6NWrV5XfBwAiIiIDgI1Yz549s7Fjx2ZZlmUrV67MmjVrls2YMSO3v3///tnRRx9d6XNPOeWUrFevXll5eXml+yMiu/POO7Msy7Jx48ZlTZs2zb766qvc/gkTJmQRkc2ePTvLsiybMWNGFhHZtGnTcsfcd999WUTknte7d+/s0ksvrfA+//Vf/5W1bNkyy7Ise+ihh7LCwsLsgw8+yO1/4IEHKsxSmUGDBmUHHnhglmVZVl5enj3yyCNZcXFxNmzYsNz+5s2bZ8uXL88957rrrsuaNGmSLVu2rMK8derUyRYuXJh73uabb56VlZXljhk3bly22WabZatXr650lsWLF2cRkb300ktZlmXZO++8k0VENmrUqNwxK1euzLbeeuvs8ssvr/Bnt2TJkizLsmzSpElZo0aNcscPHz4869KlS6Xf77e++uqrrEmTJtnUqVNz23bZZZdsxIgRP/jnBgA/xZluADZa8+fPj+eeey4GDhwYERGFhYUxYMCAmDhxYu6YE044IaZMmRJdu3aNM888M55++uncvsGDB8ecOXOiY8eO8cc//jEefvjhH32vXXbZJUpKSnLbdt9990qP3WWXXXL/3bJly4iI+OijjyIiYu7cuXHhhRfGZpttlvs69thjo7S0NL788suYN29etG7dOlq1apV7jR49eqzVn8e9994bm222WZSUlMT+++8fAwYMiBEjRuT2d+7cOYqKinKP582bF126dIlNN900t23PPfeM8vLyCpfEd+nSJerXr19hnmXLlsX7778f8c3l8AMHDox27dpFw4YNc1cLvPfeexXm++73UVhYGN27d4958+at1fe2NkpKSuLII4+M66+/PiIiZs2aFS+//HIMHjy42t4DgI1PYU0PAAA1ZeLEibFq1aoKgZplWRQXF8c111wTjRo1iv333z/efffduP/+++ORRx6J3r17x0knnRRXXXVVdOvWLd5555144IEHYtq0aXHooYdGnz594vbbb/9Zc22yySa5//72xmDfXmq9bNmyGDlyZBxyyCFrPO+7QZ+PfffdN8aNGxdFRUXRqlWrKCys+NeE78Z1derfv3+0adMmJkyYEK1atYry8vLo1KlTjdys7ZhjjomuXbvGP/7xj5g0aVL06tUr2rRps87nAGDD4Uw3ABulVatWxY033hijR4+OOXPm5L7mzp0brVq1iltuuSV37BZbbBGDBg2Km266KcaOHRvXXXddbl/Dhg1jwIABMWHChJg6dWr893//d3z66adrvF/Hjh3jpZdeqvB54eeff77Kc3fr1i3mz58f22+//RpfderUiR133DHef//9KC0tzT3n73//+1q99qabbhrbb799bLPNNmsEd2V23HHHmDt3bpSVleW2PfXUU1GnTp3cjdbim7PzX331VYV5Nttss2jdunV88sknMX/+/DjvvPOid+/eseOOO8aSJUsqfb/vfh+rVq2KmTNnxo477rhW39v3FRUVxerVq9fY3rlz5+jevXtMmDAhbr755gqfpweAfIhuADZK9957byxZsiSGDBkSnTp1qvD129/+NneJ+QUXXBB33313vPnmm/HKK6/Evffemwu9MWPGxC233BKvvfZavP7663HbbbdFixYtonHjxmu832GHHRbl5eVx3HHHxbx58+Khhx6Kq666KuI7Z7PXxgUXXBA33nhjjBw5Ml555ZWYN29eTJkyJc4777yIiOjTp0906NAhBg0aFHPnzo0nnngizj333Gr6U6vo8MMPj5KSkhg0aFC8/PLLMWPGjDjllFPiyCOPjObNm+eOW7FiRQwZMiReffXVuP/++2P48OFx8sknR506daJJkybRtGnTuO666+LNN9+MRx99NIYOHVrp+1177bVx5513xmuvvRYnnXRSLFmyJO8obtu2bbz44osxf/78+Pjjj2PlypW5fcccc0yMGjUqsiyLgw8+OK/XB4BviW4ANkoTJ06MPn36RKNGjdbY99vf/jZeeOGFePHFF6OoqCjOOeec2GWXXWKfffaJunXrxpQpUyIiokGDBnHFFVdE9+7dY7fddosFCxbE/fffH3XqrPl/rw0bNoz/+Z//iTlz5kTXrl3j3HPPjQsuuCCiipeF9+3bN+699954+OGHY7fddotf/OIX8W//9m+5S6Dr1KkTd955Z3z11Vex++67xzHHHJO7s3l1q1+/fjz00EPx6aefxm677Rb//M//HL17945rrrmmwnG9e/eO9u3bxz777BMDBgyI3/zmN7nPitepUyemTJkSM2fOjE6dOsVpp532g78bfNSoUTFq1Kjo0qVLPPnkk3HPPffk7iRfVccee2x07NgxunfvHltssUU89dRTuX0DBw6MwsLCGDhw4M++ZB8ACrLv/l4UAGCd+etf/xpHH310fP7551GvXr2aHodvLFiwILbbbrt4/vnno1u3bjU9DgC1nBupAcA6cuONN0a7du1iq622irlz58ZZZ50Vhx56qOBeT6xcuTI++eSTOO+88+IXv/iF4AagWohuAFhHFi5cGBdccEEsXLgwWrZsGf/yL/+S7NJvqu6pp56KfffdNzp06PCz70APAN9yeTkAAAAk4kZqAAAAkIjoBgAAgERENwAAACQiugEAACAR0Q0AAACJiG4AAABIRHQDAABAIqIbAAAAEvlfFJVZ4+i331wAAAAASUVORK5CYII=", "text/plain": [ - "bot_question_id Int64\n", - "title object\n", - "resolution float64\n", - "scheduled_close_time datetime64[ns]\n", - "actual_close_time datetime64[ns]\n", - "type object\n", - "options object\n", - "range_min float64\n", - "range_max float64\n", - "pro_question_id Int64\n", - "question_weight float64\n", - "bot_team_median object\n", - "pro_median object\n", - "head_to_head float64\n", - "weighted_score float64\n", - "dtype: object" + "
" ] }, - "execution_count": 75, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "# Cast df_top_bot_pro_forecasts['resolution'] as string - idk why this is necessary but it is\n", - "df_top_bot_pro_forecasts['resolution'] = df_top_bot_pro_forecasts['resolution'].astype(pd.StringDtype())\n", - "df_top_bot_pro_forecasts['resolution'] = df_top_bot_pro_forecasts['resolution'].map({'yes': 1, 'no': 0})\n", - "df_top_bot_pro_forecasts.dtypes" + "# Call the function with your DataFrame and column names\n", + "create_discrimination_histogram(df_top_bot_pro_forecasts,\n", + " 'bot_team_median',\n", + " 'pro_median',\n", + " 'resolution')" ] }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 82, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4dkNBotk_4e3", + "outputId": "d393a72e-997a-4025-ca7b-6f5328436286" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bot average forecast difference (1 - 0): 0.4355\n", + "Pro average forecast difference (1 - 0): 0.5238\n", + "Difference between pro and bot differences: 0.0882\n" + ] + } + ], + "source": [ + "# Calculate average forecasts for resolved 1 and 0 for bots\n", + "bot_avg_1 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 1]['bot_team_median'].mean()\n", + "bot_avg_0 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 0]['bot_team_median'].mean()\n", + "\n", + "# Calculate average forecasts for resolved 1 and 0 for pros\n", + "pro_avg_1 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 1]['pro_median'].mean()\n", + "pro_avg_0 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 0]['pro_median'].mean()\n", + "\n", + "# Calculate the differences\n", + "bot_difference = bot_avg_1 - bot_avg_0\n", + "pro_difference = pro_avg_1 - pro_avg_0\n", + "\n", + "print(f\"Bot average forecast difference (1 - 0): {bot_difference:.4f}\")\n", + "print(f\"Pro average forecast difference (1 - 0): {pro_difference:.4f}\")\n", + "\n", + "# Calculate the difference between pro and bot differences\n", + "pro_bot_difference = pro_difference - bot_difference\n", + "print(f\"Difference between pro and bot differences: {pro_bot_difference:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bGnXswWOx_yw", + "outputId": "35a0e2a8-5831-43cf-a006-f8e0262666ec" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Weighted number of 1 resolutions: 14.5\n", + "Weighted number of 0 resolutions: 31.35\n", + "Average 1 resolutions: 0.31624863685932386\n" + ] + } + ], + "source": [ + "# Calculate weighted number of 1 resolutions\n", + "weighted_ones = np.sum(\n", + " df_top_bot_pro_forecasts['resolution'] *\n", + " df_top_bot_pro_forecasts['question_weight']\n", + ")\n", + "\n", + "# Calculate weighted number of 0 resolutions\n", + "weighted_zeros = np.sum(\n", + " (1 - df_top_bot_pro_forecasts['resolution']) *\n", + " df_top_bot_pro_forecasts['question_weight']\n", + ")\n", + "\n", + "print(f\"Weighted number of 1 resolutions: {weighted_ones}\")\n", + "print(f\"Weighted number of 0 resolutions: {weighted_zeros}\")\n", + "\n", + "print(f\"Average 1 resolutions: {weighted_ones / (weighted_zeros + weighted_ones)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 84, "metadata": {}, "outputs": [ { @@ -11683,6 +13976,8 @@ " options\n", " range_min\n", " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", " pro_question_id\n", " question_weight\n", " bot_team_median\n", @@ -11700,15 +13995,17 @@ " 2025-01-20 03:27:00\n", " 2025-01-20 03:27:00\n", " multiple_choice\n", - " [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"]\n", + " [0, 1, 2-3, 4-6, >6]\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31268\n", " 1.0\n", - " 0.02\n", + " [0.012462871287128714, 0.0001, 0.0001, 0.0001,...\n", " [0.001,0.62,0.35,0.019,0.01]\n", - " 299.573227\n", - " 299.573227\n", + " 2.522754\n", + " 2.522754\n", " \n", " \n", " 1\n", @@ -11721,12 +14018,14 @@ " NaN\n", " 60.0\n", " 100.0\n", + " True\n", + " True\n", " 31269\n", " 1.0\n", - " [0.03366666666666667, 0.0341314028, 0.03460208...\n", + " [0.05, 0.0505982539, 0.0511965078, 0.051794761...\n", " [0.0013749738,0.0014499743,0.001526641,0.00160...\n", - " -57.286904\n", - " -57.286904\n", + " -0.158842\n", + " -0.158842\n", " \n", " \n", " 2\n", @@ -11739,12 +14038,14 @@ " NaN\n", " NaN\n", " NaN\n", + " False\n", + " False\n", " 31270\n", " 1.0\n", - " 0.1\n", + " 0.063\n", " 0.013\n", - " -9.227528\n", - " -9.227528\n", + " -0.051987\n", + " -0.051987\n", " \n", " \n", " 3\n", @@ -11754,15 +14055,17 @@ " 2025-01-21 11:42:00\n", " 2025-01-21 11:42:00\n", " multiple_choice\n", - " [\"0-4\",\"5-9\",\">9\"]\n", + " [0-4, 5-9, >9]\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " 31280\n", " 1.0\n", - " 0.6\n", + " [0.0001, 0.5125, 0.0001]\n", " [0.16,0.44,0.4]\n", - " 31.015493\n", - " 31.015493\n", + " 0.152526\n", + " 0.152526\n", " \n", " \n", " 4\n", @@ -11775,12 +14078,14 @@ " NaN\n", " 0.0\n", " 400.0\n", + " False\n", + " False\n", " 31281\n", " 1.0\n", - " [0.0, 0.0017047194333333333, 0.0034148989, 0.0...\n", + " [0.0, 0.0018181818, 0.0036363636, 0.0054545455...\n", " [0.0,0.0005044914,0.0010323506,0.0015847475,0....\n", - " 56.082092\n", - " 56.082092\n", + " 0.132210\n", + " 0.132210\n", " \n", " \n", "\n", @@ -11801,127 +14106,45 @@ "3 NaN 2025-01-21 11:42:00 2025-01-21 11:42:00 multiple_choice \n", "4 NaN 2025-01-21 11:42:00 2025-01-21 11:42:00 numeric \n", "\n", - " options range_min range_max pro_question_id \\\n", - "0 [\"0\",\"1\",\"2-3\",\"4-6\",\">6\"] NaN NaN 31268 \n", - "1 NaN 60.0 100.0 31269 \n", - "2 NaN NaN NaN 31270 \n", - "3 [\"0-4\",\"5-9\",\">9\"] NaN NaN 31280 \n", - "4 NaN 0.0 400.0 31281 \n", - "\n", - " question_weight bot_team_median \\\n", - "0 1.0 0.02 \n", - "1 1.0 [0.03366666666666667, 0.0341314028, 0.03460208... \n", - "2 1.0 0.1 \n", - "3 1.0 0.6 \n", - "4 1.0 [0.0, 0.0017047194333333333, 0.0034148989, 0.0... \n", + " options range_min range_max open_upper_bound \\\n", + "0 [0, 1, 2-3, 4-6, >6] NaN NaN False \n", + "1 NaN 60.0 100.0 True \n", + "2 NaN NaN NaN False \n", + "3 [0-4, 5-9, >9] NaN NaN NaN \n", + "4 NaN 0.0 400.0 False \n", + "\n", + " open_lower_bound pro_question_id question_weight \\\n", + "0 False 31268 1.0 \n", + "1 True 31269 1.0 \n", + "2 False 31270 1.0 \n", + "3 NaN 31280 1.0 \n", + "4 False 31281 1.0 \n", + "\n", + " bot_team_median \\\n", + "0 [0.012462871287128714, 0.0001, 0.0001, 0.0001,... \n", + "1 [0.05, 0.0505982539, 0.0511965078, 0.051794761... \n", + "2 0.063 \n", + "3 [0.0001, 0.5125, 0.0001] \n", + "4 [0.0, 0.0018181818, 0.0036363636, 0.0054545455... \n", "\n", " pro_median head_to_head \\\n", - "0 [0.001,0.62,0.35,0.019,0.01] 299.573227 \n", - "1 [0.0013749738,0.0014499743,0.001526641,0.00160... -57.286904 \n", - "2 0.013 -9.227528 \n", - "3 [0.16,0.44,0.4] 31.015493 \n", - "4 [0.0,0.0005044914,0.0010323506,0.0015847475,0.... 56.082092 \n", + "0 [0.001,0.62,0.35,0.019,0.01] 2.522754 \n", + "1 [0.0013749738,0.0014499743,0.001526641,0.00160... -0.158842 \n", + "2 0.013 -0.051987 \n", + "3 [0.16,0.44,0.4] 0.152526 \n", + "4 [0.0,0.0005044914,0.0010323506,0.0015847475,0.... 0.132210 \n", "\n", " weighted_score \n", - "0 299.573227 \n", - "1 -57.286904 \n", - "2 -9.227528 \n", - "3 31.015493 \n", - "4 56.082092 " - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_top_bot_pro_forecasts.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [], - "source": [ - "# Make binary-only df_top_bot_pro_forecasts for calibration curves etc\n", - "df_top_bot_pro_forecasts_binary = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['type'] == 'binary'].copy()\n", - "\n", - "df_top_bot_pro_forecasts_all_binary = df_top_bot_pro_forecasts_all[df_top_bot_pro_forecasts_all['type'] == 'binary'].copy()" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 807 - }, - "id": "BjNQ4IND6Ct7", - "outputId": "c0ec1316-ef4e-4bd1-875d-148b65ba0114" - }, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" + "0 2.522754 \n", + "1 -0.158842 \n", + "2 -0.051987 \n", + "3 0.152526 \n", + "4 0.132210 " ] }, "metadata": {}, "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of pro forecasts: 50\n" - ] - } - ], - "source": [ - "# Set up the plot\n", - "plt.figure(figsize=(10, 8))\n", - "plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfectly calibrated')\n", - "\n", - "# Plot calibration curves for bot_team_median and pro_median\n", - "plot_calibration_curve(df_top_bot_pro_forecasts_binary, 'bot_team_median', 'Bot Team Median', 'blue')\n", - "plot_calibration_curve(df_top_bot_pro_forecasts_binary, 'pro_median', 'Pro Median', 'red')\n", - "\n", - "# Customize the plot\n", - "plt.xlabel('Assigned Probability', fontsize=12)\n", - "plt.ylabel('Fraction that Resolved \\'Yes\\'', fontsize=12)\n", - "plt.title(f'Calibration Curve: Bot Team Median vs Pro Median\\n(only overlap: {len(df_top_bot_pro_forecasts_binary)} questions)', fontsize=14)\n", - "plt.legend(fontsize=10)\n", - "plt.grid(True, alpha=0.3)\n", - "\n", - "# Set axis limits\n", - "plt.xlim(0, 1)\n", - "plt.ylim(0, 1)\n", - "\n", - "# Show the plot\n", - "plt.tight_layout()\n", - "plt.show()\n", - "print(f\"Number of pro forecasts: {len(df_top_bot_pro_forecasts_binary)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Map resolution to 0 and 1\n", - "df_top_bot_pro_forecasts_all_binary['resolution'] = df_top_bot_pro_forecasts_all_binary['resolution'].map({'yes': 1, 'no': 0})" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ { "data": { "text/html": [ @@ -11952,207 +14175,163 @@ " options\n", " range_min\n", " range_max\n", + " open_upper_bound\n", + " open_lower_bound\n", " pro_question_id\n", " question_weight\n", " bot_team_median\n", " pro_median\n", + " head_to_head\n", + " weighted_score\n", " \n", " \n", - " \n", - " \n", - " 2\n", - " 31264\n", - " Will the bubble in the Magnificent Seven pop b...\n", - " 0.0\n", - " 2025-01-20 03:27:00\n", - " 2025-01-20 03:27:00\n", + " \n", + " \n", + " 342\n", + " 35345\n", + " Will the US Citizenship and Immigration Servic...\n", + " 1.0\n", + " 2025-03-12 22:00:00\n", + " 2025-03-12 22:00:00\n", " binary\n", " NaN\n", " NaN\n", " NaN\n", - " 31270\n", - " 1.0\n", - " 0.1\n", - " 0.013\n", + " False\n", + " False\n", + " 35380\n", + " 1.00\n", + " 0.9\n", + " 0.95\n", + " -0.054067\n", + " -0.054067\n", " \n", " \n", - " 5\n", - " 31276\n", - " Will the USDA-posted recall by Pork Dynasty In...\n", - " 1.0\n", - " 2025-01-21 11:42:00\n", - " 2025-01-21 11:42:00\n", + " 351\n", + " 35354\n", + " Will the United States impose any new tariffs ...\n", + " 0.0\n", + " 2025-03-13 03:00:00\n", + " 2025-03-13 03:00:00\n", " binary\n", " NaN\n", " NaN\n", " NaN\n", - " 31282\n", - " 1.0\n", - " 0.6\n", - " 0.45\n", + " False\n", + " False\n", + " 35381\n", + " 1.00\n", + " 0.4\n", + " 0.05\n", + " -0.459532\n", + " -0.459532\n", " \n", " \n", - " 8\n", - " 31288\n", - " Will Eric Adams be Mayor of New York City on t...\n", + " 355\n", + " 35358\n", + " Will ChatGPT rank in the top 10 global website...\n", " 1.0\n", - " 2025-01-22 20:19:00\n", - " 2025-01-22 20:19:00\n", + " 2025-03-13 03:00:00\n", + " 2025-03-13 03:00:00\n", " binary\n", " NaN\n", " NaN\n", " NaN\n", - " 31294\n", - " 1.0\n", - " 0.9\n", - " 0.95\n", + " False\n", + " False\n", + " 35385\n", + " 1.00\n", + " 0.8\n", + " 0.97\n", + " -0.192684\n", + " -0.192684\n", " \n", " \n", - " 10\n", - " 31318\n", - " Will the S&P 500 index go up in January 2025?\n", - " 1.0\n", - " 2025-01-23 23:23:00\n", - " 2025-01-23 23:23:00\n", + " 361\n", + " 35364\n", + " Will Doge's Agency Efficiency Leaderboard have...\n", + " 0.0\n", + " 2025-03-14 23:00:00\n", + " 2025-03-14 23:00:00\n", " binary\n", " NaN\n", " NaN\n", " NaN\n", - " <NA>\n", - " 1.0\n", - " NaN\n", - " NaN\n", + " False\n", + " False\n", + " 35386\n", + " 0.85\n", + " 0.8\n", + " 0.666\n", + " -0.435900\n", + " -0.370515\n", " \n", " \n", - " 13\n", - " 31334\n", - " At the end of March 2025, will Wikipedia still...\n", - " 1.0\n", - " 2025-01-24 14:23:00\n", - " 2025-01-24 14:23:00\n", + " 364\n", + " 35367\n", + " Will the Project 2025 Tracker spreadsheet mark...\n", + " 0.0\n", + " 2025-03-14 23:00:00\n", + " 2025-03-14 23:00:00\n", " binary\n", " NaN\n", " NaN\n", " NaN\n", - " 31338\n", - " 1.0\n", - " 0.75\n", - " 0.9\n", + " False\n", + " False\n", + " 35387\n", + " 0.85\n", + " 0.05\n", + " 0.03\n", + " -0.017709\n", + " -0.015053\n", " \n", " \n", "\n", "" ], "text/plain": [ - " bot_question_id title \\\n", - "2 31264 Will the bubble in the Magnificent Seven pop b... \n", - "5 31276 Will the USDA-posted recall by Pork Dynasty In... \n", - "8 31288 Will Eric Adams be Mayor of New York City on t... \n", - "10 31318 Will the S&P 500 index go up in January 2025? \n", - "13 31334 At the end of March 2025, will Wikipedia still... \n", - "\n", - " resolution scheduled_close_time actual_close_time type options \\\n", - "2 0.0 2025-01-20 03:27:00 2025-01-20 03:27:00 binary NaN \n", - "5 1.0 2025-01-21 11:42:00 2025-01-21 11:42:00 binary NaN \n", - "8 1.0 2025-01-22 20:19:00 2025-01-22 20:19:00 binary NaN \n", - "10 1.0 2025-01-23 23:23:00 2025-01-23 23:23:00 binary NaN \n", - "13 1.0 2025-01-24 14:23:00 2025-01-24 14:23:00 binary NaN \n", - "\n", - " range_min range_max pro_question_id question_weight bot_team_median \\\n", - "2 NaN NaN 31270 1.0 0.1 \n", - "5 NaN NaN 31282 1.0 0.6 \n", - "8 NaN NaN 31294 1.0 0.9 \n", - "10 NaN NaN 1.0 NaN \n", - "13 NaN NaN 31338 1.0 0.75 \n", - "\n", - " pro_median \n", - "2 0.013 \n", - "5 0.45 \n", - "8 0.95 \n", - "10 NaN \n", - "13 0.9 " - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_top_bot_pro_forecasts_all_binary.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" + " bot_question_id title \\\n", + "342 35345 Will the US Citizenship and Immigration Servic... \n", + "351 35354 Will the United States impose any new tariffs ... \n", + "355 35358 Will ChatGPT rank in the top 10 global website... \n", + "361 35364 Will Doge's Agency Efficiency Leaderboard have... \n", + "364 35367 Will the Project 2025 Tracker spreadsheet mark... \n", + "\n", + " resolution scheduled_close_time actual_close_time type options \\\n", + "342 1.0 2025-03-12 22:00:00 2025-03-12 22:00:00 binary NaN \n", + "351 0.0 2025-03-13 03:00:00 2025-03-13 03:00:00 binary NaN \n", + "355 1.0 2025-03-13 03:00:00 2025-03-13 03:00:00 binary NaN \n", + "361 0.0 2025-03-14 23:00:00 2025-03-14 23:00:00 binary NaN \n", + "364 0.0 2025-03-14 23:00:00 2025-03-14 23:00:00 binary NaN \n", + "\n", + " range_min range_max open_upper_bound open_lower_bound pro_question_id \\\n", + "342 NaN NaN False False 35380 \n", + "351 NaN NaN False False 35381 \n", + "355 NaN NaN False False 35385 \n", + "361 NaN NaN False False 35386 \n", + "364 NaN NaN False False 35387 \n", + "\n", + " question_weight bot_team_median pro_median head_to_head weighted_score \n", + "342 1.00 0.9 0.95 -0.054067 -0.054067 \n", + "351 1.00 0.4 0.05 -0.459532 -0.459532 \n", + "355 1.00 0.8 0.97 -0.192684 -0.192684 \n", + "361 0.85 0.8 0.666 -0.435900 -0.370515 \n", + "364 0.85 0.05 0.03 -0.017709 -0.015053 " ] }, "metadata": {}, "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of pro forecasts: 50\n", - "Number of bot forecasts: 241\n" - ] - } - ], - "source": [ - "# Set up the plot\n", - "plt.figure(figsize=(10, 8))\n", - "plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfectly calibrated')\n", - "\n", - "# Plot calibration curves for bot_team_median and pro_median\n", - "plot_calibration_curve(df_top_bot_pro_forecasts_all_binary, 'bot_team_median', 'Bot Team Median', 'blue')\n", - "plot_calibration_curve(df_top_bot_pro_forecasts_binary, 'pro_median', 'Pro Median', 'red')\n", - "\n", - "# Customize the plot\n", - "plt.xlabel('Assigned Probability', fontsize=12)\n", - "plt.ylabel('Fraction that Resolved \\'Yes\\'', fontsize=12)\n", - "plt.title(f'Calibration Curve: Bot Team Median vs Pro Median\\n(all questions)', fontsize=14)\n", - "plt.legend(fontsize=10)\n", - "plt.grid(True, alpha=0.3)\n", - "\n", - "# Set axis limits\n", - "plt.xlim(0, 1)\n", - "plt.ylim(0, 1)\n", - "\n", - "# Show the plot\n", - "plt.tight_layout()\n", - "plt.show()\n", - "print(f\"Number of pro forecasts: {len(df_top_bot_pro_forecasts_binary)}\")\n", - "print(f\"Number of bot forecasts: {len(df_top_bot_pro_forecasts_all_binary)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "lPPgorXB7omi", - "outputId": "24571b16-50b7-4e51-cd3d-420c15c7fe42" - }, - "outputs": [ { "ename": "ValueError", - "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()", + "evalue": "operands could not be broadcast together with shapes (201,) (5,) ", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[80], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Calculate confidence scores for bot_team_median and pro_median\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m bot_confidence \u001b[38;5;241m=\u001b[39m \u001b[43mcalculate_confidence\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_top_bot_pro_forecasts\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbot_team_median\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf_top_bot_pro_forecasts\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mresolution\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m pro_confidence \u001b[38;5;241m=\u001b[39m calculate_confidence(df_top_bot_pro_forecasts[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpro_median\u001b[39m\u001b[38;5;124m'\u001b[39m], df_top_bot_pro_forecasts[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresolution\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBot team confidence score: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbot_confidence\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.4f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/metaculus/aib-analysis/functions.py:824\u001b[0m, in \u001b[0;36mcalculate_confidence\u001b[0;34m(predictions, outcomes)\u001b[0m\n\u001b[1;32m 813\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 814\u001b[0m \u001b[38;5;124;03mCalculates over- or under-confidence for a set of predictions.\u001b[39;00m\n\u001b[1;32m 815\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;124;03m float: Confidence score (positive for overconfidence, negative for underconfidence).\u001b[39;00m\n\u001b[1;32m 822\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 823\u001b[0m \u001b[38;5;66;03m# Bin predictions into 10 equally spaced bins\u001b[39;00m\n\u001b[0;32m--> 824\u001b[0m bins \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcut\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpredictions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbins\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 826\u001b[0m \u001b[38;5;66;03m# Calculate mean prediction and actual outcome for each bin\u001b[39;00m\n\u001b[1;32m 827\u001b[0m grouped \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprediction\u001b[39m\u001b[38;5;124m'\u001b[39m: predictions, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124moutcome\u001b[39m\u001b[38;5;124m'\u001b[39m: outcomes})\u001b[38;5;241m.\u001b[39mgroupby(bins)\n", + "Cell \u001b[0;32mIn[84], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Calculate confidence scores for bot_team_median and pro_median\u001b[39;00m\n\u001b[1;32m 2\u001b[0m display_head_and_tail(df_top_bot_pro_forecasts)\n\u001b[0;32m----> 3\u001b[0m bot_confidence \u001b[38;5;241m=\u001b[39m \u001b[43mcalculate_confidence\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_top_bot_pro_forecasts\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbot_team_median\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf_top_bot_pro_forecasts\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mresolution\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m pro_confidence \u001b[38;5;241m=\u001b[39m calculate_confidence(df_top_bot_pro_forecasts[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpro_median\u001b[39m\u001b[38;5;124m'\u001b[39m], df_top_bot_pro_forecasts[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresolution\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBot team confidence score: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbot_confidence\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.4f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/metaculus/aib-analysis/functions.py:782\u001b[0m, in \u001b[0;36mcalculate_confidence\u001b[0;34m(predictions, outcomes)\u001b[0m\n\u001b[1;32m 771\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 772\u001b[0m \u001b[38;5;124;03mCalculates over- or under-confidence for a set of predictions.\u001b[39;00m\n\u001b[1;32m 773\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 779\u001b[0m \u001b[38;5;124;03m float: Confidence score (positive for overconfidence, negative for underconfidence).\u001b[39;00m\n\u001b[1;32m 780\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 781\u001b[0m \u001b[38;5;66;03m# Bin predictions into 10 equally spaced bins\u001b[39;00m\n\u001b[0;32m--> 782\u001b[0m bins \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcut\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpredictions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbins\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 784\u001b[0m \u001b[38;5;66;03m# Calculate mean prediction and actual outcome for each bin\u001b[39;00m\n\u001b[1;32m 785\u001b[0m grouped \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprediction\u001b[39m\u001b[38;5;124m\"\u001b[39m: predictions, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutcome\u001b[39m\u001b[38;5;124m\"\u001b[39m: outcomes})\u001b[38;5;241m.\u001b[39mgroupby(\n\u001b[1;32m 786\u001b[0m bins\n\u001b[1;32m 787\u001b[0m )\n", "File \u001b[0;32m~/.local/lib/python3.12/site-packages/pandas/core/reshape/tile.py:246\u001b[0m, in \u001b[0;36mcut\u001b[0;34m(x, bins, right, labels, retbins, precision, include_lowest, duplicates, ordered)\u001b[0m\n\u001b[1;32m 243\u001b[0m x_idx, _ \u001b[38;5;241m=\u001b[39m _coerce_to_type(x_idx)\n\u001b[1;32m 245\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m np\u001b[38;5;241m.\u001b[39miterable(bins):\n\u001b[0;32m--> 246\u001b[0m bins \u001b[38;5;241m=\u001b[39m \u001b[43m_nbins_to_bins\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbins\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(bins, IntervalIndex):\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bins\u001b[38;5;241m.\u001b[39mis_overlapping:\n", "File \u001b[0;32m~/.local/lib/python3.12/site-packages/pandas/core/reshape/tile.py:363\u001b[0m, in \u001b[0;36m_nbins_to_bins\u001b[0;34m(x_idx, nbins, right)\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m x_idx\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 361\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot cut empty array\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 363\u001b[0m rng \u001b[38;5;241m=\u001b[39m (\u001b[43mx_idx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m, x_idx\u001b[38;5;241m.\u001b[39mmax())\n\u001b[1;32m 364\u001b[0m mn, mx \u001b[38;5;241m=\u001b[39m rng\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_numeric_dtype(x_idx\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m (np\u001b[38;5;241m.\u001b[39misinf(mn) \u001b[38;5;129;01mor\u001b[39;00m np\u001b[38;5;241m.\u001b[39misinf(mx)):\n\u001b[1;32m 367\u001b[0m \u001b[38;5;66;03m# GH#24314\u001b[39;00m\n", "File \u001b[0;32m~/.local/lib/python3.12/site-packages/pandas/core/indexes/base.py:7467\u001b[0m, in \u001b[0;36mIndex.min\u001b[0;34m(self, axis, skipna, *args, **kwargs)\u001b[0m\n\u001b[1;32m 7464\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_multi \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values, np\u001b[38;5;241m.\u001b[39mndarray):\n\u001b[1;32m 7465\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values\u001b[38;5;241m.\u001b[39m_reduce(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmin\u001b[39m\u001b[38;5;124m\"\u001b[39m, skipna\u001b[38;5;241m=\u001b[39mskipna)\n\u001b[0;32m-> 7467\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mnanops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnanmin\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_values\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskipna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipna\u001b[49m\u001b[43m)\u001b[49m\n", @@ -12160,12 +14339,13 @@ "File \u001b[0;32m~/.local/lib/python3.12/site-packages/pandas/core/nanops.py:404\u001b[0m, in \u001b[0;36m_datetimelike_compat..new_func\u001b[0;34m(values, axis, skipna, mask, **kwargs)\u001b[0m\n\u001b[1;32m 401\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m datetimelike \u001b[38;5;129;01mand\u001b[39;00m mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 402\u001b[0m mask \u001b[38;5;241m=\u001b[39m isna(values)\n\u001b[0;32m--> 404\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskipna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipna\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 406\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m datetimelike:\n\u001b[1;32m 407\u001b[0m result \u001b[38;5;241m=\u001b[39m _wrap_results(result, orig_values\u001b[38;5;241m.\u001b[39mdtype, fill_value\u001b[38;5;241m=\u001b[39miNaT)\n", "File \u001b[0;32m~/.local/lib/python3.12/site-packages/pandas/core/nanops.py:1098\u001b[0m, in \u001b[0;36m_nanminmax..reduction\u001b[0;34m(values, axis, skipna, mask)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _na_for_min_count(values, axis)\n\u001b[1;32m 1095\u001b[0m values, mask \u001b[38;5;241m=\u001b[39m _get_values(\n\u001b[1;32m 1096\u001b[0m values, skipna, fill_value_typ\u001b[38;5;241m=\u001b[39mfill_value_typ, mask\u001b[38;5;241m=\u001b[39mmask\n\u001b[1;32m 1097\u001b[0m )\n\u001b[0;32m-> 1098\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmeth\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1099\u001b[0m result \u001b[38;5;241m=\u001b[39m _maybe_null_out(result, axis, mask, values\u001b[38;5;241m.\u001b[39mshape)\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", "File \u001b[0;32m~/.local/lib/python3.12/site-packages/numpy/_core/_methods.py:49\u001b[0m, in \u001b[0;36m_amin\u001b[0;34m(a, axis, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_amin\u001b[39m(a, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, out\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, keepdims\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 48\u001b[0m initial\u001b[38;5;241m=\u001b[39m_NoValue, where\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m---> 49\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mumr_minimum\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" + "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (201,) (5,) " ] } ], "source": [ "# Calculate confidence scores for bot_team_median and pro_median\n", + "display_head_and_tail(df_top_bot_pro_forecasts)\n", "bot_confidence = calculate_confidence(df_top_bot_pro_forecasts['bot_team_median'], df_top_bot_pro_forecasts['resolution'])\n", "pro_confidence = calculate_confidence(df_top_bot_pro_forecasts['pro_median'], df_top_bot_pro_forecasts['resolution'])\n", "\n", @@ -12176,88 +14356,6 @@ "print(f\"Pro team is {interpret_confidence(pro_confidence)}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "N26JZjCV9_jc", - "outputId": "eacb7626-54d0-47c7-8f21-48e95e709564" - }, - "outputs": [], - "source": [ - "# Call the function with your DataFrame and column names\n", - "create_discrimination_histogram(df_top_bot_pro_forecasts,\n", - " 'bot_team_median',\n", - " 'pro_median',\n", - " 'resolution')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4dkNBotk_4e3", - "outputId": "d393a72e-997a-4025-ca7b-6f5328436286" - }, - "outputs": [], - "source": [ - "# Calculate average forecasts for resolved 1 and 0 for bots\n", - "bot_avg_1 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 1]['bot_team_median'].mean()\n", - "bot_avg_0 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 0]['bot_team_median'].mean()\n", - "\n", - "# Calculate average forecasts for resolved 1 and 0 for pros\n", - "pro_avg_1 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 1]['pro_median'].mean()\n", - "pro_avg_0 = df_top_bot_pro_forecasts[df_top_bot_pro_forecasts['resolution'] == 0]['pro_median'].mean()\n", - "\n", - "# Calculate the differences\n", - "bot_difference = bot_avg_1 - bot_avg_0\n", - "pro_difference = pro_avg_1 - pro_avg_0\n", - "\n", - "print(f\"Bot average forecast difference (1 - 0): {bot_difference:.4f}\")\n", - "print(f\"Pro average forecast difference (1 - 0): {pro_difference:.4f}\")\n", - "\n", - "# Calculate the difference between pro and bot differences\n", - "pro_bot_difference = pro_difference - bot_difference\n", - "print(f\"Difference between pro and bot differences: {pro_bot_difference:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bGnXswWOx_yw", - "outputId": "35a0e2a8-5831-43cf-a006-f8e0262666ec" - }, - "outputs": [], - "source": [ - "# Calculate weighted number of 1 resolutions\n", - "weighted_ones = np.sum(\n", - " df_top_bot_pro_forecasts['resolution'] *\n", - " df_top_bot_pro_forecasts['question_weight']\n", - ")\n", - "\n", - "# Calculate weighted number of 0 resolutions\n", - "weighted_zeros = np.sum(\n", - " (1 - df_top_bot_pro_forecasts['resolution']) *\n", - " df_top_bot_pro_forecasts['question_weight']\n", - ")\n", - "\n", - "print(f\"Weighted number of 1 resolutions: {weighted_ones}\")\n", - "print(f\"Weighted number of 0 resolutions: {weighted_zeros}\")\n", - "\n", - "print(f\"Average 1 resolutions: {weighted_ones / (weighted_zeros + weighted_ones)}\")" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -12276,8 +14374,8 @@ "cp = pd.read_csv('https://data.heroku.com/dataclips/xwbtczmsuszvlbrhdifhsilplfxf.csv')\n", "cp.rename(columns={'post_id': 'cp_post_id', 'question_id': 'cp_question_id'}, inplace=True)\n", "\n", - "bot_cp_id = pd.read_csv('bot_to_main_feed_ids.csv')\n", - " \n", + "bot_cp_id = pd.read_csv('misc_data/bot_to_main_feed_ids.csv')\n", + "\n", "# Merge these on cp_question_id\n", "df_bot_cp = pd.merge(bot_cp_id, cp, on='cp_post_id', how='right') # ahh?\n", "\n", @@ -12400,10 +14498,10 @@ "for bot_question_id in groups_exploded['bot_question_id'].unique():\n", " # Get all rows for this bot_question_id\n", " question_group = groups_exploded[groups_exploded['bot_question_id'] == bot_question_id]\n", - " \n", + "\n", " # Get the question title\n", " question_title = question_group['question_title'].iloc[0]\n", - " \n", + "\n", " # Function to check if option matches question title\n", " def option_matches(row):\n", " option = row['options']\n", @@ -12415,16 +14513,16 @@ " or_format = f\"{start} or {end}\"\n", " return or_format in question_title\n", " return False\n", - " \n", + "\n", " # Find rows where the question title contains the option (with format handling)\n", " matching_rows = question_group[question_group.apply(option_matches, axis=1)]\n", - " \n", + "\n", " filtered_rows = []\n", "\n", " # If we found a matching row, add the first one to our filtered rows, EXCEPT... Biden\n", " if not matching_rows.empty and 'Biden' not in question_title:\n", " filtered_rows.append(matching_rows.iloc[0])\n", - " \n", + "\n", " # If Biden in question_title, we mustn't just take the first row - we must sum the rows that meet the threshold\n", " if 'Biden' in question_title:\n", " # Get first row for each unique option to avoid duplicates\n", @@ -12433,7 +14531,7 @@ " # Drop option='1' - we don't ask about 1 or more\n", " first_rows = first_rows[first_rows['options'] != '1']\n", " biden_interp = first_rows.copy()\n", - " \n", + "\n", " # Now for each row in biden_interp\n", " for idx, row in biden_interp.iterrows():\n", " threshold = int(row['threshold'])\n", @@ -12444,10 +14542,10 @@ " forecast_value = first_rows[first_rows['options'].isin(['3', '4 or more'])]['forecast_values'].sum()\n", " elif threshold == 4:\n", " forecast_value = first_rows[first_rows['options'] == '4 or more']['forecast_values'].sum()\n", - " \n", + "\n", " # Update this row's forecast value\n", " biden_interp.at[idx, 'forecast_value'] = forecast_value\n", - " \n", + "\n", " filtered_rows.append(biden_interp.iloc[0])\n", "\n", "# Combine all filtered rows into a DataFrame\n", @@ -12502,7 +14600,7 @@ "thresholds = {\n", " 29163: ('less', 2.0), # COVID hospitalizations\n", " 29349: ('greater', 100), # Brasilia rain\n", - " 29350: ('greater', 150), # Brasilia rain \n", + " 29350: ('greater', 150), # Brasilia rain\n", " 29351: ('greater', 200), # Brasilia rain\n", " 29353: ('greater', 20), # Arms sales\n", " 29354: ('greater', 25), # Arms sales\n", @@ -12591,7 +14689,7 @@ "# 29567: China youth unemployment > 17.0 and less than 18.0\n", "row = numerics[numerics['bot_question_id'] == 29567].iloc[0]\n", "numerics.loc[numerics['bot_question_id'] == row['bot_question_id'], 'forecast_values'] = cdf_between(row, row['cdf'], 17.0, 18.0)\n", - " \n", + "\n", "# 29568: China youth unemployment > 18.0 and less than 19.0\n", "row = numerics[numerics['bot_question_id'] == 29568].iloc[0]\n", "numerics.loc[numerics['bot_question_id'] == row['bot_question_id'], 'forecast_values'] = cdf_between(row, row['cdf'], 18.0, 19.0)\n", @@ -12701,7 +14799,7 @@ "if True:\n", " # Filter rows where the months do not match\n", " df_bot_cp_exploded = df_bot_cp_exploded[\n", - " (df_bot_cp_exploded['bot_version_month'] == df_bot_cp_exploded['cp_version_month']) | \n", + " (df_bot_cp_exploded['bot_version_month'] == df_bot_cp_exploded['cp_version_month']) |\n", " (df_bot_cp_exploded['bot_version_month'].isnull())\n", "]\n", "\n", @@ -13010,9 +15108,9 @@ "outputs": [], "source": [ "# Write both leaderboards to csv\n", - "weighted_leaderboard.to_csv('weighted_baseline_bot_cp.csv', index=False)\n", + "weighted_leaderboard.to_csv('notebook_outputs/weighted_baseline_bot_cp.csv', index=False)\n", "\n", - "df_W_leaderboard.to_csv('weighted_t_test_h2h_bot_vs_cp.csv', index=True)" + "df_W_leaderboard.to_csv('notebook_outputs/weighted_t_test_h2h_bot_vs_cp.csv', index=True)" ] }, { @@ -13035,9 +15133,9 @@ "\n", "# Recommend paying attention to the bot team h2h scores vs CP graph (further down) rather than pgodzinai (he was selected as the bot \"team\" vs the PROS)\n", "\n", - "df_top_bot_pro_cp_forecasts['head_to_head_bot_vs_cp'] = df_top_bot_pro_cp_forecasts.apply(calculate_head_to_head, args=('bot_team_median', 'forecast_values'), axis=1)\n", - "df_top_bot_pro_cp_forecasts['head_to_head_cp_vs_pro'] = df_top_bot_pro_cp_forecasts.apply(calculate_head_to_head, args=('forecast_values', 'pro_median'), axis=1)\n", - "df_top_bot_pro_cp_forecasts['head_to_head_bot_vs_pro'] = df_top_bot_pro_cp_forecasts.apply(calculate_head_to_head, args=('bot_team_median', 'pro_median'), axis=1)\n", + "df_top_bot_pro_cp_forecasts['head_to_head_bot_vs_cp'] = df_top_bot_pro_cp_forecasts.apply(calculate_weighted_h2h_score_between_two_forecast_columns, args=('bot_team_median', 'forecast_values'), axis=1)\n", + "df_top_bot_pro_cp_forecasts['head_to_head_cp_vs_pro'] = df_top_bot_pro_cp_forecasts.apply(calculate_weighted_h2h_score_between_two_forecast_columns, args=('forecast_values', 'pro_median'), axis=1)\n", + "df_top_bot_pro_cp_forecasts['head_to_head_bot_vs_pro'] = df_top_bot_pro_cp_forecasts.apply(calculate_weighted_h2h_score_between_two_forecast_columns, args=('bot_team_median', 'pro_median'), axis=1)\n", "\n", "plot_head_to_head_distribution(df_top_bot_pro_cp_forecasts, 'head_to_head_bot_vs_cp', ('pgodzinai', 'CP'))\n", "plot_head_to_head_distribution(df_top_bot_pro_cp_forecasts, 'head_to_head_cp_vs_pro', ('CP', 'Pro median'))\n", @@ -13189,7 +15287,7 @@ "df_top_bot_cp_forecasts = df_top_bot_cp_forecasts.dropna(subset=['forecast_values'])\n", "\n", "# Add the head_to_head column\n", - "df_top_bot_cp_forecasts['head_to_head'] = df_top_bot_cp_forecasts.apply(calculate_head_to_head, args=('bot_team_median', 'forecast_values'), axis=1)\n", + "df_top_bot_cp_forecasts['head_to_head'] = df_top_bot_cp_forecasts.apply(calculate_weighted_h2h_score_between_two_forecast_columns, args=('bot_team_median', 'forecast_values'), axis=1)\n", "\n", "display_head_and_tail(df_top_bot_cp_forecasts)" ] @@ -13206,7 +15304,7 @@ "df_top_bot_pro_cp_forecasts = df_top_bot_pro_cp_forecasts.rename(columns={'forecast_values': 'community_prediction'})\n", "\n", "# Write df_top_bot_pro_cp_forecasts to csv, but only the columns bot question id, cp post id, cp question id, title, resolution, cp_reveal_time, forecast_values, bot_team_median, pro_median\n", - "df_top_bot_pro_cp_forecasts[['bot_question_id', 'cp_post_id', 'cp_question_id', 'title', 'resolution', 'cp_reveal_time', 'community_prediction', 'bot_team_median', 'pgodzinai', 'pro_median']].to_csv('df_top_bot_pro_cp_forecasts.csv', index=False)" + "df_top_bot_pro_cp_forecasts[['bot_question_id', 'cp_post_id', 'cp_question_id', 'title', 'resolution', 'cp_reveal_time', 'community_prediction', 'bot_team_median', 'pgodzinai', 'pro_median']].to_csv('notebook_outputs/df_top_bot_pro_cp_forecasts.csv', index=False)" ] }, { @@ -13248,7 +15346,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.12.10" } }, "nbformat": 4, diff --git a/df_top_bot_pro_cp_forecasts.csv b/archived/df_top_bot_pro_cp_forecasts.csv similarity index 100% rename from df_top_bot_pro_cp_forecasts.csv rename to archived/df_top_bot_pro_cp_forecasts.csv diff --git a/pgodzinai_comments.csv b/archived/pgodzinai_comments.csv similarity index 100% rename from pgodzinai_comments.csv rename to archived/pgodzinai_comments.csv diff --git a/pgodzinai_comments.ipynb b/archived/pgodzinai_comments.ipynb similarity index 100% rename from pgodzinai_comments.ipynb rename to archived/pgodzinai_comments.ipynb diff --git a/scores/Q4 2024 FAB - questions list - FINAL BOT.csv b/archived/scores/Q4 2024 FAB - questions list - FINAL BOT.csv similarity index 100% rename from scores/Q4 2024 FAB - questions list - FINAL BOT.csv rename to archived/scores/Q4 2024 FAB - questions list - FINAL BOT.csv diff --git a/scores/bots_score_data_q3.csv b/archived/scores/bots_score_data_q3.csv similarity index 100% rename from scores/bots_score_data_q3.csv rename to archived/scores/bots_score_data_q3.csv diff --git a/scores/bots_score_data_q4.csv b/archived/scores/bots_score_data_q4.csv similarity index 100% rename from scores/bots_score_data_q4.csv rename to archived/scores/bots_score_data_q4.csv diff --git a/scores/luke_baseline_cp_scores.csv b/archived/scores/luke_baseline_cp_scores.csv similarity index 100% rename from scores/luke_baseline_cp_scores.csv rename to archived/scores/luke_baseline_cp_scores.csv diff --git a/scores/pros_score_data_q3.csv b/archived/scores/pros_score_data_q3.csv similarity index 100% rename from scores/pros_score_data_q3.csv rename to archived/scores/pros_score_data_q3.csv diff --git a/scores/pros_score_data_q4.csv b/archived/scores/pros_score_data_q4.csv similarity index 100% rename from scores/pros_score_data_q4.csv rename to archived/scores/pros_score_data_q4.csv diff --git a/weighted_baseline_bot_cp.csv b/archived/weighted_baseline_bot_cp.csv similarity index 100% rename from weighted_baseline_bot_cp.csv rename to archived/weighted_baseline_bot_cp.csv diff --git a/weighted_t_test_h2h_bot_vs_cp.csv b/archived/weighted_t_test_h2h_bot_vs_cp.csv similarity index 100% rename from weighted_t_test_h2h_bot_vs_cp.csv rename to archived/weighted_t_test_h2h_bot_vs_cp.csv diff --git a/bootstrapped_h2h_bot_vs_pros.csv b/bootstrapped_h2h_bot_vs_pros.csv deleted file mode 100644 index c536929..0000000 --- a/bootstrapped_h2h_bot_vs_pros.csv +++ /dev/null @@ -1,47 +0,0 @@ -,2.5% CI,10% CI,Median,90% CI,97.5% CI -Grizeu_Bot,-9.7,-5.4,4.4,15.9,22.2 -RPM_bot,-0.1,0.3,1.4,2.8,3.7 -X_bot,-0.4,-0.3,0.2,0.7,1.2 -andrewsiah,0.0,0.0,0.0,0.0,0.0 -cobyj-bot,0.0,0.0,0.0,0.0,0.0 -acm_bot,-16.3,-11.3,-0.2,14.8,22.5 -jonahsingerbot,-1.4,-1.1,-0.6,-0.3,-0.1 -bean_bot,-1.6,-1.3,-0.7,-0.3,-0.1 -CumulativeBot,-2.9,-2.3,-1.0,0.2,1.0 -swingswish,-2.4,-1.9,-1.1,-0.5,-0.3 -jkraybill_bot,-8.5,-6.2,-1.1,4.6,7.5 -KevinTestBot,-5.8,-3.9,-1.4,0.4,1.1 -SynapseSeer,-6.3,-4.6,-1.5,1.9,3.9 -pianobot,-8.0,-5.9,-2.6,-0.2,0.1 -twsummerbot,-13.4,-10.3,-2.9,4.6,9.2 -CatrachoCaster,-8.6,-6.8,-3.4,-0.3,1.1 -annabot,-8.4,-6.5,-3.4,-0.6,0.9 -cookics_bot_TEST,-12.1,-9.7,-4.2,0.1,2.1 -GreeneiBot2,-17.4,-13.2,-4.9,3.6,7.4 -krm-bot,-10.6,-8.6,-5.3,-2.6,-1.6 -4Shadower,-12.8,-9.8,-5.3,-1.8,-1.1 -metac-o1,-22.7,-18.5,-6.7,8.5,16.1 -MWG,-18.3,-14.9,-8.3,-2.2,1.3 -ajf-bot,-22.3,-17.2,-8.8,-1.4,2.5 -bot_median,-22.7,-18.3,-9.0,2.1,8.9 -Bot_Pepa,-20.9,-16.3,-9.0,-1.2,2.7 -manticAI,-22.1,-17.7,-9.5,-0.7,4.9 -ProfessorSP,-20.7,-16.8,-10.1,-4.7,-2.4 -wunderplumb,-22.4,-19.1,-12.0,-5.8,-3.3 -metac-perplexity,-29.1,-24.0,-12.0,0.8,8.0 -laylaps,-21.0,-17.8,-12.8,-8.1,-5.8 -NextWorldLab,-28.4,-24.0,-13.6,-2.8,4.0 -pgodzinai,-31.7,-25.6,-14.0,-4.1,1.9 -metac-Gemini-Exp-1206,-28.1,-23.3,-14.0,-2.7,3.2 -metac-deepseek-r1,-30.7,-25.2,-14.6,-4.9,0.5 -minefrac1,-29.8,-24.8,-14.9,-3.1,4.1 -metac-Llama-3.1,-32.9,-26.8,-15.1,-3.3,3.2 -metac-claude-3-5-sonnet-latest,-32.6,-26.6,-15.9,-3.5,3.2 -metac-claude-3-5-sonnet-20240620,-35.3,-29.9,-18.2,-4.3,2.8 -metac-o1-preview,-38.9,-32.4,-19.3,-6.9,0.3 -mmBot,-36.2,-30.9,-21.1,-11.7,-7.1 -VeritasAI,-33.5,-28.9,-21.3,-14.4,-11.1 -metac-grok-2-1212,-41.8,-35.2,-23.4,-10.4,-3.8 -metac-exa,-40.4,-34.4,-23.4,-13.8,-7.9 -metac-gpt-4o,-41.7,-34.7,-23.8,-11.3,-5.3 -InstitutPelFutur,-43.6,-37.9,-26.5,-14.9,-6.6 diff --git a/functions.py b/functions.py index 29a05b2..08b3fd0 100644 --- a/functions.py +++ b/functions.py @@ -1,25 +1,31 @@ -import pandas as pd -import numpy as np +import ast +import math +import random +import re +from datetime import datetime + import matplotlib.pyplot as plt -from scipy.stats import norm +import numpy as np +import pandas as pd from scipy import stats from scipy.optimize import minimize_scalar -from scipy.stats import binom -import re -from datetime import datetime -import random -import math -import ast +from scipy.stats import binom, norm + +from refactored_notebook.scoring import ( + calculate_baseline_score, + calculate_peer_score, + nominal_location_to_cdf_location, +) + def extract_forecast(df): # Extract the forecast from whichever column it's in - df['forecast'] = df['probability_yes'].combine_first( - df['probability_yes_per_category'].combine_first( - df['continuous_cdf'] - ) + df["forecast"] = df["probability_yes"].combine_first( + df["probability_yes_per_category"].combine_first(df["continuous_cdf"]) ) return df + def process_forecasts(df): """ Process a dataframe of forecasts by: @@ -27,39 +33,40 @@ def process_forecasts(df): 2. Sorting by created_at to get chronological order 3. Taking the last forecast for each (forecaster, question_id) pair 4. Dropping unused columns - + Parameters: ----------- df : pandas DataFrame DataFrame containing forecast data - + Returns: -------- pandas DataFrame Processed DataFrame with last forecasts """ # Extract the forecast value - df['forecast'] = df['probability_yes'].combine_first( - df['probability_yes_per_category'].combine_first( - df['continuous_cdf'] - ) + df["forecast"] = df["probability_yes"].combine_first( + df["probability_yes_per_category"].combine_first(df["continuous_cdf"]) ) - + # Sort by created_at to ensure chronological order - df = df.sort_values(by='created_at') - + df = df.sort_values(by="created_at") + # Take the last forecast for each (forecaster, question_id) pair - df = df.groupby(['question_id', 'forecaster']).last().reset_index() - + df = df.groupby(["question_id", "forecaster"]).last().reset_index() + # Drop the original forecast columns as they're now redundant - df = df.drop(['probability_yes', 'probability_yes_per_category', 'continuous_cdf'], axis=1) - + df = df.drop( + ["probability_yes", "probability_yes_per_category", "continuous_cdf"], axis=1 + ) + return df + def add_is_median(df): """ Marks exactly one row per question_id as the median. - Guarantees one median per question by taking the forecaster with + Guarantees one median per question by taking the forecaster with the actual median value for that question. Args: @@ -69,22 +76,23 @@ def add_is_median(df): pandas.DataFrame: DataFrame with an additional 'is_median' column. """ # Initialize median column - df['is_median'] = False - + df["is_median"] = False + # For each question_id - for qid in df['question_id'].unique(): + for qid in df["question_id"].unique(): # Get just the rows for this question - question_mask = df['question_id'] == qid + question_mask = df["question_id"] == qid question_df = df[question_mask] - + # Get the median value index (middle position after sorting) - median_idx = question_df['forecast'].sort_values().index[len(question_df)//2] - + median_idx = question_df["forecast"].sort_values().index[len(question_df) // 2] + # Mark that row - df.loc[median_idx, 'is_median'] = True - + df.loc[median_idx, "is_median"] = True + return df + def add_median_rows(df, prefix): """ For each row where is_median=True, creates a duplicate row with forecaster='median'. @@ -97,16 +105,21 @@ def add_median_rows(df, prefix): pandas.DataFrame: Original DataFrame plus duplicate rows for medians. """ # Get the median rows - median_rows = df[df['is_median']].copy() - + median_rows = df[df["is_median"]].copy() + # Change forecaster to 'median' - median_rows['forecaster'] = f'{prefix}_median' - + median_rows["forecaster"] = f"{prefix}_median" + # Combine original and new median rows - whole = pd.concat([df, median_rows], ignore_index=True).sort_values('question_id').drop_duplicates(['question_id', 'forecaster']) + whole = ( + pd.concat([df, median_rows], ignore_index=True) + .sort_values("question_id") + .drop_duplicates(["question_id", "forecaster"]) + ) return whole + def calculate_weighted_stats(df): """ Calculates weighted statistics (mean, sum, standard error, confidence intervals) for each forecaster. @@ -120,12 +133,12 @@ def calculate_weighted_stats(df): results = [] # For each forecaster - for forecaster in df['forecaster'].unique(): - forecaster_data = df[df['forecaster'] == forecaster] + for forecaster in df["forecaster"].unique(): + forecaster_data = df[df["forecaster"] == forecaster] # Get scores and weights - scores = forecaster_data['score'] - weights = forecaster_data['question_weight'] + scores = forecaster_data["score"] + weights = forecaster_data["question_weight"] # Calculate weighted mean weighted_mean = np.average(scores, weights=weights) @@ -133,26 +146,28 @@ def calculate_weighted_stats(df): # Calculate weighted standard error # Using weighted variance formula - weighted_var = np.average((scores - weighted_mean)**2, weights=weights) + weighted_var = np.average((scores - weighted_mean) ** 2, weights=weights) n = len(scores) weighted_se = np.sqrt(weighted_var / n) # Calculate t-statistic for 95% confidence interval - t_value = stats.t.ppf(0.975, n-1) + t_value = stats.t.ppf(0.975, n - 1) ci_lower = weighted_mean - (t_value * weighted_se) - results.append({ - 'forecaster': forecaster, - 'weighted_mean': weighted_mean, - 'weighted_sum': weighted_sum, - 'n_questions': n, - 'ci_lower': ci_lower, - 'weighted_se': weighted_se - }) + results.append( + { + "forecaster": forecaster, + "weighted_mean": weighted_mean, + "weighted_sum": weighted_sum, + "n_questions": n, + "ci_lower": ci_lower, + "weighted_se": weighted_se, + } + ) # Convert to dataframe and sort by lower bound results_df = pd.DataFrame(results) - return results_df.sort_values('weighted_sum', ascending=False) + return results_df.sort_values("weighted_sum", ascending=False) def make_wide(df_bot_peer, df_pro_bot_resolved_questions): @@ -166,40 +181,46 @@ def make_wide(df_bot_peer, df_pro_bot_resolved_questions): Returns: pandas.DataFrame: Wide-format DataFrame with question weights merged. """ - df_pivoted = df_bot_peer.pivot(index='bot_question_id', columns='forecaster', values='score') + df_pivoted = df_bot_peer.pivot( + index="bot_question_id", columns="forecaster", values="score" + ) df_pivoted = df_pivoted.reset_index() df_pivoted = df_pivoted.reindex(sorted(df_pivoted.columns), axis=1) # Step 4: Move 'question_id' to be the first column cols = df_pivoted.columns.tolist() - cols = ['bot_question_id'] + [col for col in cols if col != 'bot_question_id'] + cols = ["bot_question_id"] + [col for col in cols if col != "bot_question_id"] df_pivoted = df_pivoted[cols] all_columns = df_pivoted.columns.tolist() - ## Remove 'question_id' and 'bot_median' from the list if they exist - all_columns = [col for col in all_columns if col not in ['bot_question_id']] - new_column_order = ['bot_question_id'] + all_columns + # Remove 'question_id' and 'bot_median' from the list if they exist + all_columns = [col for col in all_columns if col not in ["bot_question_id"]] + new_column_order = ["bot_question_id"] + all_columns df_pivoted = df_pivoted[new_column_order] df_bot_peer_wide = df_pivoted - df_bot_peer_wide['bot_question_id'] = pd.to_numeric(df_bot_peer_wide['bot_question_id'], errors='coerce') + df_bot_peer_wide["bot_question_id"] = pd.to_numeric( + df_bot_peer_wide["bot_question_id"], errors="coerce" + ) # Join with df_pro_bot_resolved_questions to get question weights df_bot_peer_wide = pd.merge( df_bot_peer_wide, - df_pro_bot_resolved_questions[['bot_question_id', 'question_weight']], - on='bot_question_id', - how='left' + df_pro_bot_resolved_questions[["bot_question_id", "question_weight"]], + on="bot_question_id", + how="left", ) return df_bot_peer_wide + """ Options from https://stats.stackexchange.com/questions/47325/bias-correction-in-weighted-variance I didn't think (B) beared trying, but could be wrong. - MGH -It makes very little difference here but (C) does seem to be the correct formula - corrects for +It makes very little difference here but (C) does seem to be the correct formula - corrects for the bias in the sample variance. """ + def calc_weighted_std_dev(df3, bot, weighted_score, weighted_count, weight_col): """ Calculates the weighted standard deviation using Molly's method - (A) from stack exchange post. @@ -215,8 +236,12 @@ def calc_weighted_std_dev(df3, bot, weighted_score, weighted_count, weight_col): float: Weighted standard deviation. """ weighted_average = weighted_score / weighted_count - return np.sqrt(((df3[bot] - weighted_average) ** 2 * df3[weight_col]).sum() / (weighted_count - 1)) - + return np.sqrt( + ((df3[bot] - weighted_average) ** 2 * df3[weight_col]).sum() + / (weighted_count - 1) + ) + + def calc_weighted_std_dev2(df3, bot, weighted_score, weighted_count, weight_col): """ Calculates the weighted standard deviation using Claude (via Nikos) method - (C) from stack exchange post. @@ -233,11 +258,15 @@ def calc_weighted_std_dev2(df3, bot, weighted_score, weighted_count, weight_col) """ weighted_average = weighted_score / weighted_count return np.sqrt( - (df3[weight_col] * (df3[bot] - weighted_average) ** 2).sum() / - (df3[weight_col].sum() * (1 - (df3[weight_col] ** 2).sum() / (df3[weight_col].sum() ** 2))) + (df3[weight_col] * (df3[bot] - weighted_average) ** 2).sum() + / ( + df3[weight_col].sum() + * (1 - (df3[weight_col] ** 2).sum() / (df3[weight_col].sum() ** 2)) + ) ) -def weighted_bootstrap_analysis(df_bot_peer_wide, bots, NUM, ITER): + +def weighted_bootstrap_analysis(df_bot_peer_wide: pd.DataFrame, bots: list[str], NUM: int, ITER: int): """ Performs weighted bootstrap analysis to calculate confidence intervals and medians. @@ -250,10 +279,11 @@ def weighted_bootstrap_analysis(df_bot_peer_wide, bots, NUM, ITER): Returns: pandas.DataFrame: DataFrame with confidence intervals and medians for each bot. """ + # Function to perform a single bootstrap iteration - def single_bootstrap(df): + def single_bootstrap(df: pd.DataFrame): # Weighted sampling of questions - sampled_df = df.sample(n=NUM, weights='question_weight', replace=True) + sampled_df = df.sample(n=NUM, weights="question_weight", replace=True) # Calculate total weighted score for each bot return sampled_df[bots].sum() @@ -271,32 +301,35 @@ def single_bootstrap(df): median = results_df.median() # Create output DataFrame - output_df = pd.DataFrame({ - '2.5% CI': ci_low, - '10% CI': ci_10, - 'Median': median, - '90% CI': ci_90, - '97.5% CI': ci_high - }) + output_df = pd.DataFrame( + { + "2.5% CI": ci_low, + "10% CI": ci_10, + "Median": median, + "90% CI": ci_90, + "97.5% CI": ci_high, + } + ) # Sort by median descending - output_df = output_df.sort_values('Median', ascending=False) + output_df = output_df.sort_values("Median", ascending=False) return output_df + def get_median_forecast_multiple_choice(row, forecasts): """ Given a row (with 'options' and 'resolution') and a list of forecasts (each a list of floats), returns the median probability assigned to the resolution option. """ - options = row['options'] - resolution = row['resolution'] + options = row["options"] + resolution = row["resolution"] try: resolution_idx = options.index(resolution) - #print(f"Resolution '{resolution}' found at index {resolution_idx} in options {options}") + # print(f"Resolution '{resolution}' found at index {resolution_idx} in options {options}") except ValueError: - #print(f"Resolution '{resolution}' not found in options {options} — returning np.nan") + # print(f"Resolution '{resolution}' not found in options {options} — returning np.nan") return np.nan # Resolution not found in options probs = [] @@ -309,26 +342,32 @@ def get_median_forecast_multiple_choice(row, forecasts): continue if not probs: - #print(f"NO PROBS collected for multiple-choice question {row.get('bot_question_id')} — returning np.nan") + # print(f"NO PROBS collected for multiple-choice question {row.get('bot_question_id')} — returning np.nan") return np.nan - return np.nanmedian(probs) + median_forecast = [] # NOTE: This forecast will not add to 1, but we only need the median for the resolution + for i, _ in enumerate(options): + if i == resolution_idx: + median_forecast.append(np.nanmedian(probs)) + else: + median_forecast.append(0.0001) # this is filler @Check: This won't screw anything up right? Perviously we were just returning the probability of resolution + + return median_forecast + def get_median_forecast(row, bots): """ - @BEN: Check - Calculates the median forecast for a given set of bots, handling different question types properly. - + Args: df (pandas.DataFrame): DataFrame with bot forecast columns and question metadata. bots (list): List of bot column names. - + Returns: pandas.Series: Median forecast for each row. """ - q_type = row['type'] - + q_type = row["type"] + forecasts = [] for bot in bots: f_raw = row.get(bot) @@ -341,49 +380,49 @@ def get_median_forecast(row, bots): continue else: forecasts.append(f_raw) # Already parsed float or list - + if not forecasts: return np.nan - if q_type == 'numeric': - forecasts = [f for f in forecasts if isinstance(f, list)] + if q_type == "numeric": + numeric_forecasts: list[list[float]] = [f for f in forecasts if isinstance(f, list)] - if not forecasts: + if not numeric_forecasts: return np.nan - cdfs_array = np.array(forecasts, dtype=float) - mean_cdf = np.mean(cdfs_array, axis=0) + cdfs_array = np.array(numeric_forecasts, dtype=float) + median_cdf = np.median(cdfs_array, axis=0) - return mean_cdf + return median_cdf - elif q_type == 'binary': - probs = [] + elif q_type == "binary": + probs: list[float] = [] for f in forecasts: try: val = float(f) probs.append(val) - except (ValueError, TypeError): - print(f' Invalid forecast: {f} — error {e}') + except (ValueError, TypeError) as e: + print(f" Invalid forecast: {f} — error {e}") continue if not probs: - print(f" >>> NO PROBS collected for binary question {row.get('bot_question_id')} — returning np.nan") + print( + f" >>> NO PROBS collected for binary question {row.get('bot_question_id')} — returning np.nan" + ) return np.nan print(f" >>> Collected {len(probs)} forecasts: {probs}") return np.nanmedian(probs) - elif q_type == 'multiple_choice': + elif q_type == "multiple_choice": return get_median_forecast_multiple_choice(row, forecasts) else: raise ValueError(f"Unknown question type: {q_type}") -def calculate_weighted_scores(df_bot_team_forecasts, teams): +def calculate_weighted_scores(df_bot_team_forecasts: pd.DataFrame, teams: list[str]) -> pd.Series: """ - @BEN: check - Calculates weighted scores for each team based on their forecasts and question weights. Args: @@ -396,89 +435,58 @@ def calculate_weighted_scores(df_bot_team_forecasts, teams): team_scores = {team: 0.0 for team in teams} for _, row in df_bot_team_forecasts.iterrows(): - q_type = row['type'] - resolution = row['resolution'] - options = row.get('options') - range_min = row.get('range_min') - range_max = row.get('range_max') - question_weight = row['question_weight'] - for team in teams: - forecast = row[team] - - if forecast is None or (isinstance(forecast, float) and np.isnan(forecast)): + # @Check: that the row conversion is corret + cleaned_row = _prepare_new_row_for_scoring(row, [team]) + if _is_unscorable(cleaned_row, [team]): continue - baseline_score = None + forecast = cleaned_row[team] + resolution = cleaned_row["resolution"] + options = cleaned_row["options"] + range_min = cleaned_row["range_min"] + range_max = cleaned_row["range_max"] + question_weight = cleaned_row["question_weight"] + open_upper_bound = cleaned_row["open_upper_bound"] + open_lower_bound = cleaned_row["open_lower_bound"] + question_type = cleaned_row["type"] try: - if q_type == 'binary': - forecast_val = float(forecast) - baseline_prob = 0.5 - if resolution == 'yes': - p_team = forecast_val - elif resolution == 'no': - p_team = 1 - forecast_val - else: - continue # Skip if invalid resolution - - elif q_type == 'multiple_choice': - pmf = [float(p) for p in forecast] - options = [str(opt) for opt in options] - resolution_idx = options.index(str(resolution)) - p_team = pmf[resolution_idx] - baseline_prob = 1 / len(pmf) - - elif q_type == 'numeric': - cdf = [float(p) for p in forecast] - pmf = [cdf[0]] + [cdf[i] - cdf[i-1] for i in range(1, len(cdf))] - pmf.append(1 - cdf[-1]) - - resolution = float(resolution) - if range_min is None or range_max is None: - continue - bin_edges = np.linspace(range_min, range_max, 200) - resolution_idx = np.searchsorted(bin_edges, resolution, side='right') - - if resolution_idx >= len(pmf): - continue # Skip if out of bounds - - p_team = pmf[resolution_idx] - baseline_prob = 1 / len(pmf) # bins = 201 because of extra appended bin - - else: - continue # Unknown question type - - if p_team <= 0 or baseline_prob <= 0: - continue # Avoid log(0) issues - - baseline_score = np.log2(p_team / baseline_prob) - - if q_type == 'numeric': - baseline_score /= 2 # Numeric scores are halved - - weighted_score = baseline_score * question_weight + weighted_score = calculate_baseline_score( + forecast=forecast, + resolution=resolution, + q_type=question_type, + options=options, + range_min=range_min, + range_max=range_max, + question_weight=question_weight, + open_upper_bound=open_upper_bound, + open_lower_bound=open_lower_bound, + ) team_scores[team] += weighted_score - except (ValueError, TypeError, IndexError): + except (ValueError, TypeError, IndexError) as e: + print(f" >>> Error calculating baseline score for question {row.get('bot_question_id')} — skipping: {e}") + # @Check: Does skipping introduce any problems? continue # Be robust to bad/missing data return pd.Series(team_scores) -def calculate_t_test(df_input, bot_list, weight_col='question_weight'): + +def calculate_t_test(df_input, bot_list, weight_col="question_weight"): """ Calculates weighted statistics, including t-test and p-values, for multiple bots. Args: - df_input (pandas.DataFrame): + df_input (pandas.DataFrame): DataFrame with peer scores, such as `df_bot_vs_pro_peer`, comparing each bot to the pro median. - bot_list (list): + bot_list (list): List of column names corresponding to bot scores. - weight_col (str, optional): + weight_col (str, optional): Name of the column containing weights. Defaults to 'question_weight'. Returns: - pandas.DataFrame: + pandas.DataFrame: Leaderboard DataFrame with calculated statistics for each bot, including: - W_score: Weighted score. - W_count: Weighted count. @@ -494,35 +502,39 @@ def calculate_t_test(df_input, bot_list, weight_col='question_weight'): """ # Initialize results dataframe df_W_leaderboard = pd.DataFrame(index=bot_list) - + for bot in bot_list: # Create working copy with just needed columns df3 = df_input[[bot, weight_col]].copy() df3 = df3.dropna() df3 = df3.reset_index(drop=True) - + # Calculate weighted statistics weighted_score = (df3[bot] * df3[weight_col]).sum() weighted_count = df3[weight_col].sum() - + if weighted_count > 2: # Only calculate if we have enough data weighted_average = weighted_score / weighted_count - weighted_std_dev = calc_weighted_std_dev2(df3, bot, weighted_score, weighted_count, weight_col) + weighted_std_dev = calc_weighted_std_dev2( + df3, bot, weighted_score, weighted_count, weight_col + ) std_error = weighted_std_dev / np.sqrt(weighted_count) t_statistic = (weighted_average - 0) / std_error - + # Get t-critical value and confidence bounds effective_n = (df3[weight_col].sum() ** 2) / (df3[weight_col] ** 2).sum() t_crit = stats.t.ppf(0.975, df=effective_n - 1) # 95% confidence level upper_bound = weighted_average + t_crit * std_error lower_bound = weighted_average - t_crit * std_error - + # Calculate CDF and p-value - cdf = stats.t.cdf(t_statistic, df=weighted_count-1) + cdf = stats.t.cdf(t_statistic, df=weighted_count - 1) p_value = 2 * min(cdf, 1 - cdf) # Two-tailed p-value - + else: # Not enough data - weighted_average = weighted_score / weighted_count if weighted_count > 0 else np.nan + weighted_average = ( + weighted_score / weighted_count if weighted_count > 0 else np.nan + ) weighted_std_dev = np.nan std_error = np.nan t_statistic = np.nan @@ -531,145 +543,50 @@ def calculate_t_test(df_input, bot_list, weight_col='question_weight'): lower_bound = np.nan cdf = np.nan p_value = np.nan - + # Store results - df_W_leaderboard.loc[bot, 'W_score'] = weighted_score - df_W_leaderboard.loc[bot, 'W_count'] = weighted_count - df_W_leaderboard.loc[bot, 'W_ave'] = weighted_average - df_W_leaderboard.loc[bot, 'W_stdev'] = weighted_std_dev - df_W_leaderboard.loc[bot, 'std_err'] = std_error - df_W_leaderboard.loc[bot, 't_stat'] = t_statistic - df_W_leaderboard.loc[bot, 't_crit'] = t_crit - df_W_leaderboard.loc[bot, 'upper_bound'] = upper_bound - df_W_leaderboard.loc[bot, 'lower_bound'] = lower_bound - df_W_leaderboard.loc[bot, 'cdf'] = cdf - df_W_leaderboard.loc[bot, 'p_value'] = p_value - + df_W_leaderboard.loc[bot, "W_score"] = weighted_score + df_W_leaderboard.loc[bot, "W_count"] = weighted_count + df_W_leaderboard.loc[bot, "W_ave"] = weighted_average + df_W_leaderboard.loc[bot, "W_stdev"] = weighted_std_dev + df_W_leaderboard.loc[bot, "std_err"] = std_error + df_W_leaderboard.loc[bot, "t_stat"] = t_statistic + df_W_leaderboard.loc[bot, "t_crit"] = t_crit + df_W_leaderboard.loc[bot, "upper_bound"] = upper_bound + df_W_leaderboard.loc[bot, "lower_bound"] = lower_bound + df_W_leaderboard.loc[bot, "cdf"] = cdf + df_W_leaderboard.loc[bot, "p_value"] = p_value + # Format and round the results - df_W_leaderboard['W_score'] = df_W_leaderboard['W_score'].round(1) + df_W_leaderboard["W_score"] = df_W_leaderboard["W_score"].round(1) # Store numerical p-values temporarily for sorting - df_W_leaderboard['_p_value_sort'] = df_W_leaderboard['p_value'] - + df_W_leaderboard["_p_value_sort"] = df_W_leaderboard["p_value"] + # Format p-values as percentages - df_W_leaderboard['p_value'] = df_W_leaderboard['p_value'].apply( + df_W_leaderboard["p_value"] = df_W_leaderboard["p_value"].apply( lambda x: f"{x:.6f}" if pd.notnull(x) else "NA" ) - + # Round other columns - df_W_leaderboard[['W_ave', 'W_count', 'lower_bound', 'upper_bound']] = \ - df_W_leaderboard[['W_ave', 'W_count', 'lower_bound', 'upper_bound']].round(1) - + df_W_leaderboard[["W_ave", "W_count", "lower_bound", "upper_bound"]] = ( + df_W_leaderboard[["W_ave", "W_count", "lower_bound", "upper_bound"]].round(1) + ) + # Sort by the numerical p-values df_W_leaderboard = df_W_leaderboard.sort_values( - by='W_score', - ascending=False, - na_position='last' + by="W_score", ascending=False, na_position="last" ) - - # Drop the temporary sorting column - df_W_leaderboard = df_W_leaderboard.drop('_p_value_sort', axis=1) - - return df_W_leaderboard - -def calculate_head_to_head(row, a, b): - """ - @BEN: Check... - Calculates the head-to-head score for two forecasters. - Positive if 'a' did better than 'b', negative if 'b' did better than 'a'. - - Args: - row (pandas.Series): Row containing 'resolution', 'type', and forecast columns. - a (str): Column name for first forecaster. - b (str): Column name for second forecaster. - - Returns: - float: Head-to-head score. - """ - q_type = row['type'] - resolution = row['resolution'] - options = row['options'] - range_min = row.get('range_min') - range_max = row.get('range_max') - - forecast_a = row[a] - forecast_b = row[b] - - if q_type == 'binary': - if (resolution == 'yes') or (resolution == 1): - return 100 * np.log(forecast_a / forecast_b) - elif (resolution == 'no') or (resolution == 0): - return 100 * np.log((1 - forecast_a) / (1 - forecast_b)) - else: - return np.nan - - elif q_type == 'multiple_choice': - # Parse forecast_a if it's a string - if isinstance(forecast_a, str): - forecast_a = ast.literal_eval(forecast_a) - options = ast.literal_eval(row['options']) if isinstance(row['options'], str) else row['options'] - resolution_idx = options.index(str(row['resolution'])) - forecast_a = forecast_a[resolution_idx] - - # Parse forecast_b if it's a string - if isinstance(forecast_b, str): - forecast_b = ast.literal_eval(forecast_b) - options = ast.literal_eval(row['options']) if isinstance(row['options'], str) else row['options'] - resolution_idx = options.index(str(row['resolution'])) - forecast_b = forecast_b[resolution_idx] - - # Now both are floats with the prob assigned to the correct bin - return 100 * np.log(forecast_a / forecast_b) - - elif q_type == 'numeric': - # Ensure both forecasts are Python lists - if isinstance(forecast_a, str): - forecast_a = ast.literal_eval(forecast_a) - elif isinstance(forecast_a, np.ndarray): - forecast_a = forecast_a.tolist() - - if isinstance(forecast_b, str): - forecast_b = ast.literal_eval(forecast_b) - elif isinstance(forecast_b, np.ndarray): - forecast_b = forecast_b.tolist() - - if not forecast_a or not forecast_b: - return np.nan - - cdf_a = forecast_a - cdf_b = forecast_b - - pmf_a = [cdf_a[0]] + [cdf_a[i] - cdf_a[i-1] for i in range(1, len(cdf_a))] - pmf_a.append(1 - cdf_a[-1]) - - pmf_b = [cdf_b[0]] + [cdf_b[i] - cdf_b[i-1] for i in range(1, len(cdf_b))] - pmf_b.append(1 - cdf_b[-1]) - - bin_edges = np.linspace(range_min, range_max, 200) - - if resolution == "below_lower_bound": - resolution_idx = 0 - elif resolution == "above_upper_bound": - resolution_idx = len(pmf_a) - 1 # i.e., 200 - else: - try: - resolution_val = float(resolution) - resolution_idx = np.searchsorted(bin_edges, resolution_val, side='right') - except ValueError: - print(f"Bad resolution value: {resolution}") - return np.nan - - p_a = pmf_a[resolution_idx] - p_b = pmf_b[resolution_idx] + # Drop the temporary sorting column + df_W_leaderboard = df_W_leaderboard.drop("_p_value_sort", axis=1) - if p_a <= 0 or p_b <= 0: - print(f"Invalid PMF values: p_a={p_a}, p_b={p_b}") - return np.nan + return df_W_leaderboard - return 100 * np.log(p_a / p_b) -def plot_head_to_head_distribution(df_forecasts, col='head_to_head', vs=('Bot Team', 'Pros')): +def plot_head_to_head_distribution( + df_forecasts, col="head_to_head", vs=("Bot Team", "Pros") +): """ Plots the distribution of head-to-head scores and fits a Gaussian curve. @@ -690,23 +607,23 @@ def plot_head_to_head_distribution(df_forecasts, col='head_to_head', vs=('Bot Te # Create the histogram plt.figure(figsize=(10, 6)) - n, bins, patches = plt.hist(data, bins=30, density=True, alpha=0.7, color='skyblue') + n, bins, patches = plt.hist(data, bins=30, density=True, alpha=0.7, color="skyblue") # Generate points for the fitted Gaussian curve x = np.linspace(min(data), max(data), 100) y = norm.pdf(x, mean, std) # Plot the fitted Gaussian curve - plt.plot(x, y, 'r-', linewidth=2, label='Fitted Gaussian') + plt.plot(x, y, "r-", linewidth=2, label="Fitted Gaussian") # Customize the plot - plt.title(f'{vs[0]} Head-to-Head Scores vs {vs[1]}') - plt.xlabel('Head-to-Head Score') - plt.ylabel('Density') + plt.title(f"{vs[0]} Head-to-Head Scores vs {vs[1]}") + plt.xlabel("Head-to-Head Score") + plt.ylabel("Density") plt.legend() # Add text annotation for the mean - #plt.text(0.95, 0.95, f'Mean: {mean:.2f}', transform=plt.gca().transAxes, verticalalignment='top', horizontalalignment='right') + # plt.text(0.95, 0.95, f'Mean: {mean:.2f}', transform=plt.gca().transAxes, verticalalignment='top', horizontalalignment='right') # Display the plot plt.show() @@ -714,7 +631,85 @@ def plot_head_to_head_distribution(df_forecasts, col='head_to_head', vs=('Bot Te # Print the average print(f"The average of 'head_to_head' is: {mean:.2f}") -def calculate_calibration_curve(forecasts, resolutions, weights): + +def plot_calibration_curve(df: pd.DataFrame, column_name: str, label: str, color: str): + """ + Plots a calibration curve with confidence intervals. + + Args: + df (pandas.DataFrame): DataFrame with forecast and resolution data. + column_name (str): Column name for forecast probabilities. + label (str): Label for the plot. + color (str): Color for the plot. + + Returns: + None + """ + _assert_calibration_dataframe_matches_assumptions(df) + # Filter to binary questions in case the DataFrame has other types (0 or 1 INT or 'yes'/'no' STR) + df = df[df["resolution"].isin(["yes", "no", 1.0, 0.0])] + + # If any of df[column_name] are None, drop those rows + df = df[df[column_name].notnull()] + + y_true = df["resolution"] + y_pred = df[column_name] + weights = [1.0 for _ in y_true] + calibration_curve = _calculate_calibration_curve(y_pred, y_true, weights)[ + "calibration_curve" + ] + + prob_true = [item["average_resolution"] for item in calibration_curve] + bin_center = [ + (item["bin_lower"] + item["bin_upper"]) / 2 for item in calibration_curve + ] + ci_lower = [item["lower_confidence_interval"] for item in calibration_curve] + ci_upper = [item["upper_confidence_interval"] for item in calibration_curve] + + plt.plot(bin_center, prob_true, marker="o", linewidth=2, label=label, color=color) + plt.fill_between(bin_center, ci_lower, ci_upper, alpha=0.2, color=color) + for x, y in zip(bin_center, prob_true): + if x is None or y is None: + continue + plt.annotate( + f"({x:.2f}, {y:.2f})", + (x, y), + textcoords="offset points", + xytext=(0, 10), + ha="center", + color=color, + fontsize=8, + ) + +def _assert_calibration_dataframe_matches_assumptions(df: pd.DataFrame): + # 1. Only binary questions + assert (df['type'] == 'binary').all(), "DataFrame contains non-binary questions." + + # 2. Only valid resolutions (0, 1, 'yes', 'no') + valid_resolutions = {0, 1} + assert set(df['resolution'].unique()).issubset(valid_resolutions), ( + f"DataFrame contains invalid resolutions: {set(df['resolution'].unique()) - valid_resolutions}" + ) + + # 3. Each question_id appears only once (if grouped by question) + if 'question_id' in df.columns: + assert df['question_id'].is_unique, "Each question_id should appear only once." + + # 4. No missing values in key columns + for col in ['resolution', 'type']: + assert df[col].notnull().all(), f"Missing values found in column: {col}" + + # 5. Probabilities are between 0 and 1 for forecast columns + prob_cols = [col for col in df.columns if 'prob' in col or 'median' in col or 'forecast' in col] + for col in prob_cols: + if df[col].dtype.kind in {'f', 'i'}: + assert ((df[col] >= 0) & (df[col] <= 1)).all(), f"Column {col} contains values outside [0, 1]" + + # 6. DataFrame is not empty + assert not df.empty, "DataFrame is empty after filtering." + + +def _calculate_calibration_curve(forecasts: list[float], resolutions: list[int], weights: list[float]) -> dict: """ Calculates a calibration curve for forecasts. @@ -771,43 +766,6 @@ def calculate_calibration_curve(forecasts, resolutions, weights): "calibration_curve": calibration_curve, } -def plot_calibration_curve(df, column_name, label, color): - """ - Plots a calibration curve with confidence intervals. - - Args: - df (pandas.DataFrame): DataFrame with forecast and resolution data. - column_name (str): Column name for forecast probabilities. - label (str): Label for the plot. - color (str): Color for the plot. - - Returns: - None - """ - # Filter to binary questions in case the DataFrame has other types (0 or 1 INT or 'yes'/'no' STR) - df = df[df['resolution'].isin(['yes', 'no', 1, 0])] - - y_true = df['resolution'] - y_pred = df[column_name] - weights = [1.0 for _ in y_true] - calibration_curve = calculate_calibration_curve(y_pred, y_true, weights)['calibration_curve'] - prob_true = [item['average_resolution'] for item in calibration_curve] - bin_center = [(item['bin_lower'] + item['bin_upper']) / 2 for item in calibration_curve] - ci_lower = [item['lower_confidence_interval'] for item in calibration_curve] - ci_upper = [item['upper_confidence_interval'] for item in calibration_curve] - - plt.plot(bin_center, prob_true, marker='o', linewidth=2, label=label, color=color) - plt.fill_between(bin_center, ci_lower, ci_upper, alpha=0.2, color=color) - for x, y in zip(bin_center, prob_true): - if x is None or y is None: - continue - plt.annotate(f'({x:.2f}, {y:.2f})', - (x, y), - textcoords="offset points", - xytext=(0,10), - ha='center', - color=color, - fontsize=8) def calculate_confidence(predictions, outcomes): """ @@ -824,9 +782,11 @@ def calculate_confidence(predictions, outcomes): bins = pd.cut(predictions, bins=10) # Calculate mean prediction and actual outcome for each bin - grouped = pd.DataFrame({'prediction': predictions, 'outcome': outcomes}).groupby(bins) - mean_prediction = grouped['prediction'].mean() - mean_outcome = grouped['outcome'].mean() + grouped = pd.DataFrame({"prediction": predictions, "outcome": outcomes}).groupby( + bins + ) + mean_prediction = grouped["prediction"].mean() + mean_outcome = grouped["outcome"].mean() # Calculate the difference between mean prediction and mean outcome confidence_diff = mean_prediction - mean_outcome @@ -834,6 +794,7 @@ def calculate_confidence(predictions, outcomes): # Return the average difference (excluding NaN values) return np.nanmean(confidence_diff) + def interpret_confidence(score): """ Interprets the confidence score. @@ -851,6 +812,7 @@ def interpret_confidence(score): else: return "Perfectly calibrated" + def create_discrimination_histogram(df, bot_col, pro_col, resolution_col): """ Creates histograms to compare discrimination between bot and pro teams. @@ -866,40 +828,47 @@ def create_discrimination_histogram(df, bot_col, pro_col, resolution_col): """ # Create figure and axes fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12)) - + # Define bin edges bins = np.linspace(0, 1, 6) - + # Top plot: Questions that resolved 1 - ax1.hist([df[df[resolution_col] == 1][bot_col], - df[df[resolution_col] == 1][pro_col]], - bins=bins, label=['Bot Team', 'Pro Team'], alpha=0.7) - ax1.set_title('Questions that Resolved \'Yes\'') - ax1.set_xlabel('Assigned Probability') - ax1.set_ylabel('Frequency') + ax1.hist( + [df[df[resolution_col] == 1][bot_col], df[df[resolution_col] == 1][pro_col]], + bins=bins, + label=["Bot Team", "Pro Team"], + alpha=0.7, + ) + ax1.set_title("Questions that Resolved 'Yes'") + ax1.set_xlabel("Assigned Probability") + ax1.set_ylabel("Frequency") ax1.legend() # Set integer y-ticks for top plot ymax1 = int(np.ceil(ax1.get_ylim()[1])) ax1.set_yticks(range(0, ymax1 + 1, 2)) - + # Bottom plot: Questions that resolved 0 - ax2.hist([df[df[resolution_col] == 0][bot_col], - df[df[resolution_col] == 0][pro_col]], - bins=bins, label=['Bot Team', 'Pro Team'], alpha=0.7) - ax2.set_title('Questions that Resolved \'No\'') - ax2.set_xlabel('Assigned Probability') - ax2.set_ylabel('Frequency') + ax2.hist( + [df[df[resolution_col] == 0][bot_col], df[df[resolution_col] == 0][pro_col]], + bins=bins, + label=["Bot Team", "Pro Team"], + alpha=0.7, + ) + ax2.set_title("Questions that Resolved 'No'") + ax2.set_xlabel("Assigned Probability") + ax2.set_ylabel("Frequency") ax2.legend() # Set integer y-ticks for bottom plot ymax2 = int(np.ceil(ax2.get_ylim()[1])) ax2.set_yticks(range(0, ymax2 + 1, 10)) - + # Adjust layout and display plt.tight_layout() plt.show() + def get_weighted_score(df_forecasts): """ Calculates the weighted total score for forecasts. @@ -911,13 +880,15 @@ def get_weighted_score(df_forecasts): float: Weighted total score. """ # Calculate the weighted score for each row - df_forecasts['weighted_score'] = df_forecasts['head_to_head'] * df_forecasts['question_weight'] + df_forecasts["weighted_score"] = ( + df_forecasts["head_to_head"] * df_forecasts["question_weight"] + ) # Calculate the total weighted score - total_weighted_score = df_forecasts['weighted_score'].sum() + total_weighted_score = df_forecasts["weighted_score"].sum() # Calculate the sum of weights - total_weight = df_forecasts['question_weight'].sum() + total_weight = df_forecasts["question_weight"].sum() # Calculate the weighted total score weighted_total_score = total_weighted_score / total_weight @@ -926,8 +897,10 @@ def get_weighted_score(df_forecasts): return weighted_total_score + # ====== CODE FROM LUKE, REFACTORED BY CHATGPT ======= + def string_location_to_scaled_location(string_location, question_row): """ Converts a string location to a scaled location based on question type. @@ -962,6 +935,7 @@ def string_location_to_scaled_location(string_location, question_row): # question.type == "numeric" return float(string_location) + def scaled_location_to_unscaled_location(scaled_location, question_row): """ Converts a scaled location to an unscaled location based on question type. @@ -985,13 +959,17 @@ def scaled_location_to_unscaled_location(scaled_location, question_row): if zero_point is not None: deriv_ratio = (range_max - zero_point) / max((range_min - zero_point), 1e-7) return ( - np.log((scaled_location - range_min) * (deriv_ratio - 1) + (range_max - range_min)) + np.log( + (scaled_location - range_min) * (deriv_ratio - 1) + + (range_max - range_min) + ) - np.log(range_max - range_min) ) / np.log(deriv_ratio) return (scaled_location - range_min) / (range_max - range_min) -def nominal_location_to_cdf_location(nominal_location, question_data): + +def nominal_location_to_cdf_location_via_question_dict(nominal_location, question_data): """ Takes a location in nominal format (e.g. 123, "123", or datetime in iso format) and scales it to metaculus's "internal representation" range [0, 1] incorporating question scaling @@ -1003,28 +981,16 @@ def nominal_location_to_cdf_location(nominal_location, question_data): Returns: float: CDF location. """ - if question_data["type"] == "date": - scaled_location = datetime.fromisoformat(nominal_location).timestamp() - else: - scaled_location = float(nominal_location) + # Unscale the value to put it into the range [0,1] range_min = question_data["range_min"] range_max = question_data["range_max"] zero_point = question_data["zero_point"] - if ~np.isnan(zero_point) and (zero_point is not None): - # logarithmically scaled question - deriv_ratio = (range_max - zero_point) / (range_min - zero_point) - unscaled_location = ( - np.log( - (scaled_location - range_min) * (deriv_ratio - 1) - + (range_max - range_min) - ) - - np.log(range_max - range_min) - ) / np.log(deriv_ratio) - else: - # linearly scaled question - unscaled_location = (scaled_location - range_min) / (range_max - range_min) - return unscaled_location + + return nominal_location_to_cdf_location( + nominal_location, range_min, range_max, zero_point + ) + def get_cdf_at(cdf, unscaled_location): """ @@ -1045,7 +1011,8 @@ def get_cdf_at(cdf, unscaled_location): if index_scaled_location.is_integer(): return cdf[int(index_scaled_location)] # linear interpolation step - left_index = int(index_scaled_location) # This is the floor, which is what we want + # This is the floor, which is what we want + left_index = int(index_scaled_location) right_index = left_index + 1 left_value = cdf[left_index] right_value = cdf[right_index] @@ -1053,8 +1020,10 @@ def get_cdf_at(cdf, unscaled_location): index_scaled_location - left_index ) + # ======== END OF LUKE'S CODE ========== + def cdf_between(row, cdf, lower_bound, upper_bound): """ Calculates the probability between two bounds using the CDF. @@ -1068,9 +1037,14 @@ def cdf_between(row, cdf, lower_bound, upper_bound): Returns: float: Probability between the bounds. """ - a = get_cdf_at(cdf, nominal_location_to_cdf_location(lower_bound, row)) - b = get_cdf_at(cdf, nominal_location_to_cdf_location(upper_bound, row)) - return (b - a) + a = get_cdf_at( + cdf, nominal_location_to_cdf_location_via_question_dict(lower_bound, row) + ) + b = get_cdf_at( + cdf, nominal_location_to_cdf_location_via_question_dict(upper_bound, row) + ) + return b - a + def extract_year(title): """ @@ -1082,9 +1056,10 @@ def extract_year(title): Returns: int or None: Extracted year or None if not found. """ - match = re.search(r'\b(19|20)\d{2}\b', title) + match = re.search(r"\b(19|20)\d{2}\b", title) return int(match.group(0)) if match else None + def extract_month(title): """ Extracts the month from a title string. @@ -1095,9 +1070,13 @@ def extract_month(title): Returns: str or None: Extracted month or None if not found. """ - match = re.search(r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\b', title) + match = re.search( + r"\b(January|February|March|April|May|June|July|August|September|October|November|December)\b", + title, + ) return match.group(0) if match else None + def compute_cp_baseline_score(value): """ Gracefully computes the cp_baseline_score. @@ -1118,6 +1097,7 @@ def compute_cp_baseline_score(value): # Handle any unexpected errors return np.nan + def process_forecast_values(df): """ Adds a 'bucket_forecast_value' column to the DataFrame (for interpreting CP distribution as a @@ -1130,61 +1110,224 @@ def process_forecast_values(df): Returns: pandas.DataFrame: Updated DataFrame with 'bucket_forecast_value' column added. """ + def compute_bucket_forecast_value(row): # Handle binary_version_tuple gracefully - if pd.isna(row['binary_version_tuple']) or not isinstance(row['binary_version_tuple'], (list, tuple)): + if pd.isna(row["binary_version_tuple"]) or not isinstance( + row["binary_version_tuple"], (list, tuple) + ): return None - + # Extract the first and second elements of the tuple - comparison_type = row['binary_version_tuple'][0] - string_location = row['binary_version_tuple'][1] - + comparison_type = row["binary_version_tuple"][0] + string_location = row["binary_version_tuple"][1] + # Skip if comparison_type is 'complicated' - if comparison_type == 'complicated': + if comparison_type == "complicated": return None - + # Compute forecast_value using the extracted string_location - forecast_value = get_cdf_at(row['cdf'], nominal_location_to_cdf_location(string_location, row)) - + forecast_value = get_cdf_at( + row["cdf"], + nominal_location_to_cdf_location_via_question_dict(string_location, row), + ) + # Apply logic based on comparison_type - if comparison_type == 'less': + if comparison_type == "less": return forecast_value - elif comparison_type == 'greater': + elif comparison_type == "greater": return 1 - forecast_value - + return None # Apply the function to each row and overwrite forecast_value (currently contains cdf, which we no longer need) - df['forecast_values'] = df.apply(compute_bucket_forecast_value, axis=1) + df["forecast_values"] = df.apply(compute_bucket_forecast_value, axis=1) return df -def parse_options_array(options_str): + +def parse_options_array(options_str: str) -> list[str]: """ Parse options string that looks like an array into an actual array. - + Args: options_str: String representation of options array (e.g. '["0","1","2-3","4-6",">6"]') - + Returns: List of option strings """ if not isinstance(options_str, str): return options_str # Already parsed or None - + + if options_str == "[]": + return [] # This can happen for numeric/binary questions with no options + + options = [] try: # First try using eval (safer than literal_eval for this specific case) - options_array = eval(options_str) - return options_array + options = eval(options_str) except: # If that fails, try custom parsing # Strip brackets and split by comma - cleaned = options_str.strip('[]') + cleaned = options_str.strip("[]") # Split by comma, but respect quotes - import re + # Match items in quotes with commas inside parts = re.findall(r'"([^"]*)"', cleaned) if parts: - return parts - - # Simple fallback: just split by comma and strip quotes - return [p.strip().strip('"\'') for p in cleaned.split(',')] + options = parts + else: + # Simple fallback: just split by comma and strip quotes + options = [p for p in cleaned.split(",")] + stripped_options = [p.strip("\"' ") for p in options] + if len(stripped_options) == 0: + raise ValueError(f"No options found in {options_str}") + return stripped_options + + +def calculate_weighted_h2h_score_between_two_forecast_columns( + row: pd.Series, col_a: str, col_b: str +) -> float: + """ + Calculates the head-to-head score for two forecasters. + Positive if 'a' did better than 'b', negative if 'b' did better than 'a'. + + Args: + row (pandas.Series): Row containing 'resolution', 'type', and forecast columns. + a (str): Column name for first forecaster. + b (str): Column name for second forecaster. + + Returns: + float: Head-to-head score. + """ + # @Check: that the row conversion is corret + + cleaned_row = _prepare_new_row_for_scoring(row, [col_a, col_b]) + if _is_unscorable(cleaned_row, [col_a, col_b]): + return np.nan + + question_type = cleaned_row["type"] + forecast_a = cleaned_row[col_a] + forecast_b = cleaned_row[col_b] + resolution = cleaned_row["resolution"] + options = cleaned_row["options"] + range_min = cleaned_row["range_min"] + range_max = cleaned_row["range_max"] + question_weight = cleaned_row["question_weight"] + + score = calculate_peer_score( + q_type=question_type, + forecast=forecast_a, + forecast_for_other_users=[forecast_b], + resolution=resolution, + options=options, + range_min=range_min, + range_max=range_max, + question_weight=question_weight, + ) + return score + + +def _is_unscorable(row: pd.Series, forecast_columns_to_check_null: list[str]): + is_unscorable = False + for col in forecast_columns_to_check_null: + forecast = row[col] + if forecast is None: + is_unscorable = True + elif isinstance(forecast, float) and math.isnan(forecast): + is_unscorable = True + resolution = row["resolution"] + if resolution == "annulled" or resolution == "ambiguous": + is_unscorable = True + return is_unscorable + + +def _prepare_new_row_for_scoring( + original_row: pd.Series, forecast_columns: list[str] +) -> pd.Series: + new_row = original_row.copy() + question_type = original_row["type"] + + options = ( + original_row["options_parsed"] + if "options_parsed" in new_row + else new_row["options"] + ) + if isinstance(options, str): + options = options.strip("[]").split(",") + new_row["options"] = options + + resolution = original_row["resolution"] + question_type = original_row["type"] + if question_type == "binary": + if resolution == "yes": + resolution = True + elif resolution == "no": + resolution = False + + elif question_type == "multiple_choice": + resolution = resolution + elif question_type == "numeric": + if resolution == "above_upper_bound" or resolution == "below_lower_bound": + resolution = resolution + elif not isinstance(resolution, float): + resolution = float(resolution) + else: + raise ValueError(f"Unknown resolution type: {resolution}") + else: + raise ValueError(f"Unknown question type: {question_type}") + new_row["resolution"] = resolution + + range_min = original_row.get("range_min") + if range_min: + range_min = float(range_min) + new_row["range_min"] = range_min + + range_max = original_row.get("range_max") + if range_max: + range_max = float(range_max) + new_row["range_max"] = range_max + + question_weight = original_row["question_weight"] + if question_weight: + question_weight = float(question_weight) + new_row["question_weight"] = question_weight + + for col in forecast_columns: + forecast = original_row[col] + if isinstance(forecast, float) and math.isnan(forecast): + forecast = forecast + elif question_type == "binary": + if isinstance(forecast, str): + forecast = [float(forecast)] + forecast = [forecast] + elif isinstance(forecast, str): + forecast = [float(x) for x in forecast.strip("[]").split(",")] + + new_row[col] = forecast + + return new_row + + +def calculate_all_peer_scores(df, all_bots, pro_col="pro_median"): + """Calculate peer scores for all bots""" + # Create a new DataFrame to store peer scores + df_peer = df.copy() + + # Calculate peer score for each bot + for bot in all_bots: + df_peer[bot] = df.apply( + lambda row: calculate_weighted_h2h_score_between_two_forecast_columns( + row, bot, pro_col + ), + axis=1, + ) + + # Calculate peer score for bot_team_median + df_peer["bot_team_median"] = df.apply( + lambda row: calculate_weighted_h2h_score_between_two_forecast_columns( + row, "bot_median", pro_col + ), + axis=1, + ) + + return df_peer diff --git a/bot_to_main_feed_ids.csv b/misc_data/bot_to_main_feed_ids.csv similarity index 100% rename from bot_to_main_feed_ids.csv rename to misc_data/bot_to_main_feed_ids.csv diff --git a/notebook_outputs/bootstrapped_h2h_bot_vs_pros.csv b/notebook_outputs/bootstrapped_h2h_bot_vs_pros.csv new file mode 100644 index 0000000..6b92b92 --- /dev/null +++ b/notebook_outputs/bootstrapped_h2h_bot_vs_pros.csv @@ -0,0 +1,47 @@ +,2.5% CI,10% CI,Median,90% CI,97.5% CI +cobyj-bot,0.0,0.0,0.0,0.0,0.0 +andrewsiah,0.0,0.0,0.0,0.0,0.0 +X_bot,-0.0,-0.0,-0.0,0.0,0.0 +jonahsingerbot,-0.0,-0.0,-0.0,-0.0,-0.0 +bean_bot,-0.0,-0.0,-0.0,-0.0,-0.0 +RPM_bot,-0.1,-0.0,-0.0,0.0,0.0 +CumulativeBot,-0.0,-0.0,-0.0,-0.0,0.0 +swingswish,-0.0,-0.0,-0.0,-0.0,-0.0 +KevinTestBot,-0.1,-0.0,-0.0,0.0,0.0 +SynapseSeer,-0.1,-0.0,-0.0,0.0,0.0 +Grizeu_Bot,-0.2,-0.1,-0.0,0.1,0.2 +pianobot,-0.1,-0.1,-0.0,-0.0,0.0 +CatrachoCaster,-0.1,-0.1,-0.0,-0.0,0.0 +krm-bot,-0.1,-0.1,-0.1,-0.0,-0.0 +4Shadower,-0.1,-0.1,-0.1,-0.0,-0.0 +annabot,-0.1,-0.1,-0.1,-0.0,-0.0 +cookics_bot_TEST,-0.2,-0.1,-0.1,-0.0,0.0 +jkraybill_bot,-0.2,-0.1,-0.1,-0.0,-0.0 +twsummerbot,-0.2,-0.2,-0.1,-0.0,0.0 +MWG,-0.2,-0.2,-0.1,-0.0,-0.0 +ProfessorSP,-0.2,-0.2,-0.1,-0.0,-0.0 +ajf-bot,-0.2,-0.2,-0.1,-0.0,0.0 +acm_bot,-0.3,-0.2,-0.1,0.0,0.1 +GreeneiBot2,-0.3,-0.2,-0.1,-0.0,0.0 +metac-deepseek-r1+asknews,-0.2,-0.2,-0.1,-0.1,-0.0 +metac-Gemini-Exp-1206,-0.3,-0.2,-0.1,-0.0,0.1 +metac-o1,-0.3,-0.2,-0.1,0.0,0.1 +Bot_Pepa,-0.2,-0.2,-0.1,-0.1,-0.0 +laylaps,-0.2,-0.2,-0.1,-0.1,-0.0 +wunderplumb,-0.3,-0.2,-0.1,-0.1,-0.0 +bot_median,-0.3,-0.2,-0.1,-0.0,0.0 +metac-perplexity,-0.4,-0.3,-0.1,-0.0,0.1 +manticAI,-0.3,-0.2,-0.2,-0.1,-0.0 +NextWorldLab,-0.3,-0.3,-0.2,-0.1,0.0 +minefrac1,-0.3,-0.3,-0.2,-0.1,-0.1 +metac-claude-3-5-sonnet-latest,-0.4,-0.3,-0.2,-0.1,-0.1 +mmBot,-0.4,-0.3,-0.2,-0.1,-0.1 +metac-claude-3-5-sonnet-20240620,-0.4,-0.4,-0.2,-0.1,-0.0 +pgodzinai,-0.4,-0.4,-0.2,-0.1,-0.1 +metac-grok-2-1212,-0.4,-0.4,-0.2,-0.1,-0.1 +VeritasAI,-0.4,-0.3,-0.2,-0.2,-0.1 +metac-o1-preview,-0.4,-0.4,-0.3,-0.1,-0.1 +metac-gpt-4o,-0.4,-0.4,-0.3,-0.1,-0.1 +metac-exa,-0.4,-0.4,-0.3,-0.2,-0.1 +InstitutPelFutur,-0.5,-0.4,-0.3,-0.2,-0.1 +metac-Llama-3.1,-0.5,-0.4,-0.3,-0.2,-0.1 diff --git a/weighted_bot_ONLY_peer_leaderboard_t_test.csv b/notebook_outputs/weighted_bot_ONLY_peer_leaderboard_t_test.csv similarity index 97% rename from weighted_bot_ONLY_peer_leaderboard_t_test.csv rename to notebook_outputs/weighted_bot_ONLY_peer_leaderboard_t_test.csv index 76b7626..029f529 100644 --- a/weighted_bot_ONLY_peer_leaderboard_t_test.csv +++ b/notebook_outputs/weighted_bot_ONLY_peer_leaderboard_t_test.csv @@ -15,7 +15,7 @@ metac-perplexity,734.7,264.3,2.8,62.518732274252,3.8454321257670965,0.7228462253 metac-exa,470.9,275.2,1.7,63.38280444669259,3.8205989842983494,0.4478599398298826,1.9681111912388756,9.2,-5.8,0.6726960546336258,0.654608 MWG,307.0,84.8,3.6,36.6252501807067,3.976544679654517,0.9101477753110279,1.987508353566517,11.5,-4.3,0.8173229386375491,0.365354 jkraybill_bot,219.6,162.4,1.4,71.12529221576798,5.5817601187391634,0.24232123347298368,1.9740758524924067,12.4,-9.7,0.5955805198867354,0.808839 -metac-deepseek-r1,172.5,225.8,0.8,38.0431452483966,2.5318249833740962,0.3017230896257882,1.9700645882216863,5.8,-4.2,0.6184289375422699,0.763142 +metac-deepseek-r1+asknews,172.5,225.8,0.8,38.0431452483966,2.5318249833740962,0.3017230896257882,1.9700645882216863,5.8,-4.2,0.6184289375422699,0.763142 pianobot,101.0,14.8,6.8,41.27615494222523,10.711147680523258,0.6349321054235654,2.1450947126002333,29.8,-16.2,0.7320891967624292,0.535822 metac-grok-2-1212,40.0,281.2,0.1,49.508070078167286,2.952248394236147,0.04816426739476925,1.967947383995502,6.0,-5.7,0.5191901814794234,0.961620 andrewsiah,2.6,25.1,0.1,35.80509173037023,7.1467391327710805,0.014679458541325375,2.0603406998894913,14.8,-14.6,0.5057956215530941,0.988409 diff --git a/weighted_bot_peer_leaderboard_t_test.csv b/notebook_outputs/weighted_bot_peer_leaderboard_t_test.csv similarity index 97% rename from weighted_bot_peer_leaderboard_t_test.csv rename to notebook_outputs/weighted_bot_peer_leaderboard_t_test.csv index 3a4a494..b32fa6b 100644 --- a/weighted_bot_peer_leaderboard_t_test.csv +++ b/notebook_outputs/weighted_bot_peer_leaderboard_t_test.csv @@ -11,7 +11,7 @@ Rank,Bot,W_score,W_count,W_ave,W_stdev,std_err,t_stat,t_crit,upper_bound,lower_b 10,metac-claude-3-5-sonnet-latest,951.3,370.3,2.6,38.26306555715613,1.988342419831904,1.2919544880180496,1.966062599368744,6.5,-1.3,0.9014096170572055,0.197181 11,GreeneiBot2,1494.7,264.1,5.7,59.728354485253575,3.675051787269948,1.539810539883174,1.9685962808273842,12.9,-1.6,0.9375959149496895,0.124808 12,metac-perplexity,1558.4,354.4,4.4,59.58837847152926,3.1652094732771676,1.389181319604283,1.9663705248092669,10.6,-1.8,0.9171738658225362,0.165652 -13,metac-deepseek-r1,516.8,277.9,1.9,37.353209862667065,2.2407803261049724,0.8299752665727909,1.968164543586558,6.3,-2.6,0.7963661024103902,0.407268 +13,metac-deepseek-r1+asknews,516.8,277.9,1.9,37.353209862667065,2.2407803261049724,0.8299752665727909,1.968164543586558,6.3,-2.6,0.7963661024103902,0.407268 14,pgodzinai,1106.7,325.4,3.4,66.68615909814488,3.6966946914459644,0.9199538936245306,1.966948755554642,10.7,-3.9,0.8208598109837832,0.358280 15,metac-exa,599.9,365.3,1.6,63.45938884307718,3.3201611290993176,0.4946106204656042,1.9661417524889626,8.2,-4.9,0.6894134359021193,0.621173 16,MWG,253.8,113.4,2.2,40.6740836146038,3.819036516963852,0.5859361127584735,1.980468444487731,9.8,-5.3,0.7204535666937473,0.559093 diff --git a/notebook_outputs/weighted_t_test_h2h_bot_vs_pros.csv b/notebook_outputs/weighted_t_test_h2h_bot_vs_pros.csv new file mode 100644 index 0000000..8eb9a70 --- /dev/null +++ b/notebook_outputs/weighted_t_test_h2h_bot_vs_pros.csv @@ -0,0 +1,47 @@ +,W_score,W_count,W_ave,W_stdev,std_err,t_stat,t_crit,upper_bound,lower_bound,cdf,p_value +cobyj-bot,0.0,0.0,,,,,,,,,NA +andrewsiah,0.0,0.0,,,,,,,,,NA +bean_bot,-0.6,4.7,-0.1,0.0698490092484186,0.03221894544078219,-4.26510566168152,2.7848427377534137,-0.0,-0.2,0.007674496502235436,0.015349 +jonahsingerbot,-0.6,4.7,-0.1,0.0502720475429557,0.023188766374944235,-5.273629910349656,2.7848427377534137,-0.1,-0.2,0.003838655509487954,0.007677 +X_bot,-0.7,7.0,-0.1,0.35406799582281046,0.13382512345060182,-0.7471946105725911,2.4469118511449692,0.2,-0.4,0.24159443667404312,0.483189 +CumulativeBot,-1.1,10.2,-0.1,0.25779754004448213,0.08052242326875068,-1.3151322887765264,2.2318482470257073,0.1,-0.3,0.1100659836303239,0.220132 +swingswish,-1.2,7.7,-0.2,0.14027522342155058,0.05055168154738577,-3.0749473143902657,2.367122926859399,-0.0,-0.3,0.009476427450502594,0.018953 +RPM_bot,-1.3,7.0,-0.2,0.803162845690475,0.3035670217119917,-0.6018020851526737,2.4469118511449692,0.6,-0.9,0.2846659989090443,0.569332 +SynapseSeer,-1.3,26.2,-0.1,0.45255474982575933,0.08849837184875071,-0.568910320013585,2.0530763092739437,0.1,-0.2,0.2872314409451841,0.574463 +KevinTestBot,-1.5,8.4,-0.2,0.5894659867910315,0.20338508794412294,-0.8971155260320279,2.3114957148363993,0.3,-0.7,0.19895153497848572,0.397903 +Grizeu_Bot,-1.7,51.4,-0.0,1.1733916577534336,0.16374678141052051,-0.20661633211162028,2.0064473532408944,0.3,-0.4,0.4185713925307672,0.837143 +pianobot,-2.7,4.7,-0.6,0.9162042335005162,0.42261349916620494,-1.3843270734534352,2.798986372998989,0.6,-1.8,0.12194093069402845,0.243882 +CatrachoCaster,-3.2,19.7,-0.2,0.5209013833112408,0.11736062067861285,-1.3655317032240997,2.0887774106971415,0.1,-0.4,0.0941440217425653,0.188288 +krm-bot,-5.1,9.5,-0.5,0.5115460847961517,0.1659674656990186,-3.2298461551560385,2.2647088573190035,-0.2,-0.9,0.005563489501517069,0.011127 +annabot,-6.2,29.3,-0.2,0.5208688899467946,0.0962264820812545,-2.2117952878836604,2.0441825433909937,-0.0,-0.4,0.017610432479673904,0.035221 +4Shadower,-6.2,14.0,-0.4,0.7673219105043008,0.20507540674799357,-2.1431944516704484,2.1472386339670253,0.0,-0.9,0.025796646516944247,0.051593 +cookics_bot_TEST,-6.7,27.4,-0.2,0.7480496337801963,0.14290753666776426,-1.7220041694550487,2.0495406495390753,0.0,-0.5,0.048383645251144566,0.096767 +jkraybill_bot,-7.5,44.0,-0.2,0.5128530627973333,0.07727161640565941,-2.197133074819885,2.0146422768105463,-0.0,-0.3,0.01672059935283912,0.033441 +twsummerbot,-8.9,58.4,-0.2,0.6597096411583532,0.08632695203642188,-1.758390985166895,2.0008548266793613,0.0,-0.3,0.042005771996978254,0.084012 +MWG,-9.6,28.6,-0.3,0.7111599387639217,0.13297936883238545,-2.5353840992759586,2.0465614134207835,-0.1,-0.6,0.008595358294567833,0.017191 +ProfessorSP,-10.0,18.6,-0.5,0.9362765859321275,0.2170939350431325,-2.484479782313461,2.0952434689972526,-0.1,-1.0,0.011644425230897355,0.023289 +acm_bot,-10.5,80.2,-0.1,0.9142649133881292,0.10205858264251064,-1.2877165899437122,1.9893443508950648,0.1,-0.3,0.10079615172895406,0.201592 +metac-o1,-10.8,91.1,-0.1,0.8668236222209089,0.09081791967404183,-1.3030182446846603,1.9858289388460384,0.1,-0.3,0.09794439270715757,0.195889 +ajf-bot,-10.9,34.2,-0.3,1.0855889019420977,0.1854962383013122,-1.722394508253831,2.0307781947345034,0.1,-0.7,0.04714462059329925,0.094289 +metac-deepseek-r1+asknews,-11.2,52.1,-0.2,0.6342566612198152,0.08787112272667183,-2.4450432699738145,2.0053789762011176,-0.0,-0.4,0.008984924011519364,0.017970 +GreeneiBot2,-11.4,58.4,-0.2,0.8462281442135139,0.1107814473823621,-1.7668111287097124,2.000831925930035,0.0,-0.4,0.041290471840402215,0.082581 +Bot_Pepa,-11.5,44.0,-0.3,0.7375369985271071,0.1111247649069599,-2.3431659801868907,2.0146422768105463,-0.0,-0.5,0.011904916896884948,0.023810 +metac-Gemini-Exp-1206,-11.5,76.5,-0.2,0.8952097471246512,0.10235147002510721,-1.4718494129042066,1.9908217254774627,0.1,-0.4,0.07260889665750306,0.145218 +laylaps,-12.9,64.1,-0.2,0.6619045107450789,0.08267350038122044,-2.44046054763956,1.9969065741038698,-0.0,-0.4,0.008744061158659102,0.017488 +bot_median,-13.3,92.1,-0.1,0.7572006546947513,0.07890075621895877,-1.8300583290868744,1.9855502432148115,0.0,-0.3,0.03525575647024838,0.070512 +wunderplumb,-13.6,25.6,-0.5,0.9000512561955677,0.17806222265862548,-2.9840941451614404,2.05660303322038,-0.2,-0.9,0.0031741533534496535,0.006348 +metac-perplexity,-14.4,89.1,-0.2,1.1026009344968866,0.11680986021222348,-1.3849519746718768,1.9864049297707018,0.1,-0.4,0.08478215225308733,0.169564 +manticAI,-14.6,69.4,-0.2,0.6709463826178552,0.08051034556472575,-2.613354492497458,1.9939680506212867,-0.0,-0.4,0.005507180276996954,0.011014 +NextWorldLab,-16.9,80.2,-0.2,0.9069642286328539,0.10124361366849416,-2.078393214767385,1.9893443508950648,-0.0,-0.4,0.020454686442219806,0.040909 +minefrac1,-18.8,51.1,-0.4,0.8747517828376596,0.12236983831928097,-3.0135811013395264,2.0065449272360034,-0.1,-0.6,0.0020214088297449183,0.004043 +metac-claude-3-5-sonnet-latest,-21.6,91.1,-0.2,0.7840729022099676,0.08214804952944678,-2.8855809804350296,1.9858289388460384,-0.1,-0.4,0.002444218354964672,0.004888 +mmBot,-21.9,92.1,-0.2,0.7250100357901175,0.0755464746834313,-3.1501040673463705,1.9855502432148115,-0.1,-0.4,0.0011040926153361213,0.002208 +metac-claude-3-5-sonnet-20240620,-22.1,90.5,-0.2,0.9921895725908227,0.10429665234389453,-2.3447130845077018,1.9860719790130024,-0.0,-0.5,0.010626881125878994,0.021254 +metac-grok-2-1212,-23.2,91.1,-0.3,0.9691804386011083,0.10154193882835436,-2.504438328301395,1.9858289388460384,-0.1,-0.5,0.007031732032192213,0.014063 +pgodzinai,-23.2,76.4,-0.3,1.00292283111273,0.11474158338495037,-2.6493172344887146,1.9908489732268309,-0.1,-0.5,0.004910376705596484,0.009821 +VeritasAI,-24.3,77.1,-0.3,0.6607028010672139,0.0752452273943661,-4.185910498866988,1.9904817922115374,-0.2,-0.5,3.7752868903447694e-05,0.000076 +metac-o1-preview,-24.4,91.1,-0.3,0.8524321835897993,0.08931011522099137,-2.9993955258512948,1.9858289388460384,-0.1,-0.4,0.0017486358986007922,0.003497 +metac-gpt-4o,-25.1,91.1,-0.3,0.8735971368751565,0.09152758712427154,-3.0097067040559993,1.9858289388460384,-0.1,-0.5,0.0016956535070904697,0.003391 +metac-exa,-26.1,89.1,-0.3,0.7919348200357222,0.08389780266944466,-3.4956946250034493,1.9864049297707018,-0.1,-0.5,0.0003713213076391189,0.000743 +InstitutPelFutur,-26.9,90.1,-0.3,0.9737673821897402,0.10258711760941522,-2.90852403334722,1.9861137662360124,-0.1,-0.5,0.0022918503861915234,0.004584 +metac-Llama-3.1,-28.0,89.1,-0.3,0.9072003561919431,0.09610906673103263,-3.2702003829748127,1.9864049297707018,-0.1,-0.5,0.0007672454772695423,0.001534 diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..531adbb --- /dev/null +++ b/pytest.ini @@ -0,0 +1,13 @@ +[pytest] +python_files = test_*.py +pythonpath="./" +# log_level=DEBUG +# log_cli=true +# asyncio_mode = auto +# addopts=-nauto --durations=10 + +# log_file = logs/latest-pytest-outputs.log +# log_cli_format = %(threadName)s - %(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(message)s +# log_cli_date_format = %Y-%m-%d %H:%M:%S +# log_file_format = %(threadName)s - %(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(message)s +# log_file_date_format = %Y-%m-%d %H:%M:%S diff --git a/refactored_notebook/data_models.py b/refactored_notebook/data_models.py new file mode 100644 index 0000000..1aaa3f4 --- /dev/null +++ b/refactored_notebook/data_models.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel + +ResolutionType = bool | str | float | None # binary, MC, numeric, or 'annulled/ambiguous' +ForecastType = list[float] | None # binary: [p_yes, p_no], multiple choice: [p_a, p_b, p_c], numeric: [p_0, p_1, p_2, ...] + +class Forecast(BaseModel): + question: Question + user: User + prediction: ForecastType + predcition_for_correct_answer: float + prediction_time: datetime + comment: str | None = None + + def get_spot_baseline_score(self, resolution: ResolutionType) -> Score: + raise NotImplementedError("Not implemented") + + def get_spot_peer_score(self, resolution: ResolutionType, other_users_forecasts: list[Forecast]) -> Score: + # assert only one forecast per user + # assert that forecasts are in time range of question + raise NotImplementedError("Not implemented") + +class Score(BaseModel): + score: float + type: Literal["spot_peer", "spot_baseline"] + forecast: Forecast + users_used_in_scoring: list[User] | None # Empty if baseline + +class Question(BaseModel): + question_text: str + resolution: ResolutionType + weight: float + spot_scoring_time: datetime + +class User(BaseModel): + name: str + type: Literal["pro", "bot", "cp"] + is_aggregate: bool + aggregated_users: list[User] + + @property + def is_metac_bot(self) -> bool: + return "metac-" in self.name + diff --git a/refactored_notebook/pseudocode_for_main.py b/refactored_notebook/pseudocode_for_main.py new file mode 100644 index 0000000..6660cc4 --- /dev/null +++ b/refactored_notebook/pseudocode_for_main.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +from typing import Literal, Callable +from datetime import datetime +from pydantic import BaseModel +from refactored_notebook.data_models import User, Forecast, Question, Score +from refactored_notebook.simulated_tournament import SimulatedTournament + +# TODO: Since I'm already creating spot score calculations, +# I might as well just input forecasts rather than scores into the tournament +# Though I will also need to check for spot scoring timing/ +# I should check that the scoring matches the original scoring though +# TODO: Rather than the seperate tournament creation for pros and bots, create a +# "Create tournament from tournament" function that takes in a tournament and +# a function that returns users. The function uses the tournament to make the new users +# a new tournament with full scores is created. + + +def set_up_data(path_to_data: str) -> dict[str, SimulatedTournament]: + + def load_initial_tournament(path_to_data: str) -> dict[str, SimulatedTournament]: + # Load the data + # Match questions between the tournaments + # Raise errors (or require manual matching) if there are differences in the questions + bot_tournament = None + pro_tournament = None + return { + "bot_tournament": bot_tournament, + "pro_tournament": pro_tournament, + } + + def caculate_spot_peer_score_for_user(all_forecasts_for_question: list[Forecast], user: User) -> Score: + # Assert forecasts are all for the same question + # Assert that there is only one forecast per user + # Filter for last forecast of each user that is before the spot scoring time (possibly do in previous step) + # Calculate the score for the user (weighted by question weight) + raise NotImplementedError("Not implemented") + + def caculate_spot_baseline_score(forecasts_for_user: list[Forecast]) -> Score: + # Find last forecast for user that is before the spot scoring time + # Calculate the score for the user (weighted by question weight) + raise NotImplementedError("Not implemented") + + def caculate_all_scores_for_forecasts(forecasts: list[Forecast]) -> list[Score]: + # Find questions + # For each question + # For each user + # Calculate spot peer score + # Calculate spot baseline score + raise NotImplementedError("Not implemented") + + def get_bot_team_user_with_size(original_tournament: SimulatedTournament, team_size: int) -> tuple[User, list[Forecast]]: + # Create a new user for the team + # Create forecasts for the team + # Calculate the scores for the user + raise NotImplementedError("Not implemented") + + def get_all_bot_teams_as_users(original_tournament: SimulatedTournament) -> list[tuple[User, list[Forecast]]]: + users_and_forecasts = [] + for team_size in range(1, len(original_tournament.users)): + users_and_forecasts.extend(get_bot_team_user_with_size(original_tournament, team_size)) + return users_and_forecasts + + def get_best_bot_team_user(bot_tournament: SimulatedTournament) -> list[tuple[User, list[Forecast]]]: + # Simulate bot team tournament + # Grab the user and forecasts for the best bot team + raise NotImplementedError("Not implemented") + + def get_pro_median_user(pro_tournament: SimulatedTournament) -> list[tuple[User, list[Forecast]]]: + # Create new user + # Create forecasts for the median + raise NotImplementedError("Not implemented") + + def get_pro_median_and_bot_median_users(bot_tournament: SimulatedTournament, pro_tournament: SimulatedTournament) -> list[tuple[User, list[Forecast]]]: + # Get the pro median user + # Get the bot median user + # Return the two users and their forecasts + raise NotImplementedError("Not implemented") + + def create_tournament( + original_tournament: SimulatedTournament, + new_users: list[tuple[User, list[Forecast]]], + remove_all_old_users: bool = False + ) -> SimulatedTournament: + # TODO: Also add parameter for filtering questions (or choosing new ones like only binaries) + # assert that the forecasts given each have a corresonding question and vise versa for each user + # Create scores for the new users and recaculate for old users + # Make a new tournament with all the new scores + raise NotImplementedError("Not implemented") + + original_tournament = load_initial_tournament(path_to_data) + original_bot_tournament = original_tournament["bot_tournament"] + original_pro_tournament = original_tournament["pro_tournament"] + bot_team_only_tournament = create_tournament( + original_bot_tournament, + get_all_bot_teams_as_users(original_bot_tournament), + remove_all_old_users=True + ) + pro_v_bot_head_to_head_tournament = create_tournament( + original_bot_tournament, + get_pro_median_and_bot_median_users(original_bot_tournament, original_pro_tournament), + remove_all_old_users=True + ) + + return { + "original_bot_tournament": original_bot_tournament, + "original_pro_tournament": original_pro_tournament, + "bot_team_only_tournament": bot_team_only_tournament, + "pro_v_bot_head_to_head_tournament": pro_v_bot_head_to_head_tournament, + } + + +def display_everything(score_sets: dict[str, SimulatedTournament]) -> None: + + forecasts_to_display = score_sets["original_bot_tournament"].forecasts + + def display_calibration_curve(forecasts: list[Forecast]) -> None: + # Each user has its own line and a 90% confidence interval + raise NotImplementedError("Not implemented") + + def display_discrimination_curve(forecasts: list[Forecast]) -> None: + # Each user has its own bar + raise NotImplementedError("Not implemented") + + def display_spot_peer_score_table(tournament: SimulatedTournament, users_to_display: list[User] | None = None) -> None: + # Filter for peer scores + # make sure all scores are peer scores + # make sure that all scores use the same users for calculation + + # Add these stats as a property of the simulated tournament scores + # Caculate average spot peer score + # Caculate sum of spot peer scores + # Find confidence interval w/ t test + # find confidence interval with bootstrapping + # Weighted question count (sum of weights) + # Show in table with a row for each user + # Filter by users_to_display if provided + raise NotImplementedError("Not implemented") + + def display_best_and_worse_scoring_questions(tournament: SimulatedTournament) -> None: + # Assert there are only 2 users + # Find the score differences between each question + # Show the top 5 and bottom 5 questions, forecasts for those questions, the resolution, and the score difference + raise NotImplementedError("Not implemented") + + def display_general_tournament_stats(bot_tournament: SimulatedTournament, pro_tournament: SimulatedTournament) -> None: + # Display num pro questions + # Display num bot questions + # Display num pro users + # Display num bot users + raise NotImplementedError("Not implemented") + + + metac_bots = [user for user in score_sets["original_bot_tournament"].users if user.is_metac_bot] + + display_calibration_curve(forecasts_to_display) + display_discrimination_curve(forecasts_to_display) + display_spot_peer_score_table(score_sets["original_bot_tournament"]) + display_spot_peer_score_table(score_sets["original_bot_tournament"], users_to_display=metac_bots) + display_spot_peer_score_table(score_sets["bot_team_only_tournament"]) + display_spot_peer_score_table(score_sets["pro_v_bot_head_to_head_tournament"]) + display_best_and_worse_scoring_questions(score_sets["pro_v_bot_head_to_head_tournament"]) + display_general_tournament_stats(score_sets["original_bot_tournament"], score_sets["original_pro_tournament"]) \ No newline at end of file diff --git a/refactored_notebook/scoring.py b/refactored_notebook/scoring.py new file mode 100644 index 0000000..eec131c --- /dev/null +++ b/refactored_notebook/scoring.py @@ -0,0 +1,331 @@ +from enum import Enum +from typing import Literal + +import numpy as np +from scipy.stats.mstats import gmean + +from refactored_notebook.data_models import ForecastType, ResolutionType + + +class QuestionType(Enum): + BINARY = "binary" + MULTIPLE_CHOICE = "multiple_choice" + NUMERIC = "numeric" + +def calculate_peer_score( + forecast: ForecastType, + forecast_for_other_users: list[ForecastType], + resolution: ResolutionType, + options: list[str] | None = None, + range_min: float | None = None, + range_max: float | None = None, + question_weight: float = 1.0, + q_type: Literal["binary", "multiple_choice", "numeric"] | None = None, +) -> float: + question_type = _determine_question_type(q_type, resolution) + resolution = _normalize_resolution(question_type, resolution, range_min, range_max) + forecast_for_resolution = _determine_probability_for_resolution( + question_type, forecast, resolution, options, range_min, range_max + ) + other_user_forecasts = [ + _determine_probability_for_resolution( + question_type, forecast, resolution, options, range_min, range_max + ) + for forecast in forecast_for_other_users + ] + + geometric_mean = gmean(other_user_forecasts) + peer_score = np.log(forecast_for_resolution / geometric_mean) + if isinstance(resolution, float): # @Check: shouldn't other q types get a divsor? + peer_score /= 2 + return peer_score * question_weight + + +def calculate_baseline_score( + forecast: ForecastType, + resolution: ResolutionType, + options: list[str] | None = None, + range_min: float | None = None, + range_max: float | None = None, + question_weight: float = 1.0, + open_upper_bound: bool = False, + open_lower_bound: bool = False, + q_type: Literal["binary", "multiple_choice", "numeric"] | None = None, +) -> float: + """ + Question type can be infered from resolution type + Scoring math: https://www.metaculus.com/help/scores-faq/#What:~:text=given%20score%20type.-,What%20is%20the%20Baseline%20score%3F,-The%20Baseline%20score + """ + question_type = _determine_question_type(q_type, resolution) + resolution = _normalize_resolution(question_type, resolution, range_min, range_max) + prob_for_resolution = _determine_probability_for_resolution( + question_type, forecast, resolution, options, range_min, range_max + ) + baseline_prob = _determine_baseline( + question_type, resolution, options, range_min, range_max, open_upper_bound, open_lower_bound + ) + divisor = _determine_divisor_for_baseline_score(question_type, options) + if prob_for_resolution <= 0 or baseline_prob <= 0: + raise ValueError( + "Probability for resolution or baseline probability is less than or equal to 0 which could cause a log(0) issue" + ) + + baseline_score = np.log(prob_for_resolution / baseline_prob) / divisor * 100 + + weighted_score = baseline_score * question_weight + + return weighted_score + + +def _determine_baseline( + question_type: QuestionType, + resolution: ResolutionType, + options: list[str] | None = None, + range_min: float | None = None, + range_max: float | None = None, + open_upper_bound: bool | None = None, + open_lower_bound: bool | None = None, +) -> float: + resolution = _normalize_resolution(question_type, resolution, range_min, range_max) + if question_type == QuestionType.BINARY: + baseline_prob = 0.5 + elif question_type == QuestionType.MULTIPLE_CHOICE: + if options is None: + raise ValueError("Options are required for multiple choice questions") + baseline_prob = 1 / len(options) + elif question_type == QuestionType.NUMERIC: + if open_upper_bound is None or open_lower_bound is None: + raise ValueError("Open upper bound and lower bound are required for numeric questions") + if range_min is None or range_max is None: + raise ValueError("Range min and range max are required for numeric questions") + if not isinstance(resolution, float): + raise ValueError("Resolution must be a float for numeric questions") + + # @Check: Which version is correct? + # Version 1: + resolved_outside_bounds = False + assert range_min is not None and range_max is not None and resolution is not None, f"These need to be not None: Range min: {range_min}, range max: {range_max}, resolution: {resolution}" + if resolution > range_max or resolution < range_min: + resolved_outside_bounds = True + if resolved_outside_bounds: + baseline_prob = 0.05 + else: + open_bound_count = bool(open_upper_bound) + bool(open_lower_bound) + baseline_prob = (1 - 0.05 * open_bound_count) / 200 # PMF has 202 bins, 2 of which represent the bounds. So 200 is the internal bins + + # Version 2: + # open_bound_count = bool(open_upper_bound) + bool(open_lower_bound) + # if open_bound_count == 0: + # baseline_prob = 1 + # elif open_bound_count == 1: + # baseline_prob = 0.95 + # else: + # baseline_prob = 0.9 + + # Version 3: + # baseline_prob = ( + # 1 / 202 + # ) # len(pmf) # bins = 201 because of extra appended bin + else: + raise ValueError("Unknown question type") + assert ( + 0 <= baseline_prob <= 1 + ), f"Baseline probability is {baseline_prob} which is not between 0 and 1" + return baseline_prob + + +def _determine_probability_for_resolution( + q_type: QuestionType, + forecast: ForecastType, + resolution: ResolutionType, + options: list[str] | None = None, + range_min: float | None = None, + range_max: float | None = None, +) -> float: + """ + Returns a 0 to 1 probability for the resolution + Also returns the baseline probability used in baseline scoring + """ + resolution = _normalize_resolution(q_type, resolution, range_min, range_max) + + if forecast is None or resolution is None: + raise NotImplementedError( + "Havent decided how to handle null forecasts or anulled resolutions" + ) + + try: + if len(forecast) == 0: + raise ValueError("Forecast is empty") + except Exception as e: + raise ValueError(f"Error encountered for question of type {q_type} with resolution {resolution} and forecast {forecast}: {e}") + + if not q_type == QuestionType.NUMERIC and any(p <= 0 or p >= 1 for p in forecast): + raise ValueError("Forecast contains probabilities outside of 0 to 1 range") + + if q_type == QuestionType.BINARY: + assert isinstance(resolution, bool) + prob_for_resolution = _binary_resolution_prob(forecast, resolution) + elif q_type == QuestionType.MULTIPLE_CHOICE: + assert isinstance(resolution, str) + if options is None: + raise ValueError("Options are required for multiple choice questions") + prob_for_resolution = _multiple_choice_resolution_prob( + forecast, resolution, options + ) + elif q_type == QuestionType.NUMERIC: + if range_min is None or range_max is None: + raise ValueError( + "Range min and range max are required for numeric questions" + ) + prob_for_resolution = _numeric_resolution_prob( + forecast, resolution, range_min, range_max + ) + else: + raise ValueError("Unknown question type") + + assert ( + 0 <= prob_for_resolution <= 1 + ), f"Probability for resolution is {prob_for_resolution} which is not between 0 and 1" + return prob_for_resolution + + +def _binary_resolution_prob(forecast: list[float], resolution: bool) -> float: + if len(forecast) != 1 and len(forecast) != 2: + raise ValueError( + "Binary questions must have exactly one or two forecasts (for yes or 'yes and no')" + ) + + forecast_val = float(forecast[0]) + if resolution: + prob_for_resolution = forecast_val + else: + prob_for_resolution = 1 - forecast_val + return prob_for_resolution + + +def _multiple_choice_resolution_prob( + forecast: list[float], resolution: str, options: list[str] +) -> float: + if len(forecast) != len(options): + raise ValueError("Forecast and options have different lengths") + + pmf = [float(p) for p in forecast] + options = [str(opt) for opt in options] # @Check: TODO: For whatever reason, options had " and ' surrounding them, and were not parsed at this point. This is the easier way to handle it, but should be dealt with earlier in the pipeline. + resolution_idx = options.index(str(resolution)) + prob_for_resolution = pmf[resolution_idx] + return prob_for_resolution + + +def _numeric_resolution_prob( + forecast: list[float], resolution: float | str, range_min: float, range_max: float +) -> float: + if len(forecast) != 201: + raise ValueError("CDF should have 201 bins") + + previous_prob = 0 + for current_prob in forecast: + if current_prob < previous_prob: + raise ValueError("CDF should be in increasing order") + previous_prob = current_prob + + cdf = [float(p) for p in forecast] + assert len(cdf) == 201, f"There should be 201 bins, but there are {len(cdf)}" + lower_bound_prob = cdf[0] + upper_bound_prob = 1 - cdf[-1] + pmf = ( + [lower_bound_prob] + + [cdf[i] - cdf[i - 1] for i in range(1, len(cdf))] + + [upper_bound_prob] + ) + assert len(pmf) == 202, f"There should be 202 bins, but there are {len(pmf)}" + + resolution = float(resolution) + # bin_edges = np.linspace(range_min, range_max, 200) + # resolution_bin_idx = np.searchsorted(bin_edges, resolution, side="right") + cdf_location = nominal_location_to_cdf_location(resolution, range_min, range_max) + resolution_bin_idx = min(int(cdf_location * (len(pmf) - 1)), len(pmf) - 1) + + if resolution_bin_idx >= len(pmf): + raise ValueError("Resolution is out of bounds") + + prob_for_resolution = pmf[resolution_bin_idx] + + return prob_for_resolution + + +def _determine_divisor_for_baseline_score( + question_type: QuestionType, options: list[str] | None = None +) -> float: + if question_type == QuestionType.BINARY: + return np.log(2) + elif question_type == QuestionType.MULTIPLE_CHOICE: + if options is None: + raise ValueError("Options are required for multiple choice questions") + return np.log(len(options)) + elif question_type == QuestionType.NUMERIC: + return 2 + else: + raise ValueError("Unknown question type") + +def nominal_location_to_cdf_location( + nominal_location: float, + range_min: float, + range_max: float, + zero_point: float | None = None, +) -> float: + """ + Takes a location in nominal format (e.g. 123, "123", or datetime in iso format) and scales it to + metaculus's "internal representation" range [0, 1] incorporating question scaling + 0.8 would incidate the nomial locatoin is at cdf index 201 * 0.8 + Values higher/lower than 0 and 1 are resolutions that are above/below the upper/lower bound + """ + assert isinstance(zero_point, float | None) + + # TODO: Make sure to use datetime.fromisoformat(nominal_location).timestamp() if you start using date questions + scaled_location = float(nominal_location) + + # Unscale the value to put it into the range [0,1] + if zero_point is not None: + # logarithmically scaled question + deriv_ratio = (range_max - zero_point) / (range_min - zero_point) + unscaled_location = ( + np.log( + (scaled_location - range_min) * (deriv_ratio - 1) + + (range_max - range_min) + ) + - np.log(range_max - range_min) + ) / np.log(deriv_ratio) + else: + # linearly scaled question + unscaled_location = (scaled_location - range_min) / (range_max - range_min) + return unscaled_location + +def _normalize_resolution(question_type: QuestionType, resolution: ResolutionType, range_min: float | None, range_max: float | None) -> ResolutionType: + if resolution == "annulled" or resolution == "ambiguous": + return None + + if question_type == QuestionType.NUMERIC: + if range_min is None or range_max is None: + raise ValueError("Range min and range max are required for numeric questions") + if resolution == "above_upper_bound": + return range_max + 0.1 + elif resolution == "below_lower_bound": + return range_min - 0.1 + else: + return resolution + else: + return resolution + + +def _determine_question_type(question_type: Literal["binary", "multiple_choice", "numeric"] | None, resolution: ResolutionType) -> QuestionType: + if question_type is None: + if isinstance(resolution, bool): + return QuestionType.BINARY + elif isinstance(resolution, float) or isinstance(resolution, int) or resolution == "above_upper_bound" or resolution == "below_lower_bound": + return QuestionType.NUMERIC + elif isinstance(resolution, str): + return QuestionType.MULTIPLE_CHOICE + else: + raise ValueError(f"Cannot infer question type from resolution. Please provide a question type. Resolution: {resolution}") + else: + return QuestionType(question_type) diff --git a/refactored_notebook/simulated_tournament.py b/refactored_notebook/simulated_tournament.py new file mode 100644 index 0000000..eddbfc5 --- /dev/null +++ b/refactored_notebook/simulated_tournament.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from pydantic import BaseModel +from refactored_notebook.data_models import User, Question, Forecast, Score + + +class SimulatedTournament(BaseModel): + forecasts: list[Forecast] + + @property + def users(self) -> set[User]: + users = set() + for forecast in self.forecasts: + users.add(forecast.user) + return users + + @property + def questions(self) -> set[Question]: + questions = set() + for forecast in self.forecasts: + questions.add(forecast.question) + return questions + + @property + def scores(self) -> list[Score]: + spot_peer_scores = [] + spot_baseline_scores = [] + for forecast in self.forecasts: + forecasts_from_other_users = [ + f + for f in self.forecasts + if f.question == forecast.question and f.user != forecast.user + ] + spot_peer_scores.append( + forecast.get_spot_peer_score( + forecast.question.resolution, forecasts_from_other_users + ) + ) + spot_baseline_scores.append( + forecast.get_spot_baseline_score(forecast.question.resolution) + ) + return spot_peer_scores + spot_baseline_scores + + def get_ranking_by_spot_peer_score_lower_t_bound( + self, confidence_level: float + ) -> list[tuple[User, float]]: + # Get all spot peer scores + # create a confidence interval for the spot peer score + # Sort by lower bound + raise NotImplementedError("Not implemented") + + def get_ranking_by_spot_peer_score_sum(self) -> list[tuple[User, float]]: + # Get all spot peer scores + # Sort by spot peer score + raise NotImplementedError("Not implemented") + + def get_ranking_by_spot_peer_score_bootstrap_lower_bound( + self, confidence_level: float + ) -> list[tuple[User, float]]: + # Get all spot peer scores + # bootstrap the spot peer scores + # create a confidence interval for the spot peer score + # Sort by lower bound + raise NotImplementedError("Not implemented") diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py new file mode 100644 index 0000000..76bbe91 --- /dev/null +++ b/tests/test_end_to_end.py @@ -0,0 +1,19 @@ +from refactored_notebook.data_models import User, Question, Forecast, Score + + +# Generate test csvs to input into the notebook, and assert the below tests pass + +# Things that could go wrong: +# - bad math in scoring +# - didn't load in data correctly +# - bad filtering/manipulation of scoring data (did we take out the right people) +# - make sure to determine the bot team only by the bot-only questions +# - make sure best bot team is decided by baseline score comparison to each other +# - make sure best bots for bot team are decided by lower bound of t test +# - make sure that worse bots come out on bottom +# - Confidence interval code is wrong +# - make sure that there are large intervals if only a few forecasts, and small intervals if many forecasts +# - make sure bootstrap and t tests indicate the same things generally +# ... continue through and consider other final outputs (e.g. calibration curve) + + diff --git a/test_functions.py b/tests/test_functions.py similarity index 100% rename from test_functions.py rename to tests/test_functions.py diff --git a/tests/test_scoring.py b/tests/test_scoring.py new file mode 100644 index 0000000..ca437bf --- /dev/null +++ b/tests/test_scoring.py @@ -0,0 +1,692 @@ +from dataclasses import dataclass + +import numpy as np +import pytest + +from refactored_notebook.data_models import ForecastType +from refactored_notebook.scoring import calculate_baseline_score, calculate_peer_score + +# TODO: +# For each of Multiple Choice, Binary, and Numeric questions +# - Test spot peer score +# - forecast this is further away than others gets worse scores (with 1-5 forecasts) +# - forecast this is closer to the resolution gets better scores (with 1-5 forecasts) +# - If everyone has the same forecast, the score is 0 +# - The sum (average?) of everyone's scores is 0 +# - The score for a weighted question is weighted by the question weight +# - Run a test of some forecasts from the site, and make sure the score generated matches the score the site gives + +################################### HELPER FUNCTIONS ################################### + + +def generate_uniform_cdf() -> list[float]: + num_points = 200 # cdf has 201 points, but first point is 0% if we assume closed bounds + return [0] + [(i + 1) / num_points for i in range(num_points)] + + +def generate_cdf_with_forecast_at_index(index: int, forecast: float) -> list[float]: + cdf = [] + for i in range(201): + if i < index: + cdf.append(0.0) + else: + cdf.append(forecast) + return cdf + + +@dataclass +class Percentile: + value: float + probability_below: float + + +def generate_cdf( + percentiles: list[Percentile], + lower_bound: float, + upper_bound: float, + open_lower_bound: bool, + open_upper_bound: bool, + zero_point: float | None = None, +) -> list[float]: + # Copied from another notebook -> definitely could be cleaned up + + percentile_values: dict[float, float] = { + percentile.probability_below * 100: percentile.value + for percentile in percentiles + } + + percentile_max = max(float(key) for key in percentile_values.keys()) + percentile_min = min(float(key) for key in percentile_values.keys()) + range_min = lower_bound + range_max = upper_bound + range_size = abs(range_max - range_min) + buffer = 1 if range_size > 100 else 0.01 * range_size + + # Adjust any values that are exactly at the bounds + for percentile, value in list(percentile_values.items()): + if not open_lower_bound and value <= range_min + buffer: + percentile_values[percentile] = range_min + buffer + if not open_upper_bound and value >= range_max - buffer: + percentile_values[percentile] = range_max - buffer + + # Set cdf values outside range + if open_upper_bound: + if range_max > percentile_values[percentile_max]: + percentile_values[int(100 - (0.5 * (100 - percentile_max)))] = range_max + else: + percentile_values[100] = range_max + + # Set cdf values outside range + if open_lower_bound: + if range_min < percentile_values[percentile_min]: + percentile_values[int(0.5 * percentile_min)] = range_min + else: + percentile_values[0] = range_min + + sorted_percentile_values = dict(sorted(percentile_values.items())) + + # Normalize percentile keys + normalized_percentile_values = {} + for key, value in sorted_percentile_values.items(): + percentile = float(key) / 100 + normalized_percentile_values[percentile] = value + + value_percentiles = { + value: key for key, value in normalized_percentile_values.items() + } + + # function for log scaled questions + def generate_cdf_locations( + range_min: float, range_max: float, zero_point: float | None + ) -> list[float]: + if zero_point is None: + scale = lambda x: range_min + (range_max - range_min) * x + else: + deriv_ratio = (range_max - zero_point) / (range_min - zero_point) + scale = lambda x: range_min + (range_max - range_min) * ( + deriv_ratio**x - 1 + ) / (deriv_ratio - 1) + return [scale(x) for x in np.linspace(0, 1, 201)] + + cdf_xaxis = generate_cdf_locations(range_min, range_max, zero_point) + + def linear_interpolation( + x_values: list[float], xy_pairs: dict[float, float] + ) -> list[float]: + # Sort the xy_pairs by x-values + sorted_pairs = sorted(xy_pairs.items()) + + # Extract sorted x and y values + known_x = [pair[0] for pair in sorted_pairs] + known_y = [pair[1] for pair in sorted_pairs] + + # Initialize the result list + y_values = [] + + for x in x_values: + # Check if x is exactly in the known x values + if x in known_x: + y_values.append(known_y[known_x.index(x)]) + else: + # Find the indices of the two nearest known x-values + i = 0 + while i < len(known_x) and known_x[i] < x: + i += 1 + # If x is outside the range of known x-values, use the nearest endpoint + if i == 0: + y_values.append(known_y[0]) + elif i == len(known_x): + y_values.append(known_y[-1]) + else: + # Perform linear interpolation + x0, x1 = known_x[i - 1], known_x[i] + y0, y1 = known_y[i - 1], known_y[i] + + # Linear interpolation formula + y = y0 + (x - x0) * (y1 - y0) / (x1 - x0) + y_values.append(y) + + return y_values + + continuous_cdf = linear_interpolation(cdf_xaxis, value_percentiles) + + percentiles = [ + Percentile(value=value, probability_below=percentile) + for value, percentile in zip(cdf_xaxis, continuous_cdf) + ] + assert len(percentiles) == 201 + + # Validate minimum spacing between consecutive values + # for i in range(len(percentiles) - 1): + # assert ( + # abs(percentiles[i + 1].probability_below - percentiles[i].probability_below) + # >= 5e-05 + # ), ( + # f"Percentiles at indices {i} and {i+1} are too close: " + # f"{percentiles[i].probability_below} and {percentiles[i+1].probability_below} " + # f"at values {percentiles[i].value} and {percentiles[i+1].value}. " + # "It is possible that your prediction is mostly or completely out of the upper/lower bound range " + # "Thus making this cdf mostly meaningless." + # ) + + return [percentile.probability_below for percentile in percentiles] + + +################################### BASELINE SCORES ################################### + + +@pytest.mark.parametrize( + "forecast,resolution,options,range_min,range_max,question_weight,expected", + [ + # Binary: uniform forecast, should be 0 + ([0.5], True, None, None, None, 1.0, 0.0), + ([0.5], False, None, None, None, 1.0, 0.0), + ([0.5, 0.5], False, None, None, None, 1.0, 0.0), + # Multiple Choice: uniform forecast, should be 0 + ([1 / 3, 1 / 3, 1 / 3], "A", ["A", "B", "C"], None, None, 1.0, 0.0), + ([0.25, 0.25, 0.25, 0.25], "B", ["A", "B", "C", "D"], None, None, 1.0, 0.0), + # Numeric: uniform CDF, should be 0 + (generate_uniform_cdf(), 0.5, None, 0.0, 1.0, 1.0, 0.0), + ], +) +def test_baseline_score_is_0_with_uniform_prediction( + forecast: list[float], + resolution: bool | str | None, + options: list[str] | None, + range_min: float | None, + range_max: float | None, + question_weight: float, + expected: float, +): + score = calculate_baseline_score( + forecast, resolution, options, range_min, range_max, question_weight + ) + assert abs(score - expected) == pytest.approx(0) + + +@pytest.mark.parametrize( + "forecast,resolution,expected", + [ + ([0.001], True, -896.57), # Completely incorrect + ([0.999], True, 99.86), # Completely correct + ([0.001], False, 99.86), # Completely correct + ( + [0.4], + True, + -32.19, + ), # Examples found here: https://www.metaculus.com/help/scores-faq/#:~:text=details%20for%20nerds-,Do%20all%20my%20predictions%20on%20a%20question%20count%20toward%20my%20score%3F,-Yes.%20Metaculus%20uses + ([0.7], True, 48.542), + ([0.4, 0.6], True, -32.19), + ], +) +def test_binary_baseline_examples( + forecast: list[float], resolution: bool, expected: float +): + score = calculate_baseline_score( + forecast=forecast, + resolution=resolution, + ) + assert score == pytest.approx(expected, abs=1e-1) + + +def test_numeric_baseline_when_perfect_forecast(): + correct_index = 31 + length_of_cdf = 201 + index_to_answer_ratio = 3 + correct_answer = correct_index * index_to_answer_ratio + range_max = length_of_cdf * index_to_answer_ratio + forecast = generate_cdf_with_forecast_at_index(correct_index, 0.59) + # As of May 3, 2025, 0.59 is max difference between 2 points on a cdf + + score = calculate_baseline_score( + forecast=forecast, + resolution=float(correct_answer), + range_min=0, + range_max=range_max, + open_upper_bound=False, + open_lower_bound=False, + ) + assert score == pytest.approx(183) + + +def test_numeric_baseline_if_completly_incorrect_forecast(): + correct_index = 31 + length_of_cdf = 201 + index_to_answer_ratio = 3 + correct_answer = correct_index * index_to_answer_ratio + range_max = length_of_cdf * index_to_answer_ratio + forecast = generate_cdf_with_forecast_at_index(correct_index, 0.01/200) + + score = calculate_baseline_score( + forecast=forecast, + resolution=float(correct_answer), + range_min=0, + range_max=range_max, + ) + assert score == pytest.approx(-230.25, abs=1e-1) + + +@pytest.mark.parametrize( + "forecast_for_answer_a,num_total_forecasts,expected", + [ + (0.999, 8, 99.95), + (0.001, 8, -232.19), + ], +) +def test_multiple_choice_examples( + forecast_for_answer_a: float, num_total_forecasts: int, expected: float +): + num_other_forecasts = num_total_forecasts - 1 + other_forecasts = (1 - forecast_for_answer_a) / num_other_forecasts + score = calculate_baseline_score( + forecast=[forecast_for_answer_a] + [other_forecasts] * num_other_forecasts, + resolution="A", + options=["A"] + [f"B{i}" for i in range(num_other_forecasts)], + ) + assert score == pytest.approx(expected, abs=1e-2) + + +@pytest.mark.parametrize( + "forecast_closer,forecast_further,resolution,options,range_min,range_max", + [ + # Binary: closer to True + ([0.8], [0.2], True, None, None, None), + # Binary: closer to False + ([0.2], [0.8], False, None, None, None), + # Multiple Choice: closer to "A" + ([0.7, 0.2, 0.1], [0.1, 0.2, 0.7], "A", ["A", "B", "C"], None, None), + # Numeric: CDF with more mass near 0.5 vs near 0.0 + ( + generate_cdf( + [ + Percentile(value=40, probability_below=0.1), + Percentile(value=60, probability_below=0.9), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=False, + open_upper_bound=False, + ), + generate_cdf( + [ + Percentile(value=30, probability_below=0.1), + Percentile(value=49, probability_below=0.9), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=False, + open_upper_bound=False, + ), + 50.0, + None, + -1, + 96, + ), + ], +) +def test_baseline_score_better_when_closer( + forecast_closer: list[float], + forecast_further: list[float], + resolution: bool | str | float | None, + options: list[str] | None, + range_min: float | None, + range_max: float | None, +): + score_closer = calculate_baseline_score( + forecast=forecast_closer, + resolution=resolution, + options=options, + range_min=range_min, + range_max=range_max, + ) + score_further = calculate_baseline_score( + forecast=forecast_further, + resolution=resolution, + options=options, + range_min=range_min, + range_max=range_max, + ) + assert score_closer > score_further + + +@pytest.mark.parametrize( + "forecast,resolution,options,range_min,range_max,question_weight", + [ + # Binary + ([0.8], True, None, None, None, 2.0), + # Multiple Choice + ([0.7, 0.2, 0.1], "A", ["A", "B", "C"], None, None, 0.5), + # Numeric + ( + generate_cdf( + [ + Percentile(value=0.1, probability_below=0.1), + Percentile(value=0.9, probability_below=0.9), + ], + lower_bound=0.0, + upper_bound=1.0, + open_lower_bound=False, + open_upper_bound=False, + ), + 0.5, + None, + 0.0, + 1.0, + 3.0, + ), + ], +) +def test_baseline_score_weighted( + forecast: list[float], + resolution: bool | str | None, + options: list[str] | None, + range_min: float | None, + range_max: float | None, + question_weight: float, +): + score_unweighted = calculate_baseline_score( + forecast, resolution, options, range_min, range_max, 1.0 + ) + score_weighted = calculate_baseline_score( + forecast, resolution, options, range_min, range_max, question_weight + ) + assert abs(score_weighted - score_unweighted * question_weight) < 1e-8 + + +################################### PEER SCORES ################################### + + +@pytest.mark.parametrize( + "forecasts,resolution,options,range_min,range_max", + [ + # Binary: forecast closer to resolution gets better score + ( + [[0.9], [0.7], [0.5], [0.3], [0.1]], + True, + None, + None, + None, + ), + # Multiple Choice: forecast closer to resolution gets better score + ( + [ + [0.9, 0.09, 0.01], + [0.7, 0.2, 0.1], + [0.5, 0.3, 0.2], + [0.3, 0.4, 0.3], + [0.1, 0.2, 0.7], + ], + "A", + ["A", "B", "C"], + None, + None, + ), + # Numeric: forecast CDFs with more mass near resolution get better score + ( + [ + generate_cdf( # Best CDF + [ + Percentile(value=40, probability_below=0.1), + Percentile(value=60, probability_below=0.9), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=False, + open_upper_bound=False, + ), + generate_cdf( + [ + Percentile(value=20, probability_below=0.1), + Percentile(value=50, probability_below=0.9), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=False, + open_upper_bound=False, + ), + generate_cdf( # worst CDF + [ + Percentile(value=10, probability_below=0.1), + Percentile(value=20, probability_below=0.9), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=False, + open_upper_bound=False, + ), + ], + 49, + None, + -1, + 96, # Not even range + ), + # Numeric: forecast CDFs with more mass near upper bound get better score + ( + [ + generate_cdf( # Best CDF + [ + Percentile(value=110, probability_below=0.1), + Percentile(value=130, probability_below=0.9), + ], + lower_bound=0, + upper_bound=100, + open_lower_bound=False, + open_upper_bound=True, + ), + generate_cdf( + [ + Percentile(value=90, probability_below=0.1), + Percentile(value=140, probability_below=0.9), + ], + lower_bound=0, + upper_bound=100, + open_lower_bound=False, + open_upper_bound=True, + ), + generate_cdf( # worst CDF + [ + Percentile(value=30, probability_below=0.1), + Percentile(value=110, probability_below=0.9), + ], + lower_bound=0, + upper_bound=100, + open_lower_bound=False, + open_upper_bound=True, # No upper bound = no probability mass at upper bound + ), + ], + 120, + None, + 0, + 100, + ), + ], +) +def test_better_forecast_means_better_peer_score( + forecasts: list[list[float]], + resolution: bool | str | float, + options: list[str] | None, + range_min: float | None, + range_max: float | None, +): + scores = [ + calculate_peer_score( + forecast, + [f for i, f in enumerate(forecasts) if i != idx], + resolution, + options, + range_min, + range_max, + 1.0, + ) + for idx, forecast in enumerate(forecasts) + ] + assert scores[1] > 0, "The first score should be positive" + sorted_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True) + assert len(scores) == len(set(scores)), "Scores should all be different" + assert sorted_indices == list( + range(len(scores)) + ), "Scores should be ordered as expected (descending)" + + +@pytest.mark.parametrize( + "question_type,forecast,resolution,options,range_min,range_max", + [ + ("binary", [0.5], True, None, None, None), + ("mc", [0.25, 0.25, 0.25, 0.25], "A", ["A", "B", "C", "D"], None, None), + ("numeric", generate_cdf_with_forecast_at_index(100, 0.999), 100, None, 0, 100), + ("numeric", generate_uniform_cdf(), 50, None, 0, 100), + ], +) +def test_peer_score_zero_when_all_same( + question_type: str, + forecast: list[float], + resolution: bool | str | float, + options: list[str] | None, + range_min: float | None, + range_max: float | None, +): + forecasts = [forecast for _ in range(5)] + scores = [ + calculate_peer_score( + f, + [f2 for i2, f2 in enumerate(forecasts) if i2 != i], + resolution, + options, + range_min, + range_max, + 1.0, + ) + for i, f in enumerate(forecasts) + ] + for score in scores: + assert score == pytest.approx(0) + + +@pytest.mark.parametrize( + "forecasts,resolution,options,range_min,range_max", + [ + # Binary + ([[0.7], [0.3], [0.5]], True, None, None, None), + # Multiple Choice + ( + [[0.7, 0.2, 0.1], [0.1, 0.7, 0.2], [0.2, 0.1, 0.7]], + "A", + ["A", "B", "C"], + None, + None, + ), + # Numeric + ( + [ + generate_cdf( + [ + Percentile(value=30, probability_below=0.1), + Percentile(value=60, probability_below=0.9), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=True, + open_upper_bound=False, + ), + generate_cdf( + [ + Percentile(value=20, probability_below=0.4), + Percentile(value=80, probability_below=0.6), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=True, + open_upper_bound=True, + ), + generate_cdf( + [ + Percentile(value=10, probability_below=0.1), + Percentile(value=70, probability_below=0.3), + ], + lower_bound=-1, + upper_bound=96, + open_lower_bound=False, + open_upper_bound=False, + ), + ], + 50, + None, + -1, + 96, + ), + ], +) +def test_peer_score_average_zero( + forecasts: list[list[float]], + resolution: bool | str | float, + options: list[str] | None, + range_min: float | None, + range_max: float | None, +): + scores = [ + calculate_peer_score( + forecast, + [f for i, f in enumerate(forecasts) if i != idx], + resolution, + options, + range_min, + range_max, + ) + for idx, forecast in enumerate(forecasts) + ] + assert np.mean(scores) == pytest.approx(0) + + +@pytest.mark.parametrize( + "forecasts,resolution,options,range_min,range_max,weight", + [ + # Binary + ([[0.7], [0.3], [0.5]], True, None, None, None, 2.0), + # Multiple Choice + ( + [[0.7, 0.2, 0.1], [0.1, 0.7, 0.2], [0.2, 0.1, 0.7]], + "A", + ["A", "B", "C"], + None, + None, + 0.5, + ), + # Numeric + ( + [ + generate_uniform_cdf(), + generate_cdf_with_forecast_at_index(100, 0.999), + generate_cdf_with_forecast_at_index(101, 0.999), + ], + 50, + None, + 0, + 100, + 0.8, + ), + ], +) +def test_peer_score_weighted( + forecasts: list[ForecastType], + resolution: bool | str | float, + options: list[str] | None, + range_min: float | None, + range_max: float | None, + weight: float, +): + for idx, forecast in enumerate(forecasts): + other_forecasts = [f for i, f in enumerate(forecasts) if i != idx] + score_unweighted = calculate_peer_score( + forecast, other_forecasts, resolution, options, range_min, range_max, 1.0 + ) + score_weighted = calculate_peer_score( + forecast, other_forecasts, resolution, options, range_min, range_max, weight + ) + assert score_weighted == pytest.approx(score_unweighted * weight) + + +# TODO: Test the below for peer scores +# Best score for MC and binary is 996 +# Worst score for MC and binary is -996 +# Best score for numeric is 408 +# Worst score for numeric is -408 diff --git a/weighted_t_test_h2h_bot_vs_pros.csv b/weighted_t_test_h2h_bot_vs_pros.csv deleted file mode 100644 index 96cf6b7..0000000 --- a/weighted_t_test_h2h_bot_vs_pros.csv +++ /dev/null @@ -1,47 +0,0 @@ -,W_score,W_count,W_ave,W_stdev,std_err,t_stat,t_crit,upper_bound,lower_bound,cdf,p_value -Grizeu_Bot,487.9,40.0,12.2,123.49852344088487,19.53904680990783,0.6251000199360248,2.0203143354405637,51.7,-27.3,0.7322246430842996,0.535551 -acm_bot,149.7,63.8,2.3,123.1672185402655,15.413976167212882,0.1521157135047702,1.9970180928411654,33.1,-28.4,0.5602085330688682,0.879583 -RPM_bot,145.0,6.0,24.2,31.46890650801069,12.847127284662498,1.8809957274619813,2.570581835636314,57.2,-8.9,0.9406376166785096,0.118725 -X_bot,20.7,5.0,4.1,19.75623679424021,8.835257690300725,0.4688971268422159,2.7764451051977987,28.7,-20.4,0.668221204908144,0.663558 -cobyj-bot,0.0,0.0,,,,,,,,,NA -andrewsiah,0.0,0.0,,,,,,,,,NA -jonahsingerbot,-61.3,4.7,-13.0,5.485368611367634,2.5302118657643557,-5.15484234051559,2.7848427377534137,-6.0,-20.1,0.004141428880289339,0.008283 -bean_bot,-70.7,4.7,-15.1,8.81313702231215,4.065196971858859,-3.702222190036137,2.7848427377534137,-3.7,-26.4,0.01192534276282408,0.023851 -jkraybill_bot,-76.1,38.2,-2.0,67.06547883632598,10.85804803442324,-0.18370601441935402,2.023360215298298,20.0,-24.0,0.4276215664726116,0.855243 -CumulativeBot,-97.0,10.2,-9.5,30.12105998155594,9.408238498783877,-1.0055347747612828,2.2318482470257073,11.5,-30.5,0.17010877366473343,0.340218 -swingswish,-109.0,6.7,-16.3,15.145530939114826,5.8512290764953425,-2.779700630431383,2.4503873959101115,-1.9,-30.6,0.016896405137265973,0.033793 -SynapseSeer,-128.5,27.1,-4.8,47.08104512679923,9.052373408885058,-0.5249586045828704,2.0495688922222266,13.8,-23.3,0.3020257536154594,0.604052 -KevinTestBot,-148.3,8.4,-17.7,59.36966948088596,20.484482089149132,-0.861937850691314,2.3114957148363993,29.7,-65.0,0.20788855644704712,0.415777 -twsummerbot,-237.2,47.0,-5.0,79.50268976923377,11.596659167249031,-0.4351341379419649,2.011215351349222,18.3,-28.4,0.3327499422743516,0.665500 -pianobot,-272.2,4.7,-57.9,92.18716506105443,42.522768374266384,-1.3617857782441627,2.798986372998989,61.1,-176.9,0.12513690451031248,0.250274 -annabot,-316.0,24.8,-12.7,43.737410179436026,8.78268331306498,-1.4506136216521068,2.061307003341828,5.4,-30.8,0.07997018027788368,0.159940 -CatrachoCaster,-331.3,19.7,-16.8,52.31505896858736,11.786737352016457,-1.4269796898114384,2.0887774106971415,7.8,-41.4,0.08503530101258772,0.170071 -cookics_bot_TEST,-413.3,24.6,-16.8,72.42669439141218,14.602630986445607,-1.1504360014417054,2.060844706052324,13.3,-46.9,0.13074420290720767,0.261488 -GreeneiBot2,-446.6,45.8,-9.8,88.55320725176313,13.092082882350407,-0.7457050808617829,2.0123403544597687,16.6,-36.1,0.22987241625188587,0.459745 -metac-o1,-500.3,74.7,-6.7,111.25524179571492,12.872419395150438,-0.5203385298152786,1.9915966480791545,18.9,-32.3,0.3021936468001055,0.604387 -krm-bot,-521.0,9.5,-54.8,50.627856321510166,16.42584560255888,-3.3389622067030595,2.2647088573190035,-17.6,-92.0,0.004699854903992789,0.009400 -4Shadower,-527.8,12.2,-43.3,80.79118175671782,23.1304480505728,-1.870272754393436,2.181694676433973,7.2,-93.7,0.043896119135688104,0.087792 -MWG,-766.4,29.5,-26.0,87.753337992406,16.156699118332316,-1.6080774730154093,2.043526587895404,7.0,-59.0,0.059420840675107243,0.118842 -bot_median,-780.6,75.7,-10.3,85.11389082378146,9.782559637787905,-1.0541472762650386,1.991180868356605,9.2,-29.8,0.14760661430231808,0.295213 -Bot_Pepa,-814.9,37.2,-21.9,93.0672852336652,15.269247572172862,-1.4365511370924278,2.0250978379673494,9.0,-52.9,0.07972209366548037,0.159444 -ajf-bot,-843.1,31.4,-26.9,104.85473327098268,18.727045567955233,-1.4360202527786072,2.0376668291983946,11.3,-65.1,0.08061224440506941,0.161224 -manticAI,-861.5,55.0,-15.7,82.87386541760124,11.169633780368585,-1.4011467022381876,2.003063688519742,6.7,-38.0,0.0834429937716208,0.166886 -ProfessorSP,-997.2,16.8,-59.4,96.91948763187727,23.64593376252087,-2.510292938252793,2.1123711239055107,-9.4,-109.3,0.011672270373603825,0.023345 -metac-perplexity,-1072.9,72.7,-14.8,105.3156072760711,12.351665757565863,-1.1948077828717358,1.9924623002180712,9.9,-39.4,0.11804973996535996,0.236099 -wunderplumb,-1159.0,23.8,-48.8,90.740106090436,18.619476902939518,-2.620989857063412,2.065034175048189,-10.4,-87.3,0.007676506818434511,0.015353 -laylaps,-1214.5,52.2,-23.3,48.01992906842049,6.64639675338256,-3.5005872010263053,2.005358510673014,-9.9,-36.6,0.0004856418727962744,0.000971 -NextWorldLab,-1224.1,63.8,-19.2,98.66262212994546,12.347305753344907,-1.552698610221572,1.9970180928411654,5.5,-43.8,0.06275829680564975,0.125517 -metac-Gemini-Exp-1206,-1250.5,65.1,-19.2,94.99321076040114,11.773404699868328,-1.6315194435246863,1.9963767235603869,4.3,-42.7,0.053842330878096756,0.107685 -minefrac1,-1289.4,43.5,-29.6,123.19979122882201,18.679504139979862,-1.5868575895194426,2.0149178012042084,8.0,-67.3,0.05997902931188052,0.119958 -pgodzinai,-1330.4,62.0,-21.5,98.40405336166643,12.497327274265158,-1.7169528181446574,1.998173547416901,3.5,-46.4,0.04553088385451872,0.091062 -metac-deepseek-r1,-1360.3,48.2,-28.2,108.35980238796017,15.607907596292135,-1.808247915950853,2.0091123850303423,3.1,-59.6,0.038470700886698884,0.076941 -metac-Llama-3.1,-1412.1,73.7,-19.2,97.48349885250519,11.355267367831132,-1.687375000139217,1.9920236390185833,3.5,-41.8,0.04790881765000651,0.095818 -metac-claude-3-5-sonnet-latest,-1463.9,74.7,-19.6,96.8559111558961,11.206392518452509,-1.7487367238291156,1.9915966480791545,2.7,-41.9,0.04225009834107552,0.084500 -metac-claude-3-5-sonnet-20240620,-1649.9,75.1,-22.0,105.32409379053074,12.153679026757276,-1.8076157533135497,1.9915359040496325,2.2,-46.2,0.03736236035591808,0.074725 -metac-o1-preview,-1830.6,74.7,-24.5,107.51540873641419,12.439714393299266,-1.9699554012840843,1.9915966480791545,0.3,-49.3,0.026300611526952466,0.052601 -mmBot,-2006.4,75.7,-26.5,78.53235084186326,9.026110757840675,-2.9364459612521934,1.991180868356605,-8.5,-44.5,0.0022054969593251583,0.004411 -VeritasAI,-2024.5,67.7,-29.9,63.28210251110541,7.691066484341371,-3.88818660370801,1.9948486063528272,-14.6,-45.2,0.00011762351540143696,0.000235 -metac-grok-2-1212,-2154.6,74.7,-28.8,106.09460633753015,12.275325155894583,-2.3496848937723014,1.9915966480791545,-4.4,-53.3,0.01073504583547352,0.021470 -metac-gpt-4o,-2196.6,74.7,-29.4,100.42168394988849,11.618958453605197,-2.53084357359069,1.9915966480791545,-6.3,-52.5,0.006756252860737068,0.013513 -metac-exa,-2249.1,72.7,-30.9,91.72328991140397,10.757526338903716,-2.875853188346894,1.9924623002180712,-9.5,-52.4,0.002651041040011998,0.005302 -InstitutPelFutur,-2477.3,72.8,-34.0,102.04145421493415,11.959442897860137,-2.8453905383922216,1.992460623985373,-10.2,-57.9,0.002888355174527779,0.005777