From 96547d97fb4039557abe07fd6ed460650ec11ce9 Mon Sep 17 00:00:00 2001 From: lsabor Date: Sat, 15 Nov 2025 11:41:12 -0800 Subject: [PATCH 1/2] add options_history field to question model and migration add options_history to question serialization add options_history initialization to question creation add helper functions to question/utils.py and add 'automatic' to forecast.source selection fix build_question_forecasts import and remove options & options_history from admin panel edit tests for question creation, multiple_choice_rename_option, multiple_choice_delete_options, multiple_choice_add_options add options_history to openapi docs add csv reporting support for options_history rebase to None openapi.py spelling update logic to play well with back/forward filling 0s update csv_utils update csv_utils 2 minor logic fix fix add all_options_ever to serializer and api docs add current options to csv return add support for datetime isoformat instead of timestamps in options_history move mc operations to mc handlers file move tests to appropriate locations minor cleanup fix forecast creation in test --- .../test_services/test_multiple_choice_handlers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py b/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py index 56b4d697e3..e2d565071e 100644 --- a/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py +++ b/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py @@ -7,7 +7,10 @@ multiple_choice_add_options, multiple_choice_delete_options, multiple_choice_rename_option, +<<<<<<< HEAD multiple_choice_reorder_options, +======= +>>>>>>> 668d4c125 (add options_history field to question model and migration) ) from tests.unit.utils import datetime_aware as dt from users.models import User From e3481d0978b5760e43ec1d6bfc5db8f3964784a8 Mon Sep 17 00:00:00 2001 From: lsabor Date: Sun, 30 Nov 2025 11:08:11 -0800 Subject: [PATCH 2/2] mc/3813/backend/forecasts fix file restructure fix datetime iso format in history conflicts other fixes mc/forecasts tweaks 2 mc forecasts tweaks 3 mc forecasts tweaks 4 mc forecasts tweaks 5 add support for None values in MC predictions fix tests and source logic --- questions/serializers/common.py | 69 +++++-- questions/services/forecasts.py | 64 ++++++- .../services/multiple_choice_handlers.py | 2 +- tests/unit/test_questions/conftest.py | 1 + .../test_multiple_choice_handlers.py | 68 ++++++- tests/unit/test_questions/test_views.py | 171 ++++++++++++++++-- 6 files changed, 334 insertions(+), 41 deletions(-) diff --git a/questions/serializers/common.py b/questions/serializers/common.py index a472b678c2..d8fb56420a 100644 --- a/questions/serializers/common.py +++ b/questions/serializers/common.py @@ -19,7 +19,7 @@ ) from questions.serializers.aggregate_forecasts import serialize_question_aggregations from questions.services.multiple_choice_handlers import get_all_options_from_history -from questions.types import QuestionMovement +from questions.types import OptionsHistoryType, QuestionMovement from users.models import User from utils.the_math.formulas import ( get_scaled_quartiles_from_cdf, @@ -400,7 +400,7 @@ class ForecastWriteSerializer(serializers.ModelSerializer): probability_yes = serializers.FloatField(allow_null=True, required=False) probability_yes_per_category = serializers.DictField( - child=serializers.FloatField(), allow_null=True, required=False + child=serializers.FloatField(allow_null=True), allow_null=True, required=False ) continuous_cdf = serializers.ListField( child=serializers.FloatField(), @@ -441,21 +441,47 @@ def binary_validation(self, probability_yes): ) return probability_yes - def multiple_choice_validation(self, probability_yes_per_category, options): + def multiple_choice_validation( + self, + probability_yes_per_category: dict[str, float | None], + current_options: list[str], + options_history: OptionsHistoryType | None, + ): if probability_yes_per_category is None: raise serializers.ValidationError( "probability_yes_per_category is required" ) if not isinstance(probability_yes_per_category, dict): raise serializers.ValidationError("Forecast must be a dictionary") - if set(probability_yes_per_category.keys()) != set(options): - raise serializers.ValidationError("Forecast must include all options") - values = [float(probability_yes_per_category[option]) for option in options] - if not all([0.001 <= v <= 0.999 for v in values]) or not np.isclose( - sum(values), 1 - ): + if not set(current_options).issubset(set(probability_yes_per_category.keys())): + raise serializers.ValidationError( + f"Forecast must reflect current options: {current_options}" + ) + all_options = get_all_options_from_history(options_history) + if not set(probability_yes_per_category.keys()).issubset(set(all_options)): + raise serializers.ValidationError( + "Forecast contains probabilities for unknown options" + ) + + values: list[float | None] = [] + for option in all_options: + value = probability_yes_per_category.get(option, None) + if option in current_options: + if (value is None) or (not (0.001 <= value <= 0.999)): + raise serializers.ValidationError( + "Probabilities for current options must be between 0.001 and 0.999" + ) + elif value is not None: + raise serializers.ValidationError( + f"Probability for inactivate option '{option}' must be null or absent" + ) + values.append(value) + if not np.isclose(sum(filter(None, values)), 1): raise serializers.ValidationError( - "All probabilities must be between 0.001 and 0.999 and sum to 1.0" + "Forecast values must sum to 1.0. " + f"Received {probability_yes_per_category} which is interpreted as " + f"values: {values} representing {all_options} " + f"with current options {current_options}" ) return values @@ -562,7 +588,7 @@ def validate(self, data): "provided for multiple choice questions" ) data["probability_yes_per_category"] = self.multiple_choice_validation( - probability_yes_per_category, question.options + probability_yes_per_category, question.options, question.options_history ) else: # Continuous question if probability_yes or probability_yes_per_category: @@ -631,6 +657,21 @@ def serialize_question( archived_scores = question.user_archived_scores user_forecasts = question.request_user_forecasts last_forecast = user_forecasts[-1] if user_forecasts else None + # if the user has a pre-registered forecast, + # replace the current forecast and anything after it + if question.type == Question.QuestionType.MULTIPLE_CHOICE: + # Right now, Multiple Choice is the only type that can have pre-registered + # forecasts + if last_forecast and last_forecast.start_time > timezone.now(): + user_forecasts = [ + f for f in user_forecasts if f.start_time < timezone.now() + ] + if user_forecasts: + last_forecast.start_time = user_forecasts[-1].start_time + user_forecasts[-1] = last_forecast + else: + last_forecast.start_time = timezone.now() + user_forecasts = [last_forecast] if ( last_forecast and last_forecast.end_time @@ -645,11 +686,7 @@ def serialize_question( many=True, ).data, "latest": ( - MyForecastSerializer( - user_forecasts[-1], - ).data - if user_forecasts - else None + MyForecastSerializer(last_forecast).data if last_forecast else None ), "score_data": dict(), } diff --git a/questions/services/forecasts.py b/questions/services/forecasts.py index 15aba16fa3..2616dc7f09 100644 --- a/questions/services/forecasts.py +++ b/questions/services/forecasts.py @@ -1,7 +1,7 @@ import logging from collections import defaultdict -from datetime import timedelta -from typing import cast, Iterable +from datetime import datetime, timedelta, timezone as dt_timezone +from typing import cast, Iterable, Literal import sentry_sdk from django.db import transaction @@ -13,6 +13,7 @@ from posts.models import PostUserSnapshot, PostSubscription from posts.services.subscriptions import create_subscription_cp_change from posts.tasks import run_on_post_forecast +from questions.services.multiple_choice_handlers import get_all_options_from_history from scoring.models import Score from users.models import User from utils.cache import cache_per_object @@ -34,21 +35,67 @@ def create_forecast( *, - question: Question = None, - user: User = None, - continuous_cdf: list[float] = None, - probability_yes: float = None, - probability_yes_per_category: list[float] = None, - distribution_input=None, + question: Question, + user: User, + continuous_cdf: list[float] | None = None, + probability_yes: float | None = None, + probability_yes_per_category: list[float | None] | None = None, + distribution_input: dict | None = None, + end_time: datetime | None = None, + source: Forecast.SourceChoices | Literal[""] | None = None, **kwargs, ): now = timezone.now() post = question.get_post() + source = source or "" + + # delete all future-dated predictions, as this one will override them + Forecast.objects.filter(question=question, author=user, start_time__gt=now).delete() + + # if the forecast to be created is for a multiple choice question during a grace + # period, we need to agument the forecast accordingly (possibly preregister) + if question.type == Question.QuestionType.MULTIPLE_CHOICE: + if not probability_yes_per_category: + raise ValueError("probability_yes_per_category required for MC questions") + options_history = question.options_history + if options_history and len(options_history) > 1: + period_end = datetime.fromisoformat(options_history[-1][0]).replace( + tzinfo=dt_timezone.utc + ) + if period_end > now: + all_options = get_all_options_from_history(question.options_history) + prior_options = options_history[-2][1] + if end_time is None or end_time > period_end: + # create a pre-registration for the given forecast + Forecast.objects.create( + question=question, + author=user, + start_time=period_end, + end_time=end_time, + probability_yes_per_category=probability_yes_per_category, + post=post, + source=Forecast.SourceChoices.AUTOMATIC, + **kwargs, + ) + end_time = period_end + + prior_pmf: list[float | None] = [None] * len(all_options) + for i, (option, value) in enumerate( + zip(all_options, probability_yes_per_category) + ): + if value is None: + continue + if option in prior_options: + prior_pmf[i] = (prior_pmf[i] or 0.0) + value + else: + prior_pmf[-1] = (prior_pmf[-1] or 0.0) + value + probability_yes_per_category = prior_pmf forecast = Forecast.objects.create( question=question, author=user, start_time=now, + end_time=end_time, continuous_cdf=continuous_cdf, probability_yes=probability_yes, probability_yes_per_category=probability_yes_per_category, @@ -56,6 +103,7 @@ def create_forecast( distribution_input if question.type in QUESTION_CONTINUOUS_TYPES else None ), post=post, + source=source, **kwargs, ) # tidy up all forecasts diff --git a/questions/services/multiple_choice_handlers.py b/questions/services/multiple_choice_handlers.py index 37c8f09330..0a0a38114e 100644 --- a/questions/services/multiple_choice_handlers.py +++ b/questions/services/multiple_choice_handlers.py @@ -256,7 +256,7 @@ def multiple_choice_add_options( for forecast in user_forecasts: pmf = forecast.probability_yes_per_category forecast.probability_yes_per_category = ( - pmf[:-1] + [0.0] * len(options_to_add) + [pmf[-1]] + pmf[:-1] + [None] * len(options_to_add) + [pmf[-1]] ) if forecast.start_time < grace_period_end and ( forecast.end_time is None or forecast.end_time > grace_period_end diff --git a/tests/unit/test_questions/conftest.py b/tests/unit/test_questions/conftest.py index 7f7ab29e4f..9c75696c2a 100644 --- a/tests/unit/test_questions/conftest.py +++ b/tests/unit/test_questions/conftest.py @@ -28,6 +28,7 @@ def question_multiple_choice(): return create_question( question_type=Question.QuestionType.MULTIPLE_CHOICE, options=["a", "b", "c", "d"], + options_history=[("0001-01-01T00:00:00", ["a", "b", "c", "d"])], ) diff --git a/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py b/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py index e2d565071e..2070530b87 100644 --- a/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py +++ b/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py @@ -7,10 +7,7 @@ multiple_choice_add_options, multiple_choice_delete_options, multiple_choice_rename_option, -<<<<<<< HEAD multiple_choice_reorder_options, -======= ->>>>>>> 668d4c125 (add options_history field to question model and migration) ) from tests.unit.utils import datetime_aware as dt from users.models import User @@ -192,6 +189,41 @@ def test_multiple_choice_reorder_options( [], False, ), # initial forecast is invalid + ( + ["a", "b", "other"], + ["b"], + [ + Forecast( + start_time=dt(2023, 1, 1), + end_time=dt(2024, 1, 1), + probability_yes_per_category=[0.6, 0.15, 0.25], + ), + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ), + ], + [ + Forecast( + start_time=dt(2023, 1, 1), + end_time=dt(2024, 1, 1), + probability_yes_per_category=[0.6, 0.15, 0.25], + ), + Forecast( + start_time=dt(2024, 1, 1), + end_time=dt(2025, 1, 1), + probability_yes_per_category=[0.2, 0.3, 0.5], + ), + Forecast( + start_time=dt(2025, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, None, 0.8], + source=Forecast.SourceChoices.AUTOMATIC, + ), + ], + True, + ), # preserve previous forecasts ], ) def test_multiple_choice_delete_options( @@ -330,6 +362,36 @@ def test_multiple_choice_delete_options( ], True, ), # no effect + ( + ["a", "b", "other"], + ["c"], + dt(2025, 1, 1), + [ + Forecast( + start_time=dt(2023, 1, 1), + end_time=dt(2024, 1, 1), + probability_yes_per_category=[0.6, 0.15, 0.25], + ), + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ), + ], + [ + Forecast( + start_time=dt(2023, 1, 1), + end_time=dt(2024, 1, 1), + probability_yes_per_category=[0.6, 0.15, None, 0.25], + ), + Forecast( + start_time=dt(2024, 1, 1), + end_time=dt(2025, 1, 1), + probability_yes_per_category=[0.2, 0.3, None, 0.5], + ), + ], + True, + ), # edit all forecasts including old ], ) def test_multiple_choice_add_options( diff --git a/tests/unit/test_questions/test_views.py b/tests/unit/test_questions/test_views.py index 2f009b1452..3e75a4f275 100644 --- a/tests/unit/test_questions/test_views.py +++ b/tests/unit/test_questions/test_views.py @@ -10,11 +10,13 @@ from posts.models import Post from questions.models import Forecast, Question, UserForecastNotification +from questions.types import OptionsHistoryType from questions.tasks import check_and_schedule_forecast_widrawal_due_notifications from tests.unit.test_posts.conftest import * # noqa from tests.unit.test_posts.factories import factory_post from tests.unit.test_questions.conftest import * # noqa from tests.unit.test_questions.factories import create_question +from users.models import User class TestQuestionForecast: @@ -75,30 +77,173 @@ def test_forecast_binary_invalid(self, post_binary_public, user1_client, props): ) assert response.status_code == 400 + @freeze_time("2025-01-01") @pytest.mark.parametrize( - "props", + "options_history,forecast_props,expected", [ - {"probability_yes_per_category": {"a": 0.1, "b": 0.2, "c": 0.3, "d": 0.4}}, + ( + [("0001-01-01T00:00:00", ["a", "other"])], + { + "probability_yes_per_category": { + "a": 0.6, + "other": 0.4, + }, + "end_time": "2026-01-01", + }, + [ + Forecast( + probability_yes_per_category=[0.6, 0.4], + start_time=datetime(2025, 1, 1, tzinfo=dt_timezone.utc), + end_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + ), + ], + ), # simple path + ( + [("0001-01-01T00:00:00", ["a", "b", "other"])], + { + "probability_yes_per_category": { + "a": 0.6, + "b": 0.15, + "other": 0.25, + }, + "end_time": "2026-01-01", + }, + [ + Forecast( + probability_yes_per_category=[0.6, 0.15, 0.25], + start_time=datetime(2025, 1, 1, tzinfo=dt_timezone.utc), + end_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + ), + ], + ), # simple path 3 options + ( + [ + ("0001-01-01T00:00:00", ["a", "b", "other"]), + (datetime(2024, 1, 1).isoformat(), ["a", "other"]), + ], + { + "probability_yes_per_category": { + "a": 0.6, + "other": 0.4, + }, + "end_time": "2026-01-01", + }, + [ + Forecast( + probability_yes_per_category=[0.6, None, 0.4], + start_time=datetime(2025, 1, 1, tzinfo=dt_timezone.utc), + end_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + ), + ], + ), # option deletion + ( + [ + ("0001-01-01T00:00:00", ["a", "b", "other"]), + (datetime(2024, 1, 1).isoformat(), ["a", "b", "c", "other"]), + ], + { + "probability_yes_per_category": { + "a": 0.6, + "b": 0.15, + "c": 0.20, + "other": 0.05, + }, + "end_time": "2026-01-01", + }, + [ + Forecast( + probability_yes_per_category=[0.6, 0.15, 0.20, 0.05], + start_time=datetime(2025, 1, 1, tzinfo=dt_timezone.utc), + end_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + ), + ], + ), # option addition + ( + [ + ("0001-01-01T00:00:00", ["a", "b", "other"]), + (datetime(2026, 1, 1).isoformat(), ["a", "b", "c", "other"]), + ], + { + "probability_yes_per_category": { + "a": 0.6, + "b": 0.15, + "c": 0.20, + "other": 0.05, + }, + }, + [ + Forecast( + probability_yes_per_category=[0.6, 0.15, None, 0.25], + start_time=datetime(2025, 1, 1, tzinfo=dt_timezone.utc), + end_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + ), + Forecast( + probability_yes_per_category=[0.6, 0.15, 0.20, 0.05], + start_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + end_time=None, + source=Forecast.SourceChoices.AUTOMATIC, + ), + ], + ), # forecasting during a grace period + ( + [ + ("0001-01-01T00:00:00", ["a", "b", "other"]), + (datetime(2026, 1, 1).isoformat(), ["a", "b", "c", "other"]), + ], + { + "probability_yes_per_category": { + "a": 0.6, + "b": 0.15, + "c": 0.20, + "other": 0.05, + }, + "end_time": "2027-01-01", + }, + [ + Forecast( + probability_yes_per_category=[0.6, 0.15, None, 0.25], + start_time=datetime(2025, 1, 1, tzinfo=dt_timezone.utc), + end_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + ), + Forecast( + probability_yes_per_category=[0.6, 0.15, 0.20, 0.05], + start_time=datetime(2026, 1, 1, tzinfo=dt_timezone.utc), + end_time=datetime(2027, 1, 1, tzinfo=dt_timezone.utc), + source=Forecast.SourceChoices.AUTOMATIC, + ), + ], + ), # forecasting during a grace period with end time ], ) def test_forecast_multiple_choice( - self, post_multiple_choice_public, user1, user1_client, props + self, + post_multiple_choice_public: Post, + user1: User, + user1_client, + options_history: OptionsHistoryType, + forecast_props: dict, + expected: list[Forecast], ): + question = post_multiple_choice_public.question + question.options_history = options_history + question.options = options_history[-1][1] + question.save() response = user1_client.post( self.url, - data=json.dumps( - [{"question": post_multiple_choice_public.question.id, **props}] - ), + data=json.dumps([{"question": question.id, **forecast_props}]), content_type="application/json", ) assert response.status_code == 201 - forecast = Forecast.objects.filter( - question=post_multiple_choice_public.question, author=user1 - ).first() - assert forecast - assert forecast.probability_yes_per_category == list( - props.get("probability_yes_per_category").values() - ) + forecasts = Forecast.objects.filter( + question=post_multiple_choice_public.question, + author=user1, + ).order_by("start_time") + assert len(forecasts) == len(expected) + for f, e in zip(forecasts, expected): + assert f.start_time == e.start_time + assert f.end_time == e.end_time + assert f.probability_yes_per_category == e.probability_yes_per_category + assert f.source == e.source @pytest.mark.parametrize( "props",