diff --git a/docs/openapi.yml b/docs/openapi.yml index a819902cb..98fe89a91 100644 --- a/docs/openapi.yml +++ b/docs/openapi.yml @@ -494,6 +494,40 @@ components: items: type: string description: "List of options for multiple_choice questions" + example: + - "Democratic" + - "Republican" + - "Libertarian" + - "Green" + - "Other" + all_options_ever: + type: array + items: + type: string + description: "List of all options ever for multiple_choice questions" + example: + - "Democratic" + - "Republican" + - "Libertarian" + - "Green" + - "Blue" + - "Other" + options_history: + type: array + description: "List of [iso format time, options] pairs for multiple_choice questions" + items: + type: array + items: + oneOf: + - type: string + description: "ISO 8601 timestamp when the options became active" + - type: array + items: + type: string + description: "Options list active from this timestamp onward" + example: + - ["0001-01-01T00:00:00", ["a", "b", "c", "other"]] + - ["2026-10-22T16:00:00", ["a", "b", "c", "d", "other"]] status: type: string enum: [ upcoming, open, closed, resolved ] @@ -1306,6 +1340,7 @@ paths: actual_close_time: "2020-11-01T00:00:00Z" type: "numeric" options: null + options_history: null status: "resolved" resolution: "77289125.94957079" resolution_criteria: "Resolution Criteria Copy" @@ -1479,6 +1514,7 @@ paths: actual_close_time: "2015-12-15T03:34:00Z" type: "binary" options: null + options_history: null status: "resolved" possibilities: type: "binary" @@ -1548,6 +1584,16 @@ paths: - "Libertarian" - "Green" - "Other" + all_options_ever: + - "Democratic" + - "Republican" + - "Libertarian" + - "Green" + - "Blue" + - "Other" + options_history: + - ["0001-01-01T00:00:00", ["Democratic", "Republican", "Libertarian", "Other"]] + - ["2026-10-22T16:00:00", ["Democratic", "Republican", "Libertarian", "Green", "Other"]] status: "open" possibilities: { } resolution: null diff --git a/misc/views.py b/misc/views.py index ce428de8c..74f6914df 100644 --- a/misc/views.py +++ b/misc/views.py @@ -113,7 +113,9 @@ def get_site_stats(request): now_year = datetime.now().year public_questions = Question.objects.filter_public() stats = { - "predictions": Forecast.objects.filter(question__in=public_questions).count(), + "predictions": Forecast.objects.filter(question__in=public_questions) + .exclude(source=Forecast.SourceChoices.AUTOMATIC) + .count(), "questions": public_questions.count(), "resolved_questions": public_questions.filter(actual_resolve_time__isnull=False) .exclude(resolution__in=UnsuccessfulResolutionType) diff --git a/posts/models.py b/posts/models.py index 635e4a93c..2a3f4d0e2 100644 --- a/posts/models.py +++ b/posts/models.py @@ -810,7 +810,11 @@ def update_forecasts_count(self): Update forecasts count cache """ - self.forecasts_count = self.forecasts.filter_within_question_period().count() + self.forecasts_count = ( + self.forecasts.filter_within_question_period() + .exclude(source=Forecast.SourceChoices.AUTOMATIC) + .count() + ) self.save(update_fields=["forecasts_count"]) def update_forecasters_count(self): diff --git a/questions/admin.py b/questions/admin.py index dbefab525..d12545aa6 100644 --- a/questions/admin.py +++ b/questions/admin.py @@ -32,7 +32,12 @@ class QuestionAdmin(CustomTranslationAdmin, DynamicArrayMixin): "curation_status", "post_link", ] - readonly_fields = ["post_link", "view_forecasts"] + readonly_fields = [ + "post_link", + "view_forecasts", + "options", + "options_history", + ] search_fields = [ "id", "title_original", diff --git a/questions/migrations/0013_forecast_source.py b/questions/migrations/0013_forecast_source.py index ccd11208e..4230d216b 100644 --- a/questions/migrations/0013_forecast_source.py +++ b/questions/migrations/0013_forecast_source.py @@ -15,7 +15,7 @@ class Migration(migrations.Migration): name="source", field=models.CharField( blank=True, - choices=[("api", "Api"), ("ui", "Ui")], + choices=[("api", "Api"), ("ui", "Ui"), ("automatic", "Automatic")], default="", max_length=30, null=True, diff --git a/questions/migrations/0033_question_options_history.py b/questions/migrations/0033_question_options_history.py new file mode 100644 index 000000000..7c4b69a97 --- /dev/null +++ b/questions/migrations/0033_question_options_history.py @@ -0,0 +1,50 @@ +# Generated by Django 5.1.13 on 2025-11-15 19:35 +from datetime import datetime + + +import questions.models +from django.db import migrations, models + + +def initialize_options_history(apps, schema_editor): + Question = apps.get_model("questions", "Question") + questions = Question.objects.filter(options__isnull=False) + for question in questions: + if question.options: + question.options_history = [(datetime.min.isoformat(), question.options)] + Question.objects.bulk_update(questions, ["options_history"]) + + +class Migration(migrations.Migration): + + dependencies = [ + ("questions", "0032_alter_aggregateforecast_forecast_values_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="forecast", + name="source", + field=models.CharField( + blank=True, + choices=[("api", "Api"), ("ui", "Ui"), ("automatic", "Automatic")], + db_index=True, + default="", + max_length=30, + null=True, + ), + ), + migrations.AddField( + model_name="question", + name="options_history", + field=models.JSONField( + blank=True, + help_text="For Multiple Choice only.\n
list of tuples: (isoformat_datetime, options_list). (json stores them as lists)\n
Records the history of options over time.\n
Initialized with (datetime.min.isoformat(), self.options) upon question creation.\n
Updated whenever options are changed.", + null=True, + validators=[questions.models.validate_options_history], + ), + ), + migrations.RunPython( + initialize_options_history, reverse_code=migrations.RunPython.noop + ), + ] diff --git a/questions/models.py b/questions/models.py index 3849b0f8e..845b88e07 100644 --- a/questions/models.py +++ b/questions/models.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta from typing import TYPE_CHECKING +from django.core.exceptions import ValidationError from django.db import models from django.db.models import Count, QuerySet, Q, F, Exists, OuterRef from django.utils import timezone @@ -8,7 +9,7 @@ from sql_util.aggregates import SubqueryAggregate from questions.constants import QuestionStatus -from questions.types import AggregationMethod +from questions.types import AggregationMethod, OptionsHistoryType from scoring.constants import ScoreTypes from users.models import User from utils.models import TimeStampedModel, TranslatedModel @@ -20,6 +21,27 @@ DEFAULT_INBOUND_OUTCOME_COUNT = 200 +def validate_options_history(value): + # Expect: [ (float, [str, ...]), ... ] or equivalent + if not isinstance(value, list): + raise ValidationError("Must be a list.") + for i, item in enumerate(value): + if ( + not isinstance(item, (list, tuple)) + or len(item) != 2 + or not isinstance(item[0], str) + or not isinstance(item[1], list) + or not all(isinstance(s, str) for s in item[1]) + ): + raise ValidationError(f"Bad item at index {i}: {item!r}") + try: + datetime.fromisoformat(item[0]) + except ValueError: + raise ValidationError( + f"Bad datetime format at index {i}: {item[0]!r}, must be isoformat string" + ) + + class QuestionQuerySet(QuerySet): def annotate_forecasts_count(self): return self.annotate( @@ -197,8 +219,20 @@ class QuestionType(models.TextChoices): ) unit = models.CharField(max_length=25, blank=True) - # list of multiple choice option labels - options = ArrayField(models.CharField(max_length=200), blank=True, null=True) + # multiple choice fields + options: list[str] | None = ArrayField( + models.CharField(max_length=200), blank=True, null=True + ) + options_history: OptionsHistoryType | None = models.JSONField( + null=True, + blank=True, + validators=[validate_options_history], + help_text="""For Multiple Choice only. +
list of tuples: (isoformat_datetime, options_list). (json stores them as lists) +
Records the history of options over time. +
Initialized with (datetime.min.isoformat(), self.options) upon question creation. +
Updated whenever options are changed.""", + ) # Legacy field that will be removed possibilities = models.JSONField(null=True, blank=True) @@ -251,6 +285,9 @@ def save(self, **kwargs): self.zero_point = None if self.type != self.QuestionType.MULTIPLE_CHOICE: self.options = None + if self.type == self.QuestionType.MULTIPLE_CHOICE and not self.options_history: + # initialize options history on first save + self.options_history = [(datetime.min.isoformat(), self.options or [])] return super().save(**kwargs) @@ -545,8 +582,11 @@ class Forecast(models.Model): ) class SourceChoices(models.TextChoices): - API = "api" - UI = "ui" + API = "api" # made via the api + UI = "ui" # made using the api + # an automatically assigned forecast + # usually this means a regular forecast was split + AUTOMATIC = "automatic" # logging the source of the forecast for data purposes source = models.CharField( @@ -555,6 +595,7 @@ class SourceChoices(models.TextChoices): null=True, choices=SourceChoices.choices, default="", + db_index=True, ) distribution_input = models.JSONField( diff --git a/questions/serializers/common.py b/questions/serializers/common.py index d514b5951..a472b678c 100644 --- a/questions/serializers/common.py +++ b/questions/serializers/common.py @@ -17,9 +17,8 @@ AggregateForecast, Forecast, ) -from questions.serializers.aggregate_forecasts import ( - serialize_question_aggregations, -) +from questions.serializers.aggregate_forecasts import serialize_question_aggregations +from questions.services.multiple_choice_handlers import get_all_options_from_history from questions.types import QuestionMovement from users.models import User from utils.the_math.formulas import ( @@ -40,6 +39,7 @@ class QuestionSerializer(serializers.ModelSerializer): actual_close_time = serializers.SerializerMethodField() resolution = serializers.SerializerMethodField() spot_scoring_time = serializers.SerializerMethodField() + all_options_ever = serializers.SerializerMethodField() class Meta: model = Question @@ -58,6 +58,8 @@ class Meta: "type", # Multiple-choice Questions only "options", + "all_options_ever", + "options_history", "group_variable", # Used for Group Of Questions to determine # whether question is eligible for forecasting @@ -122,6 +124,10 @@ def get_actual_close_time(self, question: Question): return min(question.scheduled_close_time, question.actual_resolve_time) return question.scheduled_close_time + def get_all_options_ever(self, question: Question): + if question.options_history: + return get_all_options_from_history(question.options_history) + def get_resolution(self, question: Question): resolution = question.resolution diff --git a/questions/services/multiple_choice_handlers.py b/questions/services/multiple_choice_handlers.py new file mode 100644 index 000000000..37c8f0933 --- /dev/null +++ b/questions/services/multiple_choice_handlers.py @@ -0,0 +1,275 @@ +from datetime import datetime, timezone as dt_timezone + +from django.db import transaction +from django.db.models import Q +from django.utils import timezone + +from questions.models import Question, Forecast +from questions.types import OptionsHistoryType + + +def get_all_options_from_history( + options_history: OptionsHistoryType | None, +) -> list[str]: + """Returns the list of all options ever available. The last value in the list + is always the "catch-all" option. + + example: + options_history = [ + ("2020-01-01", ["a", "b", "other"]), + ("2020-01-02", ["a", "b", "c", "other"]), + ("2020-01-03", ["a", "c", "other"]), + ] + return ["a", "b", "c", "other"] + """ + if not options_history: + raise ValueError("Cannot make master list from empty history") + designated_other_label = options_history[0][1][-1] + all_labels: list[str] = [] + for _, options in options_history: + for label in options[:-1]: + if label not in all_labels: + all_labels.append(label) + return all_labels + [designated_other_label] + + +def multiple_choice_rename_option( + question: Question, + old_option: str, + new_option: str, +) -> Question: + """ + Modifies question in place and returns it. + Renames multiple choice option in question options and options history. + """ + if question.type != Question.QuestionType.MULTIPLE_CHOICE: + raise ValueError("Question must be multiple choice") + if not question.options or old_option not in question.options: + raise ValueError("Old option not found") + if new_option in question.options: + raise ValueError("New option already exists") + if not question.options_history: + raise ValueError("Options history is empty") + + question.options = [ + new_option if opt == old_option else opt for opt in question.options + ] + for i, (timestr, options) in enumerate(question.options_history): + question.options_history[i] = ( + timestr, + [new_option if opt == old_option else opt for opt in options], + ) + + return question + + +def multiple_choice_reorder_options( + question: Question, + new_options_order: list[str], +) -> Question: + """ + Modifies question in place and returns it. + Reorders multiple choice options in question options and options history. + Requires all options ever to be present in new_options_order. + + For now, only supports reordering if options have never changed. + """ + current_options = question.options + all_options_ever = get_all_options_from_history(question.options_history) + if question.type != Question.QuestionType.MULTIPLE_CHOICE: + raise ValueError("Question must be multiple choice") + if not current_options: + raise ValueError("Question has no options") + if set(new_options_order) != set(all_options_ever): + raise ValueError("New order does not match existing options") + if not question.options_history: + raise ValueError("Options history is empty") + + if len(question.options_history) != 1: + # TODO: support reordering options with history changes + raise ValueError("Cannot reorder options that have changed") + + # update options history (it is only one entry long) + question.options_history[0] = (question.options_history[0][0], new_options_order) + question.options = new_options_order + question.save() + + # update user forecasts + # example forecast remap: all_options_ever = [a,b,c], new_options_order = [c,a,b] + # remap = [2,0,1] + # if a forecast is [0.2,0.3,0.5], then the new one is [0.5,0.2,0.3] + remap = [all_options_ever.index(option) for option in new_options_order] + for forecast in question.user_forecasts.all(): + forecast.probability_yes_per_category = [ + forecast.probability_yes_per_category[i] for i in remap + ] + forecast.save() + + # trigger recalculation of aggregates + from questions.services.forecasts import build_question_forecasts + + build_question_forecasts(question) + + return question + + +def multiple_choice_delete_options( + question: Question, + options_to_delete: list[str], + timestep: datetime | None = None, +) -> Question: + """ + Modifies question in place and returns it. + Deletes multiple choice options in question options. + Adds a new entry to options_history. + Slices all user forecasts at timestep. + Triggers recalculation of aggregates. + """ + if not options_to_delete: + return question + timestep = timestep or timezone.now() + if question.type != Question.QuestionType.MULTIPLE_CHOICE: + raise ValueError("Question must be multiple choice") + if not question.options or not all( + [opt in question.options for opt in options_to_delete] + ): + raise ValueError("Option to delete not found") + if not question.options_history: + raise ValueError("Options history is empty") + + if ( + datetime.fromisoformat(question.options_history[-1][0]).replace( + tzinfo=dt_timezone.utc + ) + > timestep + ): + raise ValueError("timestep is before the last options history entry") + + # update question + new_options = [opt for opt in question.options if opt not in options_to_delete] + all_options = get_all_options_from_history(question.options_history) + + question.options = new_options + question.options_history.append((timestep.isoformat(), new_options)) + question.save() + + # update user forecasts + user_forecasts = question.user_forecasts.filter( + Q(end_time__isnull=True) | Q(end_time__gt=timestep), + start_time__lt=timestep, + ) + forecasts_to_create: list[Forecast] = [] + for forecast in user_forecasts: + # get new PMF + previous_pmf = forecast.probability_yes_per_category + if len(previous_pmf) != len(all_options): + raise ValueError( + f"Forecast {forecast.id} PMF length does not match " + f"all options {all_options}" + ) + new_pmf: list[float | None] = [None] * len(all_options) + for value, label in zip(previous_pmf, all_options): + if value is None: + continue + if label in new_options: + new_pmf[all_options.index(label)] = ( + new_pmf[all_options.index(label)] or 0.0 + ) + value + else: + new_pmf[-1] = ( + new_pmf[-1] or 0.0 + ) + value # add to catch-all last option + + # slice forecast + if forecast.start_time >= timestep: + # forecast is completely after timestep, just update PMF + forecast.probability_yes_per_category = new_pmf + continue + forecasts_to_create.append( + Forecast( + question=question, + author=forecast.author, + start_time=timestep, + end_time=forecast.end_time, + probability_yes_per_category=new_pmf, + post=forecast.post, + source=Forecast.SourceChoices.AUTOMATIC, # mark as automatic forecast + ) + ) + forecast.end_time = timestep + + with transaction.atomic(): + Forecast.objects.bulk_update( + user_forecasts, ["end_time", "probability_yes_per_category"] + ) + Forecast.objects.bulk_create(forecasts_to_create) + + # trigger recalculation of aggregates + from questions.services.forecasts import build_question_forecasts + + build_question_forecasts(question) + + return question + + +def multiple_choice_add_options( + question: Question, + options_to_add: list[str], + grace_period_end: datetime, + timestep: datetime | None = None, +) -> Question: + """ + Modifies question in place and returns it. + Adds multiple choice options in question options. + Adds a new entry to options_history. + Terminates all user forecasts at grace_period_end. + Triggers recalculation of aggregates. + """ + if not options_to_add: + return question + timestep = timestep or timezone.now() + if question.type != Question.QuestionType.MULTIPLE_CHOICE: + raise ValueError("Question must be multiple choice") + if not question.options or any([opt in question.options for opt in options_to_add]): + raise ValueError("Option to add already found") + if not question.options_history: + raise ValueError("Options history is empty") + + if timestep > grace_period_end: + raise ValueError("grace_period_end must end after timestep") + if ( + datetime.fromisoformat(question.options_history[-1][0]).replace( + tzinfo=dt_timezone.utc + ) + > timestep + ): + raise ValueError("timestep is before the last options history entry") + + # update question + new_options = question.options[:-1] + options_to_add + question.options[-1:] + question.options = new_options + question.options_history.append((grace_period_end.isoformat(), new_options)) + question.save() + + # update user forecasts + user_forecasts = question.user_forecasts.all() + for forecast in user_forecasts: + pmf = forecast.probability_yes_per_category + forecast.probability_yes_per_category = ( + pmf[:-1] + [0.0] * len(options_to_add) + [pmf[-1]] + ) + if forecast.start_time < grace_period_end and ( + forecast.end_time is None or forecast.end_time > grace_period_end + ): + forecast.end_time = grace_period_end + with transaction.atomic(): + Forecast.objects.bulk_update( + user_forecasts, ["probability_yes_per_category", "end_time"] + ) + + # trigger recalculation of aggregates + from questions.services.forecasts import build_question_forecasts + + build_question_forecasts(question) + + return question diff --git a/questions/types.py b/questions/types.py index 9556806b4..f87735e52 100644 --- a/questions/types.py +++ b/questions/types.py @@ -3,6 +3,8 @@ from django.db import models from django.db.models import TextChoices +OptionsHistoryType = list[tuple[str, list[str]]] + class Direction(TextChoices): UNCHANGED = "unchanged" diff --git a/tests/unit/test_questions/test_models.py b/tests/unit/test_questions/test_models.py index ba405474a..74c5e49b3 100644 --- a/tests/unit/test_questions/test_models.py +++ b/tests/unit/test_questions/test_models.py @@ -43,3 +43,14 @@ def test_filter_within_question_period( Forecast.objects.filter(id=f1.id).filter_within_question_period().exists() == include ) + + +def test_initialize_multiple_choice_question(): + question = create_question( + question_type=Question.QuestionType.MULTIPLE_CHOICE, + options=["a", "b", "other"], + ) + question.save() + assert ( + question.options_history and question.options_history[0][1] == question.options + ) diff --git a/tests/unit/test_questions/test_services.py b/tests/unit/test_questions/test_services/test_lifecycle.py similarity index 100% rename from tests/unit/test_questions/test_services.py rename to tests/unit/test_questions/test_services/test_lifecycle.py diff --git a/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py b/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py new file mode 100644 index 000000000..56b4d697e --- /dev/null +++ b/tests/unit/test_questions/test_services/test_multiple_choice_handlers.py @@ -0,0 +1,378 @@ +from datetime import datetime + +import pytest # noqa + +from questions.models import Question, Forecast +from questions.services.multiple_choice_handlers import ( + multiple_choice_add_options, + multiple_choice_delete_options, + multiple_choice_rename_option, + multiple_choice_reorder_options, +) +from tests.unit.utils import datetime_aware as dt +from users.models import User + + +@pytest.mark.parametrize( + "old_option,new_option,expect_success", + [ + ("Option B", "Option D", True), + ("Option X", "Option Y", False), # old_option does not exist + ("Option A", "Option A", False), # new_option already exists + ], +) +def test_multiple_choice_rename_option( + question_multiple_choice, old_option, new_option, expect_success +): + question = question_multiple_choice + question.options = ["Option A", "Option B", "Option C"] + question.save() + + if not expect_success: + with pytest.raises(ValueError): + multiple_choice_rename_option(question, old_option, new_option) + return + updated_question = multiple_choice_rename_option(question, old_option, new_option) + + assert old_option not in updated_question.options + assert new_option in updated_question.options + assert len(updated_question.options) == 3 + + +@pytest.mark.parametrize( + "new_options_order,expect_success", + [ + (["Option A", "Option B", "Option C"], True), # no change + (["Option C", "Option B", "Option A"], True), # happy path + (["Option B", "Option A"], False), # different number of options + ( + ["Option A", "Option B", "Option C", "D"], + False, + ), # different number of options + (["Option D", "Option E", "Option F"], False), # different options + ], +) +def test_multiple_choice_reorder_options( + question_multiple_choice, user1, new_options_order, expect_success +): + question = question_multiple_choice + original_options = ["Option A", "Option B", "Option C"] + question.options = original_options + question.options_history = [(datetime.min.isoformat(), original_options)] + question.save() + Forecast.objects.create( + author=user1, + question=question, + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + + if not expect_success: + with pytest.raises(ValueError): + multiple_choice_reorder_options(question, new_options_order) + return + updated_question = multiple_choice_reorder_options(question, new_options_order) + + assert updated_question.options == new_options_order + forecast = updated_question.user_forecasts.first() + assert forecast is not None + assert forecast.probability_yes_per_category == [ + [0.2, 0.3, 0.5][original_options.index(opt)] for opt in new_options_order + ] + + +@pytest.mark.parametrize( + "initial_options,options_to_delete,forecasts,expected_forecasts,expect_success", + [ + (["a", "b", "other"], ["b"], [], [], True), # simplest path + (["a", "b", "other"], ["c"], [], [], False), # try to remove absent item + (["a", "b", "other"], ["a", "b"], [], [], True), # remove two items + ( + ["a", "b", "other"], + ["b"], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=dt(2025, 1, 1), + probability_yes_per_category=[0.2, 0.3, 0.5], + ), + Forecast( + start_time=dt(2025, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, None, 0.8], + source=Forecast.SourceChoices.AUTOMATIC, + ), + ], + True, + ), # happy path + ( + ["a", "b", "c", "other"], + ["b", "c"], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.1, 0.4], + ) + ], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=dt(2025, 1, 1), + probability_yes_per_category=[0.2, 0.3, 0.1, 0.4], + ), + Forecast( + start_time=dt(2025, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, None, None, 0.8], + source=Forecast.SourceChoices.AUTOMATIC, + ), + ], + True, + ), # happy path removing 2 + ( + ["a", "b", "other"], + ["b"], + [ + Forecast( + start_time=dt(2025, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.8], + ) + ], + [ + Forecast( + start_time=dt(2025, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.8], + ), + ], + True, + ), # forecast is at / after timestep + ( + ["a", "b", "other"], + [], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + True, + ), # no effect + ( + ["a", "b", "other"], + ["b"], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.8], + ) + ], + [], + False, + ), # initial forecast is invalid + ], +) +def test_multiple_choice_delete_options( + question_multiple_choice: Question, + user1: User, + initial_options: list[str], + options_to_delete: list[str], + forecasts: list[Forecast], + expected_forecasts: list[Forecast], + expect_success: bool, +): + question = question_multiple_choice + question.options = initial_options + question.options_history = [(datetime.min.isoformat(), initial_options)] + question.save() + + timestep = dt(2025, 1, 1) + for forecast in forecasts: + forecast.author = user1 + forecast.question = question + forecast.save() + + if not expect_success: + with pytest.raises(ValueError): + multiple_choice_delete_options( + question, options_to_delete, timestep=timestep + ) + return + + multiple_choice_delete_options(question, options_to_delete, timestep=timestep) + + question.refresh_from_db() + expected_options = [opt for opt in initial_options if opt not in options_to_delete] + assert question.options == expected_options + ts, options = question.options_history[-1] + assert ts == ( + timestep.isoformat() if options_to_delete else datetime.min.isoformat() + ) + assert options == expected_options + + forecasts = question.user_forecasts.order_by("start_time") + assert len(forecasts) == len(expected_forecasts) + for f, e in zip(forecasts, expected_forecasts): + assert f.start_time == e.start_time + assert f.end_time == e.end_time + assert f.probability_yes_per_category == e.probability_yes_per_category + assert f.source == e.source + + +@pytest.mark.parametrize( + "initial_options,options_to_add,grace_period_end,forecasts,expected_forecasts," + "expect_success", + [ + (["a", "b", "other"], ["c"], dt(2025, 1, 1), [], [], True), # simplest path + (["a", "b", "other"], ["b"], dt(2025, 1, 1), [], [], False), # copied add + (["a", "b", "other"], ["c", "d"], dt(2025, 1, 1), [], [], True), # double add + # grace period before last options history + (["a", "b", "other"], ["c"], dt(1900, 1, 1), [], [], False), + ( + ["a", "b", "other"], + ["c"], + dt(2025, 1, 1), + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=dt(2025, 1, 1), + probability_yes_per_category=[0.2, 0.3, None, 0.5], + ) + ], + True, + ), # happy path + ( + ["a", "b", "other"], + ["c", "d"], + dt(2025, 1, 1), + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=dt(2025, 1, 1), + probability_yes_per_category=[0.2, 0.3, None, None, 0.5], + ) + ], + True, + ), # happy path adding two options + ( + ["a", "b", "other"], + ["c"], + dt(2025, 1, 1), + [ + Forecast( + start_time=dt(2025, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + [ + Forecast( + start_time=dt(2025, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, None, 0.5], + ) + ], + True, + ), # forecast starts at /after grace_period_end + ( + ["a", "b", "other"], + [], + dt(2025, 1, 1), + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + [ + Forecast( + start_time=dt(2024, 1, 1), + end_time=None, + probability_yes_per_category=[0.2, 0.3, 0.5], + ) + ], + True, + ), # no effect + ], +) +def test_multiple_choice_add_options( + question_multiple_choice: Question, + user1: User, + initial_options: list[str], + options_to_add: list[str], + grace_period_end: datetime, + forecasts: list[Forecast], + expected_forecasts: list[Forecast], + expect_success: bool, +): + question = question_multiple_choice + question.options = initial_options + question.options_history = [(datetime.min.isoformat(), initial_options)] + question.save() + + for forecast in forecasts: + forecast.author = user1 + forecast.question = question + forecast.save() + + if not expect_success: + with pytest.raises(ValueError): + multiple_choice_add_options( + question, options_to_add, grace_period_end, timestep=dt(2024, 7, 1) + ) + return + + multiple_choice_add_options( + question, options_to_add, grace_period_end, timestep=dt(2024, 7, 1) + ) + + question.refresh_from_db() + expected_options = initial_options[:-1] + options_to_add + initial_options[-1:] + assert question.options == expected_options + ts, options = question.options_history[-1] + assert ts == ( + grace_period_end.isoformat() if options_to_add else datetime.min.isoformat() + ) + assert options == expected_options + + forecasts = question.user_forecasts.order_by("start_time") + assert len(forecasts) == len(expected_forecasts) + for f, e in zip(forecasts, expected_forecasts): + assert f.start_time == e.start_time + assert f.end_time == e.end_time + assert f.probability_yes_per_category == e.probability_yes_per_category + assert f.source == e.source diff --git a/users/views.py b/users/views.py index 993613cc9..15f843e16 100644 --- a/users/views.py +++ b/users/views.py @@ -326,7 +326,7 @@ def get_forecasting_stats_data( ) if user is not None: forecasts = forecasts.filter(author=user) - forecasts_count = forecasts.count() + forecasts_count = forecasts.exclude(source=Forecast.SourceChoices.AUTOMATIC).count() questions_predicted_count = forecasts.values("question").distinct().count() score_count = len(scores) diff --git a/utils/csv_utils.py b/utils/csv_utils.py index 447bbe1d6..d096d4ad5 100644 --- a/utils/csv_utils.py +++ b/utils/csv_utils.py @@ -16,6 +16,7 @@ Forecast, QUESTION_CONTINUOUS_TYPES, ) +from questions.services.multiple_choice_handlers import get_all_options_from_history from questions.types import AggregationMethod from scoring.models import Score, ArchivedScore from utils.the_math.aggregations import get_aggregation_history @@ -328,7 +329,9 @@ def generate_data( + "**`Default Project ID`** - the id of the default project for the Post.\n" + "**`Label`** - for a group question, this is the sub-question object.\n" + "**`Question Type`** - the type of the question. Binary, Multiple Choice, Numeric, Discrete, or Date.\n" - + "**`MC Options`** - the options for a multiple choice question, if applicable.\n" + + "**`MC Options (Current)`** - the current options for a multiple choice question, if applicable.\n" + + "**`MC Options (All)`** - the options for a multiple choice question across all time, if applicable.\n" + + "**`MC Options History`** - the history of options over time. Each entry is a isoformat time and a record of what the options were at that time.\n" + "**`Lower Bound`** - the lower bound of the forecasting range for a continuous question.\n" + "**`Open Lower Bound`** - whether the lower bound is open.\n" + "**`Upper Bound`** - the upper bound of the forecasting range for a continuous question.\n" @@ -357,7 +360,9 @@ def generate_data( "Default Project ID", "Label", "Question Type", - "MC Options", + "MC Options (Current)", + "MC Options (All)", + "MC Options History", "Lower Bound", "Open Lower Bound", "Upper Bound", @@ -406,7 +411,13 @@ def format_value(val): post.default_project_id, question.label, question.type, - question.options or None, + question.options, + ( + get_all_options_from_history(question.options_history) + if question.options_history + else None + ), + question.options_history or None, format_value(question.range_min), question.open_lower_bound, format_value(question.range_max), @@ -446,7 +457,7 @@ def format_value(val): + "**`End Time`** - the time when the forecast ends. If not populated, the forecast is still active. Note that this can be set in the future indicating an expiring forecast.\n" + "**`Forecaster Count`** - if this is an aggregate forecast, how many forecasts contribute to it.\n" + "**`Probability Yes`** - the probability of the binary question resolving to 'Yes'\n" - + "**`Probability Yes Per Category`** - a list of probabilities corresponding to each option for a multiple choice question. Cross-reference 'MC Options' in `question_data.csv`.\n" + + "**`Probability Yes Per Category`** - a list of probabilities corresponding to each option for a multiple choice question. Cross-reference 'MC Options (All)' in `question_data.csv`. Note that a Multiple Choice forecast will have None in places where the corresponding option wasn't available for forecast at the time.\n" + "**`Continuous CDF`** - the value of the CDF (cumulative distribution function) at each of the locations in the continuous range for a continuous question. Cross-reference 'Continuous Range' in `question_data.csv`.\n" + "**`Probability Below Lower Bound`** - the probability of the question resolving below the lower bound for a continuous question.\n" + "**`Probability Above Upper Bound`** - the probability of the question resolving above the upper bound for a continuous question.\n"