diff --git a/questions/models.py b/questions/models.py
index 845b88e07c..60edd13fda 100644
--- a/questions/models.py
+++ b/questions/models.py
@@ -637,14 +637,16 @@ def get_prediction_values(self) -> list[float | None]:
             return self.probability_yes_per_category
         return self.continuous_cdf
 
-    def get_pmf(self) -> list[float]:
+    def get_pmf(self, replace_none: bool = False) -> list[float]:
         """
-        gets the PMF for this forecast, replacing None values with 0.0
-        Not for serialization use (keep None values in that case)
+        gets the PMF for this forecast
+        replaces None values with 0.0 if replace_none is True
         """
         if self.probability_yes:
             return [1 - self.probability_yes, self.probability_yes]
         if self.probability_yes_per_category:
+            if not replace_none:
+                return self.probability_yes_per_category
             return [
                 v or 0.0 for v in self.probability_yes_per_category
             ]  # replace None with 0.0
@@ -719,18 +721,20 @@ def get_cdf(self) -> list[float | None] | None:
             return self.forecast_values
         return None
 
-    def get_pmf(self) -> list[float]:
+    def get_pmf(self, replace_none: bool = False) -> list[float | None]:
         """
-        gets the PMF for this forecast, replacing None values with 0.0
-        Not for serialization use (keep None values in that case)
+        gets the PMF for this forecast
+        replacing None values with 0.0 if replace_none is True
         """
         # grab annotation if it exists for efficiency
         question_type = getattr(self, "question_type", self.question.type)
-        forecast_values = [
-            v or 0.0 for v in self.forecast_values
-        ]  # replace None with 0.0
+        forecast_values = self.forecast_values
+        if question_type == Question.QuestionType.MULTIPLE_CHOICE:
+            if not replace_none:
+                return forecast_values
+            return [v or 0.0 for v in forecast_values]  # replace None with 0.0
         if question_type in QUESTION_CONTINUOUS_TYPES:
-            cdf: list[float] = forecast_values
+            cdf: list[float] = forecast_values  # type: ignore
             pmf = [cdf[0]]
             for i in range(1, len(cdf)):
                 pmf.append(cdf[i] - cdf[i - 1])
diff --git a/scoring/score_math.py b/scoring/score_math.py
index fada04f0d1..546b19d310 100644
--- a/scoring/score_math.py
+++ b/scoring/score_math.py
@@ -20,7 +20,7 @@
 
 @dataclass
 class AggregationEntry:
-    pmf: np.ndarray | list[float]
+    pmf: np.ndarray | list[float | None]
     num_forecasters: int
     timestamp: float
 
@@ -36,7 +36,7 @@ def get_geometric_means(
             timesteps.add(forecast.end_time.timestamp())
     for timestep in sorted(timesteps):
         prediction_values = [
-            f.get_pmf()
+            f.get_pmf(replace_none=True)
             for f in forecasts
             if f.start_time.timestamp() <= timestep
             and (f.end_time is None or f.end_time.timestamp() > timestep)
@@ -84,9 +84,12 @@ def evaluate_forecasts_baseline_accuracy(
         forecast_coverage = forecast_duration / total_duration
         pmf = forecast.get_pmf()
         if question_type in ["binary", "multiple_choice"]:
-            forecast_score = (
-                100 * np.log(pmf[resolution_bucket] * len(pmf)) / np.log(len(pmf))
-            )
+            # forecasts always have `None` assigned to MC options that aren't
+            # available at the time. Detecting these allows us to avoid trying to
+            # follow the question's options_history.
+            options_at_time = len([p for p in pmf if p is not None])
+            p = pmf[resolution_bucket] or pmf[-1]  # if None, read from Other
+            forecast_score = 100 * np.log(p * options_at_time) / np.log(options_at_time)
         else:
             if resolution_bucket in [0, len(pmf) - 1]:
                 baseline = 0.05
@@ -116,8 +119,13 @@ def evaluate_forecasts_baseline_spot_forecast(
         if start <= spot_forecast_timestamp < end:
             pmf = forecast.get_pmf()
             if question_type in ["binary", "multiple_choice"]:
+                # forecasts always have `None` assigned to MC options that aren't
+                # available at the time. Detecting these allows us to avoid trying to
+                # follow the question's options_history.
+                options_at_time = len([p for p in pmf if p is not None])
+                p = pmf[resolution_bucket] or pmf[-1]  # if None, read from Other
                 forecast_score = (
-                    100 * np.log(pmf[resolution_bucket] * len(pmf)) / np.log(len(pmf))
+                    100 * np.log(p * options_at_time) / np.log(options_at_time)
                 )
             else:
                 if resolution_bucket in [0, len(pmf) - 1]:
@@ -159,17 +167,21 @@ def evaluate_forecasts_peer_accuracy(
             continue
 
         pmf = forecast.get_pmf()
+        p = pmf[resolution_bucket] or pmf[-1]  # if None, read from Other
         interval_scores: list[float | None] = []
         for gm in geometric_mean_forecasts:
             if forecast_start <= gm.timestamp < forecast_end:
-                score = (
+                gmp = (
+                    gm.pmf[resolution_bucket] or gm.pmf[-1]
+                )  # if None, read from Other
+                interval_score = (
                     100
                     * (gm.num_forecasters / (gm.num_forecasters - 1))
-                    * np.log(pmf[resolution_bucket] / gm.pmf[resolution_bucket])
+                    * np.log(p / gmp)
                 )
                 if question_type in QUESTION_CONTINUOUS_TYPES:
-                    score /= 2
-                interval_scores.append(score)
+                    interval_score /= 2
+                interval_scores.append(interval_score)
             else:
                 interval_scores.append(None)
 
@@ -218,10 +230,10 @@ def evaluate_forecasts_peer_spot_forecast(
         )
         if start <= spot_forecast_timestamp < end:
             pmf = forecast.get_pmf()
+            p = pmf[resolution_bucket] or pmf[-1]  # if None, read from Other
+            gmp = gm.pmf[resolution_bucket] or gm.pmf[-1]  # if None, read from Other
             forecast_score = (
-                100
-                * (gm.num_forecasters / (gm.num_forecasters - 1))
-                * np.log(pmf[resolution_bucket] / gm.pmf[resolution_bucket])
+                100 * (gm.num_forecasters / (gm.num_forecasters - 1)) * np.log(p / gmp)
             )
             if question_type in QUESTION_CONTINUOUS_TYPES:
                 forecast_score /= 2
@@ -260,11 +272,15 @@ def evaluate_forecasts_legacy_relative(
             continue
 
         pmf = forecast.get_pmf()
+        p = pmf[resolution_bucket] or pmf[-1]  # if None, read from Other
         interval_scores: list[float | None] = []
         for bf in baseline_forecasts:
             if forecast_start <= bf.timestamp < forecast_end:
-                score = np.log2(pmf[resolution_bucket] / bf.pmf[resolution_bucket])
-                interval_scores.append(score)
+                bfp = (
+                    bf.pmf[resolution_bucket] or bf.pmf[-1]
+                )  # if None, read from Other
+                interval_score = np.log2(p / bfp)
+                interval_scores.append(interval_score)
             else:
                 interval_scores.append(None)
 
@@ -316,7 +332,7 @@ def evaluate_question(
     if spot_forecast_time:
         spot_forecast_timestamp = min(spot_forecast_time.timestamp(), actual_close_time)
 
-    # We need all user forecasts to calculated GeoMean even
+    # We need all user forecasts to calculate GeoMean even
     # if we're only scoring some or none of the users
     user_forecasts = question.user_forecasts.all()
     if only_include_user_ids:
diff --git a/tests/unit/test_scoring/test_score_math.py b/tests/unit/test_scoring/test_score_math.py
index 23f5f78c71..652dcd9be3 100644
--- a/tests/unit/test_scoring/test_score_math.py
+++ b/tests/unit/test_scoring/test_score_math.py
@@ -47,7 +47,7 @@ def F(q=None, v=None, s=None, e=None):
     return forecast
 
 
-def A(p: list[float] | None = None, n: int = 0, t: int | None = None):
+def A(p: list[float | None] | None = None, n: int = 0, t: int | None = None):
     # Create an AggregationEntry object with basic values
     # p: pmf
     # n: number of forecasters
@@ -75,6 +75,11 @@ class TestScoreMath:
             ([F()] * 100, [A(n=100)]),
             # maths
             ([F(v=0.7), F(v=0.8), F(v=0.9)], [A(p=[0.18171206, 0.79581144], n=3)]),
+            # multiple choice forecasts with placeholder 0s
+            (
+                [F(q=QT.MULTIPLE_CHOICE, v=[0.6, 0.15, None, 0.25])] * 2,
+                [A(n=2, p=[0.6, 0.15, 0.0, 0.25])],
+            ),
             # start times
             ([F(), F(s=1)], [A(), A(t=1, n=2)]),
             ([F(), F(s=1), F(s=2)], [A(), A(t=1, n=2), A(t=2, n=3)]),
@@ -85,7 +90,7 @@ class TestScoreMath:
             # numeric
             (
                 [F(q=QT.NUMERIC), F(q=QT.NUMERIC)],
-                [A(p=[0] + [1 / 200] * 200 + [0], n=2)],
+                [A(p=[0.0] + [1 / 200] * 200 + [0.0], n=2)],
             ),
             (
                 [
@@ -103,7 +108,10 @@ def test_get_geometric_means(
         result = get_geometric_means(forecasts)
         assert len(result) == len(expected)
         for ra, ea in zip(result, expected):
-            assert all(round(r, 8) == round(e, 8) for r, e in zip(ra.pmf, ea.pmf))
+            assert all(
+                ((r == e) or (round(r, 8) == round(e, 8)))
+                for r, e in zip(ra.pmf, ea.pmf)
+            )
             assert ra.num_forecasters == ea.num_forecasters
             assert ra.timestamp == ea.timestamp
 
@@ -131,6 +139,37 @@ def test_get_geometric_means(
             ([F(v=0.9, s=5)], {}, [S(v=84.79969066 / 2, c=0.5)]),  # half coverage
             ([F(v=2 ** (-1 / 2))], {}, [S(v=50)]),
             ([F(v=2 ** (-3 / 2))], {}, [S(v=-50)]),
+            # multiple choice w/ placeholder at index 2
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 - 3 ** (-0.5) - 1 / 3, None, 3 ** (-0.5)],
+                    )
+                ],
+                {"resolution_bucket": 0, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=0.0)],
+            ),  # chosen to have a score of 0 for simplicity
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 - 3 ** (-0.5) - 1 / 3, None, 3 ** (-0.5)],
+                    )
+                ],
+                {"resolution_bucket": 2, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=50)],
+            ),  # same score as index == 3 since None should read from "Other"
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 - 3 ** (-0.5) - 1 / 3, None, 3 ** (-0.5)],
+                    )
+                ],
+                {"resolution_bucket": 3, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=50)],
+            ),  # chosen to have a score of 50 for simplicity
             # numeric
             (
                 [F(q=QT.NUMERIC)],
@@ -199,6 +238,37 @@ def test_evaluate_forecasts_baseline_accuracy(self, forecasts, args, expected):
             ([F(v=0.9, s=5)], {}, [S(v=84.79969066, c=1)]),
             ([F(v=2 ** (-1 / 2))], {}, [S(v=50)]),
             ([F(v=2 ** (-3 / 2))], {}, [S(v=-50)]),
+            # multiple choice w/ placeholder at index 2
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 - 3 ** (-0.5) - 1 / 3, None, 3 ** (-0.5)],
+                    )
+                ],
+                {"resolution_bucket": 0, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=0.0)],
+            ),  # chosen to have a score of 0 for simplicity
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 - 3 ** (-0.5) - 1 / 3, None, 3 ** (-0.5)],
+                    )
+                ],
+                {"resolution_bucket": 2, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=50)],
+            ),  # same score as index == 3 since None should read from "Other"
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 - 3 ** (-0.5) - 1 / 3, None, 3 ** (-0.5)],
+                    )
+                ],
+                {"resolution_bucket": 3, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=50)],
+            ),  # chosen to have a score of 50 for simplicity
             # numeric
             (
                 [F(q=QT.NUMERIC)],
@@ -319,6 +389,64 @@ def test_evaluate_forecasts_baseline_spot_forecast(self, forecasts, args, expect
                     S(v=100 * (0.5 * 0 + 0.5 * np.log(0.9 / gmean([0.1, 0.5]))), c=0.5),
                 ],
             ),
+            # multiple choice w/ placeholder at index 2
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[
+                            1 / 3,
+                            1 - (np.e ** (0.25) / 3) - 1 / 3,
+                            None,
+                            np.e ** (0.25) / 3,
+                        ],
+                    ),
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 / 3, None, 1 / 3],
+                    ),
+                ],
+                {"resolution_bucket": 0, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=0), S(v=0)],
+            ),  # chosen to have a score of 0 for simplicity
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[
+                            1 / 3,
+                            1 - (np.e ** (0.25) / 3) - 1 / 3,
+                            None,
+                            np.e ** (0.25) / 3,
+                        ],
+                    ),
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 / 3, None, 1 / 3],
+                    ),
+                ],
+                {"resolution_bucket": 2, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=25), S(v=-25)],
+            ),  # same score as index == 3 since 0.0 should read from "Other"
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[
+                            1 / 3,
+                            1 - (np.e ** (0.25) / 3) - 1 / 3,
+                            None,
+                            np.e ** (0.25) / 3,
+                        ],
+                    ),
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 / 3, None, 1 / 3],
+                    ),
+                ],
+                {"resolution_bucket": 3, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=25), S(v=-25)],
+            ),  # chosen to have a score of 25 for simplicity
             # TODO: add tests with base forecasts different from forecasts
         ],
     )
@@ -403,6 +531,64 @@ def test_evaluate_forecasts_peer_accuracy(self, forecasts, args, expected):
                 {},
                 [S(v=100 * np.log(0.1 / 0.5)), S(v=100 * np.log(0.5 / 0.1)), S(c=0)],
             ),
+            # multiple choice w/ placeholder at index 2
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[
+                            1 / 3,
+                            1 - (np.e ** (0.25) / 3) - 1 / 3,
+                            None,
+                            np.e ** (0.25) / 3,
+                        ],
+                    ),
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 / 3, None, 1 / 3],
+                    ),
+                ],
+                {"resolution_bucket": 0, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=0), S(v=0)],
+            ),  # chosen to have a score of 0 for simplicity
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[
+                            1 / 3,
+                            1 - (np.e ** (0.25) / 3) - 1 / 3,
+                            None,
+                            np.e ** (0.25) / 3,
+                        ],
+                    ),
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 / 3, None, 1 / 3],
+                    ),
+                ],
+                {"resolution_bucket": 2, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=25), S(v=-25)],
+            ),  # same score as index == 3 since None should read from "Other"
+            (
+                [
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[
+                            1 / 3,
+                            1 - (np.e ** (0.25) / 3) - 1 / 3,
+                            None,
+                            np.e ** (0.25) / 3,
+                        ],
+                    ),
+                    F(
+                        q=QT.MULTIPLE_CHOICE,
+                        v=[1 / 3, 1 / 3, None, 1 / 3],
+                    ),
+                ],
+                {"resolution_bucket": 3, "question_type": QT.MULTIPLE_CHOICE},
+                [S(v=25), S(v=-25)],
+            ),  # chosen to have a score of 25 for simplicity
             # TODO: add tests with base forecasts different from forecasts
         ],
     )
diff --git a/tests/unit/test_utils/test_the_math/test_formulas.py b/tests/unit/test_utils/test_the_math/test_formulas.py
index 54f78dd357..30bb3d3e13 100644
--- a/tests/unit/test_utils/test_the_math/test_formulas.py
+++ b/tests/unit/test_utils/test_the_math/test_formulas.py
@@ -15,7 +15,12 @@ class TestFormulas:
     binary_details = {"type": Question.QuestionType.BINARY}
     multiple_choice_details = {
         "type": Question.QuestionType.MULTIPLE_CHOICE,
-        "options": ["A", "B", "C"],
+        "options": ["a", "c", "Other"],
+        "options_history": [
+            (0, ["a", "b", "Other"]),
+            (100, ["a", "Other"]),
+            (200, ["a", "c", "Other"]),
+        ],
     }
     numeric_details = {
         "type": Question.QuestionType.NUMERIC,
@@ -57,8 +62,10 @@ class TestFormulas:
             ("", binary_details, None),
             (None, binary_details, None),
             # Multiple choice questions
-            ("A", multiple_choice_details, 0),
-            ("C", multiple_choice_details, 2),
+            ("a", multiple_choice_details, 0),
+            ("b", multiple_choice_details, 1),
+            ("c", multiple_choice_details, 2),
+            ("Other", multiple_choice_details, 3),
             # Numeric questions
             ("below_lower_bound", numeric_details, 0),
             ("-2", numeric_details, 0),
diff --git a/utils/the_math/formulas.py b/utils/the_math/formulas.py
index 999444794c..d582039269 100644
--- a/utils/the_math/formulas.py
+++ b/utils/the_math/formulas.py
@@ -5,6 +5,7 @@
 
 from questions.constants import UnsuccessfulResolutionType
 from questions.models import Question
+from questions.services.multiple_choice_handlers import get_all_options_from_history
 from utils.typing import ForecastValues
 
 logger = logging.getLogger(__name__)
@@ -33,7 +34,8 @@ def string_location_to_scaled_location(
     if question.type == Question.QuestionType.BINARY:
         return 1.0 if string_location == "yes" else 0.0
     if question.type == Question.QuestionType.MULTIPLE_CHOICE:
-        return float(question.options.index(string_location))
+        list_of_all_options = get_all_options_from_history(question.options_history)
+        return float(list_of_all_options.index(string_location))
     # continuous
     if string_location == "below_lower_bound":
         return question.range_min - 1.0
diff --git a/utils/the_math/measures.py b/utils/the_math/measures.py
index 50fe1f21ac..7edce08712 100644
--- a/utils/the_math/measures.py
+++ b/utils/the_math/measures.py
@@ -56,7 +56,7 @@ def weighted_percentile_2d(
     # replace np.nan back to None
     weighted_percentiles = np.array(weighted_percentiles)
     weighted_percentiles = np.where(
-        weighted_percentiles == np.nan, None, weighted_percentiles
+        np.isnan(weighted_percentiles.astype(float)), None, weighted_percentiles
     )
     return weighted_percentiles.tolist()