UCL · marghe-molaro · Apr 3, 2024 · Sep 17, 2024 · Sep 30, 2024 · Oct 2, 2024
diff --git a/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv b/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv
diff --git a/src/scripts/track_individual_histories/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py
@@ -0,0 +1,82 @@
+import argparse
+from pathlib import Path
+
+import pandas as pd
+
+from tlo.analysis.utils import extract_individual_histories
+
+
+def print_filtered_df(df):
+    """
+    Prints rows of the DataFrame excluding event_name 'Initialise' and 'Birth'.
+    """
+    pd.set_option('display.max_colwidth', None)
+    filtered = df  # [~df['event_name'].isin(['StartOfSimulation', 'Birth'])]
+
+    dict_cols = ["Info"]
+    max_items = 2
+    # Step 2: Truncate dictionary columns for display
+    if dict_cols is not None:
+        for col in dict_cols:
+            def truncate_dict(d):
+                if isinstance(d, dict):
+                    items = list(d.items())[:max_items]  # keep only first `max_items`
+                    return dict(items)
+                return d
+            filtered[col] = filtered[col].apply(truncate_dict)
+    print(filtered)
+
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ):
+    """Extract event chains
+    """
+    pd.set_option('display.max_rows', None)
+    pd.set_option('display.max_colwidth', None)
+
+    individual_individual_histories = extract_individual_histories(results_folder)
+
+if __name__ == "__main__":
+    rfp = Path('resources')
+
+    parser = argparse.ArgumentParser(
+        description="Produce plots to show the impact each set of treatments",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--output-path",
+        help=(
+            "Directory to write outputs to. If not specified (set to None) outputs "
+            "will be written to value of --results-path argument."
+        ),
+        type=Path,
+        default=None,
+        required=False,
+    )
+    parser.add_argument(
+        "--resources-path",
+        help="Directory containing resource files",
+        type=Path,
+        default=Path('resources'),
+        required=False,
+    )
+    parser.add_argument(
+        "--results-path",
+        type=Path,
+        help=(
+            "Directory containing results from running "
+            "src/scripts/analysis_data_generation/scenario_track_individual_histories.py "
+        ),
+        default=None,
+        required=False
+    )
+    args = parser.parse_args()
+    assert args.results_path is not None
+    results_path = args.results_path
+
+    output_path = results_path if args.output_path is None else args.output_path
+
+    apply(
+        results_folder=results_path,
+        output_folder=output_path,
+        resourcefilepath=args.resources_path
+    )
diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -0,0 +1,91 @@
+"""This Scenario file run the model to track individual histories
+
+Run on the batch system using:
+```
+tlo batch-submit
+    src/scripts/analysis_data_generation/scenario_track_individual_histories.py
+```
+
+or locally using:
+```
+    tlo scenario-run src/scripts/analysis_data_generation/scenario_track_individual_histories.py
+```
+
+"""
+from pathlib import Path
+from typing import Dict
+
+import pandas as pd
+
+from tlo import Date, logging
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
+from tlo.methods import individual_history_tracker
+from tlo.methods.fullmodel import fullmodel
+from tlo.scenario import BaseScenario
+
+
+class TrackIndividualHistories(BaseScenario):
+    def __init__(self):
+        super().__init__()
+        self.seed = 42
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = self.start_date + pd.DateOffset(years=5)
+        self.pop_size = 100
+        self._scenarios = self._get_scenarios()
+        self.number_of_draws = len(self._scenarios)
+        self.runs_per_draw = 1
+        self.generate_event_chains = True
+
+    def log_configuration(self):
+        return {
+            'filename': 'track_individual_histories',
+            'directory': Path('./outputs'),  # <- (specified only for local running)
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.events': logging.INFO,
+                'tlo.methods.demography.detail': logging.WARNING,
+                'tlo.methods.healthburden': logging.INFO,
+                'tlo.methods.healthsystem.summary': logging.INFO,
+                'tlo.methods.individual_history': logging.INFO
+            }
+        }
+
+    def modules(self):
+        return (
+            fullmodel() + [individual_history_tracker.IndividualHistoryTracker()]
+        )
+
+    def draw_parameters(self, draw_number, rng):
+        if draw_number < self.number_of_draws:
+            return list(self._scenarios.values())[draw_number]
+        else:
+            return
+
+    def _get_scenarios(self) -> Dict[str, Dict]:
+
+        return {
+            "Baseline":
+                mix_scenarios(
+                    self._baseline(),
+                    {
+                    }
+                ),
+
+        }
+
+    def _baseline(self) -> Dict:
+        #Return the Dict with values for the parameter changes that define the baseline scenario.
+        return mix_scenarios(
+            get_parameters_for_status_quo(),
+            {
+                "HealthSystem": {
+                    "mode_appt_constraints": 1,                 # <-- Mode 1 prior to change to preserve calibration
+                }
+            },
+        )
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
@@ -365,6 +365,130 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
     _concat.columns.names = ['draw', 'run']  # name the levels of the columns multi-index
     return _concat
 
+def check_info_value_changes(df):
+
+    problems = {}  # store issues
+
+    # iterate group-by-group
+    for E, g in df.groupby("entity"):
+        prev_info = {}
+
+        for _, row in g.iterrows():
+            current_info = row["Info"]
+
+            for key, value in current_info.items():
+                if key in prev_info and key != 'footprint' and key != 'level':
+                    # compare with previous value
+                    if prev_info[key] == value and key not in problems.keys():
+                        problems[key] = value
+
+            # update latest value
+            prev_info = row["Info"]
+
+    return problems
+
+def remove_events_for_individual_after_death(df):
+    rows_to_drop = []
+
+    # Group by entity
+    for entity, g in df.groupby("entity"):
+        died = False
+
+        for idx, row in g.iterrows():
+            current_info = row["Info"]
+
+            if not died:
+                # Check if this row marks death
+                if isinstance(current_info, dict) and current_info.get("is_alive") is False:
+                    died = True
+            else:
+                # Already dead → mark this row for removal
+                rows_to_drop.append(idx)
+
+    # Drop all marked rows
+    return df.drop(index=rows_to_drop)
+
+def reconstruct_individual_histories(df):
+
+    # Collapse into 'entity', 'date', 'event_name', 'Info' format where 'Info' is dict listing attributes
+    # (e.g. {a1:v1, a2:v2, a3:v3, ...} )
+    df_collapsed = (
+            df.groupby(['entity', 'date', 'event_name'], sort=False)
+              .apply(lambda g: dict(zip(g['attribute'], g['value'])))
+              .reset_index(name='Info')
+        )
+
+    df_final = (
+        df_collapsed
+            .sort_values(by=['entity', 'date'])
+            .reset_index(drop=True)
+    )
+
+    df_final = remove_events_for_individual_after_death(df_final)
+
+    problems = check_info_value_changes(df_final)
+    if len(problems)>0:
+        print("Values didn't change but were still detected")
+        print(problems)
+
+
+
+    return df_final
+
+
+def extract_individual_histories(results_folder: Path,
+                        ) -> dict:
+    """Utility function to collect chains of events. Individuals across runs of the same draw
+    will be combined into unique df.
+    Returns dictionary where keys are draws, and each draw is associated with a dataframe of
+    format 'entity', 'date', 'event_name', 'Info' where 'Info' is a dictionary that combines
+    A&Vs for a particular individual + date + event name combination.
+    """
+    module = 'tlo.methods.individual_history'
+    key = 'individual_histories'
+
+    # get number of draws and numbers of runs
+    info = get_scenario_info(results_folder)
+
+    # Collect results from each draw/run. Individuals across runs of the same draw will be combined into unique df.
+    res = dict()
+
+    for draw in range(info['number_of_draws']):
+
+        # All individuals in same draw will be combined across runs, so their ID will be offset.
+        dfs_from_runs = []
+        ID_offset = 0
+
+        for run in range(info['runs_per_draw']):
+
+            try:
+                df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
+                df_single_run= reconstruct_individual_histories(df)
+
+                # Offset person ID to account for the fact that we are collecting chains across runs
+                df_single_run['entity'] = df_single_run['entity'] + ID_offset
+
+                # Calculate ID offset for next run
+                ID_offset = (max(df_single_run['entity']) + 1)
+
+                # The E has now become an ID for the individual in the draw overall, so rename column as such
+                df_single_run = df_single_run.rename(columns={'entity': 'person_ID_in_draw'})
+
+                # Append these chains to list
+                dfs_from_runs.append(df_single_run)
+
+            except KeyError:
+                # Some logs could not be found - probably because this run failed.
+                # Simply to not append anything to the df collecting chains.
+                print("Run failed")
+
+        # Combine all dfs into a single DataFrame
+        res[draw] = pd.concat(dfs_from_runs, ignore_index=True)
+
+        res[0].to_csv('individual_histories.csv')
+
+    return res
+
 
 def compute_summary_statistics(
     results: pd.DataFrame,

diff --git a/src/tlo/events.py b/src/tlo/events.py
@@ -9,6 +9,8 @@
 if TYPE_CHECKING:
     from tlo import Simulation
 
+from tlo.notify import notifier
+
 
 class Priority(Enum):
     """Enumeration for the Priority, which is used in sorting the events in the simulation queue."""
@@ -22,7 +24,6 @@ def __lt__(self, other):
             return self.value < other.value
         return NotImplemented
 
-
 class Event:
     """Base event class, from which all others inherit.
 
@@ -63,9 +64,19 @@ def apply(self, target):
 
     def run(self):
         """Make the event happen."""
+
+        # Dispatch notification that event is about to run
+        notifier.dispatch("event.pre-run", data={"target": self.target,
+                                                 "module" : self.module.name,
+                                                 "event_name": self.__class__.__name__})
+
         self.apply(self.target)
         self.post_apply_hook()
 
+        # Dispatch notification that event has just ran
+        notifier.dispatch("event.post-run", data={"target": self.target,
+                                                  "event_name": self.__class__.__name__})
+
 
 class RegularEvent(Event):
     """An event that automatically reschedules itself at a fixed frequency."""