From dbff470b51cde44beeefdae3575d52e0c19964bc Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 3 Apr 2024 15:00:09 +0100
Subject: [PATCH 01/97] Investigate analysis of events at sim level

---
 src/tlo/simulation.py |  9 +++++++++
 tests/test_rti.py     | 12 ++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 219b1b8a6f..a641909ed1 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -231,6 +231,15 @@ def simulate(self, *, end_date):
             if date >= end_date:
                 self.date = end_date
                 break
+                
+            #if event.target != self.population:
+            #    print("Event: ", event)
+
+            if event.module == self.modules['RTI']:
+                 print("RTI event ", event)
+                 print("   target ", event.target)
+                 if event.target != self.population:
+                    self.population.props.at[event.tar]
             self.fire_single_event(event, date)
 
         # The simulation has ended.
diff --git a/tests/test_rti.py b/tests/test_rti.py
index 0e231fb4af..99243b988e 100644
--- a/tests/test_rti.py
+++ b/tests/test_rti.py
@@ -25,6 +25,17 @@
 end_date = Date(2012, 1, 1)
 popsize = 1000
 
+@pytest.mark.slow
+def test_data_harvesting(seed):
+    """
+    This test runs a simulation with a functioning health system with full service availability and no set
+    constraints
+    """
+    # create sim object
+    sim = create_basic_rti_sim(popsize, seed)
+    # run simulation
+    sim.simulate(end_date=end_date)
+    exit(-1)
 
 def check_dtypes(simulation):
     # check types of columns in dataframe, check they are the same, list those that aren't
@@ -65,6 +76,7 @@ def test_run(seed):
     check_dtypes(sim)
 
 
+
 @pytest.mark.slow
 def test_all_injuries_run(seed):
     """

From 05098f78668a5317667d58cbda882a364a031277 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 30 Sep 2024 16:26:39 +0200
Subject: [PATCH 02/97] Final data-printing set-up

---
 src/tlo/methods/demography.py   |  7 ++-
 src/tlo/methods/healthsystem.py | 18 ++++++
 src/tlo/methods/hiv.py          | 67 ++++++++++++++++++----
 src/tlo/methods/tb.py           | 99 +++++++++++++++++++++++++--------
 src/tlo/simulation.py           | 82 ++++++++++++++++++++++++---
 5 files changed, 226 insertions(+), 47 deletions(-)

diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py
index e58f3895f4..6b2578fd44 100644
--- a/src/tlo/methods/demography.py
+++ b/src/tlo/methods/demography.py
@@ -315,9 +315,10 @@ def initialise_simulation(self, sim):
         # Launch the repeating event that will store statistics about the population structure
         sim.schedule_event(DemographyLoggingEvent(self), sim.date)
 
-        # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
-        self.other_death_poll = OtherDeathPoll(self)
-        sim.schedule_event(self.other_death_poll, sim.date)
+        if sim.generate_data is False:
+            # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
+            self.other_death_poll = OtherDeathPoll(self)
+            sim.schedule_event(self.other_death_poll, sim.date)
 
         # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`)
         for _logger in (logger, logger_scale_factor):
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 181c08f5aa..6e251e636c 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2033,8 +2033,26 @@ def run_individual_level_events_in_mode_0_or_1(self,
                     assert event.facility_info is not None, \
                         f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined."
 
+                    go_ahead = False
+                    if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']):
+                        go_ahead = True
+                        row = self.sim.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = self.sim.date
+                        row['when'] = 'Before'
+                        self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+
                     # Run the HSI event (allowing it to return an updated appt_footprint)
                     actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
+                    
+                    if go_ahead:
+                        row = self.sim.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = self.sim.date
+                        row['when'] = 'After'
+                        self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
 
                     # Check if the HSI event returned updated appt_footprint
                     if actual_appt_footprint is not None:
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index d6455cc861..8e0d337fc1 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -631,11 +631,12 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        # Launch sub-routines for allocating the right number of people into each category
-        self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
+        if self.sim.generate_data is False:
+            # Launch sub-routines for allocating the right number of people into each category
+            self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
-        self.initialise_baseline_art(population)  # allocate baseline art coverage
-        self.initialise_baseline_tested(population)  # allocate baseline testing coverage
+            self.initialise_baseline_art(population)  # allocate baseline art coverage
+            self.initialise_baseline_tested(population)  # allocate baseline testing coverage
 
     def initialise_baseline_prevalence(self, population):
         """
@@ -905,10 +906,16 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        # 1) Schedule the Main HIV Regular Polling Event
-        sim.schedule_event(
-            HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
-        )
+        if self.sim.generate_data:
+            print("Should be generating data")
+            sim.schedule_event(
+                HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
+            )
+        else:
+            # 1) Schedule the Main HIV Regular Polling Event
+            sim.schedule_event(
+                HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
+            )
 
         # 2) Schedule the Logging Event
         sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1))
@@ -1662,6 +1669,37 @@ def do_at_generic_first_appt(
 #   Main Polling Event
 # ---------------------------------------------------------------------------
 
+class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin):
+    """ The HIV Polling Events for Data Generation
+    * Ensures that 
+    """
+
+    def __init__(self, module):
+        super().__init__(
+            module, frequency=DateOffset(years=120)
+        )  # repeats every 12 months, but this can be changed
+
+    def apply(self, population):
+    
+        df = population.props
+        
+        # Make everyone who is alive and not infected (no-one should be) susceptible
+        susc_idx = df.loc[
+            df.is_alive
+            & ~df.hv_inf
+            ].index
+            
+        n_susceptible = len(susc_idx)
+        print("Number of individuals susceptible", n_susceptible)
+        # Schedule the date of infection for each new infection:
+        for i in susc_idx:
+            date_of_infection = self.sim.date + pd.DateOffset(
+                # Ensure that individual will be infected before end of sim
+                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
+            )
+            self.sim.schedule_event(
+                HivInfectionEvent(self.module, i), date_of_infection
+            )
 
 class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin):
     """ The HIV Regular Polling Events
@@ -1683,6 +1721,7 @@ def apply(self, population):
         fraction_of_year_between_polls = self.frequency.months / 12
         beta = p["beta"] * fraction_of_year_between_polls
 
+        
         # ----------------------------------- HORIZONTAL TRANSMISSION -----------------------------------
         def horizontal_transmission(to_sex, from_sex):
             # Count current number of alive 15-80 year-olds at risk of transmission
@@ -1758,6 +1797,7 @@ def horizontal_transmission(to_sex, from_sex):
                         HivInfectionEvent(self.module, idx), date_of_infection
                     )
 
+
         # ----------------------------------- SPONTANEOUS TESTING -----------------------------------
         def spontaneous_testing(current_year):
 
@@ -1861,11 +1901,12 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        # Horizontal transmission: Male --> Female
-        horizontal_transmission(from_sex="M", to_sex="F")
+        if self.sim.generate_data is False:
+            # Horizontal transmission: Male --> Female
+            horizontal_transmission(from_sex="M", to_sex="F")
 
-        # Horizontal transmission: Female --> Male
-        horizontal_transmission(from_sex="F", to_sex="M")
+            # Horizontal transmission: Female --> Male
+            horizontal_transmission(from_sex="F", to_sex="M")
 
         # testing
         # if year later than 2020, set testing rates to those reported in 2020
@@ -1882,6 +1923,8 @@ def vmmc_for_child():
         vmmc_for_child()
 
 
+
+
 # ---------------------------------------------------------------------------
 #   Natural History Events
 # ---------------------------------------------------------------------------
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 623ee2e483..cd79ae22a5 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -833,28 +833,29 @@ def initialise_population(self, population):
         df["tb_date_ipt"] = pd.NaT
 
         # # ------------------ infection status ------------------ #
-        # WHO estimates of active TB for 2010 to get infected initial population
-        # don't need to scale or include treated proportion as no-one on treatment yet
-        inc_estimates = p["who_incidence_estimates"]
-        incidence_year = (inc_estimates.loc[
-            (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
-        ].values[0]) / 100_000
-
-        incidence_year = incidence_year * p["scaling_factor_WHO"]
-
-        self.assign_active_tb(
-            population,
-            strain="ds",
-            incidence=incidence_year)
-
-        self.assign_active_tb(
-            population,
-            strain="mdr",
-            incidence=incidence_year * p['prop_mdr2010'])
-
-        self.send_for_screening_general(
-            population
-        )  # send some baseline population for screening
+        if self.sim.generate_data is False:
+            # WHO estimates of active TB for 2010 to get infected initial population
+            # don't need to scale or include treated proportion as no-one on treatment yet
+            inc_estimates = p["who_incidence_estimates"]
+            incidence_year = (inc_estimates.loc[
+                (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
+            ].values[0]) / 100_000
+
+            incidence_year = incidence_year * p["scaling_factor_WHO"]
+
+            self.assign_active_tb(
+                population,
+                strain="ds",
+                incidence=incidence_year)
+
+            self.assign_active_tb(
+                population,
+                strain="mdr",
+                incidence=incidence_year * p['prop_mdr2010'])
+
+            self.send_for_screening_general(
+                population
+            )  # send some baseline population for screening
 
     def initialise_simulation(self, sim):
         """
@@ -867,7 +868,11 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbActiveEvent(self), sim.date)
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
-        sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+
+        if sim.generate_data is False:
+            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+        else:
+            sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
 
         # 2) log at the end of the year
         # Optional: Schedule the scale-up of programs
@@ -1366,6 +1371,53 @@ def is_subset(col_for_set, col_for_subset):
 # #   TB infection event
 # # ---------------------------------------------------------------------------
 
+class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin):
+    """The Tb Regular Poll Event for Data Generation for assigning active infections
+    * selects everyone to develop an active infection and schedules onset of active tb
+    sometime during the simulation
+    """
+
+    def __init__(self, module):
+        super().__init__(module, frequency=DateOffset(years=120))
+
+    def apply(self, population):
+
+        df = population.props
+        now = self.sim.date
+        rng = self.module.rng
+        # Make everyone who is alive and not infected (no-one should be) susceptible
+        susc_idx = df.loc[
+            df.is_alive
+            & (df.tb_inf != "active")
+            ].index
+            
+        n_susceptible = len(susc_idx)
+        
+        middle_index = len(susc_idx) // 2
+
+        # Will equally split two strains among the population
+        list_ds = susc_idx[:middle_index]
+        list_mdr = susc_idx[middle_index:]
+    
+        # schedule onset of active tb. This will be equivalent to the "Onset", so it
+        # doesn't matter how long after we have decided which infection this is.
+        for person_id in list_ds:
+            date_progression = now + pd.DateOffset(
+                # At some point during their lifetime, this person will develop TB
+                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
+            )
+            # set date of active tb - properties will be updated at TbActiveEvent poll daily
+            df.at[person_id, "tb_scheduled_date_active"] = date_progression
+            df.at[person_id, "tb_strain"] = "ds"
+            
+        for person_id in list_mdr:
+            date_progression = now + pd.DateOffset(
+                days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1))
+            )
+            # set date of active tb - properties will be updated at TbActiveEvent poll daily
+            df.at[person_id, "tb_scheduled_date_active"] = date_progression
+            df.at[person_id, "tb_strain"] = "mdr"
+            
 
 class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin):
     """The Tb Regular Poll Event for assigning active infections
@@ -1439,7 +1491,6 @@ def apply(self, population):
 
         self.module.update_parameters_for_program_scaleup()
 
-
 class TbActiveEvent(RegularEvent, PopulationScopeEventMixin):
     """
     * check for those with dates of active tb onset within last time-period
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 5b4e2fff4c..f0c8d6f09f 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -7,7 +7,7 @@
 from collections import OrderedDict
 from pathlib import Path
 from typing import Dict, Optional, Union
-
+import pandas as pd
 import numpy as np
 
 from tlo import Date, Population, logging
@@ -63,9 +63,11 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
+        self.generate_data = None
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
+        self.event_chains: Optinoal[Population] = None
 
         self.show_progress_bar = show_progress_bar
         self.resourcefilepath = resourcefilepath
@@ -209,6 +211,8 @@ def make_initial_population(self, *, n):
             module.initialise_population(self.population)
             logger.debug(key='debug', data=f'{module.name}.initialise_population() {time.time() - start1} s')
 
+        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
+
         end = time.time()
         logger.info(key='info', data=f'make_initial_population() {end - start} s')
 
@@ -221,7 +225,14 @@ def simulate(self, *, end_date):
         """
         start = time.time()
         self.end_date = end_date  # store the end_date so that others can reference it
+        self.generate_data = True # for now ensure we're always aiming to print data
+
+        f = open('output.txt', mode='a')
+        #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
 
+        # Reorder columns to place the new columns at the front
+        pd.set_option('display.max_columns', None)
+        print(self.event_chains.columns)
         for module in self.modules.values():
             module.initialise_simulation(self)
 
@@ -250,17 +261,72 @@ def simulate(self, *, end_date):
 
             if date >= end_date:
                 self.date = end_date
+                self.event_chains.to_csv('output.csv', index=False)
                 break
-                
+
             #if event.target != self.population:
             #    print("Event: ", event)
-
-            if event.module == self.modules['RTI']:
-                 print("RTI event ", event)
-                 print("   target ", event.target)
-                 if event.target != self.population:
-                    self.population.props.at[event.tar]
+            go_ahead = False
+            df_before = []
+            
+            # Only print events relevant to modules of interest
+            # Do not want to compare before/after in births because it may expand the pop dataframe
+            print_output = True
+            if print_output:
+                if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event):
+                #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event):
+                    go_ahead = True
+                    if event.target != self.population:
+                        row = self.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = date
+                        row['when'] = 'Before'
+                        self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+                    else:
+                        df_before = self.population.props.copy()
+                    
             self.fire_single_event(event, date)
+            
+            if print_output:
+                if go_ahead == True:
+                    if event.target != self.population:
+                        row = self.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = date
+                        row['when'] = 'After'
+                        self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+                    else:
+                        df_after = self.population.props.copy()
+                       # if not df_before.columns.equals(df_after.columns):
+                       #     print("Number of columns in pop dataframe", len(self.population.props.columns))
+                       #     print("Before", df_before.columns)
+                       #     print("After", df_after.columns#)
+                      #      exit(-1)
+                      #  if not df_before.index.equals(df_after.index):
+                       #     print("Number of indices in pop dataframe", len(self.population.props.index))
+                      #      print("----> ", event)
+                      #      print("Before", df_before.index#)
+                      #      print("After", df_after.index)
+                      #      exit(-1)
+                            
+                        change = df_before.compare(df_after)
+                        if ~change.empty:
+                            indices = change.index
+                            new_rows_before = df_before.loc[indices]
+                            new_rows_before['person_ID'] = new_rows_before.index
+                            new_rows_before['event'] = event
+                            new_rows_before['event_date'] = date
+                            new_rows_before['when'] = 'Before'
+                            new_rows_after = df_after.loc[indices]
+                            new_rows_after['person_ID'] = new_rows_after.index
+                            new_rows_after['event'] = event
+                            new_rows_after['event_date'] = date
+                            new_rows_after['when'] = 'After'
+
+                            self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True)
+                            self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True)
 
         # The simulation has ended.
         if self.show_progress_bar:

From 16c071c6220edcc20b539f346625f628e5e8c4c5 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 2 Oct 2024 12:37:38 +0200
Subject: [PATCH 03/97] Print event chains

---
 src/tlo/methods/demography.py   |  2 +-
 src/tlo/methods/healthsystem.py |  8 ++--
 src/tlo/methods/hiv.py          |  6 +--
 src/tlo/methods/tb.py           |  4 +-
 src/tlo/simulation.py           | 47 +++++++++---------
 tests/test_data_generation.py   | 85 +++++++++++++++++++++++++++++++++
 6 files changed, 117 insertions(+), 35 deletions(-)
 create mode 100644 tests/test_data_generation.py

diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py
index 6b2578fd44..4f19af6d55 100644
--- a/src/tlo/methods/demography.py
+++ b/src/tlo/methods/demography.py
@@ -315,7 +315,7 @@ def initialise_simulation(self, sim):
         # Launch the repeating event that will store statistics about the population structure
         sim.schedule_event(DemographyLoggingEvent(self), sim.date)
 
-        if sim.generate_data is False:
+        if sim.generate_event_chains is False:
             # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
             self.other_death_poll = OtherDeathPoll(self)
             sim.schedule_event(self.other_death_poll, sim.date)
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 6e251e636c..203ca10985 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2033,9 +2033,9 @@ def run_individual_level_events_in_mode_0_or_1(self,
                     assert event.facility_info is not None, \
                         f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined."
 
-                    go_ahead = False
-                    if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']):
-                        go_ahead = True
+                    print_chains = False
+                    if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
+                        print_chains = True
                         row = self.sim.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
                         row['event'] = event
@@ -2046,7 +2046,7 @@ def run_individual_level_events_in_mode_0_or_1(self,
                     # Run the HSI event (allowing it to return an updated appt_footprint)
                     actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
                     
-                    if go_ahead:
+                    if print_chains:
                         row = self.sim.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
                         row['event'] = event
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 8e0d337fc1..36b1a4bd6e 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -631,7 +631,7 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        if self.sim.generate_data is False:
+        if self.sim.generate_event_chains is False:
             # Launch sub-routines for allocating the right number of people into each category
             self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
@@ -906,7 +906,7 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        if self.sim.generate_data:
+        if self.sim.generate_event_chains:
             print("Should be generating data")
             sim.schedule_event(
                 HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
@@ -1901,7 +1901,7 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        if self.sim.generate_data is False:
+        if self.sim.generate_event_chains is False:
             # Horizontal transmission: Male --> Female
             horizontal_transmission(from_sex="M", to_sex="F")
 
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index cd79ae22a5..57ccd97368 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -833,7 +833,7 @@ def initialise_population(self, population):
         df["tb_date_ipt"] = pd.NaT
 
         # # ------------------ infection status ------------------ #
-        if self.sim.generate_data is False:
+        if self.sim.generate_event_chains is False:
             # WHO estimates of active TB for 2010 to get infected initial population
             # don't need to scale or include treated proportion as no-one on treatment yet
             inc_estimates = p["who_incidence_estimates"]
@@ -869,7 +869,7 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
 
-        if sim.generate_data is False:
+        if sim.generate_event_chains is False:
             sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
         else:
             sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index f0c8d6f09f..d055d6e367 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -63,7 +63,9 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
-        self.generate_data = None
+        self.generate_event_chains = None
+        self.generate_event_chains_modules_of_interest = []
+        self.generate_event_chains_ignore_events = []
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
@@ -216,7 +218,7 @@ def make_initial_population(self, *, n):
         end = time.time()
         logger.info(key='info', data=f'make_initial_population() {end - start} s')
 
-    def simulate(self, *, end_date):
+    def simulate(self, *, end_date, generate_event_chains = False):
         """Simulation until the given end date
 
         :param end_date: when to stop simulating. Only events strictly before this
@@ -225,7 +227,11 @@ def simulate(self, *, end_date):
         """
         start = time.time()
         self.end_date = end_date  # store the end_date so that others can reference it
-        self.generate_data = True # for now ensure we're always aiming to print data
+        self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data
+        if self.generate_event_chains:
+            # For now keep these fixed, eventually they will be input from user
+            self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
+            self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
 
         f = open('output.txt', mode='a')
         #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
@@ -264,17 +270,13 @@ def simulate(self, *, end_date):
                 self.event_chains.to_csv('output.csv', index=False)
                 break
 
-            #if event.target != self.population:
-            #    print("Event: ", event)
-            go_ahead = False
+
+            print_chains = False
             df_before = []
             
-            # Only print events relevant to modules of interest
-            # Do not want to compare before/after in births because it may expand the pop dataframe
-            print_output = True
-            if print_output:
-                if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event):
-                #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event):
+            if self.generate_event_chains:
+                # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+                if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events):
                     go_ahead = True
                     if event.target != self.population:
                         row = self.population.props.iloc[[event.target]]
@@ -288,7 +290,7 @@ def simulate(self, *, end_date):
                     
             self.fire_single_event(event, date)
             
-            if print_output:
+            if go_ahead:
                 if go_ahead == True:
                     if event.target != self.population:
                         row = self.population.props.iloc[[event.target]]
@@ -299,18 +301,6 @@ def simulate(self, *, end_date):
                         self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
                     else:
                         df_after = self.population.props.copy()
-                       # if not df_before.columns.equals(df_after.columns):
-                       #     print("Number of columns in pop dataframe", len(self.population.props.columns))
-                       #     print("Before", df_before.columns)
-                       #     print("After", df_after.columns#)
-                      #      exit(-1)
-                      #  if not df_before.index.equals(df_after.index):
-                       #     print("Number of indices in pop dataframe", len(self.population.props.index))
-                      #      print("----> ", event)
-                      #      print("Before", df_before.index#)
-                      #      print("After", df_after.index)
-                      #      exit(-1)
-                            
                         change = df_before.compare(df_after)
                         if ~change.empty:
                             indices = change.index
@@ -385,6 +375,13 @@ def do_birth(self, mother_id):
         child_id = self.population.do_birth()
         for module in self.modules.values():
             module.on_birth(mother_id, child_id)
+        if self.generate_event_chains:
+            row = self.population.props.iloc[[child_id]]
+            row['person_ID'] = child_id
+            row['event'] = 'Birth'
+            row['event_date'] = self.date
+            row['when'] = 'After'
+            self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
         return child_id
 
     def find_events_for_person(self, person_id: int):
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
new file mode 100644
index 0000000000..1f6333bbfe
--- /dev/null
+++ b/tests/test_data_generation.py
@@ -0,0 +1,85 @@
+import os
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from tlo import Date, Simulation
+from tlo.methods import (
+    care_of_women_during_pregnancy,
+    demography,
+    depression,
+    enhanced_lifestyle,
+    epi,
+    epilepsy,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    hiv,
+    cardio_metabolic_disorders,
+    labour,
+    newborn_outcomes,
+    postnatal_supervisor,
+    pregnancy_helper_functions,
+    pregnancy_supervisor,
+    depression,
+    tb,
+    contraception,
+#    simplified_births,
+    symptommanager,
+)
+from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt
+
+# create simulation parameters
+start_date = Date(2010, 1, 1)
+end_date = Date(2015, 1, 1)
+popsize = 100
+
+@pytest.mark.slow
+def test_data_harvesting(seed):
+    """
+    This test runs a simulation to print all individual events of specific individuals
+    """
+    
+    module_of_interest = 'Hiv'
+    # create sim object
+    sim = create_basic_sim(popsize, seed)
+    
+    dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES)
+    
+    # Check that all dependencies are included
+    for dep in dependencies_list:
+        if dep not in sim.modules:
+            print("WARNING: dependency ", dep, "not included")
+            exit(-1)
+
+    # run simulation
+    sim.simulate(end_date=end_date, generate_event_chains = True)
+
+
+def create_basic_sim(population_size, seed):
+    # create the basic outline of an rti simulation object
+    sim = Simulation(start_date=start_date, seed=seed)
+    resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
+    sim.register(demography.Demography(resourcefilepath=resourcefilepath),
+                contraception.Contraception(resourcefilepath=resourcefilepath),
+                 enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
+                 healthburden.HealthBurden(resourcefilepath=resourcefilepath),
+                 symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
+                 healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
+                 healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
+                 epi.Epi(resourcefilepath=resourcefilepath),
+                 hiv.Hiv(resourcefilepath=resourcefilepath),
+                 tb.Tb(resourcefilepath=resourcefilepath),
+                 cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
+                 depression.Depression(resourcefilepath=resourcefilepath),
+                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
+                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
+                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
+                 labour.Labour(resourcefilepath=resourcefilepath),
+                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
+                 )
+
+    sim.make_initial_population(n=population_size)
+    return sim
+

From ba81487a3fa003e2f10206e435a1d64f170f14e3 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 2 Oct 2024 13:08:50 +0200
Subject: [PATCH 04/97] Add chains in mode 2 too and clean up in simuation

---
 src/tlo/methods/healthsystem.py | 40 ++++++++++++++++++------
 src/tlo/simulation.py           | 55 ++++++++++++++++-----------------
 2 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 203ca10985..54cb976b26 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2034,18 +2034,20 @@ def run_individual_level_events_in_mode_0_or_1(self,
                         f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined."
 
                     print_chains = False
-                    if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
-                        print_chains = True
-                        row = self.sim.population.props.iloc[[event.target]]
-                        row['person_ID'] = event.target
-                        row['event'] = event
-                        row['event_date'] = self.sim.date
-                        row['when'] = 'Before'
-                        self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                    if self.sim.generate_event_chains:
+                        if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
+                            print_chains = True
+                            row = self.sim.population.props.iloc[[event.target]]
+                            row['person_ID'] = event.target
+                            row['event'] = event
+                            row['event_date'] = self.sim.date
+                            row['when'] = 'Before'
+                            self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
 
                     # Run the HSI event (allowing it to return an updated appt_footprint)
                     actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
                     
+                    # Print individual info after event
                     if print_chains:
                         row = self.sim.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
@@ -2445,8 +2447,28 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None:
 
                             # Expected appt footprint before running event
                             _appt_footprint_before_running = event.EXPECTED_APPT_FOOTPRINT
-                            # Run event & get actual footprint
+                       
+                            print_chains = False
+                            if self.sim.generate_event_chains:
+                                if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
+                                    print_chains = True
+                                    row = self.sim.population.props.iloc[[event.target]]
+                                    row['person_ID'] = event.target
+                                    row['event'] = event
+                                    row['event_date'] = self.sim.date
+                                    row['when'] = 'Before'
+                                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+
+                            # Run the HSI event (allowing it to return an updated appt_footprint)
                             actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
+                            
+                            if print_chains:
+                                row = self.sim.population.props.iloc[[event.target]]
+                                row['person_ID'] = event.target
+                                row['event'] = event
+                                row['event_date'] = self.sim.date
+                                row['when'] = 'After'
+                                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
 
                             # Check if the HSI event returned updated_appt_footprint, and if so adjust original_call
                             if actual_appt_footprint is not None:
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index d055d6e367..616e159453 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -277,7 +277,7 @@ def simulate(self, *, end_date, generate_event_chains = False):
             if self.generate_event_chains:
                 # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
                 if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events):
-                    go_ahead = True
+                    print_chains = True
                     if event.target != self.population:
                         row = self.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
@@ -290,33 +290,32 @@ def simulate(self, *, end_date, generate_event_chains = False):
                     
             self.fire_single_event(event, date)
             
-            if go_ahead:
-                if go_ahead == True:
-                    if event.target != self.population:
-                        row = self.population.props.iloc[[event.target]]
-                        row['person_ID'] = event.target
-                        row['event'] = event
-                        row['event_date'] = date
-                        row['when'] = 'After'
-                        self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
-                    else:
-                        df_after = self.population.props.copy()
-                        change = df_before.compare(df_after)
-                        if ~change.empty:
-                            indices = change.index
-                            new_rows_before = df_before.loc[indices]
-                            new_rows_before['person_ID'] = new_rows_before.index
-                            new_rows_before['event'] = event
-                            new_rows_before['event_date'] = date
-                            new_rows_before['when'] = 'Before'
-                            new_rows_after = df_after.loc[indices]
-                            new_rows_after['person_ID'] = new_rows_after.index
-                            new_rows_after['event'] = event
-                            new_rows_after['event_date'] = date
-                            new_rows_after['when'] = 'After'
-
-                            self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True)
-                            self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True)
+            if print_chains:
+                if event.target != self.population:
+                    row = self.population.props.iloc[[event.target]]
+                    row['person_ID'] = event.target
+                    row['event'] = event
+                    row['event_date'] = date
+                    row['when'] = 'After'
+                    self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+                else:
+                    df_after = self.population.props.copy()
+                    change = df_before.compare(df_after)
+                    if ~change.empty:
+                        indices = change.index
+                        new_rows_before = df_before.loc[indices]
+                        new_rows_before['person_ID'] = new_rows_before.index
+                        new_rows_before['event'] = event
+                        new_rows_before['event_date'] = date
+                        new_rows_before['when'] = 'Before'
+                        new_rows_after = df_after.loc[indices]
+                        new_rows_after['person_ID'] = new_rows_after.index
+                        new_rows_after['event'] = event
+                        new_rows_after['event_date'] = date
+                        new_rows_after['when'] = 'After'
+
+                        self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True)
+                        self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True)
 
         # The simulation has ended.
         if self.show_progress_bar:

From b1c907c12bfa54621983415b560381d1737afc9a Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 7 Oct 2024 09:36:06 +0200
Subject: [PATCH 05/97] Fix issue with tests by ensuring standard Polling and
 infection is maintained is generate_event_chains is None

---
 src/tlo/methods/hiv.py       |  6 +++---
 src/tlo/methods/hsi_event.py | 14 ++++++++------
 src/tlo/methods/tb.py        | 10 ++++++----
 src/tlo/simulation.py        |  4 +++-
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 36b1a4bd6e..391cf587a8 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -631,7 +631,7 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        if self.sim.generate_event_chains is False:
+        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False:
             # Launch sub-routines for allocating the right number of people into each category
             self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
@@ -906,7 +906,7 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        if self.sim.generate_event_chains:
+        if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi:
             print("Should be generating data")
             sim.schedule_event(
                 HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
@@ -1901,7 +1901,7 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        if self.sim.generate_event_chains is False:
+        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False:
             # Horizontal transmission: Male --> Female
             horizontal_transmission(from_sex="M", to_sex="F")
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 470794bcdd..785f27b7a6 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -193,10 +193,12 @@ def run(self, squeeze_factor):
         
         print_chains = False
         df_before = []
-        
+
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and
+            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.iloc[[self.target]]
@@ -204,7 +206,7 @@ def run(self, squeeze_factor):
                     row['event'] = self
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
-                    self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 else:
                     df_before = self.sim.population.props.copy()
         
@@ -219,7 +221,7 @@ def run(self, squeeze_factor):
                 row['event'] = self
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
-                self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             else:
                 df_after = self.sim.population.props.copy()
                 change = df_before.compare(df_after)
@@ -236,8 +238,8 @@ def run(self, squeeze_factor):
                     new_rows_after['event_date'] = self.sim.date
                     new_rows_after['when'] = 'After'
 
-                    self.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
-                    self.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
+                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
         return updated_appt_footprint
 
     def get_consumables(
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 57ccd97368..4c170944d2 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -832,8 +832,9 @@ def initialise_population(self, population):
         df["tb_on_ipt"] = False
         df["tb_date_ipt"] = pd.NaT
 
+
         # # ------------------ infection status ------------------ #
-        if self.sim.generate_event_chains is False:
+        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None:
             # WHO estimates of active TB for 2010 to get infected initial population
             # don't need to scale or include treated proportion as no-one on treatment yet
             inc_estimates = p["who_incidence_estimates"]
@@ -869,10 +870,11 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
 
-        if sim.generate_event_chains is False:
-            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
-        else:
+        if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True:
             sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
+        else:
+            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+
 
         # 2) log at the end of the year
         # Optional: Schedule the scale-up of programs
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 794bfef98e..4aff23c9d7 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -105,6 +105,7 @@ def __init__(
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
         self.generate_event_chains = None
+        self.generate_event_chains_overwrite_epi = None
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
         self.end_date = None
@@ -298,10 +299,11 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
         self.end_date = end_date  # store the end_date so that others can reference it
 
         self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data
+        self.generate_event_chains_overwrite_epi = False
         if self.generate_event_chains:
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
-            self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
 
         #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
 

From cfb4264f0133fccbc0a82a6c9d3f51479d19038f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 7 Oct 2024 15:51:37 +0200
Subject: [PATCH 06/97] Switch iloc for loc

---
 src/tlo/events.py             | 5 ++---
 src/tlo/methods/hsi_event.py  | 4 ++--
 src/tlo/simulation.py         | 9 ++++++---
 tests/test_data_generation.py | 2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 78b828091d..a50832a58d 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -74,7 +74,7 @@ def run(self):
             if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
+                    row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = self
                     row['event_date'] = self.sim.date
@@ -83,13 +83,12 @@ def run(self):
                 else:
                     df_before = self.sim.population.props.copy()
         
-        
         self.apply(self.target)
         self.post_apply_hook()
                 
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
+                row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = self
                 row['event_date'] = self.sim.date
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 785f27b7a6..cffeb32992 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -201,7 +201,7 @@ def run(self, squeeze_factor):
 
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
+                    row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = self
                     row['event_date'] = self.sim.date
@@ -216,7 +216,7 @@ def run(self, squeeze_factor):
         
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
+                row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = self
                 row['event_date'] = self.sim.date
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 4aff23c9d7..42a2a288d3 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -298,14 +298,17 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
         self.date = self.start_date
         self.end_date = end_date  # store the end_date so that others can reference it
 
-        self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data
-        self.generate_event_chains_overwrite_epi = False
+        self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
+            # Eventually this can be made an option
+            self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+        else:
+            # If not using to print chains, cannot ignore epi
+            self.generate_event_chains_overwrite_epi = False
 
-        #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
 
         # Reorder columns to place the new columns at the front
         pd.set_option('display.max_columns', None)
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index 1f6333bbfe..8dd92513f9 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -32,7 +32,7 @@
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2015, 1, 1)
+end_date = Date(2014, 1, 1)
 popsize = 100
 
 @pytest.mark.slow

From e0327de6b6f850ac871a2308271f6863333f173e Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 7 Oct 2024 15:55:57 +0200
Subject: [PATCH 07/97] Change syntax of if statement

---
 src/tlo/events.py            | 2 +-
 src/tlo/methods/hsi_event.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index a50832a58d..2eef87ba3f 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -71,7 +71,7 @@ def run(self):
         
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index cffeb32992..805c9584fb 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -196,9 +196,7 @@ def run(self, squeeze_factor):
 
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and
-            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]

From fceee02e68722e29314c3d9efe35983709a78deb Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 9 Oct 2024 09:27:54 +0100
Subject: [PATCH 08/97] Change syntax of if statement and print string of event

---
 src/tlo/events.py            | 6 +++---
 src/tlo/methods/hsi_event.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 2eef87ba3f..2a7871c2c8 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -71,12 +71,12 @@ def run(self):
         
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
-                    row['event'] = self
+                    row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -90,7 +90,7 @@ def run(self):
             if self.target != self.sim.population:
                 row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
-                row['event'] = self
+                row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 805c9584fb..ea9066bc8b 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -196,12 +196,12 @@ def run(self, squeeze_factor):
 
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
-                    row['event'] = self
+                    row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -216,7 +216,7 @@ def run(self, squeeze_factor):
             if self.target != self.sim.population:
                 row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
-                row['event'] = self
+                row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)

From eaeae626a4b37c024db38abf82bdb7c2e723ffe2 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 10 Oct 2024 14:45:41 +0100
Subject: [PATCH 09/97] Focus on rti and print footprint

---
 src/tlo/events.py             | 16 +++++++++++++---
 src/tlo/methods/hsi_event.py  | 36 ++++++++++++++++-------------------
 src/tlo/methods/rti.py        |  8 ++++++--
 src/tlo/simulation.py         |  6 +++---
 tests/test_data_generation.py | 31 ++++++++++++++++--------------
 5 files changed, 55 insertions(+), 42 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 2a7871c2c8..76e1b9a117 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -71,14 +71,19 @@ def run(self):
         
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
+            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.loc[[self.target]]
+                    row = self.sim.population.props.iloc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
+                    row['appt_footprint'] = 'N/A'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 else:
                     df_before = self.sim.population.props.copy()
@@ -88,11 +93,12 @@ def run(self):
                 
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.loc[[self.target]]
+                row = self.sim.population.props.iloc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
+                row['appt_footprint'] = 'N/A'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             else:
                 df_after = self.sim.population.props.copy()
@@ -104,11 +110,15 @@ def run(self):
                     new_rows_before['event'] = self
                     new_rows_before['event_date'] = self.sim.date
                     new_rows_before['when'] = 'Before'
+                    new_rows_before['appt_footprint'] = 'N/A'
+
                     new_rows_after = df_after.loc[indices]
                     new_rows_after['person_ID'] = new_rows_after.index
                     new_rows_after['event'] = self
                     new_rows_after['event_date'] = self.sim.date
                     new_rows_after['when'] = 'After'
+                    new_rows_after['appt_footprint'] = 'N/A'
+
 
                     self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
                     self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index ea9066bc8b..f8e8738543 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -196,14 +196,19 @@ def run(self, squeeze_factor):
 
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
+            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+#            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.loc[[self.target]]
+                    row = self.sim.population.props.iloc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
+                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 else:
                     df_before = self.sim.population.props.copy()
@@ -212,32 +217,23 @@ def run(self, squeeze_factor):
         self.post_apply_hook()
         self._run_after_hsi_event()
         
+        footprint = self.EXPECTED_APPT_FOOTPRINT
+        if updated_appt_footprint is not None:
+            footprint = updated_appt_footprint
+        
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.loc[[self.target]]
+                row = self.sim.population.props.iloc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
+                row['appt_footprint'] = str(footprint)
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             else:
-                df_after = self.sim.population.props.copy()
-                change = df_before.compare(df_after)
-                if ~change.empty:
-                    indices = change.index
-                    new_rows_before = df_before.loc[indices]
-                    new_rows_before['person_ID'] = new_rows_before.index
-                    new_rows_before['event'] = self
-                    new_rows_before['event_date'] = self.sim.date
-                    new_rows_before['when'] = 'Before'
-                    new_rows_after = df_after.loc[indices]
-                    new_rows_after['person_ID'] = new_rows_after.index
-                    new_rows_after['event'] = self
-                    new_rows_after['event_date'] = self.sim.date
-                    new_rows_after['when'] = 'After'
-
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                print("Error, I shouldn't be here")
+                exit(-1)
+
         return updated_appt_footprint
 
     def get_consumables(
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 18c1987483..1c12e7162b 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Schedule to take place every month
         """
-        super().__init__(module, frequency=DateOffset(months=1))
+        super().__init__(module, frequency=DateOffset(months=1000))
         p = module.parameters
         # Parameters which transition the model between states
         self.base_1m_prob_rti = (p['base_rate_injrti'] / 12)
@@ -2864,9 +2864,13 @@ def apply(self, population):
                          .when('.between(70,79)', self.rr_injrti_age7079),
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
-        pred = eq.predict(df.loc[rt_current_non_ind])
+        if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
+            pred = 1
+        else:
+            pred = eq.predict(df.loc[rt_current_non_ind])
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
         selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti]
+
         # Update to say they have been involved in a rti
         df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True
         # Set the date that people were injured to now
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 42a2a288d3..a8ecf14cc6 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
-        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
+        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'])
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
@@ -303,8 +303,8 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
             # Eventually this can be made an option
             self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
-            self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
-            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+            self.generate_event_chains_modules_of_interest = [self.modules['RTI']]
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
         else:
             # If not using to print chains, cannot ignore epi
             self.generate_event_chains_overwrite_epi = False
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index 8dd92513f9..af3c4f0ae9 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -25,15 +25,16 @@
     depression,
     tb,
     contraception,
-#    simplified_births,
+    simplified_births,
+    rti,
     symptommanager,
 )
 from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2014, 1, 1)
-popsize = 100
+end_date = Date(2012, 1, 1)
+popsize = 200
 
 @pytest.mark.slow
 def test_data_harvesting(seed):
@@ -41,7 +42,7 @@ def test_data_harvesting(seed):
     This test runs a simulation to print all individual events of specific individuals
     """
     
-    module_of_interest = 'Hiv'
+    module_of_interest = 'RTI'
     # create sim object
     sim = create_basic_sim(popsize, seed)
     
@@ -55,29 +56,31 @@ def test_data_harvesting(seed):
 
     # run simulation
     sim.simulate(end_date=end_date, generate_event_chains = True)
-
+    exit(-1)
 
 def create_basic_sim(population_size, seed):
     # create the basic outline of an rti simulation object
     sim = Simulation(start_date=start_date, seed=seed)
     resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
     sim.register(demography.Demography(resourcefilepath=resourcefilepath),
-                contraception.Contraception(resourcefilepath=resourcefilepath),
+               # contraception.Contraception(resourcefilepath=resourcefilepath),
                  enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
                  healthburden.HealthBurden(resourcefilepath=resourcefilepath),
                  symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
                  healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
+                 rti.RTI(resourcefilepath=resourcefilepath),
                  healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
-                 epi.Epi(resourcefilepath=resourcefilepath),
-                 hiv.Hiv(resourcefilepath=resourcefilepath),
-                 tb.Tb(resourcefilepath=resourcefilepath),
+                 simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+               #  epi.Epi(resourcefilepath=resourcefilepath),
+               #  hiv.Hiv(resourcefilepath=resourcefilepath),
+               #  tb.Tb(resourcefilepath=resourcefilepath),
                  cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
                  depression.Depression(resourcefilepath=resourcefilepath),
-                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
-                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
-                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
-                 labour.Labour(resourcefilepath=resourcefilepath),
-                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
+                # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
+                # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
+                # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
+                # labour.Labour(resourcefilepath=resourcefilepath),
+                 #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
                  )
 
     sim.make_initial_population(n=population_size)

From c7bd9d058cea79fad0f8471830766f5c335a7df1 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 11 Oct 2024 16:57:21 +0100
Subject: [PATCH 10/97] Only store change in individual properties, not entire
 property row. Log changes to logger.

---
 src/tlo/events.py             | 204 ++++++++++++++++++++++++++--------
 src/tlo/methods/hsi_event.py  | 134 ++++++++++++++++------
 src/tlo/simulation.py         |   2 +-
 tests/test_data_generation.py |  22 ++--
 4 files changed, 268 insertions(+), 94 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 76e1b9a117..436a01a97c 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -4,13 +4,20 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 
-from tlo import DateOffset
+from tlo import DateOffset, logging
 
 if TYPE_CHECKING:
     from tlo import Simulation
 
 import pandas as pd
 
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+logger_summary = logging.getLogger(f"{__name__}.summary")
+logger_summary.setLevel(logging.INFO)
+
+debug_chains = True
 
 class Priority(Enum):
     """Enumeration for the Priority, which is used in sorting the events in the simulation queue."""
@@ -62,66 +69,167 @@ def apply(self, target):
         :param target: the target of the event
         """
         raise NotImplementedError
-
-    def run(self):
-        """Make the event happen."""
         
+    def compare_population_dataframe(self,df_before, df_after):
+        """ This function compares the population dataframe before/after a population-wide event has occurred.
+        It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """
+        
+        # Create a mask of where values are different
+        diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
+        
+        # Create an empty list to store changes for each of the individuals
+        chain_links = {}
+        
+        # Loop through each row of the mask
+        for idx, row in diff_mask.iterrows():
+            changed_cols = row.index[row].tolist()
+            
+            if changed_cols:  # Proceed only if there are changes in the row
+            
+                # Create a dictionary for this person
+                # First add event info
+                link_info = {
+                    #'person_ID': idx,
+                    'event': str(self),
+                    'event_date': self.sim.date,
+                }
+                
+                # Store the new values from df_after for the changed columns
+                for col in changed_cols:
+                    link_info[col] = df_after.at[idx, col]
+                
+                
+                # Append the event and changes to the individual key
+                chain_links = {idx : link_info}
+        
+        return chain_links
+        
+    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]:
+        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
+        
+        # Initialise these variables
         print_chains = False
         df_before = []
+        row_before = pd.Series()
         
-        if self.sim.generate_event_chains:
-            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
-            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-
-                print_chains = True
-                if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
+        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+        #if (self.module in self.sim.generate_event_chains_modules_of_interest) and ..
+        if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+        
+        # Will eventually use this once I can actually GET THE NAME OF THE SELF
+        #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+
+            print_chains = True
+            
+            # Target is single individual
+            if self.target != self.sim.population:
+                # Save row for comparison after event has occurred
+                row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
+                
+                if debug_chains:
+                    # Print entire row
+                    row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
-                    row['appt_footprint'] = 'N/A'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
-                else:
-                    df_before = self.sim.population.props.copy()
-        
-        self.apply(self.target)
-        self.post_apply_hook()
                 
+            else:
+                # This will be a population-wide event. In order to find individuals for which this led to
+                # a meaningful change, make a copy of the pop dataframe before the event has occurred.
+                df_before = self.sim.population.props.copy()
+                
+        return print_chains, row_before, df_before
+        
+    def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict:
+        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
+        
+        chain_links = {}
+        
         if print_chains:
+        
+            # Target is single individual
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
-                row['person_ID'] = self.target
-                row['event'] = str(self)
-                row['event_date'] = self.sim.date
-                row['when'] = 'After'
-                row['appt_footprint'] = 'N/A'
-                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
+                
+                # Create and store event for this individual
+                link_info = {
+                    #'person_ID' : self.target,
+                    'event' : str(self),
+                    'event_date' : self.sim.date,
+                }
+                # Store property changes as a result of the event for this individual
+                for key in row_before.index:
+                    if row_before[key] != row_after[key]: # Note: used fillna previously
+                        link_info[key] = row_after[key]
+                        
+                chain_links = {self.target : link_info}
+
+                if debug_chains:
+                    # Print entire row
+                    row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
+                    row['person_ID'] = self.target
+                    row['event'] = str(self)
+                    row['event_date'] = self.sim.date
+                    row['when'] = 'After'
+                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                
             else:
-                df_after = self.sim.population.props.copy()
-                change = df_before.compare(df_after)
-                if ~change.empty:
-                    indices = change.index
-                    new_rows_before = df_before.loc[indices]
-                    new_rows_before['person_ID'] = new_rows_before.index
-                    new_rows_before['event'] = self
-                    new_rows_before['event_date'] = self.sim.date
-                    new_rows_before['when'] = 'Before'
-                    new_rows_before['appt_footprint'] = 'N/A'
-
-                    new_rows_after = df_after.loc[indices]
-                    new_rows_after['person_ID'] = new_rows_after.index
-                    new_rows_after['event'] = self
-                    new_rows_after['event_date'] = self.sim.date
-                    new_rows_after['when'] = 'After'
-                    new_rows_after['appt_footprint'] = 'N/A'
-
-
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                # Target is entire population. Identify individuals for which properties have changed
+                # and store their changes.
+                
+                # Population frame after event
+                df_after = self.sim.population.props
+                
+                #  Create and store the event and dictionary of changes for affected individuals
+                chain_links = self.compare_population_dataframe(df_before, df_after)
+
+                if debug_chains:
+                    # Or print entire rows
+                    change = df_before.compare(df_after)
+                    if not change.empty:
+                        indices = change.index
+                        new_rows_before = df_before.loc[indices]
+                        new_rows_before['person_ID'] = new_rows_before.index
+                        new_rows_before['event'] = self
+                        new_rows_before['event_date'] = self.sim.date
+                        new_rows_before['when'] = 'Before'
+
+                        new_rows_after = df_after.loc[indices]
+                        new_rows_after['person_ID'] = new_rows_after.index
+                        new_rows_after['event'] = self
+                        new_rows_after['event_date'] = self.sim.date
+                        new_rows_after['when'] = 'After'
+
+                        self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
+                        self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                    
+        return chain_links
+
+    def run(self):
+        """Make the event happen."""
+        
+        # Collect relevant information before event takes place
+        if self.sim.generate_event_chains:
+            print_chains, row_before, df_before = self.store_chains_to_do_before_event()
+                
+        self.apply(self.target)
+        self.post_apply_hook()
+        
+        # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link'
+        # in the individual's event chain.
+        if self.sim.generate_event_chains:
+            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before)
+            
+            # Log chain_links here
+            if len(chain_links)>0:
+                logger.info(key='event_chains',
+                            data= chain_links,
+                            description='Links forming chains of events for simulated individuals')
+                
+                #print("Chain events ", chain_links)
+            
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index f8e8738543..1c727f014b 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -16,12 +16,19 @@
     from tlo import Module, Simulation
     from tlo.methods.healthsystem import HealthSystem
 
+# Pointing to the logger in events
+logger_chains = logging.getLogger("tlo.methods.event")
+logger_chains.setLevel(logging.INFO)
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
 logger_summary = logging.getLogger(f"{__name__}.summary")
 logger_summary.setLevel(logging.INFO)
 
+debug_chains = True
+
+
 # Declare the level which will be used to represent the merging of levels '1b' and '2'
 LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2"
 
@@ -187,54 +194,113 @@ def _run_after_hsi_event(self) -> None:
                 item_codes=self._EQUIPMENT,
                 facility_id=self.facility_info.id
             )
-
-    def run(self, squeeze_factor):
-        """Make the event happen."""
+            
+    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
+        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
         
+        # Initialise these variables
         print_chains = False
-        df_before = []
-
-        if self.sim.generate_event_chains:
-            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
-            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
-#            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-
-                print_chains = True
-                if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
-                    row['person_ID'] = self.target
-                    row['event'] = str(self)
-                    row['event_date'] = self.sim.date
-                    row['when'] = 'Before'
-                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
-                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
-                else:
-                    df_before = self.sim.population.props.copy()
-        
-        updated_appt_footprint = self.apply(self.target, squeeze_factor)
-        self.post_apply_hook()
-        self._run_after_hsi_event()
+        row_before = pd.Series()
         
-        footprint = self.EXPECTED_APPT_FOOTPRINT
-        if updated_appt_footprint is not None:
-            footprint = updated_appt_footprint
+        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+        # if (self.module in self.sim.generate_event_chains_modules_of_interest) and
+        if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
         
-        if print_chains:
+        # Will eventually use this once I can actually GET THE NAME OF THE SELF
+        # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+                
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
+            
+                # In the case of HSI events, only individual events should exist and therefore be logged
+                print_chains = True
+                
+                # Save row for comparison after event has occurred
+                row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
+
+                row = self.sim.population.props.loc[[abs(self.target)]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
-                row['when'] = 'After'
-                row['appt_footprint'] = str(footprint)
+                row['when'] = 'Before'
+                row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
+                row['level'] = self.facility_info.level
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                
             else:
+                # Many of our HealthSystem implementations rely on the assumption that
                 print("Error, I shouldn't be here")
                 exit(-1)
+                
+        return print_chains, row_before
+        
+    def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict:
+        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
+        if print_chains:
+            # For HSI event, this will only ever occur for individual events
+            
+            row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
+            
+            # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level
+            # will be stored regardless of whether individual experienced property changes.
+
+            # Add event details
+            link_info = {
+                'event' : str(self),
+                'event_date' : self.sim.date,
+                'appt_footprint' : str(footprint),
+                'level' : self.facility_info.level,
+            }
+            
+            # Add changes to properties
+            for key in row_before.index:
+                if row_before[key] != row_after[key]: # Note: used fillna previously
+                    link_info[key] = row_after[key]
+            
+            chain_links = {self.target : link_info}
+
+            # Print entire row
+            row = self.sim.population.props.loc[[abs(self.target)]]
+            row['person_ID'] = self.target
+            row['event'] = str(self)
+            row['event_date'] = self.sim.date
+            row['when'] = 'After'
+            row['appt_footprint'] = footprint
+            row['level'] = self.facility_info.level
+            self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+            
+        return chain_links
+        
+
+    def run(self, squeeze_factor):
+        """Make the event happen."""
+
+        
+        if self.sim.generate_event_chains:
+            print_chains, row_before = self.store_chains_to_do_before_event()
+              
+            footprint = self.EXPECTED_APPT_FOOTPRINT
 
+        updated_appt_footprint = self.apply(self.target, squeeze_factor)
+        self.post_apply_hook()
+        self._run_after_hsi_event()
+        
+        
+        if self.sim.generate_event_chains:
+
+            # If the footprint has been updated when the event ran, change it here
+            if updated_appt_footprint is not None:
+                footprint = updated_appt_footprint
+            
+            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint))
+            
+            if len(chain_links)>0:
+                logger_chains.info(key='event_chains',
+                            data = chain_links,
+                            description='Links forming chains of events for simulated individuals')
+                #print(chain_links)
+                
         return updated_appt_footprint
+        
 
     def get_consumables(
         self,
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index a8ecf14cc6..20b3a4898f 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
-        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'])
+        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index af3c4f0ae9..39f2b022aa 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -33,7 +33,7 @@
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2012, 1, 1)
+end_date = Date(2011, 1, 1)
 popsize = 200
 
 @pytest.mark.slow
@@ -63,24 +63,24 @@ def create_basic_sim(population_size, seed):
     sim = Simulation(start_date=start_date, seed=seed)
     resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
     sim.register(demography.Demography(resourcefilepath=resourcefilepath),
-               # contraception.Contraception(resourcefilepath=resourcefilepath),
+                 contraception.Contraception(resourcefilepath=resourcefilepath),
                  enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
                  healthburden.HealthBurden(resourcefilepath=resourcefilepath),
                  symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
                  healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
                  rti.RTI(resourcefilepath=resourcefilepath),
                  healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
-                 simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
-               #  epi.Epi(resourcefilepath=resourcefilepath),
-               #  hiv.Hiv(resourcefilepath=resourcefilepath),
-               #  tb.Tb(resourcefilepath=resourcefilepath),
+                 # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+                 epi.Epi(resourcefilepath=resourcefilepath),
+                 hiv.Hiv(resourcefilepath=resourcefilepath),
+                 tb.Tb(resourcefilepath=resourcefilepath),
                  cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
                  depression.Depression(resourcefilepath=resourcefilepath),
-                # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
-                # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
-                # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
-                # labour.Labour(resourcefilepath=resourcefilepath),
-                 #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
+                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
+                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
+                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
+                 labour.Labour(resourcefilepath=resourcefilepath),
+                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
                  )
 
     sim.make_initial_population(n=population_size)

From 769aaeca44aaedc324bd3da2f5f338bb47e02106 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 11 Oct 2024 17:03:22 +0100
Subject: [PATCH 11/97] Style fixes

---
 src/tlo/methods/tb.py         | 2 +-
 src/tlo/simulation.py         | 4 ++--
 tests/test_data_generation.py | 5 -----
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 4c170944d2..9dc05ff301 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -1393,7 +1393,7 @@ def apply(self, population):
             & (df.tb_inf != "active")
             ].index
             
-        n_susceptible = len(susc_idx)
+        len(susc_idx)
         
         middle_index = len(susc_idx) // 2
 
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 20b3a4898f..75dfa76429 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -8,7 +8,7 @@
 import time
 from collections import OrderedDict
 from pathlib import Path
-from typing import Dict, Optional, Union
+from typing import Optional
 from typing import TYPE_CHECKING, Optional
 import pandas as pd
 
@@ -374,7 +374,7 @@ def run_simulation_to(self, *, to_date: Date) -> None:
         :param to_date: Date to simulate up to but not including - must be before or
             equal to simulation end date specified in call to :py:meth:`initialise`.
         """
-        f = open('output.txt', mode='a')
+        open('output.txt', mode='a')
 
         if not self._initialised:
             msg = "Simulation must be initialised before calling run_simulation_to"
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index 39f2b022aa..c94618a77d 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -1,7 +1,6 @@
 import os
 from pathlib import Path
 
-import pandas as pd
 import pytest
 
 from tlo import Date, Simulation
@@ -11,7 +10,6 @@
     depression,
     enhanced_lifestyle,
     epi,
-    epilepsy,
     healthburden,
     healthseekingbehaviour,
     healthsystem,
@@ -20,16 +18,13 @@
     labour,
     newborn_outcomes,
     postnatal_supervisor,
-    pregnancy_helper_functions,
     pregnancy_supervisor,
     depression,
     tb,
     contraception,
-    simplified_births,
     rti,
     symptommanager,
 )
-from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)

From 757cee36b0ae611f1f7ae31d25799fc0d6e7daa1 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Sun, 13 Oct 2024 11:15:17 +0100
Subject: [PATCH 12/97] Include printing of individual properties at the
 beginning and at birth, label what is only used for ddebugging and will be
 later removed

---
 src/tlo/events.py             |  5 +++--
 src/tlo/methods/hsi_event.py  |  7 ++++---
 src/tlo/methods/rti.py        |  2 +-
 src/tlo/simulation.py         | 28 ++++++++++++++++++++++++++++
 tests/test_data_generation.py |  5 ++---
 5 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 436a01a97c..03bf7c72fa 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -98,7 +98,6 @@ def compare_population_dataframe(self,df_before, df_after):
                 for col in changed_cols:
                     link_info[col] = df_after.at[idx, col]
                 
-                
                 # Append the event and changes to the individual key
                 chain_links = {idx : link_info}
         
@@ -127,7 +126,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
                 if debug_chains:
-                    # Print entire row
+                    # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
@@ -166,6 +165,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                         
                 chain_links = {self.target : link_info}
 
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 if debug_chains:
                     # Print entire row
                     row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
@@ -185,6 +185,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 #  Create and store the event and dictionary of changes for affected individuals
                 chain_links = self.compare_population_dataframe(df_before, df_after)
 
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 if debug_chains:
                     # Or print entire rows
                     change = df_before.compare(df_after)
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 1c727f014b..0c3bc16072 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -217,6 +217,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
 
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 row = self.sim.population.props.loc[[abs(self.target)]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
@@ -228,8 +229,8 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 
             else:
                 # Many of our HealthSystem implementations rely on the assumption that
-                print("Error, I shouldn't be here")
-                exit(-1)
+                raise RuntimeError("Cannot have population-wide HSI events")
+
                 
         return print_chains, row_before
         
@@ -258,7 +259,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             
             chain_links = {self.target : link_info}
 
-            # Print entire row
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
             row = self.sim.population.props.loc[[abs(self.target)]]
             row['person_ID'] = self.target
             row['event'] = str(self)
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 1c12e7162b..3642365976 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2865,7 +2865,7 @@ def apply(self, population):
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
         if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
-            pred = 1
+            pred = 1.0
         else:
             pred = eq.predict(df.loc[rt_current_non_ind])
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 75dfa76429..582fb4ba1c 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -37,6 +37,9 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
+logger_chains = logging.getLogger("tlo.methods.event")
+logger_chains.setLevel(logging.INFO)
+
 
 class SimulationPreviouslyInitialisedError(Exception):
     """Exception raised when trying to initialise an already initialised simulation."""
@@ -111,6 +114,8 @@ def __init__(
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
+        
+        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
         self.event_chains: Optinoal[Population] = None
 
         self.show_progress_bar = show_progress_bar
@@ -281,7 +286,16 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
+        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
         self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
+        
+        # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
+        # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
+        if self.generate_event_chains:
+            pop_dict = self.population.props.to_dict(orient='index')
+            logger_chains.info(key='event_chains',
+                               data = pop_dict,
+                               description='Links forming chains of events for simulated individuals')
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
@@ -392,6 +406,8 @@ def run_simulation_to(self, *, to_date: Date) -> None:
                 self._update_progress_bar(progress_bar, date)
             self.fire_single_event(event, date)
         self.date = to_date
+        
+        # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR.
         self.event_chains.to_csv('output.csv', index=False)
 
         if self.show_progress_bar:
@@ -449,13 +465,25 @@ def do_birth(self, mother_id: int) -> int:
         child_id = self.population.do_birth()
         for module in self.modules.values():
             module.on_birth(mother_id, child_id)
+            
         if self.generate_event_chains:
+            # When individual is born, store their initial properties to provide a starting point to the chain of property
+            # changes that this individual will undergo as a result of events taking place.
+            prop_dict = self.population.props.loc[child_id].to_dict()
+            
+            child_dict = {child_id : prop_dict}
+            logger_chains.info(key='event_chains',
+                               data = child_dict,
+                               description='Links forming chains of events for simulated individuals')
+        
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
             row = self.population.props.iloc[[child_id]]
             row['person_ID'] = child_id
             row['event'] = 'Birth'
             row['event_date'] = self.date
             row['when'] = 'After'
             self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+            
         return child_id
 
     def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]:
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index c94618a77d..d9885c1fab 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -28,8 +28,8 @@
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2011, 1, 1)
-popsize = 200
+end_date = Date(2012, 1, 1)
+popsize = 100
 
 @pytest.mark.slow
 def test_data_harvesting(seed):
@@ -51,7 +51,6 @@ def test_data_harvesting(seed):
 
     # run simulation
     sim.simulate(end_date=end_date, generate_event_chains = True)
-    exit(-1)
 
 def create_basic_sim(population_size, seed):
     # create the basic outline of an rti simulation object

From 22a5e44312ad4d2f1d955b70399ae9569efb13c0 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 16 Oct 2024 14:00:22 +0100
Subject: [PATCH 13/97] Log everything to simulation, as events logger doesn't
 seem to be visible to all modules. For now add person_ID to the dict of info
 printed as the outer dictionary key logging seems to have a problem.

---
 src/tlo/events.py            | 13 +++++++++----
 src/tlo/methods/hsi_event.py |  3 ++-
 src/tlo/simulation.py        | 25 +++++++++++++++++--------
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 03bf7c72fa..98832faecb 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,9 +11,13 @@
 
 import pandas as pd
 
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
+logger_chain = logging.getLogger('tlo.simulation')
+logger_chain.setLevel(logging.INFO)
+
 logger_summary = logging.getLogger(f"{__name__}.summary")
 logger_summary.setLevel(logging.INFO)
 
@@ -89,7 +93,7 @@ def compare_population_dataframe(self,df_before, df_after):
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
-                    #'person_ID': idx,
+                    'person_ID': idx,
                     'event': str(self),
                     'event_date': self.sim.date,
                 }
@@ -152,13 +156,14 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
             if self.target != self.sim.population:
                 row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
                 
-                # Create and store event for this individual
+                # Create and store event for this individual, regardless of whether any property change occurred
                 link_info = {
                     #'person_ID' : self.target,
+                    'person_ID' : self.target,
                     'event' : str(self),
                     'event_date' : self.sim.date,
                 }
-                # Store property changes as a result of the event for this individual
+                # Store (if any) property changes as a result of the event for this individual
                 for key in row_before.index:
                     if row_before[key] != row_after[key]: # Note: used fillna previously
                         link_info[key] = row_after[key]
@@ -225,7 +230,7 @@ def run(self):
             
             # Log chain_links here
             if len(chain_links)>0:
-                logger.info(key='event_chains',
+                logger_chain.info(key='event_chains',
                             data= chain_links,
                             description='Links forming chains of events for simulated individuals')
                 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 0c3bc16072..6651a8704a 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -17,7 +17,7 @@
     from tlo.methods.healthsystem import HealthSystem
 
 # Pointing to the logger in events
-logger_chains = logging.getLogger("tlo.methods.event")
+logger_chains = logging.getLogger("tlo.simulation")
 logger_chains.setLevel(logging.INFO)
 
 logger = logging.getLogger(__name__)
@@ -246,6 +246,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
 
             # Add event details
             link_info = {
+                'person_ID': self.target,
                 'event' : str(self),
                 'event_date' : self.sim.date,
                 'appt_footprint' : str(footprint),
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 582fb4ba1c..fd9fade215 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -107,7 +107,7 @@ def __init__(
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
-        self.generate_event_chains = None
+        self.generate_event_chains = True
         self.generate_event_chains_overwrite_epi = None
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
@@ -292,15 +292,23 @@ def make_initial_population(self, *, n: int) -> None:
         # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
         if self.generate_event_chains:
+
             pop_dict = self.population.props.to_dict(orient='index')
-            logger_chains.info(key='event_chains',
+            
+            print(pop_dict)
+            print(pop_dict.keys())
+            for key in pop_dict.keys():
+                pop_dict[key]['person_ID'] = key
+            print("Length of properties", len(pop_dict[0].keys()))
+            #exit(-1)
+            logger.info(key='event_chains',
                                data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
 
-    def initialise(self, *, end_date: Date, generate_event_chains) -> None:
+    def initialise(self, *, end_date: Date) -> None:
         """Initialise all modules in simulation.
         :param end_date: Date to end simulation on - accessible to modules to allow
             initialising data structures which may depend (in size for example) on the
@@ -312,7 +320,7 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
         self.date = self.start_date
         self.end_date = end_date  # store the end_date so that others can reference it
 
-        self.generate_event_chains = generate_event_chains
+        #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
             # Eventually this can be made an option
             self.generate_event_chains_overwrite_epi = True
@@ -413,7 +421,7 @@ def run_simulation_to(self, *, to_date: Date) -> None:
         if self.show_progress_bar:
             progress_bar.stop()
 
-    def simulate(self, *, end_date: Date, generate_event_chains=False) -> None:
+    def simulate(self, *, end_date: Date) -> None:
         """Simulate until the given end date
 
         :param end_date: When to stop simulating. Only events strictly before this
@@ -421,7 +429,7 @@ def simulate(self, *, end_date: Date, generate_event_chains=False) -> None:
             clarity.
         """
         start = time.time()
-        self.initialise(end_date=end_date, generate_event_chains=generate_event_chains)
+        self.initialise(end_date=end_date)
         self.run_simulation_to(to_date=end_date)
         self.finalise(time.time() - start)
 
@@ -470,9 +478,10 @@ def do_birth(self, mother_id: int) -> int:
             # When individual is born, store their initial properties to provide a starting point to the chain of property
             # changes that this individual will undergo as a result of events taking place.
             prop_dict = self.population.props.loc[child_id].to_dict()
-            
+            prop_dict['event'] = 'Birth'
+            prop_dict['event_date'] = self.date
             child_dict = {child_id : prop_dict}
-            logger_chains.info(key='event_chains',
+            logger.info(key='event_chains',
                                data = child_dict,
                                description='Links forming chains of events for simulated individuals')
         

From 7faa81783dc43e434e26ef8c95717480cebd3816 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 10:07:46 +0200
Subject: [PATCH 14/97] Consider all modules included as of interest

---
 src/tlo/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index fd9fade215..15be1622e8 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -325,7 +325,7 @@ def initialise(self, *, end_date: Date) -> None:
             # Eventually this can be made an option
             self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
-            self.generate_event_chains_modules_of_interest = [self.modules['RTI']]
+            self.generate_event_chains_modules_of_interest = [self.modules]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
         else:
             # If not using to print chains, cannot ignore epi

From 7232f976831054ed541d59d8da20c91289fa79e6 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:29:43 +0200
Subject: [PATCH 15/97] Remove pop-wide HSI warning and make epi default even
 when printing chains

---
 src/tlo/methods/hsi_event.py | 38 ++++++++++++++++++++++++++----------
 src/tlo/simulation.py        |  2 +-
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 6651a8704a..d0cdb5bbdd 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -223,13 +223,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'Before'
-                row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
-                row['level'] = self.facility_info.level
+                try:
+                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
+                    row['level'] = self.facility_info.level
+                except:
+                    row['appt_footprint'] = 'N/A'
+                    row['level'] = 'N/A'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
-                # Many of our HealthSystem implementations rely on the assumption that
-                raise RuntimeError("Cannot have population-wide HSI events")
+                # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error
+                # raise RuntimeError("Cannot have population-wide HSI events")
+                logger.debug(
+                    key="message",
+                    data=(
+                        f"Cannot have population-wide HSI events"
+                    ),
+                )
 
                 
         return print_chains, row_before
@@ -245,12 +255,20 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             # will be stored regardless of whether individual experienced property changes.
 
             # Add event details
+            
+            try:
+                record_footprint = str(footprint)
+                record_level = self.facility_info.level
+            except:
+                record_footprint = 'N/A'
+                record_level = 'N/A'
+                
             link_info = {
                 'person_ID': self.target,
                 'event' : str(self),
                 'event_date' : self.sim.date,
-                'appt_footprint' : str(footprint),
-                'level' : self.facility_info.level,
+                'appt_footprint' : record_footprint,
+                'level' : record_level,
             }
             
             # Add changes to properties
@@ -266,8 +284,8 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             row['event'] = str(self)
             row['event_date'] = self.sim.date
             row['when'] = 'After'
-            row['appt_footprint'] = footprint
-            row['level'] = self.facility_info.level
+            row['appt_footprint'] = record_footprint
+            row['level'] = record_level
             self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             
         return chain_links
@@ -277,7 +295,7 @@ def run(self, squeeze_factor):
         """Make the event happen."""
 
         
-        if self.sim.generate_event_chains:
+        if self.sim.generate_event_chains and self.target != self.sim.population:
             print_chains, row_before = self.store_chains_to_do_before_event()
               
             footprint = self.EXPECTED_APPT_FOOTPRINT
@@ -287,7 +305,7 @@ def run(self, squeeze_factor):
         self._run_after_hsi_event()
         
         
-        if self.sim.generate_event_chains:
+        if self.sim.generate_event_chains and self.target != self.sim.population:
 
             # If the footprint has been updated when the event ran, change it here
             if updated_appt_footprint is not None:
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 15be1622e8..0c70b164d9 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -323,7 +323,7 @@ def initialise(self, *, end_date: Date) -> None:
         #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
             # Eventually this can be made an option
-            self.generate_event_chains_overwrite_epi = True
+            self.generate_event_chains_overwrite_epi = False
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']

From a6def2d22c0d291ce775afef561b580847ad36cf Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:39:24 +0200
Subject: [PATCH 16/97] Style fix

---
 src/tlo/methods/hsi_event.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index d0cdb5bbdd..041ab9cf08 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -237,7 +237,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 logger.debug(
                     key="message",
                     data=(
-                        f"Cannot have population-wide HSI events"
+                        "Cannot have population-wide HSI events"
                     ),
                 )
 

From ecea532a2843d312580accf97383cd62c457fd04 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:51:39 +0200
Subject: [PATCH 17/97] Remove data generation test, which wasn't really a test

---
 tests/test_data_generation.py | 82 -----------------------------------
 1 file changed, 82 deletions(-)
 delete mode 100644 tests/test_data_generation.py

diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
deleted file mode 100644
index d9885c1fab..0000000000
--- a/tests/test_data_generation.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os
-from pathlib import Path
-
-import pytest
-
-from tlo import Date, Simulation
-from tlo.methods import (
-    care_of_women_during_pregnancy,
-    demography,
-    depression,
-    enhanced_lifestyle,
-    epi,
-    healthburden,
-    healthseekingbehaviour,
-    healthsystem,
-    hiv,
-    cardio_metabolic_disorders,
-    labour,
-    newborn_outcomes,
-    postnatal_supervisor,
-    pregnancy_supervisor,
-    depression,
-    tb,
-    contraception,
-    rti,
-    symptommanager,
-)
-
-# create simulation parameters
-start_date = Date(2010, 1, 1)
-end_date = Date(2012, 1, 1)
-popsize = 100
-
-@pytest.mark.slow
-def test_data_harvesting(seed):
-    """
-    This test runs a simulation to print all individual events of specific individuals
-    """
-    
-    module_of_interest = 'RTI'
-    # create sim object
-    sim = create_basic_sim(popsize, seed)
-    
-    dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES)
-    
-    # Check that all dependencies are included
-    for dep in dependencies_list:
-        if dep not in sim.modules:
-            print("WARNING: dependency ", dep, "not included")
-            exit(-1)
-
-    # run simulation
-    sim.simulate(end_date=end_date, generate_event_chains = True)
-
-def create_basic_sim(population_size, seed):
-    # create the basic outline of an rti simulation object
-    sim = Simulation(start_date=start_date, seed=seed)
-    resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
-    sim.register(demography.Demography(resourcefilepath=resourcefilepath),
-                 contraception.Contraception(resourcefilepath=resourcefilepath),
-                 enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
-                 healthburden.HealthBurden(resourcefilepath=resourcefilepath),
-                 symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
-                 healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
-                 rti.RTI(resourcefilepath=resourcefilepath),
-                 healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
-                 # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
-                 epi.Epi(resourcefilepath=resourcefilepath),
-                 hiv.Hiv(resourcefilepath=resourcefilepath),
-                 tb.Tb(resourcefilepath=resourcefilepath),
-                 cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
-                 depression.Depression(resourcefilepath=resourcefilepath),
-                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
-                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
-                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
-                 labour.Labour(resourcefilepath=resourcefilepath),
-                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
-                 )
-
-    sim.make_initial_population(n=population_size)
-    return sim
-

From ae7a44cb5f72063c48555e3b21d5d6dd4400ee97 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 23 Oct 2024 15:29:03 +0200
Subject: [PATCH 18/97] Change dict of properties to string in logging, and add
 analysis files

---
 .../analysis_extract_data.py                  | 370 ++++++++++++++++++
 .../postprocess_events_chain.py               | 156 ++++++++
 .../scenario_generate_chains.py               | 115 ++++++
 src/tlo/events.py                             |  23 +-
 src/tlo/methods/hsi_event.py                  |  13 +-
 src/tlo/simulation.py                         |  29 +-
 6 files changed, 684 insertions(+), 22 deletions(-)
 create mode 100644 src/scripts/analysis_data_generation/analysis_extract_data.py
 create mode 100644 src/scripts/analysis_data_generation/postprocess_events_chain.py
 create mode 100644 src/scripts/analysis_data_generation/scenario_generate_chains.py

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
new file mode 100644
index 0000000000..2cfba5315b
--- /dev/null
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -0,0 +1,370 @@
+"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when
+running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)"""
+
+# short tclose -> ideal case
+# long tclose -> status quo
+import argparse
+from pathlib import Path
+from typing import Tuple
+
+import pandas as pd
+
+from tlo import Date
+from tlo.analysis.utils import extract_results
+from datetime import datetime
+
+# Range of years considered
+min_year = 2010
+max_year = 2040
+
+
+def all_columns(_df):
+    return pd.Series(_df.all())
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ):
+    """Produce standard set of plots describing the effect of each TREATMENT_ID.
+    - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur.
+    - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur.
+    """
+    pd.set_option('display.max_rows', None)
+    pd.set_option('display.max_colwidth', None)
+    event_chains = extract_results(
+            results_folder,
+            module='tlo.simulation',
+            key='event_chains',
+            column='0',
+            #column = str(i),
+            #custom_generate_series=get_num_dalys_by_year,
+            do_scaling=False
+        )
+   # print(event_chains.loc[0,(0, 0)])
+
+    eval_env = {
+        'datetime': datetime,  # Add the datetime class to the eval environment
+        'pd': pd,              # Add pandas to handle Timestamp
+        'Timestamp': pd.Timestamp,  # Specifically add Timestamp for eval
+        'NaT': pd.NaT,
+        'nan': float('nan'),       # Include NaN for eval (can also use pd.NA if preferred)
+    }
+
+    for item,row in event_chains.iterrows():
+        value = event_chains.loc[item,(0, 0)]
+        if value !='':
+            print('')
+            print(value)
+    exit(-1)
+    #dict = {}
+    #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
+    #    dict[i] = []
+
+    #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
+    #    event_chains = extract_results(
+    #        results_folder,
+    #        module='tlo.simulation'#,
+    #        key='event_chains',
+    #        column = str(i),
+    #        #custom_generate_series=get_num_dalys_by_year,
+    #        do_scaling=False
+    #    )
+    #    print(event_chains)
+    #    print(event_chains.index)
+    #    print(event_chains.columns.levels)
+
+    #    for index, row in event_chains.iterrows():
+    #        if event_chains.iloc[index,0] is not None:
+    #            if(event_chains.iloc[index,0]['person_ID']==i): #and 'event' in event_chains.iloc[index,0].keys()):
+    #                dict[i].append(event_chains.iloc[index,0])
+            #elif (event_chains.iloc[index,0]['person_ID']==i and 'event' not in event_chains.iloc[index,0].keys()):
+                #print(event_chains.iloc[index,0]['de_depr'])
+               # exit(-1)
+    #for item in dict[0]:
+    #    print(item)
+    
+    #exit(-1)
+    
+    TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1))
+
+    # Definitions of general helper functions
+    lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png"  # noqa: E731
+
+    def target_period() -> str:
+        """Returns the target period as a string of the form YYYY-YYYY"""
+        return "-".join(str(t.year) for t in TARGET_PERIOD)
+
+    def get_parameter_names_from_scenario_file() -> Tuple[str]:
+        """Get the tuple of names of the scenarios from `Scenario` class used to create the results."""
+        from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import (
+            ImpactOfHealthSystemMode,
+        )
+        e = ImpactOfHealthSystemMode()
+        return tuple(e._scenarios.keys())
+
+    def get_num_deaths(_df):
+        """Return total number of Deaths (total within the TARGET_PERIOD)
+        """
+        return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
+
+    def get_num_dalys(_df):
+        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+
+    def get_num_dalys_by_cause(_df):
+        """Return number of DALYs by cause by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum()
+        )
+
+    def set_param_names_as_column_index_level_0(_df):
+        """Set the columns index (level 0) as the param_names."""
+        ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
+        names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]]
+        assert len(names_of_cols_level0) == len(_df.columns.levels[0])
+        _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0)
+        return _df
+
+    def find_difference_relative_to_comparison(_ser: pd.Series,
+                                               comparison: str,
+                                               scaled: bool = False,
+                                               drop_comparison: bool = True,
+                                               ):
+        """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+        within the runs (level 1), relative to where draw = `comparison`.
+        The comparison is `X - COMPARISON`."""
+        return _ser \
+            .unstack(level=0) \
+            .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+            .drop(columns=([comparison] if drop_comparison else [])) \
+            .stack()
+
+    
+    def get_counts_of_hsi_by_treatment_id(_df):
+        """Get the counts of the short TREATMENT_IDs occurring"""
+        _counts_by_treatment_id = _df \
+            .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \
+            .apply(pd.Series) \
+            .sum() \
+            .astype(int)
+        return _counts_by_treatment_id.groupby(level=0).sum()
+        
+    year_target = 2023
+    def get_counts_of_hsi_by_treatment_id_by_year(_df):
+        """Get the counts of the short TREATMENT_IDs occurring"""
+        _counts_by_treatment_id = _df \
+            .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \
+            .apply(pd.Series) \
+            .sum() \
+            .astype(int)
+        return _counts_by_treatment_id.groupby(level=0).sum()
+    
+    def get_counts_of_hsi_by_short_treatment_id(_df):
+        """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)"""
+        _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df)
+        _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*")
+        return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum()
+        
+    def get_counts_of_hsi_by_short_treatment_id_by_year(_df):
+        """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)"""
+        _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df)
+        _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*")
+        return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum()
+ 
+        
+    # Obtain parameter names for this scenario file
+    param_names = get_parameter_names_from_scenario_file()
+    print(param_names)
+
+    # ================================================================================================
+    # TIME EVOLUTION OF TOTAL DALYs
+    # Plot DALYs averted compared to the ``No Policy'' policy
+    
+    year_target = 2023 # This global variable will be passed to custom function
+    def get_num_dalys_by_year(_df):
+        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year == year_target]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+        
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    this_min_year = 2010
+    for year in range(this_min_year, max_year+1):
+        year_target = year
+        num_dalys_by_year = extract_results(
+            results_folder,
+            module='tlo.methods.healthburden',
+            key='dalys_stacked',
+            custom_generate_series=get_num_dalys_by_year,
+            do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = num_dalys_by_year
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original'])
+    concatenated_df = concatenated_df.reset_index(level='index_original',drop=True)
+    dalys_by_year = concatenated_df
+    print(dalys_by_year)
+    dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True)
+    
+    # ================================================================================================
+    # Print population under each scenario
+    pop_model = extract_results(results_folder,
+                                module="tlo.methods.demography",
+                                key="population",
+                                column="total",
+                                index="date",
+                                do_scaling=True
+                                ).pipe(set_param_names_as_column_index_level_0)
+    
+    pop_model.index = pop_model.index.year
+    pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)]
+    print(pop_model)
+    assert dalys_by_year.index.equals(pop_model.index)
+    assert all(dalys_by_year.columns == pop_model.columns)
+    pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True)
+
+    # ================================================================================================
+    # DALYs BROKEN DOWN BY CAUSES AND YEAR
+    # DALYs by cause per year
+    # %% Quantify the health losses associated with all interventions combined.
+    
+    year_target = 2023 # This global variable will be passed to custom function
+    def get_num_dalys_by_year_and_cause(_df):
+        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year == year_target]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum()
+        )
+        
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    this_min_year = 2010
+    for year in range(this_min_year, max_year+1):
+        year_target = year
+        num_dalys_by_year = extract_results(
+            results_folder,
+            module='tlo.methods.healthburden',
+            key='dalys_stacked',
+            custom_generate_series=get_num_dalys_by_year_and_cause,
+            do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year)
+
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
+    
+    df_total = concatenated_df
+    df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True)
+
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    for year in range(min_year, max_year+1):
+        year_target = year
+        
+        hsi_delivered_by_year = extract_results(
+                results_folder,
+                module='tlo.methods.healthsystem.summary',
+                key='HSI_Event',
+                custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year,
+                do_scaling=True
+            ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = hsi_delivered_by_year
+
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
+    HSI_ran_by_year = concatenated_df
+
+    del ALL
+    
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    for year in range(min_year, max_year+1):
+        year_target = year
+        
+        hsi_not_delivered_by_year = extract_results(
+                results_folder,
+                module='tlo.methods.healthsystem.summary',
+                key='Never_ran_HSI_Event',
+                custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year,
+                do_scaling=True
+            ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = hsi_not_delivered_by_year
+
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
+    HSI_never_ran_by_year = concatenated_df
+    
+    HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df(
+    HSI_ran_by_year = HSI_ran_by_year.fillna(0)
+    HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0)
+    HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True)
+    HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True)
+    print(HSI_ran_by_year)
+    print(HSI_never_ran_by_year)
+    print(HSI_total_by_year)
+    
+if __name__ == "__main__":
+    rfp = Path('resources')
+
+    parser = argparse.ArgumentParser(
+        description="Produce plots to show the impact each set of treatments",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--output-path",
+        help=(
+            "Directory to write outputs to. If not specified (set to None) outputs "
+            "will be written to value of --results-path argument."
+        ),
+        type=Path,
+        default=None,
+        required=False,
+    )
+    parser.add_argument(
+        "--resources-path",
+        help="Directory containing resource files",
+        type=Path,
+        default=Path('resources'),
+        required=False,
+    )
+    parser.add_argument(
+        "--results-path",
+        type=Path,
+        help=(
+            "Directory containing results from running "
+            "src/scripts/analysis_data_generation/scenario_generate_chains.py "
+        ),
+        default=None,
+        required=False
+    )
+    args = parser.parse_args()
+    assert args.results_path is not None
+    results_path = args.results_path
+
+    output_path = results_path if args.output_path is None else args.output_path
+
+    apply(
+        results_folder=results_path,
+        output_folder=output_path,
+        resourcefilepath=args.resources_path
+    )
diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py
new file mode 100644
index 0000000000..96c27a04b1
--- /dev/null
+++ b/src/scripts/analysis_data_generation/postprocess_events_chain.py
@@ -0,0 +1,156 @@
+import pandas as pd
+from dateutil.relativedelta import relativedelta
+
+# Remove from every individual's event chain all events that were fired after death
+def cut_off_events_after_death(df):
+
+    events_chain = df.groupby('person_ID')
+    
+    filtered_data = pd.DataFrame()
+
+    for name, group in events_chain:
+
+        # Find the first non-NaN 'date_of_death' and its index
+        first_non_nan_index = group['date_of_death'].first_valid_index()
+        
+        if first_non_nan_index is not None:
+            # Filter out all rows after the first non-NaN index
+            filtered_group = group.loc[:first_non_nan_index]  # Keep rows up to and including the first valid index
+            filtered_data = pd.concat([filtered_data, filtered_group])
+        else:
+            # If there are no non-NaN values, keep the original group
+            filtered_data = pd.concat([filtered_data, group])
+
+    return filtered_data
+
+# Load into DataFrame
+def load_csv_to_dataframe(file_path):
+    try:
+        # Load raw chains into df
+        df = pd.read_csv(file_path)
+        print("Raw event chains loaded successfully!")
+        return df
+    except FileNotFoundError:
+        print(f"Error: The file '{file_path}' was not found.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+file_path = 'output.csv'  # Replace with the path to your CSV file
+
+output = load_csv_to_dataframe(file_path)
+
+# Some of the dates appeared not to be in datetime format. Correct here.
+output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce')
+output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce')
+if 'hv_date_inf' in output.columns:
+    output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce')
+
+
+date_start = pd.to_datetime('2010-01-01')
+if 'Other' in output['cause_of_death'].values:
+    print("ERROR: 'Other' was included in sim as possible cause of death")
+    exit(-1)
+
+# Choose which columns in individual properties to visualise
+columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when']
+#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event']
+
+# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison
+columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint']
+
+# If considering epidemiology consistent with sim, add check here.
+check_ages_of_those_HIV_inf = False
+if check_ages_of_those_HIV_inf:
+    for index, row in output.iterrows():
+        if pd.isna(row['hv_date_inf']):
+            continue  # Skip this iteration
+        diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth'])
+        if diff.years > 1 and diff.years<15:
+            print("Person contracted HIV infection at age younger than 15", diff)
+
+# Remove events after death
+filtered_data = cut_off_events_after_death(output)
+
+print_raw_events = True # Print raw chain of events for each individual
+print_selected_changes = False
+print_all_changes = True
+person_ID_of_interest = 494
+
+pd.set_option('display.max_rows', None)
+
+for name, group in filtered_data.groupby('person_ID'):
+    list_of_dob = group['date_of_birth']
+    
+    # Select individuals based on when they were born
+    if list_of_dob.iloc[0].year<2010:
+
+        # Check that immutable properties are fixed for this individual, i.e. that events were collated properly:
+        all_identical_dob = group['date_of_birth'].nunique() == 1
+        all_identical_sex = group['sex'].nunique() == 1
+        if all_identical_dob is False or all_identical_sex is False:
+            print("Immutable properties are changing! This is not chain for single individual")
+            print(group)
+            exit(-1)
+            
+        print("----------------------------------------------------------------------")
+        print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0])
+        print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event
+        number_of_events =len(group)/2
+        number_of_changes=0
+        if print_raw_events:
+            print(group)
+        
+        if print_all_changes:
+            # Check each row
+            comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999))
+
+            # Iterate over rows where any column has changed
+            for idx, row_changed in comparison.iloc[1:].iterrows():
+                if row_changed.any():  # Check if any column changed in this row
+                    number_of_changes+=1
+                    changed_columns = row_changed[row_changed].index.tolist()  # Get the columns where changes occurred
+                    print(f"Row {idx} - Changes detected in columns: {changed_columns}")
+                    columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns
+                    print(group.loc[idx, columns_output])  # Print only the changed columns
+                    if group.loc[idx, 'when'] == 'Before':
+                        print('-----> THIS CHANGE OCCURRED BEFORE EVENT!')
+                    #print(group.loc[idx,columns_to_print])
+                    print()  # For better readability
+            print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events")
+        
+        if print_selected_changes:
+            tb_inf_condition = (
+                ((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) |
+                ((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) |
+                ((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) |
+                ((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) |
+                ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) |
+                ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not'))
+            )
+
+            alive_condition = (
+                (group['is_alive'].shift(1) is True) & (group['is_alive'] is False)
+            )
+            # Combine conditions for rows of interest
+            transition_condition = tb_inf_condition | alive_condition
+
+            if list_of_dob.iloc[0].year >= 2010:
+                print("DETECTED OF INTEREST")
+                print(group[group['event'] == 'Birth'][columns_to_print])
+
+            # Filter the DataFrame based on the condition
+            filtered_transitions = group[transition_condition]
+            if not filtered_transitions.empty:
+                if list_of_dob.iloc[0].year < 2010:
+                    print("DETECTED OF INTEREST")
+                print(filtered_transitions[columns_to_print])
+    
+    
+print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups)
+
+
+
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
new file mode 100644
index 0000000000..6bdcd02d90
--- /dev/null
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -0,0 +1,115 @@
+"""This Scenario file run the model to generate event chans
+
+Run on the batch system using:
+```
+tlo batch-submit 
+    src/scripts/analysis_data_generation/scenario_generate_chains.py
+```
+
+or locally using:
+```
+    tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py
+```
+
+"""
+from pathlib import Path
+from typing import Dict
+
+import pandas as pd
+
+from tlo import Date, logging
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
+from tlo.methods.fullmodel import fullmodel
+from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
+from tlo.scenario import BaseScenario
+
+
+class GenerateDataChains(BaseScenario):
+    def __init__(self):
+        super().__init__()
+        self.seed = 0
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = self.start_date + pd.DateOffset(months=1)
+        self.pop_size = 120
+        self._scenarios = self._get_scenarios()
+        self.number_of_draws = len(self._scenarios)
+        self.runs_per_draw = 1
+        self.generate_event_chains = True
+
+    def log_configuration(self):
+        return {
+            'filename': 'generate_event_chains',
+            'directory': Path('./outputs'),  # <- (specified only for local running)
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.events': logging.INFO,
+                'tlo.methods.demography.detail': logging.WARNING,
+                'tlo.methods.healthburden': logging.INFO,
+                'tlo.methods.healthsystem.summary': logging.INFO,
+            }
+        }
+
+    def modules(self):
+        return (
+            fullmodel(resourcefilepath=self.resources)
+            + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
+        )
+
+    def draw_parameters(self, draw_number, rng):
+        if draw_number < self.number_of_draws:
+            return list(self._scenarios.values())[draw_number]
+        else:
+            return
+
+    # case 1: gfHE = -0.030, factor = 1.01074
+    # case 2: gfHE = -0.020, factor = 1.02116
+    # case 3: gfHE = -0.015, factor = 1.02637
+    # case 4: gfHE =  0.015, factor = 1.05763
+    # case 5: gfHE =  0.020, factor = 1.06284
+    # case 6: gfHE =  0.030, factor = 1.07326
+
+    def _get_scenarios(self) -> Dict[str, Dict]:
+        """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.
+        """
+        
+        self.YEAR_OF_CHANGE = 2019
+
+        return {
+   
+            # =========== STATUS QUO ============
+            "Baseline":
+                mix_scenarios(
+                    self._baseline(),
+                    {
+                     "HealthSystem": {
+                        "yearly_HR_scaling_mode": "no_scaling",
+                      },
+                    }
+                ),
+
+        }
+        
+    def _baseline(self) -> Dict:
+        """Return the Dict with values for the parameter changes that define the baseline scenario. """
+        return mix_scenarios(
+            get_parameters_for_status_quo(),
+            {
+                "HealthSystem": {
+                    "mode_appt_constraints": 1,                 # <-- Mode 1 prior to change to preserve calibration
+                    "mode_appt_constraints_postSwitch": 2,      # <-- Mode 2 post-change to show effects of HRH
+                    "year_mode_switch": self.YEAR_OF_CHANGE,
+                    "scale_to_effective_capabilities": True,
+                    "policy_name": "Naive",
+                    "tclose_overwrite": 1,
+                    "tclose_days_offset_overwrite": 7,
+                    "use_funded_or_actual_staffing": "actual",
+                    "cons_availability": "default",
+                }
+            },
+        )
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 98832faecb..00a6fe4e7d 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,6 +11,8 @@
 
 import pandas as pd
 
+FACTOR_POP_DICT = 5000
+
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -83,13 +85,14 @@ def compare_population_dataframe(self,df_before, df_after):
         
         # Create an empty list to store changes for each of the individuals
         chain_links = {}
-        
+        len_of_diff = len(diff_mask)
+
         # Loop through each row of the mask
+        
         for idx, row in diff_mask.iterrows():
             changed_cols = row.index[row].tolist()
-            
+
             if changed_cols:  # Proceed only if there are changes in the row
-            
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
@@ -103,7 +106,7 @@ def compare_population_dataframe(self,df_before, df_after):
                     link_info[col] = df_after.at[idx, col]
                 
                 # Append the event and changes to the individual key
-                chain_links = {idx : link_info}
+                chain_links[idx] = str(link_info)
         
         return chain_links
         
@@ -168,7 +171,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                     if row_before[key] != row_after[key]: # Note: used fillna previously
                         link_info[key] = row_after[key]
                         
-                chain_links = {self.target : link_info}
+                chain_links[self.target] = str(link_info)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 if debug_chains:
@@ -228,14 +231,18 @@ def run(self):
         if self.sim.generate_event_chains:
             chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before)
             
+            # Create empty logger for entire pop
+            pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
+
+            pop_dict.update(chain_links)
+
             # Log chain_links here
             if len(chain_links)>0:
                 logger_chain.info(key='event_chains',
-                            data= chain_links,
-                            description='Links forming chains of events for simulated individuals')
+                                  data= pop_dict,
+                                  description='Links forming chains of events for simulated individuals')
                 
                 #print("Chain events ", chain_links)
-            
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 041ab9cf08..d657e9d3a0 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -11,6 +11,8 @@
 
 import pandas as pd
 
+FACTOR_POP_DICT = 5000
+
 
 if TYPE_CHECKING:
     from tlo import Module, Simulation
@@ -276,7 +278,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
                 if row_before[key] != row_after[key]: # Note: used fillna previously
                     link_info[key] = row_after[key]
             
-            chain_links = {self.target : link_info}
+            chain_links = {self.target : str(link_info)}
 
             # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
             row = self.sim.population.props.loc[[abs(self.target)]]
@@ -314,10 +316,15 @@ def run(self, squeeze_factor):
             chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint))
             
             if len(chain_links)>0:
+            
+                pop_dict = {i: '' for i in range(FACTOR_POP_DICT)}
+               # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals
+
+                pop_dict.update(chain_links)
+                
                 logger_chains.info(key='event_chains',
-                            data = chain_links,
+                            data = pop_dict,
                             description='Links forming chains of events for simulated individuals')
-                #print(chain_links)
                 
         return updated_appt_footprint
         
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 0c70b164d9..d9ba62c43a 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -40,6 +40,8 @@
 logger_chains = logging.getLogger("tlo.methods.event")
 logger_chains.setLevel(logging.INFO)
 
+FACTOR_POP_DICT = 5000
+
 
 class SimulationPreviouslyInitialisedError(Exception):
     """Exception raised when trying to initialise an already initialised simulation."""
@@ -294,17 +296,18 @@ def make_initial_population(self, *, n: int) -> None:
         if self.generate_event_chains:
 
             pop_dict = self.population.props.to_dict(orient='index')
-            
-            print(pop_dict)
-            print(pop_dict.keys())
             for key in pop_dict.keys():
                 pop_dict[key]['person_ID'] = key
-            print("Length of properties", len(pop_dict[0].keys()))
-            #exit(-1)
+                pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later
+                
+            pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)}
+            pop_dict_full.update(pop_dict)
+
+            print("Size for full sim", len(pop_dict_full))
+            
             logger.info(key='event_chains',
-                               data = pop_dict,
+                               data = pop_dict_full,
                                description='Links forming chains of events for simulated individuals')
-
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
 
@@ -323,7 +326,7 @@ def initialise(self, *, end_date: Date) -> None:
         #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
             # Eventually this can be made an option
-            self.generate_event_chains_overwrite_epi = False
+            self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
@@ -480,9 +483,13 @@ def do_birth(self, mother_id: int) -> int:
             prop_dict = self.population.props.loc[child_id].to_dict()
             prop_dict['event'] = 'Birth'
             prop_dict['event_date'] = self.date
-            child_dict = {child_id : prop_dict}
+            
+            pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
+            pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length
+
+            print("Length at birth", len(pop_dict))
             logger.info(key='event_chains',
-                               data = child_dict,
+                               data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
         
             # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
@@ -492,7 +499,7 @@ def do_birth(self, mother_id: int) -> int:
             row['event_date'] = self.date
             row['when'] = 'After'
             self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
-            
+
         return child_id
 
     def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]:

From 16299a21f43862a188f41ea6117b81c2c11d72ab Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 25 Nov 2024 09:37:29 +0000
Subject: [PATCH 19/97] Include debugging option, final set-up of scenario to
 print data, analysis file now collects all relevant info and prints them

---
 .../analysis_extract_data.py                  | 157 ++++++++++++++++--
 .../scenario_generate_chains.py               |  53 +++++-
 src/tlo/events.py                             |  10 +-
 src/tlo/methods/hsi_event.py                  |  50 +++---
 src/tlo/methods/rti.py                        |  17 +-
 src/tlo/simulation.py                         |  39 +++--
 src/tlo/util.py                               |   1 +
 7 files changed, 252 insertions(+), 75 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 2cfba5315b..6eb6408830 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -8,10 +8,14 @@
 from typing import Tuple
 
 import pandas as pd
+import matplotlib.pyplot as plt
 
 from tlo import Date
 from tlo.analysis.utils import extract_results
 from datetime import datetime
+from collections import Counter
+import ast
+
 
 # Range of years considered
 min_year = 2010
@@ -28,17 +32,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     """
     pd.set_option('display.max_rows', None)
     pd.set_option('display.max_colwidth', None)
-    event_chains = extract_results(
-            results_folder,
-            module='tlo.simulation',
-            key='event_chains',
-            column='0',
-            #column = str(i),
-            #custom_generate_series=get_num_dalys_by_year,
-            do_scaling=False
-        )
-   # print(event_chains.loc[0,(0, 0)])
-
+    
     eval_env = {
         'datetime': datetime,  # Add the datetime class to the eval environment
         'pd': pd,              # Add pandas to handle Timestamp
@@ -46,13 +40,144 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
         'NaT': pd.NaT,
         'nan': float('nan'),       # Include NaN for eval (can also use pd.NA if preferred)
     }
+    
+    initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
+
+    # Will be added through computation: age at time of RTI
+        
+    # Will be added through computation: total duration of event
+    
+    initial_rt_event_properties = set()
+    
+    num_individuals = 1000
+    num_runs = 50
+    record = []
+    
+    
+    for i in range(0,num_individuals):
 
-    for item,row in event_chains.iterrows():
-        value = event_chains.loc[item,(0, 0)]
-        if value !='':
-            print('')
-            print(value)
+        individual_event_chains = extract_results(
+                results_folder,
+                module='tlo.simulation',
+                key='event_chains',
+                column=str(i),
+                do_scaling=False
+            )
+            
+        #print(individual_event_chains)
+
+            
+        for r in range(0,num_runs):
+        
+            print("AT RUN = ", r)
+
+            initial_properties = {}
+            progression_properties = {}
+            key_first_event = {}
+            key_last_event = {}
+            first_event = {}
+            last_event = {}
+            properties = {}
+
+
+            #ind_Counter = Counter()
+            ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()}
+            # Count total appts
+
+            list_for_individual = []
+            for item,row in individual_event_chains.iterrows():
+                value = individual_event_chains.loc[item,(0, r)]
+               # print("The value is", value, "at run ", r)
+                if value !='' and isinstance(value, str):
+                    evaluated = eval(value, eval_env)
+                    list_for_individual.append(evaluated)
+               # elif not isinstance(value,str):
+               #     print(value)
+                    
+            initial_properties = list_for_individual[0]
+            print(initial_properties)
+            
+            # Initialise first event by gathering parameters of interest from initial_properties
+            first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties}
+            
+            progression_properties = {}
+            for i in list_for_individual:
+                if 'event' in i:
+                    print("")
+                    print(i)
+                    if 'RTIPolling' in i['event']:
+                        #print("I'm in polling event")
+                        #print(i)
+                        
+                        # Keep track of which properties are changed during polling events
+                        for key,value in i.items():
+                            if 'rt_' in key:
+                                initial_rt_event_properties.add(key)
+                        
+                        # Retain a copy of Polling event
+                        polling_event = i.copy()
+                        
+                        # Update parameters of interest following RTI
+                        key_first_event = {key: i[key] if key in i else value for key, value in first_event.items()}
+                        
+                        # Calculate age of individual at time of event
+                        key_first_event['age_in_days_at_event'] = (i['rt_date_inj'] - initial_properties['date_of_birth']).days
+                        
+                        # Keep track of evolution in individual's properties
+                        progression_properties = initial_properties.copy()
+                        progression_properties.update(i)
+
+                    else:
+                        # Progress properties of individual, even if this event is a death
+                        progression_properties.update(i)
+                    
+                    #print(progression_properties)
+                    # Update footprint
+                    if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()':
+                        footprint = i['appt_footprint']
+                        if 'Counter' in footprint:
+                            footprint = footprint[len("Counter("):-1]
+                        apply = eval(footprint, eval_env)
+                        ind_Counter[i['level']].update(Counter(apply))
+                        
+                    if 'is_alive' in i and i['is_alive'] is False:
+                        print("Death", i)
+                        print("-------Total footprint", ind_Counter)
+                        break
+                        
+
+            # Compute final properties of individual
+            key_last_event['is_alive_after_RTI'] = progression_properties['is_alive']
+            key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days
+            key_last_event['rt_disability_final'] = progression_properties['rt_disability']
+            key_last_event.update({'total_footprint': ind_Counter})
+            
+            #print("-------Total footprint", ind_Counter)
+            #for key, value in key_first_event.items():
+               # if 'rt_' in key or 'alive' in key:
+             #   print(f"{key}: {value}")
+            #print(#)
+            #for key, value in key_last_event.items():
+                #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
+            #    print(f"{key}: {value}")
+
+            #print(key_first_event)
+            #print(key_last_event)
+            print(initial_rt_event_properties)
+            properties = key_first_event | key_last_event
+            record.append(properties)
+            for key, value in properties.items():
+                #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
+                print(f"{key}: {value}")
+         
+    df = pd.DataFrame(record)
+    df.to_csv("raw_data.csv", index=False)
+
+    print(df)
+    print(initial_rt_event_properties)
     exit(-1)
+            #print(i)
+
     #dict = {}
     #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
     #    dict[i] = []
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 6bdcd02d90..79df3f55b6 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -22,18 +22,42 @@
 from tlo.methods.fullmodel import fullmodel
 from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
-
+from tlo.methods import (
+    alri,
+    cardio_metabolic_disorders,
+    care_of_women_during_pregnancy,
+    contraception,
+    demography,
+    depression,
+    diarrhoea,
+    enhanced_lifestyle,
+    epi,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    hiv,
+    rti,
+    labour,
+    malaria,
+    newborn_outcomes,
+    postnatal_supervisor,
+    pregnancy_supervisor,
+    stunting,
+    symptommanager,
+    tb,
+    wasting,
+)
 
 class GenerateDataChains(BaseScenario):
     def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = self.start_date + pd.DateOffset(months=1)
-        self.pop_size = 120
+        self.end_date = self.start_date + pd.DateOffset(months=13)
+        self.pop_size = 1000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 1
+        self.runs_per_draw = 50
         self.generate_event_chains = True
 
     def log_configuration(self):
@@ -51,10 +75,23 @@ def log_configuration(self):
         }
 
     def modules(self):
-        return (
-            fullmodel(resourcefilepath=self.resources)
-            + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
-        )
+        # MODIFY
+        # Here instead of running full module
+        return [demography.Demography(resourcefilepath=self.resources),
+                enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources),
+                healthburden.HealthBurden(resourcefilepath=self.resources),
+                symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),
+                rti.RTI(resourcefilepath=self.resources),
+                healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources),
+                #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+                healthsystem.HealthSystem(resourcefilepath=self.resources,
+                                          mode_appt_constraints=1,
+                                          cons_availability='all')]
+                                          
+       # return (
+       #     fullmodel(resourcefilepath=self.resources)
+       #     + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
+       # )
 
     def draw_parameters(self, draw_number, rng):
         if draw_number < self.number_of_draws:
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 00a6fe4e7d..ba8024f621 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,7 +11,7 @@
 
 import pandas as pd
 
-FACTOR_POP_DICT = 5000
+from tlo.util import FACTOR_POP_DICT
 
 
 logger = logging.getLogger(__name__)
@@ -132,7 +132,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
-                if debug_chains:
+                if self.sim.debug_generate_event_chains:
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
@@ -142,6 +142,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
+
                 # This will be a population-wide event. In order to find individuals for which this led to
                 # a meaningful change, make a copy of the pop dataframe before the event has occurred.
                 df_before = self.sim.population.props.copy()
@@ -174,7 +175,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 chain_links[self.target] = str(link_info)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                if debug_chains:
+                if self.sim.debug_generate_event_chains:
                     # Print entire row
                     row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
                     row['person_ID'] = self.target
@@ -194,7 +195,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 chain_links = self.compare_population_dataframe(df_before, df_after)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                if debug_chains:
+                if self.sim.debug_generate_event_chains:
                     # Or print entire rows
                     change = df_before.compare(df_after)
                     if not change.empty:
@@ -233,7 +234,6 @@ def run(self):
             
             # Create empty logger for entire pop
             pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
-
             pop_dict.update(chain_links)
 
             # Log chain_links here
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index d657e9d3a0..bdf597fba4 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -8,10 +8,9 @@
 from tlo import Date, logging
 from tlo.events import Event
 from tlo.population import Population
-
+from tlo.util import FACTOR_POP_DICT
 import pandas as pd
 
-FACTOR_POP_DICT = 5000
 
 
 if TYPE_CHECKING:
@@ -219,19 +218,21 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
 
-                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                row = self.sim.population.props.loc[[abs(self.target)]]
-                row['person_ID'] = self.target
-                row['event'] = str(self)
-                row['event_date'] = self.sim.date
-                row['when'] = 'Before'
-                try:
-                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
-                    row['level'] = self.facility_info.level
-                except:
-                    row['appt_footprint'] = 'N/A'
-                    row['level'] = 'N/A'
-                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                if self.sim.debug_generate_event_chains:
+                    # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+                    row = self.sim.population.props.loc[[abs(self.target)]]
+                    row['person_ID'] = self.target
+                    row['event'] = str(self)
+                    row['event_date'] = self.sim.date
+                    row['when'] = 'Before'
+                
+                    try:
+                        row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
+                        row['level'] = self.facility_info.level
+                    except:
+                        row['appt_footprint'] = 'N/A'
+                        row['level'] = 'N/A'
+                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
                 # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error
@@ -280,15 +281,16 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             
             chain_links = {self.target : str(link_info)}
 
-            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-            row = self.sim.population.props.loc[[abs(self.target)]]
-            row['person_ID'] = self.target
-            row['event'] = str(self)
-            row['event_date'] = self.sim.date
-            row['when'] = 'After'
-            row['appt_footprint'] = record_footprint
-            row['level'] = record_level
-            self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+            if self.sim.debug_generate_event_chains:
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+                row = self.sim.population.props.loc[[abs(self.target)]]
+                row['person_ID'] = self.target
+                row['event'] = str(self)
+                row['event_date'] = self.sim.date
+                row['when'] = 'After'
+                row['appt_footprint'] = record_footprint
+                row['level'] = record_level
+                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             
         return chain_links
         
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 3642365976..1ca2749af7 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Schedule to take place every month
         """
-        super().__init__(module, frequency=DateOffset(months=1000))
+        super().__init__(module, frequency=DateOffset(months=1000)) # Single polling event
         p = module.parameters
         # Parameters which transition the model between states
         self.base_1m_prob_rti = (p['base_rate_injrti'] / 12)
@@ -2864,10 +2864,12 @@ def apply(self, population):
                          .when('.between(70,79)', self.rr_injrti_age7079),
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
-        if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
-            pred = 1.0
-        else:
-            pred = eq.predict(df.loc[rt_current_non_ind])
+        #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
+        pred = 1.0
+        #else:
+        #    pred = eq.predict(df.loc[rt_current_non_ind])
+            
+            
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
         selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti]
 
@@ -4852,6 +4854,7 @@ def __init__(self, module, person_id):
         self.treated_code = 'none'
 
     def apply(self, person_id, squeeze_factor):
+
         self._number_of_times_this_event_has_run += 1
         df = self.sim.population.props
         rng = self.module.rng
@@ -4900,10 +4903,12 @@ def apply(self, person_id, squeeze_factor):
         # injury is being treated in this surgery
         # find untreated injury codes that are treated with major surgery
         relevant_codes = np.intersect1d(injuries_to_be_treated, surgically_treated_codes)
+
         # check that the person sent here has an appropriate code(s)
         assert len(relevant_codes) > 0
         # choose a code at random
         self.treated_code = rng.choice(relevant_codes)
+
         if request_outcome:
             # check the people sent here hasn't died due to rti, have had their injuries diagnosed and been through
             # RTI_Med
@@ -4990,7 +4995,9 @@ def apply(self, person_id, squeeze_factor):
 
             # ------------------------------------- Perm disability from amputation ------------------------------------
             codes = ['782', '782a', '782b', '782c', '783', '882', '883', '884']
+
             if self.treated_code in codes:
+
                 # Track whether they are permanently disabled
                 df.at[person_id, 'rt_perm_disability'] = True
                 # Find the column and code where the permanent injury is stored
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index d9ba62c43a..bb766562a0 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -11,8 +11,9 @@
 from typing import Optional
 from typing import TYPE_CHECKING, Optional
 import pandas as pd
-
+import tlo.population
 import numpy as np
+from tlo.util import FACTOR_POP_DICT
 
 try:
     import dill
@@ -40,8 +41,6 @@
 logger_chains = logging.getLogger("tlo.methods.event")
 logger_chains.setLevel(logging.INFO)
 
-FACTOR_POP_DICT = 5000
-
 
 class SimulationPreviouslyInitialisedError(Exception):
     """Exception raised when trying to initialise an already initialised simulation."""
@@ -113,12 +112,15 @@ def __init__(
         self.generate_event_chains_overwrite_epi = None
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
+        self.debug_generate_event_chains = False
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
         
-        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-        self.event_chains: Optinoal[Population] = None
+                
+        if self.debug_generate_event_chains:
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+            self.event_chains: Optional[Population] = None
 
         self.show_progress_bar = show_progress_bar
         self.resourcefilepath = resourcefilepath
@@ -288,8 +290,9 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
-        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
+        if self.debug_generate_event_chains:
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+            self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
         
         # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
@@ -329,7 +332,7 @@ def initialise(self, *, end_date: Date) -> None:
             self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
-            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
         else:
             # If not using to print chains, cannot ignore epi
             self.generate_event_chains_overwrite_epi = False
@@ -418,8 +421,9 @@ def run_simulation_to(self, *, to_date: Date) -> None:
             self.fire_single_event(event, date)
         self.date = to_date
         
-        # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR.
-        self.event_chains.to_csv('output.csv', index=False)
+        if self.debug_generate_event_chains:
+            # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR.
+            self.event_chains.to_csv('output.csv', index=False)
 
         if self.show_progress_bar:
             progress_bar.stop()
@@ -492,13 +496,14 @@ def do_birth(self, mother_id: int) -> int:
                                data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
         
-            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-            row = self.population.props.iloc[[child_id]]
-            row['person_ID'] = child_id
-            row['event'] = 'Birth'
-            row['event_date'] = self.date
-            row['when'] = 'After'
-            self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+            if self.debug_generate_event_chains:
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+                row = self.population.props.iloc[[child_id]]
+                row['person_ID'] = child_id
+                row['event'] = 'Birth'
+                row['event_date'] = self.date
+                row['when'] = 'After'
+                self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
 
         return child_id
 
diff --git a/src/tlo/util.py b/src/tlo/util.py
index 168b1d41a1..f8dc67d471 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -12,6 +12,7 @@
 
 # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation.
 DEFAULT_MOTHER_ID = -1e7
+FACTOR_POP_DICT = 1000
 
 
 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]):

From 0dd862f2a9b485a33933e185e3c59ad64ed33ed9 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 26 Nov 2024 15:28:30 +0000
Subject: [PATCH 20/97] Change label of person when iterating

---
 .../analysis_extract_data.py                  | 68 ++++++++++++-------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 6eb6408830..4c8e7d8197 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -41,7 +41,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
         'nan': float('nan'),       # Include NaN for eval (can also use pd.NA if preferred)
     }
     
-    initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
+    initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
 
     # Will be added through computation: age at time of RTI
         
@@ -54,13 +54,15 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     record = []
     
     
-    for i in range(0,num_individuals):
+    for p in range(0,num_individuals):
+    
+        print("At person = ", p)
 
         individual_event_chains = extract_results(
                 results_folder,
                 module='tlo.simulation',
                 key='event_chains',
-                column=str(i),
+                column=str(p),
                 do_scaling=False
             )
             
@@ -69,7 +71,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
             
         for r in range(0,num_runs):
         
-            print("AT RUN = ", r)
+
 
             initial_properties = {}
             progression_properties = {}
@@ -78,7 +80,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
             first_event = {}
             last_event = {}
             properties = {}
-
+            average_disability = 0
+            prev_disability_incurred = 0
 
             #ind_Counter = Counter()
             ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()}
@@ -95,7 +98,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                #     print(value)
                     
             initial_properties = list_for_individual[0]
-            print(initial_properties)
+           # print(initial_properties)
             
             # Initialise first event by gathering parameters of interest from initial_properties
             first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties}
@@ -103,8 +106,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
             progression_properties = {}
             for i in list_for_individual:
                 if 'event' in i:
-                    print("")
-                    print(i)
+                    #print("")
+                    #print(i)
                     if 'RTIPolling' in i['event']:
                         #print("I'm in polling event")
                         #print(i)
@@ -126,10 +129,26 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                         # Keep track of evolution in individual's properties
                         progression_properties = initial_properties.copy()
                         progression_properties.update(i)
+                        
+                        # dalys incurred
+                        if 'rt_disability' in i:
+                            prev_disability_incurred = i['rt_disability']
+                            prev_date = i['event_date']
+                            #print('At polling event, ', prev_disability_incurred, prev_date)
 
                     else:
                         # Progress properties of individual, even if this event is a death
                         progression_properties.update(i)
+                        
+                        # If disability has changed as a result of this, recalculate
+                        if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred:
+                            dt_in_prev_disability = (i['event_date'] - prev_date).days
+                            average_disability += prev_disability_incurred*dt_in_prev_disability
+                            # Update variables
+                            prev_disability_incurred = i['rt_disability']
+                            prev_date = i['event_date']
+
+
                     
                     #print(progression_properties)
                     # Update footprint
@@ -141,34 +160,33 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                         ind_Counter[i['level']].update(Counter(apply))
                         
                     if 'is_alive' in i and i['is_alive'] is False:
-                        print("Death", i)
-                        print("-------Total footprint", ind_Counter)
+                        #print("Death", i)
+                        #print("-------Total footprint", ind_Counter)
                         break
                         
 
             # Compute final properties of individual
             key_last_event['is_alive_after_RTI'] = progression_properties['is_alive']
             key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days
-            key_last_event['rt_disability_final'] = progression_properties['rt_disability']
+            if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0:
+                key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days']
+            else:
+                key_last_event['rt_disability_average'] = 0.0
+            key_last_event['rt_disability_permanent'] = progression_properties['rt_disability']
             key_last_event.update({'total_footprint': ind_Counter})
-            
-            #print("-------Total footprint", ind_Counter)
-            #for key, value in key_first_event.items():
-               # if 'rt_' in key or 'alive' in key:
-             #   print(f"{key}: {value}")
-            #print(#)
-            #for key, value in key_last_event.items():
-                #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
-            #    print(f"{key}: {value}")
 
-            #print(key_first_event)
-            #print(key_last_event)
-            print(initial_rt_event_properties)
+            #print("Average disability", key_last_event['rt_disability_average'])
+            
             properties = key_first_event | key_last_event
+            
+            if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4):
+                print("Error in computed average for individual ", p, r )
+                
             record.append(properties)
-            for key, value in properties.items():
+            #for key, value in properties.items():
                 #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
-                print(f"{key}: {value}")
+                #print(f"{key}: {value}")
+           # print("Initial event properties", initial_rt_event_properties)
          
     df = pd.DataFrame(record)
     df.to_csv("raw_data.csv", index=False)

From 84f826322ba13f6fa1631d639944c2bac50667f6 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 13 Dec 2024 15:55:03 +0000
Subject: [PATCH 21/97] Correctly retrieve event name

---
 src/tlo/events.py            | 12 ++++++------
 src/tlo/methods/hsi_event.py |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index ba8024f621..f67b54458a 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -97,7 +97,7 @@ def compare_population_dataframe(self,df_before, df_after):
                 # First add event info
                 link_info = {
                     'person_ID': idx,
-                    'event': str(self),
+                    'event': type(self).__name__,
                     'event_date': self.sim.date,
                 }
                 
@@ -136,7 +136,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
-                    row['event'] = str(self)
+                    row['event'] = type(self).__name__
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -164,7 +164,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 link_info = {
                     #'person_ID' : self.target,
                     'person_ID' : self.target,
-                    'event' : str(self),
+                    'event' : type(self).__name__,
                     'event_date' : self.sim.date,
                 }
                 # Store (if any) property changes as a result of the event for this individual
@@ -179,7 +179,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                     # Print entire row
                     row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
                     row['person_ID'] = self.target
-                    row['event'] = str(self)
+                    row['event'] = type(self).__name__
                     row['event_date'] = self.sim.date
                     row['when'] = 'After'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -202,13 +202,13 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                         indices = change.index
                         new_rows_before = df_before.loc[indices]
                         new_rows_before['person_ID'] = new_rows_before.index
-                        new_rows_before['event'] = self
+                        new_rows_before['event'] = type(self).__name__
                         new_rows_before['event_date'] = self.sim.date
                         new_rows_before['when'] = 'Before'
 
                         new_rows_after = df_after.loc[indices]
                         new_rows_after['person_ID'] = new_rows_after.index
-                        new_rows_after['event'] = self
+                        new_rows_after['event'] = type(self).__name__
                         new_rows_after['event_date'] = self.sim.date
                         new_rows_after['when'] = 'After'
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index f267181b56..978b26d7c5 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -222,7 +222,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
-                    row['event'] = str(self)
+                    row['event'] = type(self).__name__ #str(self.event_name)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                 
@@ -268,7 +268,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
                 
             link_info = {
                 'person_ID': self.target,
-                'event' : str(self),
+                'event' : type(self).__name__,
                 'event_date' : self.sim.date,
                 'appt_footprint' : record_footprint,
                 'level' : record_level,
@@ -285,7 +285,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 row = self.sim.population.props.loc[[abs(self.target)]]
                 row['person_ID'] = self.target
-                row['event'] = str(self)
+                row['event'] = type(self).__name__
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
                 row['appt_footprint'] = record_footprint

From a490d1995c12ac20beda2fbd16271d22f0e4f8fe Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 20 Jan 2025 11:34:02 +0000
Subject: [PATCH 22/97] Modify scenario file such that can exclude specific
 services, and corrected analysis file such as for small number of cases where
 the DALYs are not explicitly resolved the average DALYs are still computed
 correctly [skip ci]

---
 .../analysis_extract_data.py                  | 105 ++++++++++--------
 .../scenario_generate_chains.py               |  58 +++++++---
 2 files changed, 103 insertions(+), 60 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 4c8e7d8197..3afad7adcc 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -16,6 +16,9 @@
 from collections import Counter
 import ast
 
+# Time simulated to collect data
+start_date = Date(2010, 1, 1)
+end_date = start_date + pd.DateOffset(months=13)
 
 # Range of years considered
 min_year = 2010
@@ -25,6 +28,13 @@
 def all_columns(_df):
     return pd.Series(_df.all())
 
+def check_if_beyond_time_range_considered(progression_properties):
+    matching_keys = [key for key in progression_properties.keys() if "rt_date_to_remove_daly" in key]
+    if matching_keys:
+        for key in matching_keys:
+            if progression_properties[key] > end_date:
+                print("Beyond time range considered, need at least ",progression_properties[key])
+
 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ):
     """Produce standard set of plots describing the effect of each TREATMENT_ID.
     - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur.
@@ -44,19 +54,21 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
 
     # Will be added through computation: age at time of RTI
-        
     # Will be added through computation: total duration of event
     
     initial_rt_event_properties = set()
-    
+
     num_individuals = 1000
     num_runs = 50
     record = []
-    
+    # Include results folder in output file name
+    name_tag = str(results_folder).replace("outputs/", "")
+
+
     
     for p in range(0,num_individuals):
     
-        print("At person = ", p)
+        print("At person = ", p, " out of ", num_individuals)
 
         individual_event_chains = extract_results(
                 results_folder,
@@ -66,51 +78,41 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                 do_scaling=False
             )
             
-        #print(individual_event_chains)
-
-            
         for r in range(0,num_runs):
-        
-
-
             initial_properties = {}
-            progression_properties = {}
             key_first_event = {}
             key_last_event = {}
             first_event = {}
             last_event = {}
             properties = {}
             average_disability = 0
+            total_dt_included = 0
+            dt_in_prev_disability = 0
             prev_disability_incurred = 0
-
-            #ind_Counter = Counter()
             ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()}
             # Count total appts
 
             list_for_individual = []
             for item,row in individual_event_chains.iterrows():
                 value = individual_event_chains.loc[item,(0, r)]
-               # print("The value is", value, "at run ", r)
                 if value !='' and isinstance(value, str):
                     evaluated = eval(value, eval_env)
                     list_for_individual.append(evaluated)
-               # elif not isinstance(value,str):
-               #     print(value)
                     
+            # These are the properties of the individual before the start of the chain of events
             initial_properties = list_for_individual[0]
-           # print(initial_properties)
             
             # Initialise first event by gathering parameters of interest from initial_properties
             first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties}
             
+            # The changing or adding of properties from the first_event will be stored in progression_properties
             progression_properties = {}
+            
             for i in list_for_individual:
+                # Skip the initial_properties, or in other words only consider these if they are 'proper' events
                 if 'event' in i:
-                    #print("")
                     #print(i)
                     if 'RTIPolling' in i['event']:
-                        #print("I'm in polling event")
-                        #print(i)
                         
                         # Keep track of which properties are changed during polling events
                         for key,value in i.items():
@@ -130,67 +132,80 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                         progression_properties = initial_properties.copy()
                         progression_properties.update(i)
                         
-                        # dalys incurred
+                        # Initialise chain of Dalys incurred
                         if 'rt_disability' in i:
                             prev_disability_incurred = i['rt_disability']
                             prev_date = i['event_date']
-                            #print('At polling event, ', prev_disability_incurred, prev_date)
 
                     else:
                         # Progress properties of individual, even if this event is a death
                         progression_properties.update(i)
                         
-                        # If disability has changed as a result of this, recalculate
-                        if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred:
+                        # If disability has changed as a result of this, recalculate and add previous to rolling average
+                        if 'rt_disability' in i:
+    
                             dt_in_prev_disability = (i['event_date'] - prev_date).days
+                            #print("Detected change in disability", i['rt_disability'], "after dt=", dt_in_prev_disability)
+                            #print("Adding the following to the average", prev_disability_incurred, " x ", dt_in_prev_disability )
                             average_disability += prev_disability_incurred*dt_in_prev_disability
+                            total_dt_included += dt_in_prev_disability
                             # Update variables
                             prev_disability_incurred = i['rt_disability']
                             prev_date = i['event_date']
 
-
-                    
-                    #print(progression_properties)
-                    # Update footprint
+                    # Update running footprint
                     if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()':
                         footprint = i['appt_footprint']
                         if 'Counter' in footprint:
                             footprint = footprint[len("Counter("):-1]
                         apply = eval(footprint, eval_env)
                         ind_Counter[i['level']].update(Counter(apply))
-                        
+                    
+                    # If the individual has died, ensure chain of event is interrupted here and update rolling average of DALYs
                     if 'is_alive' in i and i['is_alive'] is False:
-                        #print("Death", i)
-                        #print("-------Total footprint", ind_Counter)
+                        if ((i['event_date'] - polling_event['rt_date_inj']).days) > total_dt_included:
+                            dt_in_prev_disability = (i['event_date'] - prev_date).days
+                            average_disability += prev_disability_incurred*dt_in_prev_disability
+                            total_dt_included += dt_in_prev_disability
                         break
-                        
-
+               
+            # check_if_beyond_time_range_considered(progression_properties)
+            
             # Compute final properties of individual
             key_last_event['is_alive_after_RTI'] = progression_properties['is_alive']
             key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days
-            if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0:
+
+            # If individual didn't die and the key_last_event didn't result in a final change in DALYs, ensure that the last change is recorded here
+            if not key_first_event['rt_imm_death'] and (total_dt_included < key_last_event['duration_days']):
+                #print("Number of events", len(list_for_individual))
+                #for i in list_for_individual:
+                #    if 'event' in i:
+                #        print(i)
+                dt_in_prev_disability = (progression_properties['event_date'] - prev_date).days
+                average_disability += prev_disability_incurred*dt_in_prev_disability
+                total_dt_included += dt_in_prev_disability
+
+            # Now calculate the average disability incurred, and store any permanent disability and total footprint
+            if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0:
                 key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days']
             else:
                 key_last_event['rt_disability_average'] = 0.0
+            
             key_last_event['rt_disability_permanent'] = progression_properties['rt_disability']
             key_last_event.update({'total_footprint': ind_Counter})
 
-            #print("Average disability", key_last_event['rt_disability_average'])
+            if key_last_event['duration_days']!=total_dt_included:
+                print("The duration of event and total_dt_included don't match", key_last_event['duration_days'], total_dt_included)
+                exit(-1)
             
             properties = key_first_event | key_last_event
-            
-            if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4):
-                print("Error in computed average for individual ", p, r )
                 
             record.append(properties)
-            #for key, value in properties.items():
-                #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
-                #print(f"{key}: {value}")
-           # print("Initial event properties", initial_rt_event_properties)
-         
-    df = pd.DataFrame(record)
-    df.to_csv("raw_data.csv", index=False)
+            
 
+    df = pd.DataFrame(record)
+    df.to_csv("new_raw_data_" + name_tag + ".csv", index=False)
+    
     print(df)
     print(initial_rt_event_properties)
     exit(-1)
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 79df3f55b6..822bf13ad8 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -18,7 +18,7 @@
 import pandas as pd
 
 from tlo import Date, logging
-from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids
 from tlo.methods.fullmodel import fullmodel
 from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
@@ -92,7 +92,35 @@ def modules(self):
        #     fullmodel(resourcefilepath=self.resources)
        #     + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
        # )
+    """
+    def draw_parameters(self, draw_number, rng):
+        return mix_scenarios(
+            get_parameters_for_status_quo(),
+            {
+                'HealthSystem': {
+                    'Service_Availability': list(self._scenarios.values())[draw_number],
+                },
+            }
+        )
 
+    def _get_scenarios(self) -> Dict[str, list[str]]:
+        Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario.
+        The sequences of scenarios systematically omits one of the TREATMENT_ID's that is defined in the model.
+
+        # Generate list of TREATMENT_IDs and filter to the resolution needed
+        treatments = get_filtered_treatment_ids(depth=2)
+        treatments_RTI = [item for item in treatments if 'Rti' in item]
+        
+        # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each
+        # treatment is omitted
+        service_availability = dict({"Everything": ["*", "Nothing": []})
+        #service_availability.update(
+        #    {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI}
+        #)
+        
+        return service_availability
+
+    """
     def draw_parameters(self, draw_number, rng):
         if draw_number < self.number_of_draws:
             return list(self._scenarios.values())[draw_number]
@@ -107,20 +135,27 @@ def draw_parameters(self, draw_number, rng):
     # case 6: gfHE =  0.030, factor = 1.07326
 
     def _get_scenarios(self) -> Dict[str, Dict]:
-        """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.
-        """
+        #Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.
+        
+        treatments = get_filtered_treatment_ids(depth=2)
+        treatments_RTI = [item for item in treatments if 'Rti' in item]
         
-        self.YEAR_OF_CHANGE = 2019
+        # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each
+        # treatment is omitted
+        service_availability = dict({"Everything": ["*"], "Nothing": []})
+        service_availability.update(
+            {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI}
+        )
+        print(service_availability.keys())
 
         return {
-   
             # =========== STATUS QUO ============
             "Baseline":
                 mix_scenarios(
                     self._baseline(),
                     {
                      "HealthSystem": {
-                        "yearly_HR_scaling_mode": "no_scaling",
+                            "Service_Availability": service_availability["No Rti_BurnManagement*"],
                       },
                     }
                 ),
@@ -128,20 +163,13 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         }
         
     def _baseline(self) -> Dict:
-        """Return the Dict with values for the parameter changes that define the baseline scenario. """
+        #Return the Dict with values for the parameter changes that define the baseline scenario.
         return mix_scenarios(
             get_parameters_for_status_quo(),
             {
                 "HealthSystem": {
                     "mode_appt_constraints": 1,                 # <-- Mode 1 prior to change to preserve calibration
-                    "mode_appt_constraints_postSwitch": 2,      # <-- Mode 2 post-change to show effects of HRH
-                    "year_mode_switch": self.YEAR_OF_CHANGE,
-                    "scale_to_effective_capabilities": True,
-                    "policy_name": "Naive",
-                    "tclose_overwrite": 1,
-                    "tclose_days_offset_overwrite": 7,
-                    "use_funded_or_actual_staffing": "actual",
-                    "cons_availability": "default",
+                    "cons_availability": "all",
                 }
             },
         )

From 08a5d9a29c9e2e8af7832ca49bfca1cb75f6d8d6 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Sat, 12 Apr 2025 11:34:07 +0100
Subject: [PATCH 23/97] Change seed in scenario file

---
 .../analysis_data_generation/scenario_generate_chains.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 822bf13ad8..3bc75978d2 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -51,7 +51,7 @@
 class GenerateDataChains(BaseScenario):
     def __init__(self):
         super().__init__()
-        self.seed = 0
+        self.seed = 42
         self.start_date = Date(2010, 1, 1)
         self.end_date = self.start_date + pd.DateOffset(months=13)
         self.pop_size = 1000

From 3dda343f65c49e429c677b89d1536531fa83833a Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 14 Apr 2025 18:06:08 +0200
Subject: [PATCH 24/97] latest scenario

---
 .../analysis_data_generation/scenario_generate_chains.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 3bc75978d2..1297c6b18b 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                     self._baseline(),
                     {
                      "HealthSystem": {
-                            "Service_Availability": service_availability["No Rti_BurnManagement*"],
+                            "Service_Availability": service_availability["No Rti_FractureCast*"],
                       },
                     }
                 ),

From d9e3f66138c0e372b2b0fa0ac10e7393457bcaf8 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 29 Apr 2025 09:35:47 +0100
Subject: [PATCH 25/97] Latest scenario version

---
 .../analysis_data_generation/scenario_generate_chains.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 1297c6b18b..b4ad946154 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                     self._baseline(),
                     {
                      "HealthSystem": {
-                            "Service_Availability": service_availability["No Rti_FractureCast*"],
+                            "Service_Availability": service_availability["No Rti_MinorSurgeries*"],
                       },
                     }
                 ),

From ddf6f689b6b9184e3f09ac1906417e6fa0495a7f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 29 Apr 2025 15:44:41 +0100
Subject: [PATCH 26/97] Latest version of scenario file

---
 .../analysis_data_generation/scenario_generate_chains.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index b4ad946154..35b7d75e1c 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                     self._baseline(),
                     {
                      "HealthSystem": {
-                            "Service_Availability": service_availability["No Rti_MinorSurgeries*"],
+                            "Service_Availability": service_availability["No Rti_ShockTreatment*"],
                       },
                     }
                 ),

From 0e38408d5e37ccb4f894bb89c4d3c93673ae09a3 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 9 Oct 2025 09:20:35 +0100
Subject: [PATCH 27/97] Ensure changes to mni dataframe are captured as well

---
 .../scenario_generate_chains.py               |  30 ++--
 src/tlo/events.py                             | 164 ++++++++++++++++--
 src/tlo/methods/hsi_event.py                  | 112 ++++++++----
 src/tlo/methods/pregnancy_helper_functions.py |  50 +-----
 src/tlo/methods/pregnancy_supervisor.py       |  50 ++++++
 src/tlo/methods/rti.py                        |   4 +-
 src/tlo/simulation.py                         |  13 +-
 src/tlo/util.py                               |   2 +-
 8 files changed, 314 insertions(+), 111 deletions(-)

diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 35b7d75e1c..64fa70d055 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -53,11 +53,11 @@ def __init__(self):
         super().__init__()
         self.seed = 42
         self.start_date = Date(2010, 1, 1)
-        self.end_date = self.start_date + pd.DateOffset(months=13)
+        self.end_date = self.start_date + pd.DateOffset(months=36)
         self.pop_size = 1000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 50
+        self.runs_per_draw = 1
         self.generate_event_chains = True
 
     def log_configuration(self):
@@ -77,21 +77,31 @@ def log_configuration(self):
     def modules(self):
         # MODIFY
         # Here instead of running full module
+        """
         return [demography.Demography(resourcefilepath=self.resources),
                 enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources),
                 healthburden.HealthBurden(resourcefilepath=self.resources),
-                symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),
-                rti.RTI(resourcefilepath=self.resources),
+                symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),#,
+                #rti.RTI(resourcefilepath=self.resources),
+                pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources),
+                labour.Labour(resourcefilepath=self.resources),
+                care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=self.resources),
+                contraception.Contraception(resourcefilepath=self.resources),
+                newborn_outcomes.NewbornOutcomes(resourcefilepath=self.resources),
+                postnatal_supervisor.PostnatalSupervisor(resourcefilepath=self.resources),
+                hiv.Hiv(resourcefilepath=self.resources),
+                tb.Tb(resourcefilepath=self.resources),
+                epi.Epi(resourcefilepath=self.resources),
                 healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources),
-                #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+            #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
                 healthsystem.HealthSystem(resourcefilepath=self.resources,
                                           mode_appt_constraints=1,
                                           cons_availability='all')]
-                                          
-       # return (
-       #     fullmodel(resourcefilepath=self.resources)
-       #     + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
-       # )
+        """
+        return (
+            fullmodel(resourcefilepath=self.resources)
+            + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
+        )
     """
     def draw_parameters(self, draw_number, rng):
         return mix_scenarios(
diff --git a/src/tlo/events.py b/src/tlo/events.py
index f67b54458a..3a8f4f58c7 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -13,6 +13,7 @@
 
 from tlo.util import FACTOR_POP_DICT
 
+import copy
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -76,23 +77,85 @@ def apply(self, target):
         """
         raise NotImplementedError
         
-    def compare_population_dataframe(self,df_before, df_after):
+    def values_differ(self, v1, v2):
+    
+        if isinstance(v1, list) and isinstance(v2, list):
+            return v1 != v2  # simple element-wise comparison
+
+        if pd.isna(v1) and pd.isna(v2):
+            return False  # treat both NaT/NaN as equal
+        return v1 != v2
+        
+    def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
+        diffs = {}
+        """
+        will_pause = False
+        
+        target_attribute = 'hcw_not_avail'
+        if len(entire_mni_after)>0:
+            print("Default target value before", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute])
+            person = next(iter(entire_mni_after))
+            entire_mni_after[person][target_attribute] = True
+            will_pause = True
+            print("Default target value after", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute])
+
+            
+        if will_pause:
+            print("Reprint")
+            print(entire_mni_before)
+            print(entire_mni_after)
+            print("Default target value", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute])
+        """
+        all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys())
+            
+        for person in all_individuals:
+            if person not in entire_mni_before: # but is afterward
+                for key in entire_mni_after[person]:
+                    if self.values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = entire_mni_after[person][key]
+                    
+            elif person not in entire_mni_after: # but is beforehand
+                for key in entire_mni_before[person]:
+                    if self.values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key]
+
+            else: # person is in both
+                # Compare properties
+                for key in entire_mni_before[person]:
+                    if self.values_differ(entire_mni_before[person][key],entire_mni_after[person][key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = entire_mni_after[person][key]
+
+        if len(diffs)>0:
+            print("DIfferences for ", diffs)
+        return diffs
+        
+    def compare_population_dataframe(self,df_before, df_after, entire_mni_before, entire_mni_after):
         """ This function compares the population dataframe before/after a population-wide event has occurred.
         It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """
         
         # Create a mask of where values are different
         diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
+
+        diff_mni  = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
         
         # Create an empty list to store changes for each of the individuals
         chain_links = {}
         len_of_diff = len(diff_mask)
 
         # Loop through each row of the mask
+        persons_changed = []
         
         for idx, row in diff_mask.iterrows():
             changed_cols = row.index[row].tolist()
 
             if changed_cols:  # Proceed only if there are changes in the row
+                persons_changed.append(idx)
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
@@ -104,19 +167,47 @@ def compare_population_dataframe(self,df_before, df_after):
                 # Store the new values from df_after for the changed columns
                 for col in changed_cols:
                     link_info[col] = df_after.at[idx, col]
-                
+ 
+                if idx in diff_mni:
+                    # This person has also undergone changes in the mni dictionary, so add these here
+                    for key in diff_mni[idx]:
+                        link_info[col] = diff_mni[idx][key]
+ 
                 # Append the event and changes to the individual key
                 chain_links[idx] = str(link_info)
-        
+     
+        # Check individuals
+        if len(diff_mni)>0:
+            print("Non-zero changes in mni")
+            for key in diff_mni:
+                if key not in persons_changed:
+                    print("Individual ", key, "is changing in mni alone")
+                    # If individual hadn't been previously added due to changes in pop df, add it here
+                    link_info = {
+                        'person_ID': key,
+                        'event': type(self).__name__,
+                        'event_date': self.sim.date,
+                    }
+                    
+                    for key_prop in diff_mni[key]:
+                        link_info[key_prop] = diff_mni[key][key_prop]
+                        
+                    chain_links[key] = str(link_info)
+                    print("Change for ", key, " is ", str(link_info))
+
         return chain_links
         
-    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]:
-        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
         
+    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame, dict, dict, bool]:
+        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
+
         # Initialise these variables
         print_chains = False
         df_before = []
         row_before = pd.Series()
+        mni_instances_before = False
+        mni_row_before = {}
+        entire_mni_before = {}
         
         # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
         #if (self.module in self.sim.generate_event_chains_modules_of_interest) and ..
@@ -129,9 +220,16 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
             
             # Target is single individual
             if self.target != self.sim.population:
+            
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                
+                if self.target in mni:
+                    mni_instances_before = True
+                    mni_row_before = mni[self.target].copy()
+                                    
                 if self.sim.debug_generate_event_chains:
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
@@ -139,6 +237,13 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                     row['event'] = type(self).__name__
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
+                    if not mni_instances_before:
+                        for key in self.sim.modules['PregnancySupervisor'].default_mni_values:
+                            row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key]
+                    else:
+                        for key in mni_row_before:
+                            row[key] = mni_row_before[key]
+                            
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
@@ -146,20 +251,30 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                 # This will be a population-wide event. In order to find individuals for which this led to
                 # a meaningful change, make a copy of the pop dataframe before the event has occurred.
                 df_before = self.sim.population.props.copy()
+                entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
                 
-        return print_chains, row_before, df_before
+        return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before
         
-    def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict:
+    def store_chains_to_do_after_event(self, print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict:
         """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
         
         chain_links = {}
-        
+
+
         if print_chains:
         
             # Target is single individual
             if self.target != self.sim.population:
+            
+                mni_instances_after = False
+            
                 row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
                 
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                
+                if self.target in mni:
+                    mni_instances_after = True
+                
                 # Create and store event for this individual, regardless of whether any property change occurred
                 link_info = {
                     #'person_ID' : self.target,
@@ -167,11 +282,35 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                     'event' : type(self).__name__,
                     'event_date' : self.sim.date,
                 }
+                
                 # Store (if any) property changes as a result of the event for this individual
                 for key in row_before.index:
                     if row_before[key] != row_after[key]: # Note: used fillna previously
                         link_info[key] = row_after[key]
                         
+                # Now store changes in the mni dictionary, accounting for following cases:
+                
+                # Individual is in mni dictionary before and after
+                if mni_instances_before and mni_instances_after:
+                    for key in mni_row_before:
+                        if self.values_differ(mni_row_before[key], mni[self.target][key]):
+                            link_info[key] = mni[self.target][key]
+                # Individual is only in mni dictionary before event
+                elif mni_instances_before and not mni_instances_after:
+                    default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                    for key in mni_row_before:
+                        if self.values_differ(mni_row_before[key], default[key]):
+                            link_info[key] = default[key]
+                # Individual is only in mni dictionary after event
+                elif mni_instances_after and not mni_instances_before:
+                    print("INDIVIDUAL WAS ADDED")
+                    exit(-1)
+                    default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                    for key in default:
+                        if self.values_differ(default[key], mni[self.target][key]):
+                            link_info[key] = mni[self.target][key]
+                # Else, no need to do anything
+                        
                 chain_links[self.target] = str(link_info)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
@@ -182,6 +321,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                     row['event'] = type(self).__name__
                     row['event_date'] = self.sim.date
                     row['when'] = 'After'
+
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
@@ -190,9 +330,10 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 
                 # Population frame after event
                 df_after = self.sim.population.props
+                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
                 
                 #  Create and store the event and dictionary of changes for affected individuals
-                chain_links = self.compare_population_dataframe(df_before, df_after)
+                chain_links = self.compare_population_dataframe(df_before, df_after, entire_mni_before, entire_mni_after)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 if self.sim.debug_generate_event_chains:
@@ -222,7 +363,7 @@ def run(self):
         
         # Collect relevant information before event takes place
         if self.sim.generate_event_chains:
-            print_chains, row_before, df_before = self.store_chains_to_do_before_event()
+            print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event()
                 
         self.apply(self.target)
         self.post_apply_hook()
@@ -230,7 +371,7 @@ def run(self):
         # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link'
         # in the individual's event chain.
         if self.sim.generate_event_chains:
-            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before)
+            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before)
             
             # Create empty logger for entire pop
             pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
@@ -238,6 +379,7 @@ def run(self):
 
             # Log chain_links here
             if len(chain_links)>0:
+                print(chain_links)
                 logger_chain.info(key='event_chains',
                                   data= pop_dict,
                                   description='Links forming chains of events for simulated individuals')
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 978b26d7c5..41342f117e 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -195,65 +195,83 @@ def _run_after_hsi_event(self) -> None:
                 item_codes=self._EQUIPMENT,
                 facility_id=self.facility_info.id
             )
-            
-    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
+
+    def values_differ(self, v1, v2):
+    
+        if isinstance(v1, list) and isinstance(v2, list):
+            return v1 != v2  # simple element-wise comparison
+
+        if pd.isna(v1) and pd.isna(v2):
+            return False  # treat both NaT/NaN as equal
+        return v1 != v2
+
+
+    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]:
         """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
         
         # Initialise these variables
         print_chains = False
         row_before = pd.Series()
+        mni_instances_before = False
+        mni_row_before = {}
         
         # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-        # if (self.module in self.sim.generate_event_chains_modules_of_interest) and
+        #if (self.module in self.sim.generate_event_chains_modules_of_interest) and ..
         if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
         
         # Will eventually use this once I can actually GET THE NAME OF THE SELF
-        # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
-                
+        #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+
+            print_chains = True
+            
+            # Target is single individual
             if self.target != self.sim.population:
             
-                # In the case of HSI events, only individual events should exist and therefore be logged
-                print_chains = True
-                
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
-
+                
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                
+                if self.target in mni:
+                    mni_instances_before = True
+                    mni_row_before = mni[self.target].copy()
+                                    
                 if self.sim.debug_generate_event_chains:
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
-                    row['event'] = type(self).__name__ #str(self.event_name)
+                    row['event'] = type(self).__name__
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
-                
-                    try:
-                        row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
-                        row['level'] = self.facility_info.level
-                    except:
-                        row['appt_footprint'] = 'N/A'
-                        row['level'] = 'N/A'
+                    if not mni_instances_before:
+                        for key in self.sim.modules['PregnancySupervisor'].default_mni_values:
+                            row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key]
+                    else:
+                        for key in mni_row_before:
+                            row[key] = mni_row_before[key]
+                            
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
-                # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error
-                # raise RuntimeError("Cannot have population-wide HSI events")
-                logger.debug(
-                    key="message",
-                    data=(
-                        "Cannot have population-wide HSI events"
-                    ),
-                )
-
+                print("ERROR: there shouldn't be pop-wide HSI event")
                 
-        return print_chains, row_before
+        return print_chains, row_before, mni_row_before, mni_instances_before
         
-    def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict:
+    def store_chains_to_do_after_event(self, print_chains, row_before, footprint, mni_row_before, mni_instances_before) -> dict:
         """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
         if print_chains:
             # For HSI event, this will only ever occur for individual events
-            
+            chain_links = {}
+
             row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
             
+            mni_instances_after = False
+            
+            mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+            
+            if self.target in mni:
+                mni_instances_after = True
+                
             # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level
             # will be stored regardless of whether individual experienced property changes.
 
@@ -278,8 +296,35 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             for key in row_before.index:
                 if row_before[key] != row_after[key]: # Note: used fillna previously
                     link_info[key] = row_after[key]
-            
-            chain_links = {self.target : str(link_info)}
+                    
+            # Now store changes in the mni dictionary, accounting for following cases:
+                
+            # Individual is in mni dictionary before and after
+            if mni_instances_before and mni_instances_after:
+                for key in mni_row_before:
+                    if self.values_differ(mni_row_before[key], mni[self.target][key]):
+                        link_info[key] = mni[self.target][key]
+                        print("--------------------------------------------->",link_info[key])
+                        exit(-1)
+
+                        
+            # Individual is only in mni dictionary before event
+            elif mni_instances_before and not mni_instances_after:
+                default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                for key in mni_row_before:
+                    if self.values_differ(mni_row_before[key], default[key]):
+                        link_info[key] = default[key]
+                        print("--------------------------------------------->",link_info[key])
+                        exit(-1)
+            # Individual is only in mni dictionary after event
+            elif mni_instances_after and not mni_instances_before:
+                default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                for key in default:
+                    if self.values_differ(default[key], mni[self.target][key]):
+                        link_info[key] = mni[self.target][key]
+                        print("--------------------------------------------->",link_info[key])
+                        exit(-1)
+            chain_links[self.target] = str(link_info)
 
             if self.sim.debug_generate_event_chains:
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
@@ -300,7 +345,7 @@ def run(self, squeeze_factor):
 
         
         if self.sim.generate_event_chains and self.target != self.sim.population:
-            print_chains, row_before = self.store_chains_to_do_before_event()
+            print_chains, row_before, mni_row_before, mni_instances_before = self.store_chains_to_do_before_event()
               
             footprint = self.EXPECTED_APPT_FOOTPRINT
 
@@ -315,10 +360,9 @@ def run(self, squeeze_factor):
             if updated_appt_footprint is not None:
                 footprint = updated_appt_footprint
             
-            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint))
+            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint), mni_row_before, mni_instances_before)
             
             if len(chain_links)>0:
-            
                 pop_dict = {i: '' for i in range(FACTOR_POP_DICT)}
                # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals
 
diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py
index 8f7faa0503..79483cddaa 100644
--- a/src/tlo/methods/pregnancy_helper_functions.py
+++ b/src/tlo/methods/pregnancy_helper_functions.py
@@ -542,55 +542,7 @@ def update_mni_dictionary(self, individual_id):
 
     if self == self.sim.modules['PregnancySupervisor']:
 
-        mni[individual_id] = {'delete_mni': False,  # if True, mni deleted in report_daly_values function
-                              'didnt_seek_care': False,
-                              'cons_not_avail': False,
-                              'comp_not_avail': False,
-                              'hcw_not_avail': False,
-                              'ga_anc_one': 0,
-                              'anc_ints': [],
-                              'abortion_onset': pd.NaT,
-                              'abortion_haem_onset': pd.NaT,
-                              'abortion_sep_onset': pd.NaT,
-                              'eclampsia_onset': pd.NaT,
-                              'mild_mod_aph_onset': pd.NaT,
-                              'severe_aph_onset': pd.NaT,
-                              'chorio_onset': pd.NaT,
-                              'chorio_in_preg': False,  # use in predictor in newborn linear models
-                              'ectopic_onset': pd.NaT,
-                              'ectopic_rupture_onset': pd.NaT,
-                              'gest_diab_onset': pd.NaT,
-                              'gest_diab_diagnosed_onset': pd.NaT,
-                              'gest_diab_resolution': pd.NaT,
-                              'mild_anaemia_onset': pd.NaT,
-                              'mild_anaemia_resolution': pd.NaT,
-                              'moderate_anaemia_onset': pd.NaT,
-                              'moderate_anaemia_resolution': pd.NaT,
-                              'severe_anaemia_onset': pd.NaT,
-                              'severe_anaemia_resolution': pd.NaT,
-                              'mild_anaemia_pp_onset': pd.NaT,
-                              'mild_anaemia_pp_resolution': pd.NaT,
-                              'moderate_anaemia_pp_onset': pd.NaT,
-                              'moderate_anaemia_pp_resolution': pd.NaT,
-                              'severe_anaemia_pp_onset': pd.NaT,
-                              'severe_anaemia_pp_resolution': pd.NaT,
-                              'hypertension_onset': pd.NaT,
-                              'hypertension_resolution': pd.NaT,
-                              'obstructed_labour_onset': pd.NaT,
-                              'sepsis_onset': pd.NaT,
-                              'uterine_rupture_onset': pd.NaT,
-                              'mild_mod_pph_onset': pd.NaT,
-                              'severe_pph_onset': pd.NaT,
-                              'secondary_pph_onset': pd.NaT,
-                              'vesicovaginal_fistula_onset': pd.NaT,
-                              'vesicovaginal_fistula_resolution': pd.NaT,
-                              'rectovaginal_fistula_onset': pd.NaT,
-                              'rectovaginal_fistula_resolution': pd.NaT,
-                              'test_run': False,  # used by labour module when running some model tests
-                              'pred_syph_infect': pd.NaT,  # date syphilis is predicted to onset
-                              'new_onset_spe': False,
-                              'cs_indication': 'none'
-                              }
+        mni[individual_id] = self.sim.modules['PregnancySupervisor'].default_mni_values.copy()
 
     elif self == self.sim.modules['Labour']:
         labour_variables = {'labour_state': None,
diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py
index 7dd8819ab6..f634d9b971 100644
--- a/src/tlo/methods/pregnancy_supervisor.py
+++ b/src/tlo/methods/pregnancy_supervisor.py
@@ -61,6 +61,56 @@ def __init__(self, name=None, resourcefilepath=None):
 
         # This variable will store a Bitset handler for the property ps_abortion_complications
         self.abortion_complications = None
+        
+        self.default_mni_values = {'delete_mni': False,  # if True, mni deleted in report_daly_values function
+                              'didnt_seek_care': False,
+                              'cons_not_avail': False,
+                              'comp_not_avail': False,
+                              'hcw_not_avail': False,
+                              'ga_anc_one': 0,
+                              'anc_ints': [],
+                              'abortion_onset': pd.NaT,
+                              'abortion_haem_onset': pd.NaT,
+                              'abortion_sep_onset': pd.NaT,
+                              'eclampsia_onset': pd.NaT,
+                              'mild_mod_aph_onset': pd.NaT,
+                              'severe_aph_onset': pd.NaT,
+                              'chorio_onset': pd.NaT,
+                              'chorio_in_preg': False,  # use in predictor in newborn linear models
+                              'ectopic_onset': pd.NaT,
+                              'ectopic_rupture_onset': pd.NaT,
+                              'gest_diab_onset': pd.NaT,
+                              'gest_diab_diagnosed_onset': pd.NaT,
+                              'gest_diab_resolution': pd.NaT,
+                              'mild_anaemia_onset': pd.NaT,
+                              'mild_anaemia_resolution': pd.NaT,
+                              'moderate_anaemia_onset': pd.NaT,
+                              'moderate_anaemia_resolution': pd.NaT,
+                              'severe_anaemia_onset': pd.NaT,
+                              'severe_anaemia_resolution': pd.NaT,
+                              'mild_anaemia_pp_onset': pd.NaT,
+                              'mild_anaemia_pp_resolution': pd.NaT,
+                              'moderate_anaemia_pp_onset': pd.NaT,
+                              'moderate_anaemia_pp_resolution': pd.NaT,
+                              'severe_anaemia_pp_onset': pd.NaT,
+                              'severe_anaemia_pp_resolution': pd.NaT,
+                              'hypertension_onset': pd.NaT,
+                              'hypertension_resolution': pd.NaT,
+                              'obstructed_labour_onset': pd.NaT,
+                              'sepsis_onset': pd.NaT,
+                              'uterine_rupture_onset': pd.NaT,
+                              'mild_mod_pph_onset': pd.NaT,
+                              'severe_pph_onset': pd.NaT,
+                              'secondary_pph_onset': pd.NaT,
+                              'vesicovaginal_fistula_onset': pd.NaT,
+                              'vesicovaginal_fistula_resolution': pd.NaT,
+                              'rectovaginal_fistula_onset': pd.NaT,
+                              'rectovaginal_fistula_resolution': pd.NaT,
+                              'test_run': False,  # used by labour module when running some model tests
+                              'pred_syph_infect': pd.NaT,  # date syphilis is predicted to onset
+                              'new_onset_spe': False,
+                              'cs_indication': 'none'
+                              }
 
     INIT_DEPENDENCIES = {'Demography'}
 
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index c79b26314d..e772366d57 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2865,9 +2865,9 @@ def apply(self, population):
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
         #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
-        pred = 1.0
+        #pred = 1.0
         #else:
-        #    pred = eq.predict(df.loc[rt_current_non_ind])
+        pred = eq.predict(df.loc[rt_current_non_ind])
             
             
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index bb766562a0..045e86bdd8 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -109,7 +109,7 @@ def __init__(
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
         self.generate_event_chains = True
-        self.generate_event_chains_overwrite_epi = None
+        self.generate_event_chains_overwrite_epi = False
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
         self.debug_generate_event_chains = False
@@ -299,6 +299,12 @@ def make_initial_population(self, *, n: int) -> None:
         if self.generate_event_chains:
 
             pop_dict = self.population.props.to_dict(orient='index')
+          
+            #if "PregnancySupervisor" in self.modules:
+            #    print("I found it!")
+            #    print(self.modules['PregnancySupervisor'].mother_and_newborn_info)
+            #    exit(-1)
+
             for key in pop_dict.keys():
                 pop_dict[key]['person_ID'] = key
                 pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later
@@ -329,10 +335,10 @@ def initialise(self, *, end_date: Date) -> None:
         #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
             # Eventually this can be made an option
-            self.generate_event_chains_overwrite_epi = True
+            self.generate_event_chains_overwrite_epi = False
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
-            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler', 'RTIPollingEvent']
         else:
             # If not using to print chains, cannot ignore epi
             self.generate_event_chains_overwrite_epi = False
@@ -491,7 +497,6 @@ def do_birth(self, mother_id: int) -> int:
             pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
             pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length
 
-            print("Length at birth", len(pop_dict))
             logger.info(key='event_chains',
                                data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
diff --git a/src/tlo/util.py b/src/tlo/util.py
index e246fcf05b..c9130e3f07 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -13,7 +13,7 @@
 
 # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation.
 DEFAULT_MOTHER_ID = -1e7
-FACTOR_POP_DICT = 1000
+FACTOR_POP_DICT = 50000
 
 
 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]):

From 9b8f01ff383bdb0954146b93849c6c7a18008b2d Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 9 Oct 2025 11:24:39 +0100
Subject: [PATCH 28/97] Tidy up

---
 .../analysis_extract_data.py                  |   2 +-
 src/tlo/events.py                             | 199 +++++++-----------
 src/tlo/methods/hiv.py                        |  32 ++-
 src/tlo/methods/hsi_event.py                  | 165 ++++++---------
 src/tlo/methods/tb.py                         |   5 +-
 src/tlo/simulation.py                         |  41 +---
 6 files changed, 151 insertions(+), 293 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 3afad7adcc..8068db203a 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -59,7 +59,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     initial_rt_event_properties = set()
 
     num_individuals = 1000
-    num_runs = 50
+    num_runs = 1
     record = []
     # Include results folder in output file name
     name_tag = str(results_folder).replace("outputs/", "")
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 3a8f4f58c7..9f762fd3c6 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -77,7 +77,7 @@ def apply(self, target):
         """
         raise NotImplementedError
         
-    def values_differ(self, v1, v2):
+    def mni_values_differ(self, v1, v2):
     
         if isinstance(v1, list) and isinstance(v2, list):
             return v1 != v2  # simple element-wise comparison
@@ -111,14 +111,14 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
         for person in all_individuals:
             if person not in entire_mni_before: # but is afterward
                 for key in entire_mni_after[person]:
-                    if self.values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
+                    if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
                         if person not in diffs:
                             diffs[person] = {}
                         diffs[person][key] = entire_mni_after[person][key]
                     
             elif person not in entire_mni_after: # but is beforehand
                 for key in entire_mni_before[person]:
-                    if self.values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
+                    if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
                         if person not in diffs:
                             diffs[person] = {}
                         diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key]
@@ -126,7 +126,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
             else: # person is in both
                 # Compare properties
                 for key in entire_mni_before[person]:
-                    if self.values_differ(entire_mni_before[person][key],entire_mni_after[person][key]):
+                    if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]):
                         if person not in diffs:
                             diffs[person] = {}
                         diffs[person][key] = entire_mni_after[person][key]
@@ -135,13 +135,12 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
             print("DIfferences for ", diffs)
         return diffs
         
-    def compare_population_dataframe(self,df_before, df_after, entire_mni_before, entire_mni_after):
-        """ This function compares the population dataframe before/after a population-wide event has occurred.
+    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after):
+        """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred.
         It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """
         
         # Create a mask of where values are different
         diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
-
         diff_mni  = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
         
         # Create an empty list to store changes for each of the individuals
@@ -176,12 +175,10 @@ def compare_population_dataframe(self,df_before, df_after, entire_mni_before, en
                 # Append the event and changes to the individual key
                 chain_links[idx] = str(link_info)
      
-        # Check individuals
+        # For individuals which only underwent changes in mni dictionary, save changes here
         if len(diff_mni)>0:
-            print("Non-zero changes in mni")
             for key in diff_mni:
                 if key not in persons_changed:
-                    print("Individual ", key, "is changing in mni alone")
                     # If individual hadn't been previously added due to changes in pop df, add it here
                     link_info = {
                         'person_ID': key,
@@ -193,7 +190,6 @@ def compare_population_dataframe(self,df_before, df_after, entire_mni_before, en
                         link_info[key_prop] = diff_mni[key][key_prop]
                         
                     chain_links[key] = str(link_info)
-                    print("Change for ", key, " is ", str(link_info))
 
         return chain_links
         
@@ -210,7 +206,6 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
         entire_mni_before = {}
         
         # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-        #if (self.module in self.sim.generate_event_chains_modules_of_interest) and ..
         if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
         
         # Will eventually use this once I can actually GET THE NAME OF THE SELF
@@ -224,140 +219,88 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
+                # Check if individual is already in mni dictionary, if so copy her original status
                 mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                
                 if self.target in mni:
                     mni_instances_before = True
                     mni_row_before = mni[self.target].copy()
-                                    
-                if self.sim.debug_generate_event_chains:
-                    # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                    row = self.sim.population.props.loc[[abs(self.target)]]
-                    row['person_ID'] = self.target
-                    row['event'] = type(self).__name__
-                    row['event_date'] = self.sim.date
-                    row['when'] = 'Before'
-                    if not mni_instances_before:
-                        for key in self.sim.modules['PregnancySupervisor'].default_mni_values:
-                            row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key]
-                    else:
-                        for key in mni_row_before:
-                            row[key] = mni_row_before[key]
-                            
-                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
 
                 # This will be a population-wide event. In order to find individuals for which this led to
-                # a meaningful change, make a copy of the pop dataframe before the event has occurred.
+                # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
                 df_before = self.sim.population.props.copy()
                 entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
                 
         return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before
         
-    def store_chains_to_do_after_event(self, print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict:
+    def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict:
         """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
         
         chain_links = {}
-
-
-        if print_chains:
-        
-            # Target is single individual
-            if self.target != self.sim.population:
+    
+        # Target is single individual
+        if self.target != self.sim.population:
+    
+            # Copy full new status for individual
+            row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
             
-                mni_instances_after = False
+            # Check if individual is in mni after the event
+            mni_instances_after = False
+            mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+            if self.target in mni:
+                mni_instances_after = True
             
-                row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
-                
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                
-                if self.target in mni:
-                    mni_instances_after = True
-                
-                # Create and store event for this individual, regardless of whether any property change occurred
-                link_info = {
-                    #'person_ID' : self.target,
-                    'person_ID' : self.target,
-                    'event' : type(self).__name__,
-                    'event_date' : self.sim.date,
-                }
-                
-                # Store (if any) property changes as a result of the event for this individual
-                for key in row_before.index:
-                    if row_before[key] != row_after[key]: # Note: used fillna previously
-                        link_info[key] = row_after[key]
-                        
-                # Now store changes in the mni dictionary, accounting for following cases:
-                
-                # Individual is in mni dictionary before and after
-                if mni_instances_before and mni_instances_after:
-                    for key in mni_row_before:
-                        if self.values_differ(mni_row_before[key], mni[self.target][key]):
-                            link_info[key] = mni[self.target][key]
-                # Individual is only in mni dictionary before event
-                elif mni_instances_before and not mni_instances_after:
-                    default = self.sim.modules['PregnancySupervisor'].default_mni_values
-                    for key in mni_row_before:
-                        if self.values_differ(mni_row_before[key], default[key]):
-                            link_info[key] = default[key]
-                # Individual is only in mni dictionary after event
-                elif mni_instances_after and not mni_instances_before:
-                    print("INDIVIDUAL WAS ADDED")
-                    exit(-1)
-                    default = self.sim.modules['PregnancySupervisor'].default_mni_values
-                    for key in default:
-                        if self.values_differ(default[key], mni[self.target][key]):
-                            link_info[key] = mni[self.target][key]
-                # Else, no need to do anything
-                        
-                chain_links[self.target] = str(link_info)
-
-                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                if self.sim.debug_generate_event_chains:
-                    # Print entire row
-                    row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
-                    row['person_ID'] = self.target
-                    row['event'] = type(self).__name__
-                    row['event_date'] = self.sim.date
-                    row['when'] = 'After'
-
-                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
-                
-            else:
-                # Target is entire population. Identify individuals for which properties have changed
-                # and store their changes.
-                
-                # Population frame after event
-                df_after = self.sim.population.props
-                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
-                
-                #  Create and store the event and dictionary of changes for affected individuals
-                chain_links = self.compare_population_dataframe(df_before, df_after, entire_mni_before, entire_mni_after)
-
-                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                if self.sim.debug_generate_event_chains:
-                    # Or print entire rows
-                    change = df_before.compare(df_after)
-                    if not change.empty:
-                        indices = change.index
-                        new_rows_before = df_before.loc[indices]
-                        new_rows_before['person_ID'] = new_rows_before.index
-                        new_rows_before['event'] = type(self).__name__
-                        new_rows_before['event_date'] = self.sim.date
-                        new_rows_before['when'] = 'Before'
-
-                        new_rows_after = df_after.loc[indices]
-                        new_rows_after['person_ID'] = new_rows_after.index
-                        new_rows_after['event'] = type(self).__name__
-                        new_rows_after['event_date'] = self.sim.date
-                        new_rows_after['when'] = 'After'
-
-                        self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
-                        self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+            # Create and store event for this individual, regardless of whether any property change occurred
+            link_info = {
+                #'person_ID' : self.target,
+                'person_ID' : self.target,
+                'event' : type(self).__name__,
+                'event_date' : self.sim.date,
+            }
+            
+            # Store (if any) property changes as a result of the event for this individual
+            for key in row_before.index:
+                if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
+                    link_info[key] = row_after[key]
+                    
+            # Now check and store changes in the mni dictionary, accounting for following cases:
+            # Individual is in mni dictionary before and after
+            if mni_instances_before and mni_instances_after:
+                for key in mni_row_before:
+                    if self.mni_values_differ(mni_row_before[key], mni[self.target][key]):
+                        link_info[key] = mni[self.target][key]
+            # Individual is only in mni dictionary before event
+            elif mni_instances_before and not mni_instances_after:
+                default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                for key in mni_row_before:
+                    if self.mni_values_differ(mni_row_before[key], default[key]):
+                        link_info[key] = default[key]
+            # Individual is only in mni dictionary after event
+            elif mni_instances_after and not mni_instances_before:
+                default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                for key in default:
+                    if self.mni_values_differ(default[key], mni[self.target][key]):
+                        link_info[key] = mni[self.target][key]
+            # Else, no need to do anything
+                    
+            # Add individual to the chain links
+            chain_links[self.target] = str(link_info)
+            
+        else:
+            # Target is entire population. Identify individuals for which properties have changed
+            # and store their changes.
+            
+            # Population frame after event
+            df_after = self.sim.population.props
+            entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+            
+            #  Create and store the event and dictionary of changes for affected individuals
+            chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after)
                     
         return chain_links
 
+
     def run(self):
         """Make the event happen."""
         
@@ -370,8 +313,8 @@ def run(self):
         
         # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link'
         # in the individual's event chain.
-        if self.sim.generate_event_chains:
-            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before)
+        if self.sim.generate_event_chains and print_chains:
+            chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before)
             
             # Create empty logger for entire pop
             pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
@@ -384,7 +327,7 @@ def run(self):
                                   data= pop_dict,
                                   description='Links forming chains of events for simulated individuals')
                 
-                #print("Chain events ", chain_links)
+                print("Chain events ", chain_links)
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 8487eaa467..0a80f8b41b 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -631,12 +631,11 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False:
-            # Launch sub-routines for allocating the right number of people into each category
-            self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
+        # Launch sub-routines for allocating the right number of people into each category
+        self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
-            self.initialise_baseline_art(population)  # allocate baseline art coverage
-            self.initialise_baseline_tested(population)  # allocate baseline testing coverage
+        self.initialise_baseline_art(population)  # allocate baseline art coverage
+        self.initialise_baseline_tested(population)  # allocate baseline testing coverage
 
     def initialise_baseline_prevalence(self, population):
         """
@@ -906,16 +905,10 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi:
-            print("Should be generating data")
-            sim.schedule_event(
-                HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
-            )
-        else:
-            # 1) Schedule the Main HIV Regular Polling Event
-            sim.schedule_event(
-                HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
-            )
+        # 1) Schedule the Main HIV Regular Polling Event
+        sim.schedule_event(
+            HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
+        )
 
         # 2) Schedule the Logging Event
         sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1))
@@ -1901,12 +1894,11 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False:
-            # Horizontal transmission: Male --> Female
-            horizontal_transmission(from_sex="M", to_sex="F")
+        # Horizontal transmission: Male --> Female
+        horizontal_transmission(from_sex="M", to_sex="F")
 
-            # Horizontal transmission: Female --> Male
-            horizontal_transmission(from_sex="F", to_sex="M")
+        # Horizontal transmission: Female --> Male
+        horizontal_transmission(from_sex="F", to_sex="M")
 
         # testing
         # if year later than 2020, set testing rates to those reported in 2020
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 41342f117e..dbca98da5c 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -216,8 +216,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]:
         mni_row_before = {}
         
         # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-        #if (self.module in self.sim.generate_event_chains_modules_of_interest) and ..
-        if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+        if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
         
         # Will eventually use this once I can actually GET THE NAME OF THE SELF
         #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
@@ -230,112 +229,75 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]:
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
+                # Check if individual is in mni dictionary before the event, if so store its original status
                 mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                
                 if self.target in mni:
                     mni_instances_before = True
                     mni_row_before = mni[self.target].copy()
-                                    
-                if self.sim.debug_generate_event_chains:
-                    # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                    row = self.sim.population.props.loc[[abs(self.target)]]
-                    row['person_ID'] = self.target
-                    row['event'] = type(self).__name__
-                    row['event_date'] = self.sim.date
-                    row['when'] = 'Before'
-                    if not mni_instances_before:
-                        for key in self.sim.modules['PregnancySupervisor'].default_mni_values:
-                            row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key]
-                    else:
-                        for key in mni_row_before:
-                            row[key] = mni_row_before[key]
-                            
-                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
                 print("ERROR: there shouldn't be pop-wide HSI event")
+                exit(-1)
                 
         return print_chains, row_before, mni_row_before, mni_instances_before
         
-    def store_chains_to_do_after_event(self, print_chains, row_before, footprint, mni_row_before, mni_instances_before) -> dict:
+    def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, mni_instances_before) -> dict:
         """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
-        if print_chains:
-            # For HSI event, this will only ever occur for individual events
-            chain_links = {}
 
-            row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
-            
-            mni_instances_after = False
-            
-            mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-            
-            if self.target in mni:
-                mni_instances_after = True
-                
-            # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level
-            # will be stored regardless of whether individual experienced property changes.
+        # For HSI event, this will only ever occur for individual events
+        chain_links = {}
 
-            # Add event details
+        row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
+        
+        mni_instances_after = False
+        mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+        if self.target in mni:
+            mni_instances_after = True
             
-            try:
-                record_footprint = str(footprint)
-                record_level = self.facility_info.level
-            except:
-                record_footprint = 'N/A'
-                record_level = 'N/A'
-                
-            link_info = {
-                'person_ID': self.target,
-                'event' : type(self).__name__,
-                'event_date' : self.sim.date,
-                'appt_footprint' : record_footprint,
-                'level' : record_level,
-            }
+        # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level
+        # will be stored regardless of whether individual experienced property changes or not.
+
+        # Add event details
+        try:
+            record_footprint = str(footprint)
+            record_level = self.facility_info.level
+        except:
+            record_footprint = 'N/A'
+            record_level = 'N/A'
             
-            # Add changes to properties
-            for key in row_before.index:
-                if row_before[key] != row_after[key]: # Note: used fillna previously
-                    link_info[key] = row_after[key]
-                    
-            # Now store changes in the mni dictionary, accounting for following cases:
+        link_info = {
+            'person_ID': self.target,
+            'event' : type(self).__name__,
+            'event_date' : self.sim.date,
+            'appt_footprint' : record_footprint,
+            'level' : record_level,
+        }
+        
+        # Add changes to properties
+        for key in row_before.index:
+            if row_before[key] != row_after[key]: # Note: used fillna previously
+                link_info[key] = row_after[key]
                 
-            # Individual is in mni dictionary before and after
-            if mni_instances_before and mni_instances_after:
-                for key in mni_row_before:
-                    if self.values_differ(mni_row_before[key], mni[self.target][key]):
-                        link_info[key] = mni[self.target][key]
-                        print("--------------------------------------------->",link_info[key])
-                        exit(-1)
-
-                        
-            # Individual is only in mni dictionary before event
-            elif mni_instances_before and not mni_instances_after:
-                default = self.sim.modules['PregnancySupervisor'].default_mni_values
-                for key in mni_row_before:
-                    if self.values_differ(mni_row_before[key], default[key]):
-                        link_info[key] = default[key]
-                        print("--------------------------------------------->",link_info[key])
-                        exit(-1)
-            # Individual is only in mni dictionary after event
-            elif mni_instances_after and not mni_instances_before:
-                default = self.sim.modules['PregnancySupervisor'].default_mni_values
-                for key in default:
-                    if self.values_differ(default[key], mni[self.target][key]):
-                        link_info[key] = mni[self.target][key]
-                        print("--------------------------------------------->",link_info[key])
-                        exit(-1)
-            chain_links[self.target] = str(link_info)
-
-            if self.sim.debug_generate_event_chains:
-                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                row = self.sim.population.props.loc[[abs(self.target)]]
-                row['person_ID'] = self.target
-                row['event'] = type(self).__name__
-                row['event_date'] = self.sim.date
-                row['when'] = 'After'
-                row['appt_footprint'] = record_footprint
-                row['level'] = record_level
-                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+        # Now store changes in the mni dictionary, accounting for following cases:
+        # Individual is in mni dictionary before and after
+        if mni_instances_before and mni_instances_after:
+            for key in mni_row_before:
+                if self.values_differ(mni_row_before[key], mni[self.target][key]):
+                    link_info[key] = mni[self.target][key]
+        # Individual is only in mni dictionary before event
+        elif mni_instances_before and not mni_instances_after:
+            default = self.sim.modules['PregnancySupervisor'].default_mni_values
+            for key in mni_row_before:
+                if self.values_differ(mni_row_before[key], default[key]):
+                    link_info[key] = default[key]
+        # Individual is only in mni dictionary after event
+        elif mni_instances_after and not mni_instances_before:
+            default = self.sim.modules['PregnancySupervisor'].default_mni_values
+            for key in default:
+                if self.values_differ(default[key], mni[self.target][key]):
+                    link_info[key] = mni[self.target][key]
+
+        chain_links[self.target] = str(link_info)
             
         return chain_links
         
@@ -360,17 +322,16 @@ def run(self, squeeze_factor):
             if updated_appt_footprint is not None:
                 footprint = updated_appt_footprint
             
-            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint), mni_row_before, mni_instances_before)
+            if print_chains:
+                chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before)
             
-            if len(chain_links)>0:
-                pop_dict = {i: '' for i in range(FACTOR_POP_DICT)}
-               # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals
-
-                pop_dict.update(chain_links)
-                
-                logger_chains.info(key='event_chains',
-                            data = pop_dict,
-                            description='Links forming chains of events for simulated individuals')
+                if len(chain_links)>0:
+                    pop_dict = {i: '' for i in range(FACTOR_POP_DICT)}
+                    pop_dict.update(chain_links)
+                    
+                    logger_chains.info(key='event_chains',
+                                data = pop_dict,
+                                description='Links forming chains of events for simulated individuals')
                 
         return updated_appt_footprint
         
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 33edeb63c8..fe5d19c964 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -890,10 +890,7 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
 
-        if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True:
-            sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
-        else:
-            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+        sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
 
 
         # 2) log at the end of the year
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 045e86bdd8..8356424901 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -109,19 +109,13 @@ def __init__(
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
         self.generate_event_chains = True
-        self.generate_event_chains_overwrite_epi = False
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
-        self.debug_generate_event_chains = False
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
         
-                
-        if self.debug_generate_event_chains:
-            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-            self.event_chains: Optional[Population] = None
-
+        
         self.show_progress_bar = show_progress_bar
         self.resourcefilepath = resourcefilepath
 
@@ -289,21 +283,12 @@ def make_initial_population(self, *, n: int) -> None:
                 key="debug",
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
-
-        if self.debug_generate_event_chains:
-            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-            self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
         
         # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
         if self.generate_event_chains:
 
             pop_dict = self.population.props.to_dict(orient='index')
-          
-            #if "PregnancySupervisor" in self.modules:
-            #    print("I found it!")
-            #    print(self.modules['PregnancySupervisor'].mother_and_newborn_info)
-            #    exit(-1)
 
             for key in pop_dict.keys():
                 pop_dict[key]['person_ID'] = key
@@ -311,12 +296,11 @@ def make_initial_population(self, *, n: int) -> None:
                 
             pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)}
             pop_dict_full.update(pop_dict)
-
-            print("Size for full sim", len(pop_dict_full))
             
             logger.info(key='event_chains',
                                data = pop_dict_full,
                                description='Links forming chains of events for simulated individuals')
+                               
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
 
@@ -334,15 +318,9 @@ def initialise(self, *, end_date: Date) -> None:
 
         #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
-            # Eventually this can be made an option
-            self.generate_event_chains_overwrite_epi = False
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
-            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler', 'RTIPollingEvent']
-        else:
-            # If not using to print chains, cannot ignore epi
-            self.generate_event_chains_overwrite_epi = False
-
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration', 'RTIPollingEvent']
 
         # Reorder columns to place the new columns at the front
         pd.set_option('display.max_columns', None)
@@ -426,10 +404,6 @@ def run_simulation_to(self, *, to_date: Date) -> None:
                 self._update_progress_bar(progress_bar, date)
             self.fire_single_event(event, date)
         self.date = to_date
-        
-        if self.debug_generate_event_chains:
-            # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR.
-            self.event_chains.to_csv('output.csv', index=False)
 
         if self.show_progress_bar:
             progress_bar.stop()
@@ -500,15 +474,6 @@ def do_birth(self, mother_id: int) -> int:
             logger.info(key='event_chains',
                                data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
-        
-            if self.debug_generate_event_chains:
-                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                row = self.population.props.iloc[[child_id]]
-                row['person_ID'] = child_id
-                row['event'] = 'Birth'
-                row['event_date'] = self.date
-                row['when'] = 'After'
-                self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
 
         return child_id
 

From 3b81de6546cb498938ff9918c852e39369b29ca3 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 9 Oct 2025 13:32:50 +0100
Subject: [PATCH 29/97] All fixes made

---
 .../analysis_extract_data.py                  |  8 +++-
 .../scenario_generate_chains.py               |  2 +-
 src/tlo/events.py                             | 33 +++----------
 src/tlo/methods/hsi_event.py                  |  4 +-
 src/tlo/methods/pregnancy_helper_functions.py | 46 ++++---------------
 src/tlo/methods/pregnancy_supervisor.py       | 40 ++++++++++++++++
 6 files changed, 64 insertions(+), 69 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 8068db203a..7fe15f0eb4 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -98,7 +98,11 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                 if value !='' and isinstance(value, str):
                     evaluated = eval(value, eval_env)
                     list_for_individual.append(evaluated)
-                    
+            
+            for i in list_for_individual:
+                print(i)
+            
+            """
             # These are the properties of the individual before the start of the chain of events
             initial_properties = list_for_individual[0]
             
@@ -201,7 +205,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
             properties = key_first_event | key_last_event
                 
             record.append(properties)
-            
+            """
 
     df = pd.DataFrame(record)
     df.to_csv("new_raw_data_" + name_tag + ".csv", index=False)
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 64fa70d055..e9291a50ce 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -53,7 +53,7 @@ def __init__(self):
         super().__init__()
         self.seed = 42
         self.start_date = Date(2010, 1, 1)
-        self.end_date = self.start_date + pd.DateOffset(months=36)
+        self.end_date = self.start_date + pd.DateOffset(months=18)
         self.pop_size = 1000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 9f762fd3c6..993c27090c 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -88,40 +88,23 @@ def mni_values_differ(self, v1, v2):
         
     def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
         diffs = {}
-        """
-        will_pause = False
-        
-        target_attribute = 'hcw_not_avail'
-        if len(entire_mni_after)>0:
-            print("Default target value before", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute])
-            person = next(iter(entire_mni_after))
-            entire_mni_after[person][target_attribute] = True
-            will_pause = True
-            print("Default target value after", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute])
 
-            
-        if will_pause:
-            print("Reprint")
-            print(entire_mni_before)
-            print(entire_mni_after)
-            print("Default target value", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute])
-        """
         all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys())
             
         for person in all_individuals:
             if person not in entire_mni_before: # but is afterward
                 for key in entire_mni_after[person]:
-                    if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
+                    if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
                         if person not in diffs:
                             diffs[person] = {}
                         diffs[person][key] = entire_mni_after[person][key]
                     
             elif person not in entire_mni_after: # but is beforehand
                 for key in entire_mni_before[person]:
-                    if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]):
+                    if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
                         if person not in diffs:
                             diffs[person] = {}
-                        diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key]
+                        diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]
 
             else: # person is in both
                 # Compare properties
@@ -131,8 +114,6 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
                             diffs[person] = {}
                         diffs[person][key] = entire_mni_after[person][key]
 
-        if len(diffs)>0:
-            print("DIfferences for ", diffs)
         return diffs
         
     def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after):
@@ -272,13 +253,13 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
                         link_info[key] = mni[self.target][key]
             # Individual is only in mni dictionary before event
             elif mni_instances_before and not mni_instances_after:
-                default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
                 for key in mni_row_before:
                     if self.mni_values_differ(mni_row_before[key], default[key]):
                         link_info[key] = default[key]
             # Individual is only in mni dictionary after event
             elif mni_instances_after and not mni_instances_before:
-                default = self.sim.modules['PregnancySupervisor'].default_mni_values
+                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
                 for key in default:
                     if self.mni_values_differ(default[key], mni[self.target][key]):
                         link_info[key] = mni[self.target][key]
@@ -322,12 +303,10 @@ def run(self):
 
             # Log chain_links here
             if len(chain_links)>0:
-                print(chain_links)
+
                 logger_chain.info(key='event_chains',
                                   data= pop_dict,
                                   description='Links forming chains of events for simulated individuals')
-                
-                print("Chain events ", chain_links)
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index dbca98da5c..85ac6da3e2 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -286,13 +286,13 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before,
                     link_info[key] = mni[self.target][key]
         # Individual is only in mni dictionary before event
         elif mni_instances_before and not mni_instances_after:
-            default = self.sim.modules['PregnancySupervisor'].default_mni_values
+            default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
             for key in mni_row_before:
                 if self.values_differ(mni_row_before[key], default[key]):
                     link_info[key] = default[key]
         # Individual is only in mni dictionary after event
         elif mni_instances_after and not mni_instances_before:
-            default = self.sim.modules['PregnancySupervisor'].default_mni_values
+            default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
             for key in default:
                 if self.values_differ(default[key], mni[self.target][key]):
                     link_info[key] = mni[self.target][key]
diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py
index 79483cddaa..2456f57e8b 100644
--- a/src/tlo/methods/pregnancy_helper_functions.py
+++ b/src/tlo/methods/pregnancy_helper_functions.py
@@ -545,40 +545,12 @@ def update_mni_dictionary(self, individual_id):
         mni[individual_id] = self.sim.modules['PregnancySupervisor'].default_mni_values.copy()
 
     elif self == self.sim.modules['Labour']:
-        labour_variables = {'labour_state': None,
-                            # Term Labour (TL), Early Preterm (EPTL), Late Preterm (LPTL) or Post Term (POTL)
-                            'birth_weight': 'normal_birth_weight',
-                            'birth_size': 'average_for_gestational_age',
-                            'delivery_setting': None,  # home_birth, health_centre, hospital
-                            'twins': df.at[individual_id, 'ps_multiple_pregnancy'],
-                            'twin_count': 0,
-                            'twin_one_comps': False,
-                            'pnc_twin_one': 'none',
-                            'bf_status_twin_one': 'none',
-                            'eibf_status_twin_one': False,
-                            'an_placental_abruption': df.at[individual_id, 'ps_placental_abruption'],
-                            'corticosteroids_given': False,
-                            'clean_birth_practices': False,
-                            'abx_for_prom_given': False,
-                            'abx_for_pprom_given': False,
-                            'endo_pp': False,
-                            'retained_placenta': False,
-                            'uterine_atony': False,
-                            'amtsl_given': False,
-                            'cpd': False,
-                            'mode_of_delivery': 'vaginal_delivery',
-                            'neo_will_receive_resus_if_needed': False,
-                            # vaginal_delivery, instrumental, caesarean_section
-                            'hsi_cant_run': False,  # True (T) or False (F)
-                            'sought_care_for_complication': False,  # True (T) or False (F)
-                            'sought_care_labour_phase': 'none',
-                            'referred_for_cs': False,  # True (T) or False (F)
-                            'referred_for_blood': False,  # True (T) or False (F)
-                            'received_blood_transfusion': False,  # True (T) or False (F)
-                            'referred_for_surgery': False,  # True (T) or False (F)'
-                            'death_in_labour': False,  # True (T) or False (F)
-                            'single_twin_still_birth': False,  # True (T) or False (F)
-                            'will_receive_pnc': 'none',
-                            'passed_through_week_one': False}
-
-        mni[individual_id].update(labour_variables)
+    
+        labour_default = self.sim.modules['PregnancySupervisor'].default_labour_values.copy()
+        mni[individual_id].update(labour_default)
+
+        # Update from default based on individual case
+        mni[individual_id]['twins'] = df.at[individual_id, 'ps_multiple_pregnancy']
+        mni[individual_id]['an_placental_abruption'] = df.at[individual_id, 'ps_placental_abruption']
+
+
diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py
index f634d9b971..5d747d44c2 100644
--- a/src/tlo/methods/pregnancy_supervisor.py
+++ b/src/tlo/methods/pregnancy_supervisor.py
@@ -82,6 +82,8 @@ def __init__(self, name=None, resourcefilepath=None):
                               'gest_diab_onset': pd.NaT,
                               'gest_diab_diagnosed_onset': pd.NaT,
                               'gest_diab_resolution': pd.NaT,
+                              'none_anaemia_onset': pd.NaT,
+                              'none_anaemia_resolution': pd.NaT,
                               'mild_anaemia_onset': pd.NaT,
                               'mild_anaemia_resolution': pd.NaT,
                               'moderate_anaemia_onset': pd.NaT,
@@ -111,6 +113,44 @@ def __init__(self, name=None, resourcefilepath=None):
                               'new_onset_spe': False,
                               'cs_indication': 'none'
                               }
+        self.default_labour_values = {'labour_state': None,
+                            # Term Labour (TL), Early Preterm (EPTL), Late Preterm (LPTL) or Post Term (POTL)
+                            'birth_weight': 'normal_birth_weight',
+                            'birth_size': 'average_for_gestational_age',
+                            'delivery_setting': None,  # home_birth, health_centre, hospital
+                            'twins': None,
+                            'twin_count': 0,
+                            'twin_one_comps': False,
+                            'pnc_twin_one': 'none',
+                            'bf_status_twin_one': 'none',
+                            'eibf_status_twin_one': False,
+                            'an_placental_abruption': None,
+                            'corticosteroids_given': False,
+                            'clean_birth_practices': False,
+                            'abx_for_prom_given': False,
+                            'abx_for_pprom_given': False,
+                            'endo_pp': False,
+                            'retained_placenta': False,
+                            'uterine_atony': False,
+                            'amtsl_given': False,
+                            'cpd': False,
+                            'mode_of_delivery': 'vaginal_delivery',
+                            'neo_will_receive_resus_if_needed': False,
+                            # vaginal_delivery, instrumental, caesarean_section
+                            'hsi_cant_run': False,  # True (T) or False (F)
+                            'sought_care_for_complication': False,  # True (T) or False (F)
+                            'sought_care_labour_phase': 'none',
+                            'referred_for_cs': False,  # True (T) or False (F)
+                            'referred_for_blood': False,  # True (T) or False (F)
+                            'received_blood_transfusion': False,  # True (T) or False (F)
+                            'referred_for_surgery': False,  # True (T) or False (F)'
+                            'death_in_labour': False,  # True (T) or False (F)
+                            'single_twin_still_birth': False,  # True (T) or False (F)
+                            'will_receive_pnc': 'none',
+                            'passed_through_week_one': False}
+                            
+        self.default_all_mni_values = self.default_mni_values
+        self.default_all_mni_values.update(self.default_labour_values)
 
     INIT_DEPENDENCIES = {'Demography'}
 

From bc61e1efbf7c79c4b85273b5b3c893c0030b362d Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 17 Nov 2025 10:21:22 +0000
Subject: [PATCH 30/97] Cleaned and [skip ci]

---
 src/tlo/simulation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 8356424901..ef2fe4518e 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -108,9 +108,11 @@ def __init__(
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
+        
         self.generate_event_chains = True
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
+        
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None

From e084e3949c03a8e19bc49f42aea56a154d09dabf Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 20 Nov 2025 14:38:07 +0000
Subject: [PATCH 31/97] Start logging data in EAV format

---
 src/tlo/events.py     | 17 ++++++++++-------
 src/tlo/simulation.py | 11 ++++++++++-
 src/tlo/util.py       | 23 +++++++++++++++++++++++
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 993c27090c..9e9865cdad 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,7 +11,7 @@
 
 import pandas as pd
 
-from tlo.util import FACTOR_POP_DICT
+from tlo.util import FACTOR_POP_DICT, convert_dict_into_eav
 
 import copy
 
@@ -233,12 +233,12 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
                 mni_instances_after = True
             
             # Create and store event for this individual, regardless of whether any property change occurred
-            link_info = {
-                #'person_ID' : self.target,
-                'person_ID' : self.target,
-                'event' : type(self).__name__,
-                'event_date' : self.sim.date,
-            }
+            link_info = {}
+            # #'person_ID' : self.target,
+            #    'person_ID' : self.target,
+            #    'event' : type(self).__name__,
+            #    'event_date' : self.sim.date,
+            #}
             
             # Store (if any) property changes as a result of the event for this individual
             for key in row_before.index:
@@ -265,6 +265,9 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
                         link_info[key] = mni[self.target][key]
             # Else, no need to do anything
                     
+            eav = convert_dict_into_eav(link_info, self.target, self.sim.date, type(self).__name__)
+            print(eav)
+            exit(-1)
             # Add individual to the chain links
             chain_links[self.target] = str(link_info)
             
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index ef2fe4518e..ef27fa6381 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -13,7 +13,7 @@
 import pandas as pd
 import tlo.population
 import numpy as np
-from tlo.util import FACTOR_POP_DICT
+from tlo.util import FACTOR_POP_DICT, df_to_eav
 
 try:
     import dill
@@ -290,6 +290,11 @@ def make_initial_population(self, *, n: int) -> None:
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
         if self.generate_event_chains:
 
+            print(len(self.population.props), n)
+            # EAV structure to capture status of individuals at the start of the simulation
+            eav = df_to_eav(self.population.props, self.date, 'StartOfSimulation')
+            
+            """
             pop_dict = self.population.props.to_dict(orient='index')
 
             for key in pop_dict.keys():
@@ -302,6 +307,10 @@ def make_initial_population(self, *, n: int) -> None:
             logger.info(key='event_chains',
                                data = pop_dict_full,
                                description='Links forming chains of events for simulated individuals')
+            """
+            logger.info(key='event_chains',
+                               data = eav.to_dict(),
+                               description='Links forming chains of events for simulated individuals')
                                
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
diff --git a/src/tlo/util.py b/src/tlo/util.py
index c9130e3f07..e83e19baab 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -94,6 +94,29 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng:
     return final_states
 
 
+def df_to_eav(df, date, event_name):
+    """Function to convert dataframe into EAV"""
+    eav = df.stack().reset_index()
+    eav.columns = ['E', 'A', 'V']
+    eav['Date'] = date
+    eav['NameEvent'] = event_name
+    eav = eav[["E", "Date", "NameEvent", "A", "V"]]
+
+    return eav
+    
+    
+def convert_dict_into_eav(link_info, target, date, event_name):
+    "Function to convert link info in the form of dictionary into an EAV"
+    eav = pd.DataFrame(list(link_info.items()), columns=['A', 'V'])
+    eav.columns = ['A', 'V']
+    eav['E'] = target
+    eav['Date'] = date
+    eav['NameEvent'] = event_name
+    eav = eav[['E', 'Date', 'NameEvent', 'A', 'V']]
+
+    return eav
+    
+
 def sample_outcome(probs: pd.DataFrame, rng: np.random.RandomState):
     """ Helper function to randomly sample an outcome for each individual in a population from a set of probabilities
     that are specific to each individual.

From ac617e80ff416976229b3f3bdd915198a26da96c Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 21 Nov 2025 13:15:07 +0000
Subject: [PATCH 32/97] Log event chains via EAV approach

---
 .../analysis_extract_data.py                  |  27 ++++-
 .../scenario_generate_chains.py               |   4 +-
 src/tlo/analysis/utils.py                     | 111 ++++++++++++++++++
 src/tlo/events.py                             |  41 +++----
 src/tlo/methods/hsi_event.py                  |  21 ++--
 src/tlo/simulation.py                         |  40 ++-----
 src/tlo/util.py                               |  30 +++--
 7 files changed, 200 insertions(+), 74 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 7fe15f0eb4..9ee37cabef 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -11,7 +11,7 @@
 import matplotlib.pyplot as plt
 
 from tlo import Date
-from tlo.analysis.utils import extract_results
+from tlo.analysis.utils import extract_results, extract_event_chains
 from datetime import datetime
 from collections import Counter
 import ast
@@ -35,6 +35,27 @@ def check_if_beyond_time_range_considered(progression_properties):
             if progression_properties[key] > end_date:
                 print("Beyond time range considered, need at least ",progression_properties[key])
 
+def print_filtered_df(df):
+    """
+    Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'.
+    """
+    pd.set_option('display.max_colwidth', None)
+    filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])]
+    
+    dict_cols = ["Info"]
+    max_items = 2
+    # Step 2: Truncate dictionary columns for display
+    if dict_cols is not None:
+        for col in dict_cols:
+            def truncate_dict(d):
+                if isinstance(d, dict):
+                    items = list(d.items())[:max_items]  # keep only first `max_items`
+                    return dict(items)
+                return d
+            filtered[col] = filtered[col].apply(truncate_dict)
+    print(filtered)
+
+
 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ):
     """Produce standard set of plots describing the effect of each TREATMENT_ID.
     - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur.
@@ -43,6 +64,10 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     pd.set_option('display.max_rows', None)
     pd.set_option('display.max_colwidth', None)
     
+    individual_event_chains = extract_event_chains(results_folder)
+    print_filtered_df(individual_event_chains[0])
+    exit(-1)
+    
     eval_env = {
         'datetime': datetime,  # Add the datetime class to the eval environment
         'pd': pd,              # Add pandas to handle Timestamp
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index e9291a50ce..6cfbd040fa 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -53,11 +53,11 @@ def __init__(self):
         super().__init__()
         self.seed = 42
         self.start_date = Date(2010, 1, 1)
-        self.end_date = self.start_date + pd.DateOffset(months=18)
+        self.end_date = self.start_date + pd.DateOffset(months=1)
         self.pop_size = 1000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 1
+        self.runs_per_draw = 3
         self.generate_event_chains = True
 
     def log_configuration(self):
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index e605400332..f762f1eb92 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -341,6 +341,117 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
     _concat = pd.concat(res, axis=1)
     _concat.columns.names = ['draw', 'run']  # name the levels of the columns multi-index
     return _concat
+    
+    
+import pandas as pd
+
+def unpack_dict_rows(df):
+    """
+    Reconstruct a full dataframe from rows whose columns contain dictionaries
+    mapping local-row-index → value. Preserves original column order.
+    """
+    original_cols = ['E', 'EventDate', 'EventName', 'A', 'V']
+    reconstructed_rows = []
+
+    for _, row in df.iterrows():
+        # Determine how many rows this block has (using the first dict column)
+        first_dict_col = next(col for col in original_cols if isinstance(row[col], dict))
+        block_length = len(row[first_dict_col])
+
+        # Build each reconstructed row
+        for i in range(block_length):
+            new_row = {}
+            for col in original_cols:
+                cell = row[col]
+                if not isinstance(cell, dict):
+                    raise ValueError(f"Column {col} does not contain a dictionary")
+                new_row[col] = cell.get(str(i))
+            reconstructed_rows.append(new_row)
+
+    # Build DataFrame and enforce the original column order
+    out = pd.DataFrame(reconstructed_rows)[original_cols]
+    return out.reset_index(drop=True)
+
+    
+def print_filtered_df(df):
+    """
+    Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'.
+    """
+    pd.set_option('display.max_colwidth', None)
+    filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])]
+    
+    dict_cols = ["Info"]
+    max_items = 2
+    # Step 2: Truncate dictionary columns for display
+    if dict_cols is not None:
+        for col in dict_cols:
+            def truncate_dict(d):
+                if isinstance(d, dict):
+                    items = list(d.items())[:max_items]  # keep only first `max_items`
+                    return dict(items)
+                return d
+            filtered[col] = filtered[col].apply(truncate_dict)
+    print(filtered)
+    
+    
+def extract_event_chains(results_folder: Path,
+                        ) -> dict:
+    """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df.
+    Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination.
+    """
+    module = 'tlo.simulation'
+    key = 'event_chains'
+
+    # get number of draws and numbers of runs
+    info = get_scenario_info(results_folder)
+
+    # Collect results from each draw/run. Individuals across runs of the same draw will be combined into unique df.
+    res = dict()
+    
+    for draw in range(info['number_of_draws']):
+    
+        # All individuals in same draw will be combined across runs, so their ID will be offset.
+        dfs_from_runs = []
+        ID_offset = 0
+        
+        for run in range(info['runs_per_draw']):
+
+            try:
+                df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
+                del df['date']
+                recon = unpack_dict_rows(df)
+                # For now convert value to string in all cases to facilitate manipulation. This can be reversed later.
+                recon['V'] = recon['V'].apply(str)
+                # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} )
+                df_collapsed = (
+                        recon.groupby(['E', 'EventDate', 'EventName'])
+                          .apply(lambda g: dict(zip(g['A'], g['V'])))
+                          .reset_index(name='Info')
+                    )
+                df_final = df_collapsed.sort_values(by=['E','EventDate'], ascending=True).reset_index(drop=True)
+                birth_count = (df_final['EventName'] == 'Birth').sum()
+
+                print("Birth count for run ", run, "is ", birth_count)
+                df_final['E'] = df_final['E'] + ID_offset
+                
+                # Calculate ID offset for next run
+                ID_offset = (max(df_final['E']) + 1)
+        
+                # Append these chains to list
+                dfs_from_runs.append(df_final)
+                
+            except KeyError:
+                # Some logs could not be found - probably because this run failed.
+                # Simply to not append anything to the df collecting chains.
+                print("Run failed")
+            
+        # Combine all dfs into a single DataFrame
+        res[draw] = pd.concat(dfs_from_runs, ignore_index=True)
+
+        # Optionally, sort by 'E' and 'EventDate' after combining
+        res[draw] = res[draw].sort_values(by=['E', 'EventDate']).reset_index(drop=True)
+
+    return res
 
 
 def summarize(results: pd.DataFrame, only_mean: bool = False, collapse_columns: bool = False) -> pd.DataFrame:
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 9e9865cdad..ba91218dbc 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,7 +11,7 @@
 
 import pandas as pd
 
-from tlo.util import FACTOR_POP_DICT, convert_dict_into_eav
+from tlo.util import convert_chain_links_into_EAV
 
 import copy
 
@@ -139,9 +139,8 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
-                    'person_ID': idx,
-                    'event': type(self).__name__,
-                    'event_date': self.sim.date,
+                    'EventDate': self.sim.date,
+                    'EventName': type(self).__name__,
                 }
                 
                 # Store the new values from df_after for the changed columns
@@ -154,7 +153,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                         link_info[col] = diff_mni[idx][key]
  
                 # Append the event and changes to the individual key
-                chain_links[idx] = str(link_info)
+                chain_links[idx] = link_info
      
         # For individuals which only underwent changes in mni dictionary, save changes here
         if len(diff_mni)>0:
@@ -162,15 +161,14 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 if key not in persons_changed:
                     # If individual hadn't been previously added due to changes in pop df, add it here
                     link_info = {
-                        'person_ID': key,
-                        'event': type(self).__name__,
-                        'event_date': self.sim.date,
+                        'EventDate': self.sim.date,
+                        'EventName': type(self).__name__,
                     }
                     
                     for key_prop in diff_mni[key]:
                         link_info[key_prop] = diff_mni[key][key_prop]
                         
-                    chain_links[key] = str(link_info)
+                    chain_links[key] = link_info
 
         return chain_links
         
@@ -233,12 +231,10 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
                 mni_instances_after = True
             
             # Create and store event for this individual, regardless of whether any property change occurred
-            link_info = {}
-            # #'person_ID' : self.target,
-            #    'person_ID' : self.target,
-            #    'event' : type(self).__name__,
-            #    'event_date' : self.sim.date,
-            #}
+            link_info = {
+                'EventDate' : self.sim.date,
+                'EventName' : type(self).__name__,
+            }
             
             # Store (if any) property changes as a result of the event for this individual
             for key in row_before.index:
@@ -265,11 +261,8 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
                         link_info[key] = mni[self.target][key]
             # Else, no need to do anything
                     
-            eav = convert_dict_into_eav(link_info, self.target, self.sim.date, type(self).__name__)
-            print(eav)
-            exit(-1)
             # Add individual to the chain links
-            chain_links[self.target] = str(link_info)
+            chain_links[self.target] = link_info
             
         else:
             # Target is entire population. Identify individuals for which properties have changed
@@ -300,6 +293,14 @@ def run(self):
         if self.sim.generate_event_chains and print_chains:
             chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before)
             
+            if chain_links:
+                # Convert chain_links into EAV
+                ednav = convert_chain_links_into_EAV(chain_links)
+
+                logger_chain.info(key='event_chains',
+                      data= ednav.to_dict(),
+                      description='Links forming chains of events for simulated individuals')
+            """
             # Create empty logger for entire pop
             pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
             pop_dict.update(chain_links)
@@ -310,7 +311,7 @@ def run(self):
                 logger_chain.info(key='event_chains',
                                   data= pop_dict,
                                   description='Links forming chains of events for simulated individuals')
-
+            """
 
 class RegularEvent(Event):
     """An event that automatically reschedules itself at a fixed frequency."""
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 85ac6da3e2..59b7b1f60a 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -8,7 +8,7 @@
 from tlo import Date, logging
 from tlo.events import Event
 from tlo.population import Population
-from tlo.util import FACTOR_POP_DICT
+from tlo.util import convert_chain_links_into_EAV
 import pandas as pd
 
 
@@ -266,9 +266,8 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before,
             record_level = 'N/A'
             
         link_info = {
-            'person_ID': self.target,
-            'event' : type(self).__name__,
-            'event_date' : self.sim.date,
+            'EventName' : type(self).__name__,
+            'EventDate' : self.sim.date,
             'appt_footprint' : record_footprint,
             'level' : record_level,
         }
@@ -297,7 +296,7 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before,
                 if self.values_differ(default[key], mni[self.target][key]):
                     link_info[key] = mni[self.target][key]
 
-        chain_links[self.target] = str(link_info)
+        chain_links[self.target] = link_info
             
         return chain_links
         
@@ -325,13 +324,13 @@ def run(self, squeeze_factor):
             if print_chains:
                 chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before)
             
-                if len(chain_links)>0:
-                    pop_dict = {i: '' for i in range(FACTOR_POP_DICT)}
-                    pop_dict.update(chain_links)
+                if chain_links:
                     
-                    logger_chains.info(key='event_chains',
-                                data = pop_dict,
-                                description='Links forming chains of events for simulated individuals')
+                    # Convert chain_links into EAV
+                    ednav = convert_chain_links_into_EAV(chain_links)
+                    logger_chain.info(key='event_chains',
+                            data = ednav,
+                            description='Links forming chains of events for simulated individuals')
                 
         return updated_appt_footprint
         
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index ef27fa6381..da55d42efc 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -13,7 +13,7 @@
 import pandas as pd
 import tlo.population
 import numpy as np
-from tlo.util import FACTOR_POP_DICT, df_to_eav
+from tlo.util import df_to_EAV, convert_chain_links_into_EAV
 
 try:
     import dill
@@ -290,26 +290,11 @@ def make_initial_population(self, *, n: int) -> None:
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
         if self.generate_event_chains:
 
-            print(len(self.population.props), n)
-            # EAV structure to capture status of individuals at the start of the simulation
-            eav = df_to_eav(self.population.props, self.date, 'StartOfSimulation')
-            
-            """
-            pop_dict = self.population.props.to_dict(orient='index')
-
-            for key in pop_dict.keys():
-                pop_dict[key]['person_ID'] = key
-                pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later
-                
-            pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)}
-            pop_dict_full.update(pop_dict)
-            
-            logger.info(key='event_chains',
-                               data = pop_dict_full,
-                               description='Links forming chains of events for simulated individuals')
-            """
+            # EDNAV structure to capture status of individuals at the start of the simulation
+            ednav = df_to_EAV(self.population.props, self.date, 'StartOfSimulation')
+
             logger.info(key='event_chains',
-                               data = eav.to_dict(),
+                               data = ednav.to_dict(),
                                description='Links forming chains of events for simulated individuals')
                                
         end = time.time()
@@ -475,15 +460,16 @@ def do_birth(self, mother_id: int) -> int:
         if self.generate_event_chains:
             # When individual is born, store their initial properties to provide a starting point to the chain of property
             # changes that this individual will undergo as a result of events taking place.
-            prop_dict = self.population.props.loc[child_id].to_dict()
-            prop_dict['event'] = 'Birth'
-            prop_dict['event_date'] = self.date
-            
-            pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
-            pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length
+            link_info = self.population.props.loc[child_id].to_dict()
+            link_info['EventName'] = 'Birth'
+            link_info['EventDate'] = self.date
+            chain_links = {}
+            chain_links[child_id] = link_info # Convert to string to avoid issue of length
 
+            ednav = convert_chain_links_into_EAV(chain_links)
+            
             logger.info(key='event_chains',
-                               data = pop_dict,
+                               data = ednav.to_dict(),
                                description='Links forming chains of events for simulated individuals')
 
         return child_id
diff --git a/src/tlo/util.py b/src/tlo/util.py
index e83e19baab..ee29445e9a 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -13,7 +13,6 @@
 
 # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation.
 DEFAULT_MOTHER_ID = -1e7
-FACTOR_POP_DICT = 50000
 
 
 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]):
@@ -94,25 +93,30 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng:
     return final_states
 
 
-def df_to_eav(df, date, event_name):
+def df_to_EAV(df, date, event_name):
     """Function to convert dataframe into EAV"""
     eav = df.stack().reset_index()
     eav.columns = ['E', 'A', 'V']
-    eav['Date'] = date
-    eav['NameEvent'] = event_name
-    eav = eav[["E", "Date", "NameEvent", "A", "V"]]
+    eav['EventDate'] = date
+    eav['EventName'] = event_name
+    eav = eav[["E", "EventDate", "EventName", "A", "V"]]
 
     return eav
     
     
-def convert_dict_into_eav(link_info, target, date, event_name):
-    "Function to convert link info in the form of dictionary into an EAV"
-    eav = pd.DataFrame(list(link_info.items()), columns=['A', 'V'])
-    eav.columns = ['A', 'V']
-    eav['E'] = target
-    eav['Date'] = date
-    eav['NameEvent'] = event_name
-    eav = eav[['E', 'Date', 'NameEvent', 'A', 'V']]
+def convert_chain_links_into_EAV(chain_links):
+    df = pd.DataFrame.from_dict(chain_links, orient="index")
+    id_cols = ["EventDate", "EventName"]
+
+    eav = df.reset_index().melt(
+        id_vars=["index"] + id_cols,  # index = person ID
+        var_name="A",
+        value_name="V"
+    )
+
+    eav.rename(columns={"index": "E"}, inplace=True)
+
+    eav = eav[["E", "EventDate", "EventName", "A", "V"]]
 
     return eav
     

From 5234550934fd0bf156e43603d593945c66d888c0 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 21 Nov 2025 13:44:52 +0000
Subject: [PATCH 33/97] No need to store EventDate since this is already stored
 in logger by default

---
 src/tlo/analysis/utils.py    | 62 ++++++++++++++++++++++++++++++++----
 src/tlo/events.py            |  3 --
 src/tlo/methods/hsi_event.py |  1 -
 src/tlo/simulation.py        |  1 -
 src/tlo/util.py              |  7 ++--
 5 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index f762f1eb92..00a297030b 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -345,7 +345,7 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
     
 import pandas as pd
 
-def unpack_dict_rows(df):
+def old_unpack_dict_rows(df):
     """
     Reconstruct a full dataframe from rows whose columns contain dictionaries
     mapping local-row-index → value. Preserves original column order.
@@ -372,6 +372,54 @@ def unpack_dict_rows(df):
     out = pd.DataFrame(reconstructed_rows)[original_cols]
     return out.reset_index(drop=True)
 
+
+def unpack_dict_rows(df, non_dict_cols=None):
+    """
+    Reconstruct a full DataFrame from rows where most columns are dictionaries.
+    Non-dict columns (e.g., 'date') are propagated to all reconstructed rows.
+    
+    Parameters:
+        df: pd.DataFrame
+        non_dict_cols: list of columns that are NOT dictionaries
+    """
+    if non_dict_cols is None:
+        non_dict_cols = []
+
+    original_cols =  ['E', 'date', 'EventName', 'A', 'V']
+
+    reconstructed_rows = []
+
+    for _, row in df.iterrows():
+        # Determine dict columns for this row
+        dict_cols = [col for col in original_cols if col not in non_dict_cols]
+
+        if not dict_cols:
+            # No dict columns, just append row
+            reconstructed_rows.append(row.to_dict())
+            continue
+
+        # Use the first dict column to get the block length
+        first_dict_col = dict_cols[0]
+        block_length = len(row[first_dict_col])
+
+        # Build each expanded row
+        for i in range(block_length):
+            new_row = {}
+            for col in original_cols:
+                cell = row[col]
+                if col in dict_cols:
+                    # Access the dict using string or integer keys
+                    new_row[col] = cell.get(str(i), cell.get(i))
+                else:
+                    # Propagate non-dict value
+                    new_row[col] = cell
+            reconstructed_rows.append(new_row)
+
+    # Build DataFrame in original column order
+    out = pd.DataFrame(reconstructed_rows)[original_cols]
+
+    return out.reset_index(drop=True)
+
     
 def print_filtered_df(df):
     """
@@ -418,17 +466,19 @@ def extract_event_chains(results_folder: Path,
 
             try:
                 df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
-                del df['date']
-                recon = unpack_dict_rows(df)
+
+                recon = unpack_dict_rows(df, ['date'])
+                print(recon)
+                #del recon['EventDate']
                 # For now convert value to string in all cases to facilitate manipulation. This can be reversed later.
                 recon['V'] = recon['V'].apply(str)
                 # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} )
                 df_collapsed = (
-                        recon.groupby(['E', 'EventDate', 'EventName'])
+                        recon.groupby(['E', 'date', 'EventName'])
                           .apply(lambda g: dict(zip(g['A'], g['V'])))
                           .reset_index(name='Info')
                     )
-                df_final = df_collapsed.sort_values(by=['E','EventDate'], ascending=True).reset_index(drop=True)
+                df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True)
                 birth_count = (df_final['EventName'] == 'Birth').sum()
 
                 print("Birth count for run ", run, "is ", birth_count)
@@ -449,7 +499,7 @@ def extract_event_chains(results_folder: Path,
         res[draw] = pd.concat(dfs_from_runs, ignore_index=True)
 
         # Optionally, sort by 'E' and 'EventDate' after combining
-        res[draw] = res[draw].sort_values(by=['E', 'EventDate']).reset_index(drop=True)
+        res[draw] = res[draw].sort_values(by=['E', 'date']).reset_index(drop=True)
 
     return res
 
diff --git a/src/tlo/events.py b/src/tlo/events.py
index ba91218dbc..4b62c16932 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -139,7 +139,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
-                    'EventDate': self.sim.date,
                     'EventName': type(self).__name__,
                 }
                 
@@ -161,7 +160,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 if key not in persons_changed:
                     # If individual hadn't been previously added due to changes in pop df, add it here
                     link_info = {
-                        'EventDate': self.sim.date,
                         'EventName': type(self).__name__,
                     }
                     
@@ -232,7 +230,6 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
             
             # Create and store event for this individual, regardless of whether any property change occurred
             link_info = {
-                'EventDate' : self.sim.date,
                 'EventName' : type(self).__name__,
             }
             
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 59b7b1f60a..d59f8e2404 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -267,7 +267,6 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before,
             
         link_info = {
             'EventName' : type(self).__name__,
-            'EventDate' : self.sim.date,
             'appt_footprint' : record_footprint,
             'level' : record_level,
         }
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index da55d42efc..35f6818f66 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -462,7 +462,6 @@ def do_birth(self, mother_id: int) -> int:
             # changes that this individual will undergo as a result of events taking place.
             link_info = self.population.props.loc[child_id].to_dict()
             link_info['EventName'] = 'Birth'
-            link_info['EventDate'] = self.date
             chain_links = {}
             chain_links[child_id] = link_info # Convert to string to avoid issue of length
 
diff --git a/src/tlo/util.py b/src/tlo/util.py
index ee29445e9a..d678aa09ef 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -97,16 +97,15 @@ def df_to_EAV(df, date, event_name):
     """Function to convert dataframe into EAV"""
     eav = df.stack().reset_index()
     eav.columns = ['E', 'A', 'V']
-    eav['EventDate'] = date
     eav['EventName'] = event_name
-    eav = eav[["E", "EventDate", "EventName", "A", "V"]]
+    eav = eav[["E", "EventName", "A", "V"]]
 
     return eav
     
     
 def convert_chain_links_into_EAV(chain_links):
     df = pd.DataFrame.from_dict(chain_links, orient="index")
-    id_cols = ["EventDate", "EventName"]
+    id_cols = ["EventName"]
 
     eav = df.reset_index().melt(
         id_vars=["index"] + id_cols,  # index = person ID
@@ -116,7 +115,7 @@ def convert_chain_links_into_EAV(chain_links):
 
     eav.rename(columns={"index": "E"}, inplace=True)
 
-    eav = eav[["E", "EventDate", "EventName", "A", "V"]]
+    eav = eav[["E", "EventName", "A", "V"]]
 
     return eav
     

From 2f20cb392a9aaee1c8d004a82e4f31957d2130b8 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 21 Nov 2025 14:45:16 +0000
Subject: [PATCH 34/97] Check if PregnancySupervisor is included before
 considering in chain of events production

---
 src/tlo/events.py            | 101 ++++++++++++++++++++---------------
 src/tlo/methods/hsi_event.py |  53 +++++++++---------
 2 files changed, 87 insertions(+), 67 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 4b62c16932..f03f150f92 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -122,7 +122,10 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
         
         # Create a mask of where values are different
         diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
-        diff_mni  = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
+        if 'PregnancySupervisor' in self.sim.modules:
+            diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
+        else:
+            diff_mni = []
         
         # Create an empty list to store changes for each of the individuals
         chain_links = {}
@@ -154,19 +157,20 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 # Append the event and changes to the individual key
                 chain_links[idx] = link_info
      
-        # For individuals which only underwent changes in mni dictionary, save changes here
-        if len(diff_mni)>0:
-            for key in diff_mni:
-                if key not in persons_changed:
-                    # If individual hadn't been previously added due to changes in pop df, add it here
-                    link_info = {
-                        'EventName': type(self).__name__,
-                    }
-                    
-                    for key_prop in diff_mni[key]:
-                        link_info[key_prop] = diff_mni[key][key_prop]
+        if 'PregnancySupervisor' in self.sim.modules:
+            # For individuals which only underwent changes in mni dictionary, save changes here
+            if len(diff_mni)>0:
+                for key in diff_mni:
+                    if key not in persons_changed:
+                        # If individual hadn't been previously added due to changes in pop df, add it here
+                        link_info = {
+                            'EventName': type(self).__name__,
+                        }
                         
-                    chain_links[key] = link_info
+                        for key_prop in diff_mni[key]:
+                            link_info[key_prop] = diff_mni[key][key_prop]
+                            
+                        chain_links[key] = link_info
 
         return chain_links
         
@@ -197,17 +201,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
                 # Check if individual is already in mni dictionary, if so copy her original status
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                if self.target in mni:
-                    mni_instances_before = True
-                    mni_row_before = mni[self.target].copy()
+                if 'PregnancySupervisor' in self.sim.modules:
+                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                    if self.target in mni:
+                        mni_instances_before = True
+                        mni_row_before = mni[self.target].copy()
+                else:
+                    mni_row_before = None
                 
             else:
 
                 # This will be a population-wide event. In order to find individuals for which this led to
                 # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
                 df_before = self.sim.population.props.copy()
-                entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                if 'PregnancySupervisor' in self.sim.modules:
+                    entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                else:
+                    entire_mni_before = None
                 
         return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before
         
@@ -224,9 +234,12 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
             
             # Check if individual is in mni after the event
             mni_instances_after = False
-            mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-            if self.target in mni:
-                mni_instances_after = True
+            if 'PregnancySupervisor' in self.sim.modules:
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                if self.target in mni:
+                    mni_instances_after = True
+            else:
+                mni_instances_after = None
             
             # Create and store event for this individual, regardless of whether any property change occurred
             link_info = {
@@ -237,26 +250,27 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
             for key in row_before.index:
                 if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
                     link_info[key] = row_after[key]
-                    
-            # Now check and store changes in the mni dictionary, accounting for following cases:
-            # Individual is in mni dictionary before and after
-            if mni_instances_before and mni_instances_after:
-                for key in mni_row_before:
-                    if self.mni_values_differ(mni_row_before[key], mni[self.target][key]):
-                        link_info[key] = mni[self.target][key]
-            # Individual is only in mni dictionary before event
-            elif mni_instances_before and not mni_instances_after:
-                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                for key in mni_row_before:
-                    if self.mni_values_differ(mni_row_before[key], default[key]):
-                        link_info[key] = default[key]
-            # Individual is only in mni dictionary after event
-            elif mni_instances_after and not mni_instances_before:
-                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                for key in default:
-                    if self.mni_values_differ(default[key], mni[self.target][key]):
-                        link_info[key] = mni[self.target][key]
-            # Else, no need to do anything
+            
+            if 'PregnancySupervisor' in self.sim.modules:
+                # Now check and store changes in the mni dictionary, accounting for following cases:
+                # Individual is in mni dictionary before and after
+                if mni_instances_before and mni_instances_after:
+                    for key in mni_row_before:
+                        if self.mni_values_differ(mni_row_before[key], mni[self.target][key]):
+                            link_info[key] = mni[self.target][key]
+                # Individual is only in mni dictionary before event
+                elif mni_instances_before and not mni_instances_after:
+                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                    for key in mni_row_before:
+                        if self.mni_values_differ(mni_row_before[key], default[key]):
+                            link_info[key] = default[key]
+                # Individual is only in mni dictionary after event
+                elif mni_instances_after and not mni_instances_before:
+                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                    for key in default:
+                        if self.mni_values_differ(default[key], mni[self.target][key]):
+                            link_info[key] = mni[self.target][key]
+                # Else, no need to do anything
                     
             # Add individual to the chain links
             chain_links[self.target] = link_info
@@ -267,7 +281,10 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before,
             
             # Population frame after event
             df_after = self.sim.population.props
-            entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+            if 'PregnancySupervisor' in self.sim.modules:
+                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+            else:
+                entire_mni_after = None
             
             #  Create and store the event and dictionary of changes for affected individuals
             chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after)
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 7d960077fc..edb5d3df3b 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -239,10 +239,11 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]:
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
                 # Check if individual is in mni dictionary before the event, if so store its original status
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                if self.target in mni:
-                    mni_instances_before = True
-                    mni_row_before = mni[self.target].copy()
+                if 'PregnancySupervisor' in self.sim.modules:
+                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                    if self.target in mni:
+                        mni_instances_before = True
+                        mni_row_before = mni[self.target].copy()
                 
             else:
                 print("ERROR: there shouldn't be pop-wide HSI event")
@@ -259,9 +260,10 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before,
         row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
         
         mni_instances_after = False
-        mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-        if self.target in mni:
-            mni_instances_after = True
+        if 'PregnancySupervisor' in self.sim.modules:
+            mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+            if self.target in mni:
+                mni_instances_after = True
             
         # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level
         # will be stored regardless of whether individual experienced property changes or not.
@@ -285,24 +287,25 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before,
             if row_before[key] != row_after[key]: # Note: used fillna previously
                 link_info[key] = row_after[key]
                 
-        # Now store changes in the mni dictionary, accounting for following cases:
-        # Individual is in mni dictionary before and after
-        if mni_instances_before and mni_instances_after:
-            for key in mni_row_before:
-                if self.values_differ(mni_row_before[key], mni[self.target][key]):
-                    link_info[key] = mni[self.target][key]
-        # Individual is only in mni dictionary before event
-        elif mni_instances_before and not mni_instances_after:
-            default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-            for key in mni_row_before:
-                if self.values_differ(mni_row_before[key], default[key]):
-                    link_info[key] = default[key]
-        # Individual is only in mni dictionary after event
-        elif mni_instances_after and not mni_instances_before:
-            default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-            for key in default:
-                if self.values_differ(default[key], mni[self.target][key]):
-                    link_info[key] = mni[self.target][key]
+        if 'PregnancySupervisor' in self.sim.modules:
+            # Now store changes in the mni dictionary, accounting for following cases:
+            # Individual is in mni dictionary before and after
+            if mni_instances_before and mni_instances_after:
+                for key in mni_row_before:
+                    if self.values_differ(mni_row_before[key], mni[self.target][key]):
+                        link_info[key] = mni[self.target][key]
+            # Individual is only in mni dictionary before event
+            elif mni_instances_before and not mni_instances_after:
+                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                for key in mni_row_before:
+                    if self.values_differ(mni_row_before[key], default[key]):
+                        link_info[key] = default[key]
+            # Individual is only in mni dictionary after event
+            elif mni_instances_after and not mni_instances_before:
+                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                for key in default:
+                    if self.values_differ(default[key], mni[self.target][key]):
+                        link_info[key] = mni[self.target][key]
 
         chain_links[self.target] = link_info
             

From 1b838235e2ca27e82412f37d302ff40adccbeba0 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Sat, 22 Nov 2025 09:42:59 +0000
Subject: [PATCH 35/97] Remove old util fnc

---
 src/tlo/analysis/utils.py | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index bc8784ae66..fc0d374fd1 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -364,36 +364,6 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
     _concat = pd.concat(res, axis=1)
     _concat.columns.names = ['draw', 'run']  # name the levels of the columns multi-index
     return _concat
-    
-    
-import pandas as pd
-
-def old_unpack_dict_rows(df):
-    """
-    Reconstruct a full dataframe from rows whose columns contain dictionaries
-    mapping local-row-index → value. Preserves original column order.
-    """
-    original_cols = ['E', 'EventDate', 'EventName', 'A', 'V']
-    reconstructed_rows = []
-
-    for _, row in df.iterrows():
-        # Determine how many rows this block has (using the first dict column)
-        first_dict_col = next(col for col in original_cols if isinstance(row[col], dict))
-        block_length = len(row[first_dict_col])
-
-        # Build each reconstructed row
-        for i in range(block_length):
-            new_row = {}
-            for col in original_cols:
-                cell = row[col]
-                if not isinstance(cell, dict):
-                    raise ValueError(f"Column {col} does not contain a dictionary")
-                new_row[col] = cell.get(str(i))
-            reconstructed_rows.append(new_row)
-
-    # Build DataFrame and enforce the original column order
-    out = pd.DataFrame(reconstructed_rows)[original_cols]
-    return out.reset_index(drop=True)
 
 
 def unpack_dict_rows(df, non_dict_cols=None):

From f4cf120a60bf6da13fecb2d66dfe05fd4f495aff Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Sat, 22 Nov 2025 09:44:08 +0000
Subject: [PATCH 36/97] Overwrite any changes to hiv and tb file

---
 src/tlo/methods/hiv.py | 35 ---------------
 src/tlo/methods/tb.py  | 96 ++++++++++--------------------------------
 2 files changed, 23 insertions(+), 108 deletions(-)

diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 7ecc741c25..8b40e37a34 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -1682,37 +1682,6 @@ def do_at_generic_first_appt(
 #   Main Polling Event
 # ---------------------------------------------------------------------------
 
-class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin):
-    """ The HIV Polling Events for Data Generation
-    * Ensures that 
-    """
-
-    def __init__(self, module):
-        super().__init__(
-            module, frequency=DateOffset(years=120)
-        )  # repeats every 12 months, but this can be changed
-
-    def apply(self, population):
-    
-        df = population.props
-        
-        # Make everyone who is alive and not infected (no-one should be) susceptible
-        susc_idx = df.loc[
-            df.is_alive
-            & ~df.hv_inf
-            ].index
-            
-        n_susceptible = len(susc_idx)
-        print("Number of individuals susceptible", n_susceptible)
-        # Schedule the date of infection for each new infection:
-        for i in susc_idx:
-            date_of_infection = self.sim.date + pd.DateOffset(
-                # Ensure that individual will be infected before end of sim
-                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
-            )
-            self.sim.schedule_event(
-                HivInfectionEvent(self.module, i), date_of_infection
-            )
 
 class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin):
     """ The HIV Regular Polling Events
@@ -1734,7 +1703,6 @@ def apply(self, population):
         fraction_of_year_between_polls = self.frequency.months / 12
         beta = p["beta"] * fraction_of_year_between_polls
 
-        
         # ----------------------------------- HORIZONTAL TRANSMISSION -----------------------------------
         def horizontal_transmission(to_sex, from_sex):
             # Count current number of alive 15-80 year-olds at risk of transmission
@@ -1810,7 +1778,6 @@ def horizontal_transmission(to_sex, from_sex):
                         HivInfectionEvent(self.module, idx), date_of_infection
                     )
 
-
         # ----------------------------------- SPONTANEOUS TESTING -----------------------------------
         def spontaneous_testing(current_year):
 
@@ -1935,8 +1902,6 @@ def vmmc_for_child():
         vmmc_for_child()
 
 
-
-
 # ---------------------------------------------------------------------------
 #   Natural History Events
 # ---------------------------------------------------------------------------
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 71361a7951..d9ba7309e0 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -864,31 +864,29 @@ def initialise_population(self, population):
         df["tb_on_ipt"] = False
         df["tb_date_ipt"] = pd.NaT
 
-
         # # ------------------ infection status ------------------ #
-        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None:
-            # WHO estimates of active TB for 2010 to get infected initial population
-            # don't need to scale or include treated proportion as no-one on treatment yet
-            inc_estimates = p["who_incidence_estimates"]
-            incidence_year = (inc_estimates.loc[
-                (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
-            ].values[0]) / 100_000
-
-            incidence_year = incidence_year * p["scaling_factor_WHO"]
-
-            self.assign_active_tb(
-                population,
-                strain="ds",
-                incidence=incidence_year)
-
-            self.assign_active_tb(
-                population,
-                strain="mdr",
-                incidence=incidence_year * p['prop_mdr2010'])
-
-            self.send_for_screening_general(
-                population
-            )  # send some baseline population for screening
+        # WHO estimates of active TB for 2010 to get infected initial population
+        # don't need to scale or include treated proportion as no-one on treatment yet
+        inc_estimates = p["who_incidence_estimates"]
+        incidence_year = (inc_estimates.loc[
+            (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
+        ].values[0]) / 100_000
+
+        incidence_year = incidence_year * p["scaling_factor_WHO"]
+
+        self.assign_active_tb(
+            population,
+            strain="ds",
+            incidence=incidence_year)
+
+        self.assign_active_tb(
+            population,
+            strain="mdr",
+            incidence=incidence_year * p['prop_mdr2010'])
+
+        self.send_for_screening_general(
+            population
+        )  # send some baseline population for screening
 
     def initialise_simulation(self, sim):
         """
@@ -901,10 +899,8 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbActiveEvent(self), sim.date)
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
-
         sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
 
-
         # 2) log at the end of the year
         # Optional: Schedule the scale-up of programs
         if self.parameters["type_of_scaleup"] != 'none':
@@ -1406,53 +1402,6 @@ def is_subset(col_for_set, col_for_subset):
 # #   TB infection event
 # # ---------------------------------------------------------------------------
 
-class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin):
-    """The Tb Regular Poll Event for Data Generation for assigning active infections
-    * selects everyone to develop an active infection and schedules onset of active tb
-    sometime during the simulation
-    """
-
-    def __init__(self, module):
-        super().__init__(module, frequency=DateOffset(years=120))
-
-    def apply(self, population):
-
-        df = population.props
-        now = self.sim.date
-        rng = self.module.rng
-        # Make everyone who is alive and not infected (no-one should be) susceptible
-        susc_idx = df.loc[
-            df.is_alive
-            & (df.tb_inf != "active")
-            ].index
-            
-        len(susc_idx)
-        
-        middle_index = len(susc_idx) // 2
-
-        # Will equally split two strains among the population
-        list_ds = susc_idx[:middle_index]
-        list_mdr = susc_idx[middle_index:]
-    
-        # schedule onset of active tb. This will be equivalent to the "Onset", so it
-        # doesn't matter how long after we have decided which infection this is.
-        for person_id in list_ds:
-            date_progression = now + pd.DateOffset(
-                # At some point during their lifetime, this person will develop TB
-                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
-            )
-            # set date of active tb - properties will be updated at TbActiveEvent poll daily
-            df.at[person_id, "tb_scheduled_date_active"] = date_progression
-            df.at[person_id, "tb_strain"] = "ds"
-            
-        for person_id in list_mdr:
-            date_progression = now + pd.DateOffset(
-                days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1))
-            )
-            # set date of active tb - properties will be updated at TbActiveEvent poll daily
-            df.at[person_id, "tb_scheduled_date_active"] = date_progression
-            df.at[person_id, "tb_strain"] = "mdr"
-            
 
 class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin):
     """The Tb Regular Poll Event for assigning active infections
@@ -1527,6 +1476,7 @@ def apply(self, population):
         self.module.update_parameters_for_program_scaleup()
         # note also culture test used in target/max scale-up in place of clinical dx
 
+
 class TbActiveEvent(RegularEvent, PopulationScopeEventMixin):
     """
     * check for those with dates of active tb onset within last time-period

From 29dd543c2c182a724c7c9099bdeb5cf5ec439363 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Sat, 22 Nov 2025 09:45:09 +0000
Subject: [PATCH 37/97] Overwrite any changes to demography file

---
 src/tlo/methods/demography.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py
index b8fa40b7df..2acaad75eb 100644
--- a/src/tlo/methods/demography.py
+++ b/src/tlo/methods/demography.py
@@ -324,10 +324,9 @@ def initialise_simulation(self, sim):
         # Launch the repeating event that will store statistics about the population structure
         sim.schedule_event(DemographyLoggingEvent(self), sim.date)
 
-        if sim.generate_event_chains is False:
-            # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
-            self.other_death_poll = OtherDeathPoll(self)
-            sim.schedule_event(self.other_death_poll, sim.date)
+        # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
+        self.other_death_poll = OtherDeathPoll(self)
+        sim.schedule_event(self.other_death_poll, sim.date)
 
         # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`)
         for _logger in (logger, logger_scale_factor):

From 33f1143e1b2c46113c498a3fde5fe0799a2a6be7 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Sat, 22 Nov 2025 18:00:52 +0000
Subject: [PATCH 38/97] Remove outdated test related to RTI data harvesting

---
 tests/test_rti.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/tests/test_rti.py b/tests/test_rti.py
index b696a249f5..711215b8cf 100644
--- a/tests/test_rti.py
+++ b/tests/test_rti.py
@@ -25,17 +25,6 @@
 end_date = Date(2012, 1, 1)
 popsize = 1000
 
-@pytest.mark.slow
-def test_data_harvesting(seed):
-    """
-    This test runs a simulation with a functioning health system with full service availability and no set
-    constraints
-    """
-    # create sim object
-    sim = create_basic_rti_sim(popsize, seed)
-    # run simulation
-    sim.simulate(end_date=end_date)
-    exit(-1)
 
 def check_dtypes(simulation):
     # check types of columns in dataframe, check they are the same, list those that aren't

From af477c29485ee7b2d4d380753d9846b7d93c19c5 Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Mon, 24 Nov 2025 09:51:58 +0000
Subject: [PATCH 39/97] Add a very simple synchronous notification dispatcher -
 avoided using the more typical naming `event` or `signal` because they are
 already used.

---
 src/tlo/notify.py    | 64 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_notify.py | 23 ++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 src/tlo/notify.py
 create mode 100644 tests/test_notify.py

diff --git a/src/tlo/notify.py b/src/tlo/notify.py
new file mode 100644
index 0000000000..28765f5afd
--- /dev/null
+++ b/src/tlo/notify.py
@@ -0,0 +1,64 @@
+"""
+A dead simple synchronous notification dispatcher.
+
+Usage
+-----
+# In the notifying class/module
+from tlo.notify import notifier
+
+notifier.dispatch("simulation.on_start", data={"one": 1, "two": 2})
+
+# In the listening class/module
+from tlo.notify import notifier
+
+def on_notification(data):
+    print("Received notification:", data)
+
+notifier.add_listener("simulation.on_start", on_notification)
+"""
+
+class Notifier:
+    """
+    A simple synchronous notification dispatcher supporting listeners.
+    """
+
+    def __init__(self):
+        self.listeners = {}
+
+    def add_listener(self, notification_key, listener):
+        """
+        Register a listener for a specific notification.
+
+        :param notification_key: The identifier to listen for.
+        :param listener: A callable to be invoked when the notification is dispatched.
+        """
+        if notification_key not in self.listeners:
+            self.listeners[notification_key] = []
+        self.listeners[notification_key].append(listener)
+
+    def remove_listener(self, notification_key, listener):
+        """
+        Remove a previously registered listener for a notification.
+
+        :param notification_key: The identifier.
+        :param listener: The listener callable to remove.
+        """
+        if notification_key in self.listeners:
+            self.listeners[notification_key].remove(listener)
+            if not self.listeners[notification_key]:
+                del self.listeners[notification_key]
+
+    def dispatch(self, notification_key, data=None):
+        """
+        Dispatch a notification to all registered listeners.
+
+        :param notification_key: The identifier.
+        :param data: Optional data to pass to each listener.
+        """
+        if notification_key in self.listeners:
+            for listener in list(self.listeners[notification_key]):
+                listener(data)
+
+# Create a global dispatcher instance
+notifier = Notifier()
+
diff --git a/tests/test_notify.py b/tests/test_notify.py
new file mode 100644
index 0000000000..e71e2acb9a
--- /dev/null
+++ b/tests/test_notify.py
@@ -0,0 +1,23 @@
+from tlo.notify import notifier
+
+def test_notifier():
+    # in listening code
+    received_data = []
+
+    def callback(data):
+        received_data.append(data)
+
+    notifier.add_listener("test.signal", callback)
+
+    # in emitting code
+    notifier.dispatch("test.signal", data={"value": 42})
+
+    assert len(received_data) == 1
+    assert received_data[0] == {"value": 42}
+
+    # Unsubscribe and test no further calls
+    notifier.remove_listener("test.signal", callback)
+    notifier.dispatch("test.signal", data={"value": 100})
+
+    assert len(received_data) == 1  # No new data
+

From 01e35d0079877dd7d12cdbd2cb6f7b285fef863f Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Mon, 24 Nov 2025 10:02:59 +0000
Subject: [PATCH 40/97] Fix comment

---
 src/tlo/notify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/notify.py b/src/tlo/notify.py
index 28765f5afd..325131a1c7 100644
--- a/src/tlo/notify.py
+++ b/src/tlo/notify.py
@@ -59,6 +59,6 @@ def dispatch(self, notification_key, data=None):
             for listener in list(self.listeners[notification_key]):
                 listener(data)
 
-# Create a global dispatcher instance
+# Create a global notifier instance
 notifier = Notifier()
 

From 9f23fcbeb46e2af5b6a1c6334aa579574ec18b66 Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Mon, 24 Nov 2025 10:23:00 +0000
Subject: [PATCH 41/97] Fix formatting

---
 src/tlo/notify.py    | 3 ++-
 tests/test_notify.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/tlo/notify.py b/src/tlo/notify.py
index 325131a1c7..2906fa712a 100644
--- a/src/tlo/notify.py
+++ b/src/tlo/notify.py
@@ -17,6 +17,7 @@ def on_notification(data):
 notifier.add_listener("simulation.on_start", on_notification)
 """
 
+
 class Notifier:
     """
     A simple synchronous notification dispatcher supporting listeners.
@@ -59,6 +60,6 @@ def dispatch(self, notification_key, data=None):
             for listener in list(self.listeners[notification_key]):
                 listener(data)
 
+
 # Create a global notifier instance
 notifier = Notifier()
-
diff --git a/tests/test_notify.py b/tests/test_notify.py
index e71e2acb9a..ad5e828bbf 100644
--- a/tests/test_notify.py
+++ b/tests/test_notify.py
@@ -1,5 +1,6 @@
 from tlo.notify import notifier
 
+
 def test_notifier():
     # in listening code
     received_data = []
@@ -20,4 +21,3 @@ def callback(data):
     notifier.dispatch("test.signal", data={"value": 100})
 
     assert len(received_data) == 1  # No new data
-

From 5ff53bb7e104e46969199dbfefc15e3fccc02eec Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Mon, 24 Nov 2025 12:23:49 +0000
Subject: [PATCH 42/97] Remove unnecessary list wrap

---
 src/tlo/notify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/notify.py b/src/tlo/notify.py
index 2906fa712a..48c46b82b4 100644
--- a/src/tlo/notify.py
+++ b/src/tlo/notify.py
@@ -57,7 +57,7 @@ def dispatch(self, notification_key, data=None):
         :param data: Optional data to pass to each listener.
         """
         if notification_key in self.listeners:
-            for listener in list(self.listeners[notification_key]):
+            for listener in self.listeners[notification_key]:
                 listener(data)
 
 

From 16f5e6701b03e826830352eeef8657991eae94bd Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 24 Nov 2025 17:08:51 +0000
Subject: [PATCH 43/97] Use broadcaster to collect events. Need to expand to
 include HSI events

---
 src/tlo/events.py                       |  11 +
 src/tlo/methods/collect_event_chains.py | 281 ++++++++++++++++++++++++
 src/tlo/methods/fullmodel.py            |   2 +
 src/tlo/simulation.py                   |   5 +
 4 files changed, 299 insertions(+)
 create mode 100644 src/tlo/methods/collect_event_chains.py

diff --git a/src/tlo/events.py b/src/tlo/events.py
index f03f150f92..dce44656bd 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,6 +11,7 @@
 
 import pandas as pd
 
+from tlo.notify import notifier
 from tlo.util import convert_chain_links_into_EAV
 
 import copy
@@ -296,7 +297,12 @@ def run(self):
         """Make the event happen."""
         
         # Collect relevant information before event takes place
+        # If statement outside or inside dispatch notification?
         if self.sim.generate_event_chains:
+        
+            # Dispatch notification that event is about to run
+            notifier.dispatch("event_about_to_run", data={"target": self.target, "EventName": type(self).__name__})
+            
             print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event()
                 
         self.apply(self.target)
@@ -305,6 +311,11 @@ def run(self):
         # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link'
         # in the individual's event chain.
         if self.sim.generate_event_chains and print_chains:
+       
+            print("About to pass")
+            # Dispatch notification that event is about to run
+            notifier.dispatch("event_has_just_ran", data={"target": self.target, "EventName": type(self).__name__})
+            
             chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before)
             
             if chain_links:
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
new file mode 100644
index 0000000000..553e095b0b
--- /dev/null
+++ b/src/tlo/methods/collect_event_chains.py
@@ -0,0 +1,281 @@
+from tlo.notify import notifier
+
+from pathlib import Path
+from typing import Optional
+from tlo import Module, logging, population
+from tlo.population import Population
+import pandas as pd
+
+from tlo.util import convert_chain_links_into_EAV
+
+import copy
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+class CollectEventChains(Module):
+
+    def __init__(self, name=None):
+        super().__init__(name)
+    
+        # This is how I am passing data from fnc taking place before event to the one after
+        # It doesn't seem very elegant but not sure how else to go about it
+        self.print_chains = False
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
+        
+    def initialise_simulation(self, sim):
+        notifier.add_listener("event_about_to_run", self.on_notification_event_about_to_run)
+        notifier.add_listener("event_has_just_ran", self.on_notification_event_has_just_ran)
+        
+    def read_parameters(self, resourcefilepath: Optional[Path] = None):
+        pass
+        
+    def initialise_population(self, population):
+        pass
+
+    def on_birth(self, mother, child):
+        pass
+        
+    def on_notification_sim_about_to_start(self,data):
+        pass
+        
+    def on_notification_event_about_to_run(self, data):
+        """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
+        print("This is the data I received ", data)
+
+        # Initialise these variables
+        self.print_chains = False
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
+        
+        print("My Modules")
+        print(self.sim.modules.keys())
+        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+        if all(sub not in str(data['EventName']) for sub in self.sim.generate_event_chains_ignore_events):
+        
+        # Will eventually use this once I can actually GET THE NAME OF THE SELF
+        #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+
+            self.print_chains = True
+            
+            # Target is single individual
+            if not isinstance(data["target"], Population):
+
+                # Save row for comparison after event has occurred
+                self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
+                
+                # Check if individual is already in mni dictionary, if so copy her original status
+                if 'PregnancySupervisor' in self.sim.modules:
+                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                    if data['target'] in mni:
+                        self.mni_instances_before = True
+                        self.mni_row_before = mni[data['target']].copy()
+                else:
+                    self.mni_row_before = None
+                
+            else:
+
+                # This will be a population-wide event. In order to find individuals for which this led to
+                # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
+                self.df_before = self.sim.population.props.copy()
+                if 'PregnancySupervisor' in self.sim.modules:
+                    self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                else:
+                    self.entire_mni_before = None
+
+        return
+        
+    
+    def on_notification_event_has_just_ran(self, data):
+        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
+        print("This is the data I received ", data)
+        
+        chain_links = {}
+    
+        # Target is single individual
+        if not isinstance(data["target"], Population):
+    
+            # Copy full new status for individual
+            row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999)
+            
+            # Check if individual is in mni after the event
+            mni_instances_after = False
+            if 'PregnancySupervisor' in self.sim.modules:
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                if data['target'] in mni:
+                    mni_instances_after = True
+            else:
+                mni_instances_after = None
+            
+            # Create and store event for this individual, regardless of whether any property change occurred
+            link_info = {
+                'EventName' : data['EventName'],
+            }
+            
+            # Store (if any) property changes as a result of the event for this individual
+            for key in self.row_before.index:
+                if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
+                    link_info[key] = row_after[key]
+            
+            if 'PregnancySupervisor' in self.sim.modules:
+                # Now check and store changes in the mni dictionary, accounting for following cases:
+                # Individual is in mni dictionary before and after
+                if self.mni_instances_before and mni_instances_after:
+                    for key in self.mni_row_before:
+                        if self.mni_values_differ(mni_row_before[key], mni[data['target']][key]):
+                            link_info[key] = mni[data['target']][key]
+                # Individual is only in mni dictionary before event
+                elif self.mni_instances_before and not mni_instances_after:
+                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                    for key in self.mni_row_before:
+                        if self.mni_values_differ(mni_row_before[key], default[key]):
+                            link_info[key] = default[key]
+                # Individual is only in mni dictionary after event
+                elif mni_instances_after and not self.mni_instances_before:
+                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                    for key in default:
+                        if self.mni_values_differ(default[key], mni[data['target']][key]):
+                            link_info[key] = mni[data['target']][key]
+                # Else, no need to do anything
+                    
+            # Add individual to the chain links
+            chain_links[data['target']] = link_info
+            
+        else:
+            # Target is entire population. Identify individuals for which properties have changed
+            # and store their changes.
+            
+            # Population frame after event
+            df_after = self.sim.population.props
+            if 'PregnancySupervisor' in self.sim.modules:
+                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+            else:
+                entire_mni_after = None
+            
+            #  Create and store the event and dictionary of changes for affected individuals
+            chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after)
+
+            if chain_links:
+                # Convert chain_links into EAV
+                ednav = convert_chain_links_into_EAV(chain_links)
+
+                logger.info(key='event_chains',
+                      data= ednav.to_dict(),
+                      description='Links forming chains of events for simulated individuals')
+                      
+        # Reset variables
+        self.print_chains = False
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
+
+        return
+    
+    def mni_values_differ(self, v1, v2):
+
+        if isinstance(v1, list) and isinstance(v2, list):
+            return v1 != v2  # simple element-wise comparison
+
+        if pd.isna(v1) and pd.isna(v2):
+            return False  # treat both NaT/NaN as equal
+        return v1 != v2
+    
+    def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
+        diffs = {}
+
+        all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys())
+            
+        for person in all_individuals:
+            if person not in entire_mni_before: # but is afterward
+                for key in entire_mni_after[person]:
+                    if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = entire_mni_after[person][key]
+                    
+            elif person not in entire_mni_after: # but is beforehand
+                for key in entire_mni_before[person]:
+                    if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]
+
+            else: # person is in both
+                # Compare properties
+                for key in entire_mni_before[person]:
+                    if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = entire_mni_after[person][key]
+
+        return diffs
+        
+    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after):
+        """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred.
+        It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """
+        
+        # Create a mask of where values are different
+        diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
+        if 'PregnancySupervisor' in self.sim.modules:
+            diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
+        else:
+            diff_mni = []
+        
+        # Create an empty list to store changes for each of the individuals
+        chain_links = {}
+        len_of_diff = len(diff_mask)
+
+        # Loop through each row of the mask
+        persons_changed = []
+        
+        for idx, row in diff_mask.iterrows():
+            changed_cols = row.index[row].tolist()
+
+            if changed_cols:  # Proceed only if there are changes in the row
+                persons_changed.append(idx)
+                # Create a dictionary for this person
+                # First add event info
+                link_info = {
+                    'EventName': type(self).__name__,
+                }
+                
+                # Store the new values from df_after for the changed columns
+                for col in changed_cols:
+                    link_info[col] = df_after.at[idx, col]
+
+                if idx in diff_mni:
+                    # This person has also undergone changes in the mni dictionary, so add these here
+                    for key in diff_mni[idx]:
+                        link_info[col] = diff_mni[idx][key]
+
+                # Append the event and changes to the individual key
+                chain_links[idx] = link_info
+     
+        if 'PregnancySupervisor' in self.sim.modules:
+            # For individuals which only underwent changes in mni dictionary, save changes here
+            if len(diff_mni)>0:
+                for key in diff_mni:
+                    if key not in persons_changed:
+                        # If individual hadn't been previously added due to changes in pop df, add it here
+                        link_info = {
+                            'EventName': type(self).__name__,
+                        }
+                        
+                        for key_prop in diff_mni[key]:
+                            link_info[key_prop] = diff_mni[key][key_prop]
+                            
+                        chain_links[key] = link_info
+
+        return chain_links
+
+
+        
diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py
index 3f0c79434e..3c710c7dd2 100644
--- a/src/tlo/methods/fullmodel.py
+++ b/src/tlo/methods/fullmodel.py
@@ -8,6 +8,7 @@
     cardio_metabolic_disorders,
     care_of_women_during_pregnancy,
     cervical_cancer,
+    collect_event_chains,
     contraception,
     copd,
     demography,
@@ -116,6 +117,7 @@ def fullmodel(
         copd.Copd,
         depression.Depression,
         epilepsy.Epilepsy,
+        collect_event_chains.CollectEventChains,
     ]
     return [
         module_class(
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index b0c95683c1..eac1bbdc89 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -13,6 +13,10 @@
 import pandas as pd
 import tlo.population
 import numpy as np
+import tlo.methods.collect_event_chains
+
+from tlo.notify import notifier
+from tlo.methods.collect_event_chains import CollectEventChains
 from tlo.util import df_to_EAV, convert_chain_links_into_EAV
 
 try:
@@ -148,6 +152,7 @@ def __init__(
 
         # Whether simulation has been initialised
         self._initialised = False
+        
 
     def _configure_logging(
         self,

From ebe0ebc6644f3a96bac01c7efb9f3ad47378048a Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 11:28:02 +0000
Subject: [PATCH 44/97] Use broadcasting in HSI events too

---
 .../parameter_values.csv                      |   3 +
 src/tlo/events.py                             | 276 +---------------
 src/tlo/methods/collect_event_chains.py       | 297 ++++++++++--------
 src/tlo/methods/hsi_event.py                  | 155 ++-------
 src/tlo/simulation.py                         |  61 +---
 5 files changed, 209 insertions(+), 583 deletions(-)
 create mode 100644 resources/ResourceFile_GenerateEventChains/parameter_values.csv

diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
new file mode 100644
index 0000000000..82394e590b
--- /dev/null
+++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f68e30f87dbe757b98cea2658c8f0c40cab629c4b6825a012ce90e12a27bc612
+size 102
diff --git a/src/tlo/events.py b/src/tlo/events.py
index dce44656bd..dba2f33cd5 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -4,28 +4,12 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 
-from tlo import DateOffset, logging
+from tlo import DateOffset
 
 if TYPE_CHECKING:
     from tlo import Simulation
 
-import pandas as pd
-
 from tlo.notify import notifier
-from tlo.util import convert_chain_links_into_EAV
-
-import copy
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-logger_chain = logging.getLogger('tlo.simulation')
-logger_chain.setLevel(logging.INFO)
-
-logger_summary = logging.getLogger(f"{__name__}.summary")
-logger_summary.setLevel(logging.INFO)
-
-debug_chains = True
 
 class Priority(Enum):
     """Enumeration for the Priority, which is used in sorting the events in the simulation queue."""
@@ -39,7 +23,6 @@ def __lt__(self, other):
             return self.value < other.value
         return NotImplemented
 
-
 class Event:
     """Base event class, from which all others inherit.
 
@@ -78,265 +61,20 @@ def apply(self, target):
         """
         raise NotImplementedError
         
-    def mni_values_differ(self, v1, v2):
-    
-        if isinstance(v1, list) and isinstance(v2, list):
-            return v1 != v2  # simple element-wise comparison
-
-        if pd.isna(v1) and pd.isna(v2):
-            return False  # treat both NaT/NaN as equal
-        return v1 != v2
-        
-    def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
-        diffs = {}
-
-        all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys())
-            
-        for person in all_individuals:
-            if person not in entire_mni_before: # but is afterward
-                for key in entire_mni_after[person]:
-                    if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
-                        if person not in diffs:
-                            diffs[person] = {}
-                        diffs[person][key] = entire_mni_after[person][key]
-                    
-            elif person not in entire_mni_after: # but is beforehand
-                for key in entire_mni_before[person]:
-                    if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
-                        if person not in diffs:
-                            diffs[person] = {}
-                        diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]
-
-            else: # person is in both
-                # Compare properties
-                for key in entire_mni_before[person]:
-                    if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]):
-                        if person not in diffs:
-                            diffs[person] = {}
-                        diffs[person][key] = entire_mni_after[person][key]
-
-        return diffs
-        
-    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after):
-        """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred.
-        It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """
-        
-        # Create a mask of where values are different
-        diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
-        if 'PregnancySupervisor' in self.sim.modules:
-            diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
-        else:
-            diff_mni = []
-        
-        # Create an empty list to store changes for each of the individuals
-        chain_links = {}
-        len_of_diff = len(diff_mask)
-
-        # Loop through each row of the mask
-        persons_changed = []
-        
-        for idx, row in diff_mask.iterrows():
-            changed_cols = row.index[row].tolist()
-
-            if changed_cols:  # Proceed only if there are changes in the row
-                persons_changed.append(idx)
-                # Create a dictionary for this person
-                # First add event info
-                link_info = {
-                    'EventName': type(self).__name__,
-                }
-                
-                # Store the new values from df_after for the changed columns
-                for col in changed_cols:
-                    link_info[col] = df_after.at[idx, col]
- 
-                if idx in diff_mni:
-                    # This person has also undergone changes in the mni dictionary, so add these here
-                    for key in diff_mni[idx]:
-                        link_info[col] = diff_mni[idx][key]
- 
-                # Append the event and changes to the individual key
-                chain_links[idx] = link_info
-     
-        if 'PregnancySupervisor' in self.sim.modules:
-            # For individuals which only underwent changes in mni dictionary, save changes here
-            if len(diff_mni)>0:
-                for key in diff_mni:
-                    if key not in persons_changed:
-                        # If individual hadn't been previously added due to changes in pop df, add it here
-                        link_info = {
-                            'EventName': type(self).__name__,
-                        }
-                        
-                        for key_prop in diff_mni[key]:
-                            link_info[key_prop] = diff_mni[key][key_prop]
-                            
-                        chain_links[key] = link_info
-
-        return chain_links
-        
-        
-    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame, dict, dict, bool]:
-        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
-
-        # Initialise these variables
-        print_chains = False
-        df_before = []
-        row_before = pd.Series()
-        mni_instances_before = False
-        mni_row_before = {}
-        entire_mni_before = {}
-        
-        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-        if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-        
-        # Will eventually use this once I can actually GET THE NAME OF THE SELF
-        #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
-
-            print_chains = True
-            
-            # Target is single individual
-            if self.target != self.sim.population:
-            
-                # Save row for comparison after event has occurred
-                row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
-                
-                # Check if individual is already in mni dictionary, if so copy her original status
-                if 'PregnancySupervisor' in self.sim.modules:
-                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                    if self.target in mni:
-                        mni_instances_before = True
-                        mni_row_before = mni[self.target].copy()
-                else:
-                    mni_row_before = None
-                
-            else:
-
-                # This will be a population-wide event. In order to find individuals for which this led to
-                # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
-                df_before = self.sim.population.props.copy()
-                if 'PregnancySupervisor' in self.sim.modules:
-                    entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
-                else:
-                    entire_mni_before = None
-                
-        return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before
-        
-    def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict:
-        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
-        
-        chain_links = {}
-    
-        # Target is single individual
-        if self.target != self.sim.population:
-    
-            # Copy full new status for individual
-            row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
-            
-            # Check if individual is in mni after the event
-            mni_instances_after = False
-            if 'PregnancySupervisor' in self.sim.modules:
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                if self.target in mni:
-                    mni_instances_after = True
-            else:
-                mni_instances_after = None
-            
-            # Create and store event for this individual, regardless of whether any property change occurred
-            link_info = {
-                'EventName' : type(self).__name__,
-            }
-            
-            # Store (if any) property changes as a result of the event for this individual
-            for key in row_before.index:
-                if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
-                    link_info[key] = row_after[key]
-            
-            if 'PregnancySupervisor' in self.sim.modules:
-                # Now check and store changes in the mni dictionary, accounting for following cases:
-                # Individual is in mni dictionary before and after
-                if mni_instances_before and mni_instances_after:
-                    for key in mni_row_before:
-                        if self.mni_values_differ(mni_row_before[key], mni[self.target][key]):
-                            link_info[key] = mni[self.target][key]
-                # Individual is only in mni dictionary before event
-                elif mni_instances_before and not mni_instances_after:
-                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                    for key in mni_row_before:
-                        if self.mni_values_differ(mni_row_before[key], default[key]):
-                            link_info[key] = default[key]
-                # Individual is only in mni dictionary after event
-                elif mni_instances_after and not mni_instances_before:
-                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                    for key in default:
-                        if self.mni_values_differ(default[key], mni[self.target][key]):
-                            link_info[key] = mni[self.target][key]
-                # Else, no need to do anything
-                    
-            # Add individual to the chain links
-            chain_links[self.target] = link_info
-            
-        else:
-            # Target is entire population. Identify individuals for which properties have changed
-            # and store their changes.
-            
-            # Population frame after event
-            df_after = self.sim.population.props
-            if 'PregnancySupervisor' in self.sim.modules:
-                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
-            else:
-                entire_mni_after = None
-            
-            #  Create and store the event and dictionary of changes for affected individuals
-            chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after)
-                    
-        return chain_links
-
 
     def run(self):
         """Make the event happen."""
         
-        # Collect relevant information before event takes place
-        # If statement outside or inside dispatch notification?
-        if self.sim.generate_event_chains:
-        
-            # Dispatch notification that event is about to run
-            notifier.dispatch("event_about_to_run", data={"target": self.target, "EventName": type(self).__name__})
-            
-            print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event()
+
+        # Dispatch notification that event is about to run
+        notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}})
                 
         self.apply(self.target)
         self.post_apply_hook()
         
-        # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link'
-        # in the individual's event chain.
-        if self.sim.generate_event_chains and print_chains:
-       
-            print("About to pass")
-            # Dispatch notification that event is about to run
-            notifier.dispatch("event_has_just_ran", data={"target": self.target, "EventName": type(self).__name__})
-            
-            chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before)
-            
-            if chain_links:
-                # Convert chain_links into EAV
-                ednav = convert_chain_links_into_EAV(chain_links)
-
-                logger_chain.info(key='event_chains',
-                      data= ednav.to_dict(),
-                      description='Links forming chains of events for simulated individuals')
-            """
-            # Create empty logger for entire pop
-            pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
-            pop_dict.update(chain_links)
-
-            # Log chain_links here
-            if len(chain_links)>0:
-
-                logger_chain.info(key='event_chains',
-                                  data= pop_dict,
-                                  description='Links forming chains of events for simulated individuals')
-            """
+        # Dispatch notification that event has just ran
+        notifier.dispatch("event.has_just_ran", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}})
+
 
 class RegularEvent(Event):
     """An event that automatically reschedules itself at a fixed frequency."""
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 553e095b0b..7fb946c524 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -2,11 +2,11 @@
 
 from pathlib import Path
 from typing import Optional
-from tlo import Module, logging, population
+from tlo import Module, Parameter, Types, logging, population
 from tlo.population import Population
 import pandas as pd
 
-from tlo.util import convert_chain_links_into_EAV
+from tlo.util import df_to_EAV, convert_chain_links_into_EAV, read_csv_files
 
 import copy
 
@@ -27,156 +27,207 @@ def __init__(self, name=None):
         self.mni_row_before = {}
         self.entire_mni_before = {}
         
+    PARAMETERS = {
+        # Options within module
+        "generate_event_chains": Parameter(
+            Types.BOOL, "Whether or not we want to collect chains of events for individuals"
+        ),
+        "modules_of_interest": Parameter(
+            Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules"
+        ),
+        "events_to_ignore": Parameter(
+            Types.LIST, "Events to be ignored when collecting chains"
+        ),
+        }
+        
     def initialise_simulation(self, sim):
-        notifier.add_listener("event_about_to_run", self.on_notification_event_about_to_run)
-        notifier.add_listener("event_has_just_ran", self.on_notification_event_has_just_ran)
+        notifier.add_listener("simulation.pop_has_been_initialised", self.on_notification_pop_has_been_initialised)
+        notifier.add_listener("simulation.on_birth", self.on_notification_of_birth)
+        notifier.add_listener("event.about_to_run", self.on_notification_event_about_to_run)
+        notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran)
         
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
-        pass
+        #print("resource file path", resourcefilepath)
+        #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv"))
+        self.parameters["generate_event_chains"] = True
+        self.parameters["modules_of_interest"] = self.sim.modules
+            
+        self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"]
+
         
     def initialise_population(self, population):
         pass
 
     def on_birth(self, mother, child):
+        # Could the notification of birth simply take place here?
         pass
         
-    def on_notification_sim_about_to_start(self,data):
-        pass
+    def on_notification_pop_has_been_initialised(self, data):
+        # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
+        # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
+        if self.parameters['generate_event_chains']:
+
+            # EDNAV structure to capture status of individuals at the start of the simulation
+            ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
+
+            logger.info(key='event_chains',
+                               data = ednav.to_dict(),
+                               description='Links forming chains of events for simulated individuals')
+                               
+                               
+    def on_notification_of_birth(self, data):
+                
+        if self.parameters['generate_event_chains']:
+            # When individual is born, store their initial properties to provide a starting point to the chain of property
+            # changes that this individual will undergo as a result of events taking place.
+            link_info = data['link_info']
+            link_info.update(self.sim.population.props.loc[data['target']].to_dict())
+            chain_links = {}
+            chain_links[data['target']] = link_info
+
+            ednav = convert_chain_links_into_EAV(chain_links)
+            
+            logger.info(key='event_chains',
+                               data = ednav.to_dict(),
+                               description='Links forming chains of events for simulated individuals')
+                               
         
     def on_notification_event_about_to_run(self, data):
         """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
-        print("This is the data I received ", data)
 
-        # Initialise these variables
-        self.print_chains = False
-        self.df_before = []
-        self.row_before = pd.Series()
-        self.mni_instances_before = False
-        self.mni_row_before = {}
-        self.entire_mni_before = {}
+        p = self.parameters
         
-        print("My Modules")
-        print(self.sim.modules.keys())
-        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-        if all(sub not in str(data['EventName']) for sub in self.sim.generate_event_chains_ignore_events):
-        
-        # Will eventually use this once I can actually GET THE NAME OF THE SELF
-        #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+        if p['generate_event_chains']:
 
-            self.print_chains = True
+            # Initialise these variables
+            self.print_chains = False
+            self.df_before = []
+            self.row_before = pd.Series()
+            self.mni_instances_before = False
+            self.mni_row_before = {}
+            self.entire_mni_before = {}
             
-            # Target is single individual
-            if not isinstance(data["target"], Population):
+            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+            if all(sub not in str(data['link_info']['EventName']) for sub in p['events_to_ignore']):
+            
+            # Will eventually use this once I can actually GET THE NAME OF THE SELF
+            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
 
-                # Save row for comparison after event has occurred
-                self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
+                self.print_chains = True
                 
-                # Check if individual is already in mni dictionary, if so copy her original status
-                if 'PregnancySupervisor' in self.sim.modules:
-                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                    if data['target'] in mni:
-                        self.mni_instances_before = True
-                        self.mni_row_before = mni[data['target']].copy()
-                else:
-                    self.mni_row_before = None
-                
-            else:
+                # Target is single individual
+                if not isinstance(data['target'], Population):
 
-                # This will be a population-wide event. In order to find individuals for which this led to
-                # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
-                self.df_before = self.sim.population.props.copy()
-                if 'PregnancySupervisor' in self.sim.modules:
-                    self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                    # Save row for comparison after event has occurred
+                    self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
+                    
+                    # Check if individual is already in mni dictionary, if so copy her original status
+                    if 'PregnancySupervisor' in self.sim.modules:
+                        mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                        if data['target'] in mni:
+                            self.mni_instances_before = True
+                            self.mni_row_before = mni[data['target']].copy()
+                    else:
+                        self.mni_row_before = None
+                    
                 else:
-                    self.entire_mni_before = None
+
+                    # This will be a population-wide event. In order to find individuals for which this led to
+                    # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
+                    self.df_before = self.sim.population.props.copy()
+                    if 'PregnancySupervisor' in self.sim.modules:
+                        self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                    else:
+                        self.entire_mni_before = None
 
         return
         
     
     def on_notification_event_has_just_ran(self, data):
         """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
-        print("This is the data I received ", data)
+
+        p = self.parameters
         
-        chain_links = {}
-    
-        # Target is single individual
-        if not isinstance(data["target"], Population):
-    
-            # Copy full new status for individual
-            row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999)
-            
-            # Check if individual is in mni after the event
-            mni_instances_after = False
-            if 'PregnancySupervisor' in self.sim.modules:
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                if data['target'] in mni:
-                    mni_instances_after = True
-            else:
-                mni_instances_after = None
-            
-            # Create and store event for this individual, regardless of whether any property change occurred
-            link_info = {
-                'EventName' : data['EventName'],
-            }
-            
-            # Store (if any) property changes as a result of the event for this individual
-            for key in self.row_before.index:
-                if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
-                    link_info[key] = row_after[key]
-            
-            if 'PregnancySupervisor' in self.sim.modules:
-                # Now check and store changes in the mni dictionary, accounting for following cases:
-                # Individual is in mni dictionary before and after
-                if self.mni_instances_before and mni_instances_after:
-                    for key in self.mni_row_before:
-                        if self.mni_values_differ(mni_row_before[key], mni[data['target']][key]):
-                            link_info[key] = mni[data['target']][key]
-                # Individual is only in mni dictionary before event
-                elif self.mni_instances_before and not mni_instances_after:
-                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                    for key in self.mni_row_before:
-                        if self.mni_values_differ(mni_row_before[key], default[key]):
-                            link_info[key] = default[key]
-                # Individual is only in mni dictionary after event
-                elif mni_instances_after and not self.mni_instances_before:
-                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                    for key in default:
-                        if self.mni_values_differ(default[key], mni[data['target']][key]):
-                            link_info[key] = mni[data['target']][key]
-                # Else, no need to do anything
-                    
-            # Add individual to the chain links
-            chain_links[data['target']] = link_info
-            
-        else:
-            # Target is entire population. Identify individuals for which properties have changed
-            # and store their changes.
-            
-            # Population frame after event
-            df_after = self.sim.population.props
-            if 'PregnancySupervisor' in self.sim.modules:
-                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+        if p['generate_event_chains'] and self.print_chains:
+
+            chain_links = {}
+        
+            # Target is single individual
+            if not isinstance(data["target"], Population):
+        
+                # Copy full new status for individual
+                row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999)
+                
+                # Check if individual is in mni after the event
+                mni_instances_after = False
+                if 'PregnancySupervisor' in self.sim.modules:
+                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                    if data['target'] in mni:
+                        mni_instances_after = True
+                else:
+                    mni_instances_after = None
+                
+                # Create and store event for this individual, regardless of whether any property change occurred
+                link_info = data['link_info']
+                
+                # Store (if any) property changes as a result of the event for this individual
+                for key in self.row_before.index:
+                    if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
+                        link_info[key] = row_after[key]
+                
+                if 'PregnancySupervisor' in self.sim.modules:
+                    # Now check and store changes in the mni dictionary, accounting for following cases:
+                    # Individual is in mni dictionary before and after
+                    if self.mni_instances_before and mni_instances_after:
+                        for key in self.mni_row_before:
+                            if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]):
+                                link_info[key] = mni[data['target']][key]
+                    # Individual is only in mni dictionary before event
+                    elif self.mni_instances_before and not mni_instances_after:
+                        default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                        for key in self.mni_row_before:
+                            if self.mni_values_differ(self.mni_row_before[key], default[key]):
+                                link_info[key] = default[key]
+                    # Individual is only in mni dictionary after event
+                    elif mni_instances_after and not self.mni_instances_before:
+                        default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                        for key in default:
+                            if self.mni_values_differ(default[key], mni[data['target']][key]):
+                                link_info[key] = mni[data['target']][key]
+                    # Else, no need to do anything
+                        
+                # Add individual to the chain links
+                chain_links[data['target']] = link_info
+                
             else:
-                entire_mni_after = None
-            
-            #  Create and store the event and dictionary of changes for affected individuals
-            chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after)
-
-            if chain_links:
-                # Convert chain_links into EAV
-                ednav = convert_chain_links_into_EAV(chain_links)
-
-                logger.info(key='event_chains',
-                      data= ednav.to_dict(),
-                      description='Links forming chains of events for simulated individuals')
-                      
-        # Reset variables
-        self.print_chains = False
-        self.df_before = []
-        self.row_before = pd.Series()
-        self.mni_instances_before = False
-        self.mni_row_before = {}
-        self.entire_mni_before = {}
+                # Target is entire population. Identify individuals for which properties have changed
+                # and store their changes.
+                
+                # Population frame after event
+                df_after = self.sim.population.props
+                if 'PregnancySupervisor' in self.sim.modules:
+                    entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                else:
+                    entire_mni_after = None
+                
+                #  Create and store the event and dictionary of changes for affected individuals
+                chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after)
+
+                if chain_links:
+                    # Convert chain_links into EAV
+                    ednav = convert_chain_links_into_EAV(chain_links)
+
+                    logger.info(key='event_chains',
+                          data= ednav.to_dict(),
+                          description='Links forming chains of events for simulated individuals')
+                          
+            # Reset variables
+            self.print_chains = False
+            self.df_before = []
+            self.row_before = pd.Series()
+            self.mni_instances_before = False
+            self.mni_row_before = {}
+            self.entire_mni_before = {}
 
         return
     
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index edb5d3df3b..01bd826f2d 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -7,28 +7,19 @@
 
 from tlo import Date, logging
 from tlo.events import Event
-from tlo.population import Population
-from tlo.util import convert_chain_links_into_EAV
-import pandas as pd
+from tlo.notify import notifier
 
 
 if TYPE_CHECKING:
     from tlo import Module, Simulation
     from tlo.methods.healthsystem import HealthSystem
 
-# Pointing to the logger in events
-logger_chains = logging.getLogger("tlo.simulation")
-logger_chains.setLevel(logging.INFO)
-
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
 logger_summary = logging.getLogger(f"{__name__}.summary")
 logger_summary.setLevel(logging.INFO)
 
-debug_chains = True
-
-
 # Declare the level which will be used to represent the merging of levels '1b' and '2'
 LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2"
 
@@ -204,144 +195,34 @@ def _run_after_hsi_event(self) -> None:
                 item_codes=self._EQUIPMENT,
                 facility_id=self.facility_info.id
             )
-
-    def values_differ(self, v1, v2):
-    
-        if isinstance(v1, list) and isinstance(v2, list):
-            return v1 != v2  # simple element-wise comparison
-
-        if pd.isna(v1) and pd.isna(v2):
-            return False  # treat both NaT/NaN as equal
-        return v1 != v2
-
-
-    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]:
-        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
-        
-        # Initialise these variables
-        print_chains = False
-        row_before = pd.Series()
-        mni_instances_before = False
-        mni_row_before = {}
-        
-        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-        if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-        
-        # Will eventually use this once I can actually GET THE NAME OF THE SELF
-        #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
-
-            print_chains = True
-            
-            # Target is single individual
-            if self.target != self.sim.population:
-            
-                # Save row for comparison after event has occurred
-                row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
-                
-                # Check if individual is in mni dictionary before the event, if so store its original status
-                if 'PregnancySupervisor' in self.sim.modules:
-                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                    if self.target in mni:
-                        mni_instances_before = True
-                        mni_row_before = mni[self.target].copy()
-                
-            else:
-                print("ERROR: there shouldn't be pop-wide HSI event")
-                exit(-1)
-                
-        return print_chains, row_before, mni_row_before, mni_instances_before
-        
-    def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, mni_instances_before) -> dict:
-        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
-
-        # For HSI event, this will only ever occur for individual events
-        chain_links = {}
-
-        row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
-        
-        mni_instances_after = False
-        if 'PregnancySupervisor' in self.sim.modules:
-            mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-            if self.target in mni:
-                mni_instances_after = True
-            
-        # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level
-        # will be stored regardless of whether individual experienced property changes or not.
-
-        # Add event details
-        try:
-            record_footprint = str(footprint)
-            record_level = self.facility_info.level
-        except:
-            record_footprint = 'N/A'
-            record_level = 'N/A'
-            
-        link_info = {
-            'EventName' : type(self).__name__,
-            'appt_footprint' : record_footprint,
-            'level' : record_level,
-        }
-        
-        # Add changes to properties
-        for key in row_before.index:
-            if row_before[key] != row_after[key]: # Note: used fillna previously
-                link_info[key] = row_after[key]
-                
-        if 'PregnancySupervisor' in self.sim.modules:
-            # Now store changes in the mni dictionary, accounting for following cases:
-            # Individual is in mni dictionary before and after
-            if mni_instances_before and mni_instances_after:
-                for key in mni_row_before:
-                    if self.values_differ(mni_row_before[key], mni[self.target][key]):
-                        link_info[key] = mni[self.target][key]
-            # Individual is only in mni dictionary before event
-            elif mni_instances_before and not mni_instances_after:
-                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                for key in mni_row_before:
-                    if self.values_differ(mni_row_before[key], default[key]):
-                        link_info[key] = default[key]
-            # Individual is only in mni dictionary after event
-            elif mni_instances_after and not mni_instances_before:
-                default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                for key in default:
-                    if self.values_differ(default[key], mni[self.target][key]):
-                        link_info[key] = mni[self.target][key]
-
-        chain_links[self.target] = link_info
-            
-        return chain_links
         
 
     def run(self, squeeze_factor):
         """Make the event happen."""
-
         
-        if self.sim.generate_event_chains and self.target != self.sim.population:
-            print_chains, row_before, mni_row_before, mni_instances_before = self.store_chains_to_do_before_event()
-              
-            footprint = self.EXPECTED_APPT_FOOTPRINT
+        # Dispatch notification that HSI event is about to run
+        notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}})
 
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()
         self._run_after_hsi_event()
-        
-        
-        if self.sim.generate_event_chains and self.target != self.sim.population:
 
-            # If the footprint has been updated when the event ran, change it here
-            if updated_appt_footprint is not None:
-                footprint = updated_appt_footprint
-            
-            if print_chains:
-                chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before)
+        # Dispatch notification that HSI event has just ran
+        if updated_appt_footprint is not None:
+            footprint = updated_appt_footprint
+        else:
+            footprint = self.EXPECTED_APPT_FOOTPRINT
+        try:
+            level = self.facility_info.level
+        except:
+            level = "N/A"
             
-                if chain_links:
-                    
-                    # Convert chain_links into EAV
-                    ednav = convert_chain_links_into_EAV(chain_links)
-                    logger_chain.info(key='event_chains',
-                            data = ednav,
-                            description='Links forming chains of events for simulated individuals')
+        notifier.dispatch("event.has_just_ran",
+                          data={"target": self.target,
+                                "link_info" : {"EventName": type(self).__name__,
+                                         "footprint": footprint,
+                                         "level": level
+                                         }})
                 
         return updated_appt_footprint
         
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index eac1bbdc89..17016f5fc7 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -8,16 +8,10 @@
 import time
 from collections import Counter, OrderedDict
 from pathlib import Path
-from typing import Optional
 from typing import TYPE_CHECKING, Optional
-import pandas as pd
-import tlo.population
 import numpy as np
-import tlo.methods.collect_event_chains
 
 from tlo.notify import notifier
-from tlo.methods.collect_event_chains import CollectEventChains
-from tlo.util import df_to_EAV, convert_chain_links_into_EAV
 
 try:
     import dill
@@ -42,9 +36,6 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-logger_chains = logging.getLogger("tlo.methods.event")
-logger_chains.setLevel(logging.INFO)
-
 
 class SimulationPreviouslyInitialisedError(Exception):
     """Exception raised when trying to initialise an already initialised simulation."""
@@ -113,16 +104,9 @@ def __init__(
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
-        
-        self.generate_event_chains = True
-        self.generate_event_chains_modules_of_interest = []
-        self.generate_event_chains_ignore_events = []
-        
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
-        
-        
         self.show_progress_bar = show_progress_bar
         self.resourcefilepath = Path(resourcefilepath)
 
@@ -152,7 +136,6 @@ def __init__(
 
         # Whether simulation has been initialised
         self._initialised = False
-        
 
     def _configure_logging(
         self,
@@ -299,21 +282,13 @@ def make_initial_population(self, *, n: int) -> None:
                 key="debug",
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
-        
-        # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
-        # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
-        if self.generate_event_chains:
-
-            # EDNAV structure to capture status of individuals at the start of the simulation
-            ednav = df_to_EAV(self.population.props, self.date, 'StartOfSimulation')
-
-            logger.info(key='event_chains',
-                               data = ednav.to_dict(),
-                               description='Links forming chains of events for simulated individuals')
+            
+        # Dispatch notification that pop has been initialised
+        notifier.dispatch("simulation.pop_has_been_initialised", data={})
                                
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
-
+        
     def initialise(self, *, end_date: Date) -> None:
         """Initialise all modules in simulation.
         :param end_date: Date to end simulation on - accessible to modules to allow
@@ -326,15 +301,6 @@ def initialise(self, *, end_date: Date) -> None:
         self.date = self.start_date
         self.end_date = end_date  # store the end_date so that others can reference it
 
-        #self.generate_event_chains = generate_event_chains
-        if self.generate_event_chains:
-            # For now keep these fixed, eventually they will be input from user
-            self.generate_event_chains_modules_of_interest = [self.modules]
-            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration', 'RTIPollingEvent']
-
-        # Reorder columns to place the new columns at the front
-        pd.set_option('display.max_columns', None)
-
         for module in self.modules.values():
             module.initialise_simulation(self)
         self._initialised = True
@@ -403,8 +369,6 @@ def run_simulation_to(self, *, to_date: Date) -> None:
         :param to_date: Date to simulate up to but not including - must be before or
             equal to simulation end date specified in call to :py:meth:`initialise`.
         """
-        open('output.txt', mode='a')
-
         if not self._initialised:
             msg = "Simulation must be initialised before calling run_simulation_to"
             raise SimulationNotInitialisedError(msg)
@@ -463,7 +427,6 @@ def fire_single_event(self, event: Event, date: Date) -> None:
         """
         self.date = date
         event.run()
-        
 
     def do_birth(self, mother_id: int) -> int:
         """Create a new child person.
@@ -478,22 +441,12 @@ def do_birth(self, mother_id: int) -> int:
         for module in self.modules.values():
             module.on_birth(mother_id, child_id)
             
-        if self.generate_event_chains:
-            # When individual is born, store their initial properties to provide a starting point to the chain of property
-            # changes that this individual will undergo as a result of events taking place.
-            link_info = self.population.props.loc[child_id].to_dict()
-            link_info['EventName'] = 'Birth'
-            chain_links = {}
-            chain_links[child_id] = link_info # Convert to string to avoid issue of length
-
-            ednav = convert_chain_links_into_EAV(chain_links)
-            
-            logger.info(key='event_chains',
-                               data = ednav.to_dict(),
-                               description='Links forming chains of events for simulated individuals')
+        # Dispatch notification that birth is about to occur
+        notifier.dispatch("simulation.on_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}})
 
         return child_id
 
+
     def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]:
         """Find the events in the queue for a particular person.
 

From e617aa9a1885a260c28dfc47db5c72cac09fdcdd Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Tue, 25 Nov 2025 13:39:35 +0000
Subject: [PATCH 45/97] Clear listeners in the global notifier instance at the
 start of simulation

---
 src/tlo/notify.py     | 7 +++++++
 src/tlo/simulation.py | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/src/tlo/notify.py b/src/tlo/notify.py
index 48c46b82b4..b1b4434ba9 100644
--- a/src/tlo/notify.py
+++ b/src/tlo/notify.py
@@ -60,6 +60,13 @@ def dispatch(self, notification_key, data=None):
             for listener in self.listeners[notification_key]:
                 listener(data)
 
+    def clear_listeners(self):
+        """
+        Clear all registered listeners. Essential because the notifier is a global singleton.
+        e.g. if you are running multiple tests or simulations in the same process.
+        """
+        self.listeners.clear()
+
 
 # Create a global notifier instance
 notifier = Notifier()
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index d2560f92d9..b0bd733234 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -26,6 +26,7 @@
     topologically_sort_modules,
 )
 from tlo.events import Event, IndividualScopeEventMixin
+from tlo.notify import notifier
 from tlo.progressbar import ProgressBar
 
 if TYPE_CHECKING:
@@ -116,6 +117,8 @@ def __init__(
         self._custom_log_levels = None
         self._log_filepath = self._configure_logging(**log_config)
 
+        # clear notifier listeners from any previous simulation in this process
+        notifier.clear_listeners()
 
         # random number generator
         seed_from = "auto" if seed is None else "user"

From 4fe8e1f11d9e7fa142735290e3d2f249d73c90d3 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 14:03:16 +0000
Subject: [PATCH 46/97] Correct log name in analysis file

---
 .../ResourceFile_GenerateEventChains/parameter_values.csv     | 4 ++--
 src/tlo/analysis/utils.py                                     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
index 82394e590b..2fa792a63a 100644
--- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv
+++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f68e30f87dbe757b98cea2658c8f0c40cab629c4b6825a012ce90e12a27bc612
-size 102
+oid sha256:fbb13b64e27d6ba81ff4bc779e4c50f0dda63b784ec7cc1790ddb64777fb9f35
+size 242
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index fc0d374fd1..94bc541d30 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -440,7 +440,7 @@ def extract_event_chains(results_folder: Path,
     """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df.
     Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination.
     """
-    module = 'tlo.simulation'
+    module = 'tlo.collect_event_chains'
     key = 'event_chains'
 
     # get number of draws and numbers of runs

From c1e60969688f50bfef1aabde122fdffe2dc6f151 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 14:34:45 +0000
Subject: [PATCH 47/97] Summarise checks on whether to collect event changes

---
 src/tlo/events.py                       |  3 +-
 src/tlo/methods/collect_event_chains.py | 58 ++++++++++++-------------
 src/tlo/methods/hsi_event.py            |  2 +-
 3 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index dba2f33cd5..e79074b333 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -65,9 +65,8 @@ def apply(self, target):
     def run(self):
         """Make the event happen."""
         
-
         # Dispatch notification that event is about to run
-        notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}})
+        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}})
                 
         self.apply(self.target)
         self.post_apply_hook()
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 7fb946c524..4ce38b43f8 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -50,7 +50,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None):
         #print("resource file path", resourcefilepath)
         #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv"))
         self.parameters["generate_event_chains"] = True
-        self.parameters["modules_of_interest"] = self.sim.modules
+        self.parameters["modules_of_interest"] = self.sim.modules.values()
             
         self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"]
 
@@ -96,9 +96,13 @@ def on_notification_event_about_to_run(self, data):
         """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
 
         p = self.parameters
-        
-        if p['generate_event_chains']:
 
+        # Only log event if
+        # 1) generate_event_chains is set to True
+        # 2) the event belongs to modules of interest and
+        # 3) the event is not in the list of events to ignore
+        if p['generate_event_chains'] and (data['module'] in p['modules_of_interest']) and (data['link_info']['EventName'] not in p['events_to_ignore']):
+                      
             # Initialise these variables
             self.print_chains = False
             self.df_before = []
@@ -107,38 +111,32 @@ def on_notification_event_about_to_run(self, data):
             self.mni_row_before = {}
             self.entire_mni_before = {}
             
-            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if all(sub not in str(data['link_info']['EventName']) for sub in p['events_to_ignore']):
+            self.print_chains = True
             
-            # Will eventually use this once I can actually GET THE NAME OF THE SELF
-            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+            # Target is single individual
+            if not isinstance(data['target'], Population):
 
-                self.print_chains = True
+                # Save row for comparison after event has occurred
+                self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
                 
-                # Target is single individual
-                if not isinstance(data['target'], Population):
-
-                    # Save row for comparison after event has occurred
-                    self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
-                    
-                    # Check if individual is already in mni dictionary, if so copy her original status
-                    if 'PregnancySupervisor' in self.sim.modules:
-                        mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                        if data['target'] in mni:
-                            self.mni_instances_before = True
-                            self.mni_row_before = mni[data['target']].copy()
-                    else:
-                        self.mni_row_before = None
-                    
+                # Check if individual is already in mni dictionary, if so copy her original status
+                if 'PregnancySupervisor' in self.sim.modules:
+                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                    if data['target'] in mni:
+                        self.mni_instances_before = True
+                        self.mni_row_before = mni[data['target']].copy()
                 else:
+                    self.mni_row_before = None
+                
+            else:
 
-                    # This will be a population-wide event. In order to find individuals for which this led to
-                    # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
-                    self.df_before = self.sim.population.props.copy()
-                    if 'PregnancySupervisor' in self.sim.modules:
-                        self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
-                    else:
-                        self.entire_mni_before = None
+                # This will be a population-wide event. In order to find individuals for which this led to
+                # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
+                self.df_before = self.sim.population.props.copy()
+                if 'PregnancySupervisor' in self.sim.modules:
+                    self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                else:
+                    self.entire_mni_before = None
 
         return
         
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 01bd826f2d..edc7ffb721 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -201,7 +201,7 @@ def run(self, squeeze_factor):
         """Make the event happen."""
         
         # Dispatch notification that HSI event is about to run
-        notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}})
+        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}})
 
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()

From 5e0720496b1a2572aee6e0b4bb30740152c26bc5 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 15:02:14 +0000
Subject: [PATCH 48/97] Use module names rather than obj for ease of use

---
 src/tlo/events.py                       | 2 +-
 src/tlo/methods/collect_event_chains.py | 2 +-
 src/tlo/methods/hsi_event.py            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index e79074b333..299fffa6ed 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -66,7 +66,7 @@ def run(self):
         """Make the event happen."""
         
         # Dispatch notification that event is about to run
-        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}})
+        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}})
                 
         self.apply(self.target)
         self.post_apply_hook()
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 4ce38b43f8..ce480aa97e 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -50,7 +50,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None):
         #print("resource file path", resourcefilepath)
         #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv"))
         self.parameters["generate_event_chains"] = True
-        self.parameters["modules_of_interest"] = self.sim.modules.values()
+        self.parameters["modules_of_interest"] = self.sim.modules.keys()
             
         self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"]
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index edc7ffb721..32620f6c28 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -201,7 +201,7 @@ def run(self, squeeze_factor):
         """Make the event happen."""
         
         # Dispatch notification that HSI event is about to run
-        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}})
+        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}})
 
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()

From 2ce9bbd9a79c83ffee33294a6c646da717d5fd30 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 15:24:10 +0000
Subject: [PATCH 49/97] Fix parameters initialisation

---
 .../parameter_values.csv                              |  4 ++--
 src/tlo/methods/collect_event_chains.py               | 11 +++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
index 2fa792a63a..a84c77ab60 100644
--- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv
+++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbb13b64e27d6ba81ff4bc779e4c50f0dda63b784ec7cc1790ddb64777fb9f35
-size 242
+oid sha256:788332259649a19889355820b194fe0d16af44436f208e3a948e833f0ad5139a
+size 419
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index ce480aa97e..7f903e2035 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -47,13 +47,12 @@ def initialise_simulation(self, sim):
         notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran)
         
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
-        #print("resource file path", resourcefilepath)
-        #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv"))
-        self.parameters["generate_event_chains"] = True
-        self.parameters["modules_of_interest"] = self.sim.modules.keys()
-            
-        self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"]
+        print("resource file path", resourcefilepath)
+        self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv"))
 
+        # If modules of interest is '*', set by default to all modules included in the simulation
+        if self.parameters["modules_of_interest"] == ['*']:
+            self.parameters["modules_of_interest"] = self.sim.modules.keys()
         
     def initialise_population(self, population):
         pass

From a786b2e1cc9c3932a90fa2f3b9c01b556e5d31d7 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 15:29:43 +0000
Subject: [PATCH 50/97] Fix to type of parameter

---
 src/tlo/methods/collect_event_chains.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 7f903e2035..f1f36224a6 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -52,7 +52,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None):
 
         # If modules of interest is '*', set by default to all modules included in the simulation
         if self.parameters["modules_of_interest"] == ['*']:
-            self.parameters["modules_of_interest"] = self.sim.modules.keys()
+            self.parameters["modules_of_interest"] = list(self.sim.modules.keys())
         
     def initialise_population(self, population):
         pass

From 7af8c70fbb5f367619d98741d38f7f6e7954a926 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 16:35:54 +0000
Subject: [PATCH 51/97] Give option to overwrite module parameters

---
 src/tlo/methods/collect_event_chains.py | 37 +++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index f1f36224a6..ef5f04d639 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -1,7 +1,7 @@
 from tlo.notify import notifier
 
 from pathlib import Path
-from typing import Optional
+from typing import Optional, List
 from tlo import Module, Parameter, Types, logging, population
 from tlo.population import Population
 import pandas as pd
@@ -15,8 +15,19 @@
 
 class CollectEventChains(Module):
 
-    def __init__(self, name=None):
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        generate_event_chains: Optional[bool] = None,
+        modules_of_interest: Optional[List[str]] = None,
+        events_to_ignore: Optional[List[str]] = None
+        
+    ):
         super().__init__(name)
+        
+        self.generate_event_chains = generate_event_chains
+        self.modules_of_interest = modules_of_interest
+        self.events_to_ignore = events_to_ignore
     
         # This is how I am passing data from fnc taking place before event to the one after
         # It doesn't seem very elegant but not sure how else to go about it
@@ -57,6 +68,28 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None):
     def initialise_population(self, population):
         pass
 
+    def initialise_simulation(self, sim):
+        # Use parameter file values by default, if not overwritten
+        self.generate_event_chains = self.parameters['generate_event_chains'] \
+            if self.generate_event_chains is None \
+            else self.generate_event_chains
+            
+        self.modules_of_interest = self.parameters['modules_of_interest'] \
+            if self.modules_of_interest is None \
+            else self.modules_of_interest
+            
+        self.events_to_ignore = self.parameters['events_to_ignore'] \
+            if self.events_to_ignore is None \
+            else self.events_to_ignore
+
+    def get_generate_event_chains(self) -> bool:
+        """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but
+        overwrite with what was provided in argument if an argument was specified -- provided for backward
+        compatibility/debugging.)"""
+        return self.parameters['generate_event_chains'] \
+            if self.arg_generate_event_chains is None \
+            else self.arg_generate_event_chains
+
     def on_birth(self, mother, child):
         # Could the notification of birth simply take place here?
         pass

From d8e6922a94eb5ecd398d20c886d9678dad3fb72f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 16:40:42 +0000
Subject: [PATCH 52/97] Correct use of parameters

---
 src/tlo/methods/collect_event_chains.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index ef5f04d639..0dca32dec1 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -127,13 +127,11 @@ def on_notification_of_birth(self, data):
     def on_notification_event_about_to_run(self, data):
         """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
 
-        p = self.parameters
-
         # Only log event if
         # 1) generate_event_chains is set to True
         # 2) the event belongs to modules of interest and
         # 3) the event is not in the list of events to ignore
-        if p['generate_event_chains'] and (data['module'] in p['modules_of_interest']) and (data['link_info']['EventName'] not in p['events_to_ignore']):
+        if self.generate_event_chains and (data['module'] in self.modules_of_interest) and (data['link_info']['EventName'] not in self.events_to_ignore):
                       
             # Initialise these variables
             self.print_chains = False
@@ -175,10 +173,8 @@ def on_notification_event_about_to_run(self, data):
     
     def on_notification_event_has_just_ran(self, data):
         """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
-
-        p = self.parameters
         
-        if p['generate_event_chains'] and self.print_chains:
+        if self.print_chains:
 
             chain_links = {}
         

From fd761f77511513e18875bedaf3b6ab3100eb8170 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 16:45:45 +0000
Subject: [PATCH 53/97] Exit as soon as condition is not met

---
 src/tlo/methods/collect_event_chains.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 0dca32dec1..b3eb42a8b3 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -131,7 +131,9 @@ def on_notification_event_about_to_run(self, data):
         # 1) generate_event_chains is set to True
         # 2) the event belongs to modules of interest and
         # 3) the event is not in the list of events to ignore
-        if self.generate_event_chains and (data['module'] in self.modules_of_interest) and (data['link_info']['EventName'] not in self.events_to_ignore):
+        if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore):
+            return
+        else:
                       
             # Initialise these variables
             self.print_chains = False
@@ -174,7 +176,9 @@ def on_notification_event_about_to_run(self, data):
     def on_notification_event_has_just_ran(self, data):
         """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
         
-        if self.print_chains:
+        if not self.print_chains:
+            return
+        else:
 
             chain_links = {}
         

From edd9e0b8a3599b28ed91b87aa568180971aa9643 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 25 Nov 2025 18:07:30 +0000
Subject: [PATCH 54/97] Allow option to overwrite parameter file

---
 .../parameter_values.csv                      |  4 +-
 .../scenario_generate_chains.py               | 81 ++-----------------
 src/tlo/events.py                             |  3 +-
 src/tlo/methods/collect_event_chains.py       | 13 +--
 4 files changed, 12 insertions(+), 89 deletions(-)

diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
index a84c77ab60..ebf20c5f79 100644
--- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv
+++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:788332259649a19889355820b194fe0d16af44436f208e3a948e833f0ad5139a
-size 419
+oid sha256:172a0c24c859aaafbad29f6016433cac7a7324efc582e6c4b19c74b6b97436e7
+size 420
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 6cfbd040fa..0f53a1461b 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -48,7 +48,7 @@
     wasting,
 )
 
-class GenerateDataChains(BaseScenario):
+class GenerateEventChains(BaseScenario):
     def __init__(self):
         super().__init__()
         self.seed = 42
@@ -71,101 +71,30 @@ def log_configuration(self):
                 'tlo.methods.demography.detail': logging.WARNING,
                 'tlo.methods.healthburden': logging.INFO,
                 'tlo.methods.healthsystem.summary': logging.INFO,
+                'tlo.methods.collect_event_chains': logging.INFO
             }
         }
 
     def modules(self):
-        # MODIFY
-        # Here instead of running full module
-        """
-        return [demography.Demography(resourcefilepath=self.resources),
-                enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources),
-                healthburden.HealthBurden(resourcefilepath=self.resources),
-                symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),#,
-                #rti.RTI(resourcefilepath=self.resources),
-                pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources),
-                labour.Labour(resourcefilepath=self.resources),
-                care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=self.resources),
-                contraception.Contraception(resourcefilepath=self.resources),
-                newborn_outcomes.NewbornOutcomes(resourcefilepath=self.resources),
-                postnatal_supervisor.PostnatalSupervisor(resourcefilepath=self.resources),
-                hiv.Hiv(resourcefilepath=self.resources),
-                tb.Tb(resourcefilepath=self.resources),
-                epi.Epi(resourcefilepath=self.resources),
-                healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources),
-            #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
-                healthsystem.HealthSystem(resourcefilepath=self.resources,
-                                          mode_appt_constraints=1,
-                                          cons_availability='all')]
-        """
         return (
-            fullmodel(resourcefilepath=self.resources)
-            + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
+            fullmodel()
         )
-    """
-    def draw_parameters(self, draw_number, rng):
-        return mix_scenarios(
-            get_parameters_for_status_quo(),
-            {
-                'HealthSystem': {
-                    'Service_Availability': list(self._scenarios.values())[draw_number],
-                },
-            }
-        )
-
-    def _get_scenarios(self) -> Dict[str, list[str]]:
-        Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario.
-        The sequences of scenarios systematically omits one of the TREATMENT_ID's that is defined in the model.
 
-        # Generate list of TREATMENT_IDs and filter to the resolution needed
-        treatments = get_filtered_treatment_ids(depth=2)
-        treatments_RTI = [item for item in treatments if 'Rti' in item]
-        
-        # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each
-        # treatment is omitted
-        service_availability = dict({"Everything": ["*", "Nothing": []})
-        #service_availability.update(
-        #    {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI}
-        #)
-        
-        return service_availability
-
-    """
     def draw_parameters(self, draw_number, rng):
         if draw_number < self.number_of_draws:
             return list(self._scenarios.values())[draw_number]
         else:
             return
 
-    # case 1: gfHE = -0.030, factor = 1.01074
-    # case 2: gfHE = -0.020, factor = 1.02116
-    # case 3: gfHE = -0.015, factor = 1.02637
-    # case 4: gfHE =  0.015, factor = 1.05763
-    # case 5: gfHE =  0.020, factor = 1.06284
-    # case 6: gfHE =  0.030, factor = 1.07326
-
     def _get_scenarios(self) -> Dict[str, Dict]:
-        #Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.
-        
-        treatments = get_filtered_treatment_ids(depth=2)
-        treatments_RTI = [item for item in treatments if 'Rti' in item]
-        
-        # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each
-        # treatment is omitted
-        service_availability = dict({"Everything": ["*"], "Nothing": []})
-        service_availability.update(
-            {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI}
-        )
-        print(service_availability.keys())
 
         return {
-            # =========== STATUS QUO ============
             "Baseline":
                 mix_scenarios(
                     self._baseline(),
                     {
-                     "HealthSystem": {
-                            "Service_Availability": service_availability["No Rti_ShockTreatment*"],
+                     "CollectEventChains": {
+                            "generate_event_chains": True,
                       },
                     }
                 ),
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 299fffa6ed..56acb82f43 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -60,11 +60,10 @@ def apply(self, target):
         :param target: the target of the event
         """
         raise NotImplementedError
-        
 
     def run(self):
         """Make the event happen."""
-        
+
         # Dispatch notification that event is about to run
         notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}})
                 
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index b3eb42a8b3..712d8c045e 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -58,17 +58,9 @@ def initialise_simulation(self, sim):
         notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran)
         
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
-        print("resource file path", resourcefilepath)
         self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv"))
-
-        # If modules of interest is '*', set by default to all modules included in the simulation
-        if self.parameters["modules_of_interest"] == ['*']:
-            self.parameters["modules_of_interest"] = list(self.sim.modules.keys())
         
     def initialise_population(self, population):
-        pass
-
-    def initialise_simulation(self, sim):
         # Use parameter file values by default, if not overwritten
         self.generate_event_chains = self.parameters['generate_event_chains'] \
             if self.generate_event_chains is None \
@@ -81,6 +73,10 @@ def initialise_simulation(self, sim):
         self.events_to_ignore = self.parameters['events_to_ignore'] \
             if self.events_to_ignore is None \
             else self.events_to_ignore
+            
+        # If modules of interest is '*', set by default to all modules included in the simulation
+        if self.modules_of_interest == ['*']:
+            self.modules_of_interest = list(self.sim.modules.keys())
 
     def get_generate_event_chains(self) -> bool:
         """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but
@@ -134,7 +130,6 @@ def on_notification_event_about_to_run(self, data):
         if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore):
             return
         else:
-                      
             # Initialise these variables
             self.print_chains = False
             self.df_before = []

From 24eacdbecb046a43cafa104eac46908e9e0d6380 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 26 Nov 2025 08:41:09 +0000
Subject: [PATCH 55/97] No need for else if exiting function

---
 src/tlo/methods/collect_event_chains.py | 213 ++++++++++++------------
 1 file changed, 106 insertions(+), 107 deletions(-)

diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 712d8c045e..4406a77345 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -129,41 +129,41 @@ def on_notification_event_about_to_run(self, data):
         # 3) the event is not in the list of events to ignore
         if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore):
             return
-        else:
-            # Initialise these variables
-            self.print_chains = False
-            self.df_before = []
-            self.row_before = pd.Series()
-            self.mni_instances_before = False
-            self.mni_row_before = {}
-            self.entire_mni_before = {}
+        
+        # Initialise these variables
+        self.print_chains = False
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
+        
+        self.print_chains = True
+        
+        # Target is single individual
+        if not isinstance(data['target'], Population):
+
+            # Save row for comparison after event has occurred
+            self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
             
-            self.print_chains = True
+            # Check if individual is already in mni dictionary, if so copy her original status
+            if 'PregnancySupervisor' in self.sim.modules:
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                if data['target'] in mni:
+                    self.mni_instances_before = True
+                    self.mni_row_before = mni[data['target']].copy()
+            else:
+                self.mni_row_before = None
             
-            # Target is single individual
-            if not isinstance(data['target'], Population):
+        else:
 
-                # Save row for comparison after event has occurred
-                self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
-                
-                # Check if individual is already in mni dictionary, if so copy her original status
-                if 'PregnancySupervisor' in self.sim.modules:
-                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                    if data['target'] in mni:
-                        self.mni_instances_before = True
-                        self.mni_row_before = mni[data['target']].copy()
-                else:
-                    self.mni_row_before = None
-                
+            # This will be a population-wide event. In order to find individuals for which this led to
+            # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
+            self.df_before = self.sim.population.props.copy()
+            if 'PregnancySupervisor' in self.sim.modules:
+                self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
             else:
-
-                # This will be a population-wide event. In order to find individuals for which this led to
-                # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
-                self.df_before = self.sim.population.props.copy()
-                if 'PregnancySupervisor' in self.sim.modules:
-                    self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
-                else:
-                    self.entire_mni_before = None
+                self.entire_mni_before = None
 
         return
         
@@ -173,86 +173,85 @@ def on_notification_event_has_just_ran(self, data):
         
         if not self.print_chains:
             return
+            
+        chain_links = {}
+    
+        # Target is single individual
+        if not isinstance(data["target"], Population):
+    
+            # Copy full new status for individual
+            row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999)
+            
+            # Check if individual is in mni after the event
+            mni_instances_after = False
+            if 'PregnancySupervisor' in self.sim.modules:
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                if data['target'] in mni:
+                    mni_instances_after = True
+            else:
+                mni_instances_after = None
+            
+            # Create and store event for this individual, regardless of whether any property change occurred
+            link_info = data['link_info']
+            
+            # Store (if any) property changes as a result of the event for this individual
+            for key in self.row_before.index:
+                if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
+                    link_info[key] = row_after[key]
+            
+            if 'PregnancySupervisor' in self.sim.modules:
+                # Now check and store changes in the mni dictionary, accounting for following cases:
+                # Individual is in mni dictionary before and after
+                if self.mni_instances_before and mni_instances_after:
+                    for key in self.mni_row_before:
+                        if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]):
+                            link_info[key] = mni[data['target']][key]
+                # Individual is only in mni dictionary before event
+                elif self.mni_instances_before and not mni_instances_after:
+                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                    for key in self.mni_row_before:
+                        if self.mni_values_differ(self.mni_row_before[key], default[key]):
+                            link_info[key] = default[key]
+                # Individual is only in mni dictionary after event
+                elif mni_instances_after and not self.mni_instances_before:
+                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                    for key in default:
+                        if self.mni_values_differ(default[key], mni[data['target']][key]):
+                            link_info[key] = mni[data['target']][key]
+                # Else, no need to do anything
+                    
+            # Add individual to the chain links
+            chain_links[data['target']] = link_info
+            
         else:
-
-            chain_links = {}
-        
-            # Target is single individual
-            if not isinstance(data["target"], Population):
-        
-                # Copy full new status for individual
-                row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999)
-                
-                # Check if individual is in mni after the event
-                mni_instances_after = False
-                if 'PregnancySupervisor' in self.sim.modules:
-                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                    if data['target'] in mni:
-                        mni_instances_after = True
-                else:
-                    mni_instances_after = None
-                
-                # Create and store event for this individual, regardless of whether any property change occurred
-                link_info = data['link_info']
-                
-                # Store (if any) property changes as a result of the event for this individual
-                for key in self.row_before.index:
-                    if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
-                        link_info[key] = row_after[key]
-                
-                if 'PregnancySupervisor' in self.sim.modules:
-                    # Now check and store changes in the mni dictionary, accounting for following cases:
-                    # Individual is in mni dictionary before and after
-                    if self.mni_instances_before and mni_instances_after:
-                        for key in self.mni_row_before:
-                            if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]):
-                                link_info[key] = mni[data['target']][key]
-                    # Individual is only in mni dictionary before event
-                    elif self.mni_instances_before and not mni_instances_after:
-                        default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                        for key in self.mni_row_before:
-                            if self.mni_values_differ(self.mni_row_before[key], default[key]):
-                                link_info[key] = default[key]
-                    # Individual is only in mni dictionary after event
-                    elif mni_instances_after and not self.mni_instances_before:
-                        default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                        for key in default:
-                            if self.mni_values_differ(default[key], mni[data['target']][key]):
-                                link_info[key] = mni[data['target']][key]
-                    # Else, no need to do anything
-                        
-                # Add individual to the chain links
-                chain_links[data['target']] = link_info
-                
+            # Target is entire population. Identify individuals for which properties have changed
+            # and store their changes.
+            
+            # Population frame after event
+            df_after = self.sim.population.props
+            if 'PregnancySupervisor' in self.sim.modules:
+                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
             else:
-                # Target is entire population. Identify individuals for which properties have changed
-                # and store their changes.
-                
-                # Population frame after event
-                df_after = self.sim.population.props
-                if 'PregnancySupervisor' in self.sim.modules:
-                    entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
-                else:
-                    entire_mni_after = None
-                
-                #  Create and store the event and dictionary of changes for affected individuals
-                chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after)
+                entire_mni_after = None
+            
+            #  Create and store the event and dictionary of changes for affected individuals
+            chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after)
 
-                if chain_links:
-                    # Convert chain_links into EAV
-                    ednav = convert_chain_links_into_EAV(chain_links)
+            if chain_links:
+                # Convert chain_links into EAV
+                ednav = convert_chain_links_into_EAV(chain_links)
 
-                    logger.info(key='event_chains',
-                          data= ednav.to_dict(),
-                          description='Links forming chains of events for simulated individuals')
-                          
-            # Reset variables
-            self.print_chains = False
-            self.df_before = []
-            self.row_before = pd.Series()
-            self.mni_instances_before = False
-            self.mni_row_before = {}
-            self.entire_mni_before = {}
+                logger.info(key='event_chains',
+                      data= ednav.to_dict(),
+                      description='Links forming chains of events for simulated individuals')
+                      
+        # Reset variables
+        self.print_chains = False
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
 
         return
     

From 017c6d239cab8f51fe49f56733f3cbabc42a660a Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Wed, 26 Nov 2025 11:30:29 +0000
Subject: [PATCH 56/97] Turn off ruff warnings. Far too frequent changes.

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f7c9e891fd..cf19215833 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -125,8 +125,8 @@ line-length = 120
 exclude = ['src/tlo/_version.py']
 
 [tool.ruff.lint]
-select = ["E", "F", "I", "W"]
-per-file-ignores = {"src/scripts/**" = ["E501", "W"]}
+select = ["E", "F", "I"]
+per-file-ignores = {"src/scripts/**" = ["E501"]}
 
 [tool.setuptools.packages.find]
 where = ["src"]

From 2942701e9247ec221943ecf9580a42144c3ae2cb Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 26 Nov 2025 11:52:12 +0000
Subject: [PATCH 57/97] Include test for event chains collection

---
 src/tlo/analysis/utils.py               | 33 ++++++++++++++-----------
 src/tlo/methods/collect_event_chains.py | 27 +++++++++-----------
 src/tlo/simulation.py                   | 11 ++++++---
 src/tlo/util.py                         |  2 +-
 4 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 94bc541d30..0469dca438 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -413,6 +413,23 @@ def unpack_dict_rows(df, non_dict_cols=None):
 
     return out.reset_index(drop=True)
 
+def reconstruct_event_chains(df):
+                
+    recon = unpack_dict_rows(df, ['date'])
+
+    # For now convert value to string in all cases to facilitate manipulation. This can be reversed later.
+    recon['V'] = recon['V'].apply(str)
+    # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} )
+    df_collapsed = (
+            recon.groupby(['E', 'date', 'EventName'])
+              .apply(lambda g: dict(zip(g['A'], g['V'])))
+              .reset_index(name='Info')
+        )
+    df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True)
+    #birth_count = (df_final['EventName'] == 'Birth').sum()
+    
+    return df_final
+
     
 def print_filtered_df(df):
     """
@@ -460,21 +477,9 @@ def extract_event_chains(results_folder: Path,
             try:
                 df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
 
-                recon = unpack_dict_rows(df, ['date'])
-                print(recon)
-                #del recon['EventDate']
-                # For now convert value to string in all cases to facilitate manipulation. This can be reversed later.
-                recon['V'] = recon['V'].apply(str)
-                # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} )
-                df_collapsed = (
-                        recon.groupby(['E', 'date', 'EventName'])
-                          .apply(lambda g: dict(zip(g['A'], g['V'])))
-                          .reset_index(name='Info')
-                    )
-                df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True)
-                birth_count = (df_final['EventName'] == 'Birth').sum()
+                df_final = reconstruct_event_chains(df)
 
-                print("Birth count for run ", run, "is ", birth_count)
+                # Offset person ID to account for the fact that we are collecting chains across runs
                 df_final['E'] = df_final['E'] + ID_offset
                 
                 # Calculate ID offset for next run
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 4406a77345..289bd055dd 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -78,22 +78,15 @@ def initialise_population(self, population):
         if self.modules_of_interest == ['*']:
             self.modules_of_interest = list(self.sim.modules.keys())
 
-    def get_generate_event_chains(self) -> bool:
-        """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but
-        overwrite with what was provided in argument if an argument was specified -- provided for backward
-        compatibility/debugging.)"""
-        return self.parameters['generate_event_chains'] \
-            if self.arg_generate_event_chains is None \
-            else self.arg_generate_event_chains
-
     def on_birth(self, mother, child):
         # Could the notification of birth simply take place here?
         pass
         
     def on_notification_pop_has_been_initialised(self, data):
+
         # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
-        if self.parameters['generate_event_chains']:
+        if self.generate_event_chains:
 
             # EDNAV structure to capture status of individuals at the start of the simulation
             ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
@@ -105,7 +98,7 @@ def on_notification_pop_has_been_initialised(self, data):
                                
     def on_notification_of_birth(self, data):
                 
-        if self.parameters['generate_event_chains']:
+        if self.generate_event_chains:
             # When individual is born, store their initial properties to provide a starting point to the chain of property
             # changes that this individual will undergo as a result of events taking place.
             link_info = data['link_info']
@@ -237,13 +230,15 @@ def on_notification_event_has_just_ran(self, data):
             #  Create and store the event and dictionary of changes for affected individuals
             chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after)
 
-            if chain_links:
-                # Convert chain_links into EAV
-                ednav = convert_chain_links_into_EAV(chain_links)
+        # Log chains
+        if chain_links:
+        
+            # Convert chain_links into EAV
+            ednav = convert_chain_links_into_EAV(chain_links)
 
-                logger.info(key='event_chains',
-                      data= ednav.to_dict(),
-                      description='Links forming chains of events for simulated individuals')
+            logger.info(key='event_chains',
+                  data= ednav.to_dict(),
+                  description='Links forming chains of events for simulated individuals')
                       
         # Reset variables
         self.print_chains = False
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index ded5960e6e..e1da725c53 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -266,7 +266,7 @@ def make_initial_population(self, *, n: int) -> None:
             a keyword parameter for clarity.
         """
         start = time.time()
-
+        
         # Collect information from all modules, that is required the population dataframe
         for module in self.modules.values():
             module.pre_initialise_population()
@@ -285,9 +285,6 @@ def make_initial_population(self, *, n: int) -> None:
                 key="debug",
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
-            
-        # Dispatch notification that pop has been initialised
-        notifier.dispatch("simulation.pop_has_been_initialised", data={})
                                
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
@@ -307,6 +304,12 @@ def initialise(self, *, end_date: Date) -> None:
         for module in self.modules.values():
             module.initialise_simulation(self)
         self._initialised = True
+        
+        # Since CollectEventChains listeners are added to notified upon module initialisation, this can only be dispatched here.
+        # Otherwise, would have to add listener outside of CollectEventChains initialisation
+        
+        # Dispatch notification that pop has been initialised
+        notifier.dispatch("simulation.pop_has_been_initialised", data={"EventName" : "StartOfSimulation"})
 
     def finalise(self, wall_clock_time: Optional[float] = None) -> None:
         """Finalise all modules in simulation and close logging file if open.
diff --git a/src/tlo/util.py b/src/tlo/util.py
index 189f994353..98b13e45fd 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -96,7 +96,7 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng:
 
 def df_to_EAV(df, date, event_name):
     """Function to convert dataframe into EAV"""
-    eav = df.stack().reset_index()
+    eav = df.stack(dropna=False).reset_index()
     eav.columns = ['E', 'A', 'V']
     eav['EventName'] = event_name
     eav = eav[["E", "EventName", "A", "V"]]

From 68b19615d91d3a00b48d6cbdc828cdaaecb5e747 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 26 Nov 2025 12:44:38 +0000
Subject: [PATCH 58/97] Scenario file

---
 .../scenario_collect_event_chains.py          | 118 ++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 src/scripts/collect_event_chains/scenario_collect_event_chains.py

diff --git a/src/scripts/collect_event_chains/scenario_collect_event_chains.py b/src/scripts/collect_event_chains/scenario_collect_event_chains.py
new file mode 100644
index 0000000000..aec12f9c62
--- /dev/null
+++ b/src/scripts/collect_event_chains/scenario_collect_event_chains.py
@@ -0,0 +1,118 @@
+"""This Scenario file run the model to generate event chans
+
+Run on the batch system using:
+```
+tlo batch-submit 
+    src/scripts/analysis_data_generation/scenario_generate_chains.py
+```
+
+or locally using:
+```
+    tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py
+```
+
+"""
+from pathlib import Path
+from typing import Dict
+
+import pandas as pd
+
+from tlo import Date, logging
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids
+from tlo.methods.fullmodel import fullmodel
+from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
+from tlo.scenario import BaseScenario
+from tlo.methods import (
+    alri,
+    cardio_metabolic_disorders,
+    care_of_women_during_pregnancy,
+    contraception,
+    demography,
+    depression,
+    diarrhoea,
+    enhanced_lifestyle,
+    epi,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    hiv,
+    rti,
+    labour,
+    malaria,
+    newborn_outcomes,
+    postnatal_supervisor,
+    pregnancy_supervisor,
+    stunting,
+    symptommanager,
+    tb,
+    wasting,
+)
+
+class GenerateEventChains(BaseScenario):
+    def __init__(self):
+        super().__init__()
+        self.seed = 42
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = self.start_date + pd.DateOffset(months=1)
+        self.pop_size = 1000
+        self._scenarios = self._get_scenarios()
+        self.number_of_draws = len(self._scenarios)
+        self.runs_per_draw = 3
+        self.generate_event_chains = True
+
+    def log_configuration(self):
+        return {
+            'filename': 'generate_event_chains',
+            'directory': Path('./outputs'),  # <- (specified only for local running)
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.events': logging.INFO,
+                'tlo.methods.demography.detail': logging.WARNING,
+                'tlo.methods.healthburden': logging.INFO,
+                'tlo.methods.healthsystem.summary': logging.INFO,
+                'tlo.methods.collect_event_chains': logging.INFO
+            }
+        }
+
+    def modules(self):
+        return (
+            fullmodel()
+        )
+
+    def draw_parameters(self, draw_number, rng):
+        if draw_number < self.number_of_draws:
+            return list(self._scenarios.values())[draw_number]
+        else:
+            return
+
+    def _get_scenarios(self) -> Dict[str, Dict]:
+
+        return {
+            "Baseline":
+                mix_scenarios(
+                    self._baseline(),
+                    {
+                     "CollectEventChains": {
+                            "generate_event_chains": True,
+                      },
+                    }
+                ),
+
+        }
+        
+    def _baseline(self) -> Dict:
+        #Return the Dict with values for the parameter changes that define the baseline scenario.
+        return mix_scenarios(
+            get_parameters_for_status_quo(),
+            {
+                "HealthSystem": {
+                    "mode_appt_constraints": 1,                 # <-- Mode 1 prior to change to preserve calibration
+                }
+            },
+        )
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])

From ba78a018b8e9f8623294821abbee4d6f0e4bc1b8 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 26 Nov 2025 14:33:00 +0000
Subject: [PATCH 59/97] Style fixes

---
 .../analysis_extract_data.py                  |  8 +--
 .../postprocess_events_chain.py               |  1 +
 .../scenario_generate_chains.py               | 29 +-------
 .../scenario_collect_event_chains.py          | 29 +-------
 src/tlo/analysis/utils.py                     | 10 ++-
 src/tlo/events.py                             |  8 ++-
 src/tlo/methods/collect_event_chains.py       | 69 ++++++++++++-------
 src/tlo/methods/hsi_event.py                  | 11 +--
 src/tlo/simulation.py                         |  5 +-
 9 files changed, 75 insertions(+), 95 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 9ee37cabef..3063b6b425 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -4,17 +4,15 @@
 # short tclose -> ideal case
 # long tclose -> status quo
 import argparse
+from collections import Counter
+from datetime import datetime
 from pathlib import Path
 from typing import Tuple
 
 import pandas as pd
-import matplotlib.pyplot as plt
 
 from tlo import Date
-from tlo.analysis.utils import extract_results, extract_event_chains
-from datetime import datetime
-from collections import Counter
-import ast
+from tlo.analysis.utils import extract_event_chains, extract_results
 
 # Time simulated to collect data
 start_date = Date(2010, 1, 1)
diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py
index 96c27a04b1..3b4a00e110 100644
--- a/src/scripts/analysis_data_generation/postprocess_events_chain.py
+++ b/src/scripts/analysis_data_generation/postprocess_events_chain.py
@@ -1,6 +1,7 @@
 import pandas as pd
 from dateutil.relativedelta import relativedelta
 
+
 # Remove from every individual's event chain all events that were fired after death
 def cut_off_events_after_death(df):
 
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 0f53a1461b..90d0801e2f 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -18,35 +18,10 @@
 import pandas as pd
 
 from tlo import Date, logging
-from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
 from tlo.methods.fullmodel import fullmodel
-from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
-from tlo.methods import (
-    alri,
-    cardio_metabolic_disorders,
-    care_of_women_during_pregnancy,
-    contraception,
-    demography,
-    depression,
-    diarrhoea,
-    enhanced_lifestyle,
-    epi,
-    healthburden,
-    healthseekingbehaviour,
-    healthsystem,
-    hiv,
-    rti,
-    labour,
-    malaria,
-    newborn_outcomes,
-    postnatal_supervisor,
-    pregnancy_supervisor,
-    stunting,
-    symptommanager,
-    tb,
-    wasting,
-)
+
 
 class GenerateEventChains(BaseScenario):
     def __init__(self):
diff --git a/src/scripts/collect_event_chains/scenario_collect_event_chains.py b/src/scripts/collect_event_chains/scenario_collect_event_chains.py
index aec12f9c62..f85e987487 100644
--- a/src/scripts/collect_event_chains/scenario_collect_event_chains.py
+++ b/src/scripts/collect_event_chains/scenario_collect_event_chains.py
@@ -18,35 +18,10 @@
 import pandas as pd
 
 from tlo import Date, logging
-from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
 from tlo.methods.fullmodel import fullmodel
-from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
-from tlo.methods import (
-    alri,
-    cardio_metabolic_disorders,
-    care_of_women_during_pregnancy,
-    contraception,
-    demography,
-    depression,
-    diarrhoea,
-    enhanced_lifestyle,
-    epi,
-    healthburden,
-    healthseekingbehaviour,
-    healthsystem,
-    hiv,
-    rti,
-    labour,
-    malaria,
-    newborn_outcomes,
-    postnatal_supervisor,
-    pregnancy_supervisor,
-    stunting,
-    symptommanager,
-    tb,
-    wasting,
-)
+
 
 class GenerateEventChains(BaseScenario):
     def __init__(self):
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 0469dca438..9d8b1d5696 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -419,7 +419,8 @@ def reconstruct_event_chains(df):
 
     # For now convert value to string in all cases to facilitate manipulation. This can be reversed later.
     recon['V'] = recon['V'].apply(str)
-    # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} )
+    # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes
+    # (e.g. {a1:v1, a2:v2, a3:v3, ...} )
     df_collapsed = (
             recon.groupby(['E', 'date', 'EventName'])
               .apply(lambda g: dict(zip(g['A'], g['V'])))
@@ -454,8 +455,11 @@ def truncate_dict(d):
     
 def extract_event_chains(results_folder: Path,
                         ) -> dict:
-    """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df.
-    Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination.
+    """Utility function to collect chains of events. Individuals across runs of the same draw
+    will be combined into unique df.
+    Returns dictionary where keys are draws, and each draw is associated with a dataframe of 
+    format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines 
+    A&Vs for a particular individual + date + event name combination.
     """
     module = 'tlo.collect_event_chains'
     key = 'event_chains'
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 56acb82f43..74c28a1ded 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,6 +11,7 @@
 
 from tlo.notify import notifier
 
+
 class Priority(Enum):
     """Enumeration for the Priority, which is used in sorting the events in the simulation queue."""
     START_OF_DAY = 0
@@ -65,13 +66,16 @@ def run(self):
         """Make the event happen."""
 
         # Dispatch notification that event is about to run
-        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}})
+        notifier.dispatch("event.about_to_run", data={"target": self.target,
+                                                      "module" : self.module.name,
+                                                      "link_info" : {"EventName": type(self).__name__}})
                 
         self.apply(self.target)
         self.post_apply_hook()
         
         # Dispatch notification that event has just ran
-        notifier.dispatch("event.has_just_ran", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}})
+        notifier.dispatch("event.has_just_ran", data={"target": self.target,
+                                                      "link_info" : {"EventName": type(self).__name__}})
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py
index 289bd055dd..6a31e868fc 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/collect_event_chains.py
@@ -1,14 +1,13 @@
-from tlo.notify import notifier
-
+import copy
 from pathlib import Path
-from typing import Optional, List
-from tlo import Module, Parameter, Types, logging, population
-from tlo.population import Population
-import pandas as pd
+from typing import List, Optional
 
-from tlo.util import df_to_EAV, convert_chain_links_into_EAV, read_csv_files
+import pandas as pd
 
-import copy
+from tlo import Module, Parameter, Types, logging
+from tlo.notify import notifier
+from tlo.population import Population
+from tlo.util import convert_chain_links_into_EAV, df_to_EAV
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -84,8 +83,11 @@ def on_birth(self, mother, child):
         
     def on_notification_pop_has_been_initialised(self, data):
 
-        # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
-        # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
+        # When logging events for each individual to reconstruct chains,
+        # only the changes in individual properties will be logged.
+        # At the start of the simulation + when a new individual is born,
+        # we therefore want to store all of their properties
+        # at the start.
         if self.generate_event_chains:
 
             # EDNAV structure to capture status of individuals at the start of the simulation
@@ -99,8 +101,9 @@ def on_notification_pop_has_been_initialised(self, data):
     def on_notification_of_birth(self, data):
                 
         if self.generate_event_chains:
-            # When individual is born, store their initial properties to provide a starting point to the chain of property
-            # changes that this individual will undergo as a result of events taking place.
+            # When individual is born, store their initial properties to provide a starting point to the
+            # chain of property changes that this individual will undergo
+            # as a result of events taking place.
             link_info = data['link_info']
             link_info.update(self.sim.population.props.loc[data['target']].to_dict())
             chain_links = {}
@@ -114,13 +117,19 @@ def on_notification_of_birth(self, data):
                                
         
     def on_notification_event_about_to_run(self, data):
-        """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
-
+        """Do this when notified that an event is about to run. 
+        This function checks whether this event should be logged as part of the event chains, a
+        nd if so stored required information before the event has occurred.
+        """
+        
         # Only log event if
         # 1) generate_event_chains is set to True
         # 2) the event belongs to modules of interest and
         # 3) the event is not in the list of events to ignore
-        if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore):
+        if (not self.generate_event_chains
+            or (data['module'] not in self.modules_of_interest)
+            or (data['link_info']['EventName'] in self.events_to_ignore)
+            ):
             return
         
         # Initialise these variables
@@ -154,7 +163,8 @@ def on_notification_event_about_to_run(self, data):
             # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
             self.df_before = self.sim.population.props.copy()
             if 'PregnancySupervisor' in self.sim.modules:
-                self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                self.entire_mni_before = copy.deepcopy(
+                                            self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
             else:
                 self.entire_mni_before = None
 
@@ -162,7 +172,9 @@ def on_notification_event_about_to_run(self, data):
         
     
     def on_notification_event_has_just_ran(self, data):
-        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
+        """ If print_chains=True, this function logs the event and identifies and logs the any property 
+        changes that have occured to one or multiple individuals as a result of the event taking place. 
+        """
         
         if not self.print_chains:
             return
@@ -228,7 +240,10 @@ def on_notification_event_has_just_ran(self, data):
                 entire_mni_after = None
             
             #  Create and store the event and dictionary of changes for affected individuals
-            chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after)
+            chain_links = self.compare_population_dataframe_and_mni(self.df_before,
+                                                                    df_after,
+                                                                    self.entire_mni_before,
+                                                                    entire_mni_after)
 
         # Log chains
         if chain_links:
@@ -267,14 +282,16 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
         for person in all_individuals:
             if person not in entire_mni_before: # but is afterward
                 for key in entire_mni_after[person]:
-                    if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
+                    if self.mni_values_differ(entire_mni_after[person][key],
+                                              self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
                         if person not in diffs:
                             diffs[person] = {}
                         diffs[person][key] = entire_mni_after[person][key]
                     
             elif person not in entire_mni_after: # but is beforehand
                 for key in entire_mni_before[person]:
-                    if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
+                    if self.mni_values_differ(entire_mni_before[person][key],
+                                              self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
                         if person not in diffs:
                             diffs[person] = {}
                         diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]
@@ -290,8 +307,12 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
         return diffs
         
     def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after):
-        """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred.
-        It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """
+        """ 
+        This function compares the population dataframe and mni dictionary before/after a population-wide e
+        vent has occurred. 
+        It allows us to identify the individuals for which this event led to a significant (i.e. property) change, 
+        and to store the properties which have changed as a result of it. 
+        """
         
         # Create a mask of where values are different
         diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
@@ -300,9 +321,8 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
         else:
             diff_mni = []
         
-        # Create an empty list to store changes for each of the individuals
+        # Create an empty dict to store changes for each of the individuals
         chain_links = {}
-        len_of_diff = len(diff_mask)
 
         # Loop through each row of the mask
         persons_changed = []
@@ -344,7 +364,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                             link_info[key_prop] = diff_mni[key][key_prop]
                             
                         chain_links[key] = link_info
-
         return chain_links
 
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 32620f6c28..085d80683c 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -9,7 +9,6 @@
 from tlo.events import Event
 from tlo.notify import notifier
 
-
 if TYPE_CHECKING:
     from tlo import Module, Simulation
     from tlo.methods.healthsystem import HealthSystem
@@ -201,7 +200,10 @@ def run(self, squeeze_factor):
         """Make the event happen."""
         
         # Dispatch notification that HSI event is about to run
-        notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}})
+        notifier.dispatch("event.about_to_run",
+                          data={"target": self.target,
+                                "module" : self.module.name,
+                                "link_info" : {"EventName": type(self).__name__}})
 
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()
@@ -212,9 +214,10 @@ def run(self, squeeze_factor):
             footprint = updated_appt_footprint
         else:
             footprint = self.EXPECTED_APPT_FOOTPRINT
-        try:
+            
+        if self.facility_info:
             level = self.facility_info.level
-        except:
+        else:
             level = "N/A"
             
         notifier.dispatch("event.has_just_ran",
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index e1da725c53..bde1c72b76 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -9,6 +9,7 @@
 from collections import Counter, OrderedDict
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional
+
 import numpy as np
 
 from tlo.notify import notifier
@@ -27,7 +28,6 @@
     topologically_sort_modules,
 )
 from tlo.events import Event, IndividualScopeEventMixin
-from tlo.notify import notifier
 from tlo.progressbar import ProgressBar
 
 if TYPE_CHECKING:
@@ -305,7 +305,8 @@ def initialise(self, *, end_date: Date) -> None:
             module.initialise_simulation(self)
         self._initialised = True
         
-        # Since CollectEventChains listeners are added to notified upon module initialisation, this can only be dispatched here.
+        # Since CollectEventChains listeners are added to notified upon module initialisation,
+        # this can only be dispatched here.
         # Otherwise, would have to add listener outside of CollectEventChains initialisation
         
         # Dispatch notification that pop has been initialised

From 44253a2c49e82211bf0483cb2b2d7e989acb923c Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 26 Nov 2025 14:47:19 +0000
Subject: [PATCH 60/97] Final style fixes

---
 .../analysis_extract_data.py                  | 555 ------------------
 .../postprocess_events_chain.py               | 157 -----
 .../scenario_generate_chains.py               |  94 ---
 .../analysis_extract_data.py                  |  83 +++
 4 files changed, 83 insertions(+), 806 deletions(-)
 delete mode 100644 src/scripts/analysis_data_generation/analysis_extract_data.py
 delete mode 100644 src/scripts/analysis_data_generation/postprocess_events_chain.py
 delete mode 100644 src/scripts/analysis_data_generation/scenario_generate_chains.py
 create mode 100644 src/scripts/collect_event_chains/analysis_extract_data.py

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
deleted file mode 100644
index 3063b6b425..0000000000
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ /dev/null
@@ -1,555 +0,0 @@
-"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when
-running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)"""
-
-# short tclose -> ideal case
-# long tclose -> status quo
-import argparse
-from collections import Counter
-from datetime import datetime
-from pathlib import Path
-from typing import Tuple
-
-import pandas as pd
-
-from tlo import Date
-from tlo.analysis.utils import extract_event_chains, extract_results
-
-# Time simulated to collect data
-start_date = Date(2010, 1, 1)
-end_date = start_date + pd.DateOffset(months=13)
-
-# Range of years considered
-min_year = 2010
-max_year = 2040
-
-
-def all_columns(_df):
-    return pd.Series(_df.all())
-
-def check_if_beyond_time_range_considered(progression_properties):
-    matching_keys = [key for key in progression_properties.keys() if "rt_date_to_remove_daly" in key]
-    if matching_keys:
-        for key in matching_keys:
-            if progression_properties[key] > end_date:
-                print("Beyond time range considered, need at least ",progression_properties[key])
-
-def print_filtered_df(df):
-    """
-    Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'.
-    """
-    pd.set_option('display.max_colwidth', None)
-    filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])]
-    
-    dict_cols = ["Info"]
-    max_items = 2
-    # Step 2: Truncate dictionary columns for display
-    if dict_cols is not None:
-        for col in dict_cols:
-            def truncate_dict(d):
-                if isinstance(d, dict):
-                    items = list(d.items())[:max_items]  # keep only first `max_items`
-                    return dict(items)
-                return d
-            filtered[col] = filtered[col].apply(truncate_dict)
-    print(filtered)
-
-
-def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ):
-    """Produce standard set of plots describing the effect of each TREATMENT_ID.
-    - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur.
-    - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur.
-    """
-    pd.set_option('display.max_rows', None)
-    pd.set_option('display.max_colwidth', None)
-    
-    individual_event_chains = extract_event_chains(results_folder)
-    print_filtered_df(individual_event_chains[0])
-    exit(-1)
-    
-    eval_env = {
-        'datetime': datetime,  # Add the datetime class to the eval environment
-        'pd': pd,              # Add pandas to handle Timestamp
-        'Timestamp': pd.Timestamp,  # Specifically add Timestamp for eval
-        'NaT': pd.NaT,
-        'nan': float('nan'),       # Include NaN for eval (can also use pd.NA if preferred)
-    }
-    
-    initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
-
-    # Will be added through computation: age at time of RTI
-    # Will be added through computation: total duration of event
-    
-    initial_rt_event_properties = set()
-
-    num_individuals = 1000
-    num_runs = 1
-    record = []
-    # Include results folder in output file name
-    name_tag = str(results_folder).replace("outputs/", "")
-
-
-    
-    for p in range(0,num_individuals):
-    
-        print("At person = ", p, " out of ", num_individuals)
-
-        individual_event_chains = extract_results(
-                results_folder,
-                module='tlo.simulation',
-                key='event_chains',
-                column=str(p),
-                do_scaling=False
-            )
-            
-        for r in range(0,num_runs):
-            initial_properties = {}
-            key_first_event = {}
-            key_last_event = {}
-            first_event = {}
-            last_event = {}
-            properties = {}
-            average_disability = 0
-            total_dt_included = 0
-            dt_in_prev_disability = 0
-            prev_disability_incurred = 0
-            ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()}
-            # Count total appts
-
-            list_for_individual = []
-            for item,row in individual_event_chains.iterrows():
-                value = individual_event_chains.loc[item,(0, r)]
-                if value !='' and isinstance(value, str):
-                    evaluated = eval(value, eval_env)
-                    list_for_individual.append(evaluated)
-            
-            for i in list_for_individual:
-                print(i)
-            
-            """
-            # These are the properties of the individual before the start of the chain of events
-            initial_properties = list_for_individual[0]
-            
-            # Initialise first event by gathering parameters of interest from initial_properties
-            first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties}
-            
-            # The changing or adding of properties from the first_event will be stored in progression_properties
-            progression_properties = {}
-            
-            for i in list_for_individual:
-                # Skip the initial_properties, or in other words only consider these if they are 'proper' events
-                if 'event' in i:
-                    #print(i)
-                    if 'RTIPolling' in i['event']:
-                        
-                        # Keep track of which properties are changed during polling events
-                        for key,value in i.items():
-                            if 'rt_' in key:
-                                initial_rt_event_properties.add(key)
-                        
-                        # Retain a copy of Polling event
-                        polling_event = i.copy()
-                        
-                        # Update parameters of interest following RTI
-                        key_first_event = {key: i[key] if key in i else value for key, value in first_event.items()}
-                        
-                        # Calculate age of individual at time of event
-                        key_first_event['age_in_days_at_event'] = (i['rt_date_inj'] - initial_properties['date_of_birth']).days
-                        
-                        # Keep track of evolution in individual's properties
-                        progression_properties = initial_properties.copy()
-                        progression_properties.update(i)
-                        
-                        # Initialise chain of Dalys incurred
-                        if 'rt_disability' in i:
-                            prev_disability_incurred = i['rt_disability']
-                            prev_date = i['event_date']
-
-                    else:
-                        # Progress properties of individual, even if this event is a death
-                        progression_properties.update(i)
-                        
-                        # If disability has changed as a result of this, recalculate and add previous to rolling average
-                        if 'rt_disability' in i:
-    
-                            dt_in_prev_disability = (i['event_date'] - prev_date).days
-                            #print("Detected change in disability", i['rt_disability'], "after dt=", dt_in_prev_disability)
-                            #print("Adding the following to the average", prev_disability_incurred, " x ", dt_in_prev_disability )
-                            average_disability += prev_disability_incurred*dt_in_prev_disability
-                            total_dt_included += dt_in_prev_disability
-                            # Update variables
-                            prev_disability_incurred = i['rt_disability']
-                            prev_date = i['event_date']
-
-                    # Update running footprint
-                    if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()':
-                        footprint = i['appt_footprint']
-                        if 'Counter' in footprint:
-                            footprint = footprint[len("Counter("):-1]
-                        apply = eval(footprint, eval_env)
-                        ind_Counter[i['level']].update(Counter(apply))
-                    
-                    # If the individual has died, ensure chain of event is interrupted here and update rolling average of DALYs
-                    if 'is_alive' in i and i['is_alive'] is False:
-                        if ((i['event_date'] - polling_event['rt_date_inj']).days) > total_dt_included:
-                            dt_in_prev_disability = (i['event_date'] - prev_date).days
-                            average_disability += prev_disability_incurred*dt_in_prev_disability
-                            total_dt_included += dt_in_prev_disability
-                        break
-               
-            # check_if_beyond_time_range_considered(progression_properties)
-            
-            # Compute final properties of individual
-            key_last_event['is_alive_after_RTI'] = progression_properties['is_alive']
-            key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days
-
-            # If individual didn't die and the key_last_event didn't result in a final change in DALYs, ensure that the last change is recorded here
-            if not key_first_event['rt_imm_death'] and (total_dt_included < key_last_event['duration_days']):
-                #print("Number of events", len(list_for_individual))
-                #for i in list_for_individual:
-                #    if 'event' in i:
-                #        print(i)
-                dt_in_prev_disability = (progression_properties['event_date'] - prev_date).days
-                average_disability += prev_disability_incurred*dt_in_prev_disability
-                total_dt_included += dt_in_prev_disability
-
-            # Now calculate the average disability incurred, and store any permanent disability and total footprint
-            if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0:
-                key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days']
-            else:
-                key_last_event['rt_disability_average'] = 0.0
-            
-            key_last_event['rt_disability_permanent'] = progression_properties['rt_disability']
-            key_last_event.update({'total_footprint': ind_Counter})
-
-            if key_last_event['duration_days']!=total_dt_included:
-                print("The duration of event and total_dt_included don't match", key_last_event['duration_days'], total_dt_included)
-                exit(-1)
-            
-            properties = key_first_event | key_last_event
-                
-            record.append(properties)
-            """
-
-    df = pd.DataFrame(record)
-    df.to_csv("new_raw_data_" + name_tag + ".csv", index=False)
-    
-    print(df)
-    print(initial_rt_event_properties)
-    exit(-1)
-            #print(i)
-
-    #dict = {}
-    #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
-    #    dict[i] = []
-
-    #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
-    #    event_chains = extract_results(
-    #        results_folder,
-    #        module='tlo.simulation'#,
-    #        key='event_chains',
-    #        column = str(i),
-    #        #custom_generate_series=get_num_dalys_by_year,
-    #        do_scaling=False
-    #    )
-    #    print(event_chains)
-    #    print(event_chains.index)
-    #    print(event_chains.columns.levels)
-
-    #    for index, row in event_chains.iterrows():
-    #        if event_chains.iloc[index,0] is not None:
-    #            if(event_chains.iloc[index,0]['person_ID']==i): #and 'event' in event_chains.iloc[index,0].keys()):
-    #                dict[i].append(event_chains.iloc[index,0])
-            #elif (event_chains.iloc[index,0]['person_ID']==i and 'event' not in event_chains.iloc[index,0].keys()):
-                #print(event_chains.iloc[index,0]['de_depr'])
-               # exit(-1)
-    #for item in dict[0]:
-    #    print(item)
-    
-    #exit(-1)
-    
-    TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1))
-
-    # Definitions of general helper functions
-    lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png"  # noqa: E731
-
-    def target_period() -> str:
-        """Returns the target period as a string of the form YYYY-YYYY"""
-        return "-".join(str(t.year) for t in TARGET_PERIOD)
-
-    def get_parameter_names_from_scenario_file() -> Tuple[str]:
-        """Get the tuple of names of the scenarios from `Scenario` class used to create the results."""
-        from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import (
-            ImpactOfHealthSystemMode,
-        )
-        e = ImpactOfHealthSystemMode()
-        return tuple(e._scenarios.keys())
-
-    def get_num_deaths(_df):
-        """Return total number of Deaths (total within the TARGET_PERIOD)
-        """
-        return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
-
-    def get_num_dalys(_df):
-        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
-        return pd.Series(
-            data=_df
-            .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])]
-            .drop(columns=['date', 'sex', 'age_range', 'year'])
-            .sum().sum()
-        )
-
-    def get_num_dalys_by_cause(_df):
-        """Return number of DALYs by cause by label (total within the TARGET_PERIOD)"""
-        return pd.Series(
-            data=_df
-            .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])]
-            .drop(columns=['date', 'sex', 'age_range', 'year'])
-            .sum()
-        )
-
-    def set_param_names_as_column_index_level_0(_df):
-        """Set the columns index (level 0) as the param_names."""
-        ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
-        names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]]
-        assert len(names_of_cols_level0) == len(_df.columns.levels[0])
-        _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0)
-        return _df
-
-    def find_difference_relative_to_comparison(_ser: pd.Series,
-                                               comparison: str,
-                                               scaled: bool = False,
-                                               drop_comparison: bool = True,
-                                               ):
-        """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
-        within the runs (level 1), relative to where draw = `comparison`.
-        The comparison is `X - COMPARISON`."""
-        return _ser \
-            .unstack(level=0) \
-            .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
-            .drop(columns=([comparison] if drop_comparison else [])) \
-            .stack()
-
-    
-    def get_counts_of_hsi_by_treatment_id(_df):
-        """Get the counts of the short TREATMENT_IDs occurring"""
-        _counts_by_treatment_id = _df \
-            .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \
-            .apply(pd.Series) \
-            .sum() \
-            .astype(int)
-        return _counts_by_treatment_id.groupby(level=0).sum()
-        
-    year_target = 2023
-    def get_counts_of_hsi_by_treatment_id_by_year(_df):
-        """Get the counts of the short TREATMENT_IDs occurring"""
-        _counts_by_treatment_id = _df \
-            .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \
-            .apply(pd.Series) \
-            .sum() \
-            .astype(int)
-        return _counts_by_treatment_id.groupby(level=0).sum()
-    
-    def get_counts_of_hsi_by_short_treatment_id(_df):
-        """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)"""
-        _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df)
-        _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*")
-        return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum()
-        
-    def get_counts_of_hsi_by_short_treatment_id_by_year(_df):
-        """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)"""
-        _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df)
-        _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*")
-        return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum()
- 
-        
-    # Obtain parameter names for this scenario file
-    param_names = get_parameter_names_from_scenario_file()
-    print(param_names)
-
-    # ================================================================================================
-    # TIME EVOLUTION OF TOTAL DALYs
-    # Plot DALYs averted compared to the ``No Policy'' policy
-    
-    year_target = 2023 # This global variable will be passed to custom function
-    def get_num_dalys_by_year(_df):
-        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
-        return pd.Series(
-            data=_df
-            .loc[_df.year == year_target]
-            .drop(columns=['date', 'sex', 'age_range', 'year'])
-            .sum().sum()
-        )
-        
-    ALL = {}
-    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
-    # are consistent across different policies
-    this_min_year = 2010
-    for year in range(this_min_year, max_year+1):
-        year_target = year
-        num_dalys_by_year = extract_results(
-            results_folder,
-            module='tlo.methods.healthburden',
-            key='dalys_stacked',
-            custom_generate_series=get_num_dalys_by_year,
-            do_scaling=True
-        ).pipe(set_param_names_as_column_index_level_0)
-        ALL[year_target] = num_dalys_by_year
-    # Concatenate the DataFrames into a single DataFrame
-    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
-    concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original'])
-    concatenated_df = concatenated_df.reset_index(level='index_original',drop=True)
-    dalys_by_year = concatenated_df
-    print(dalys_by_year)
-    dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True)
-    
-    # ================================================================================================
-    # Print population under each scenario
-    pop_model = extract_results(results_folder,
-                                module="tlo.methods.demography",
-                                key="population",
-                                column="total",
-                                index="date",
-                                do_scaling=True
-                                ).pipe(set_param_names_as_column_index_level_0)
-    
-    pop_model.index = pop_model.index.year
-    pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)]
-    print(pop_model)
-    assert dalys_by_year.index.equals(pop_model.index)
-    assert all(dalys_by_year.columns == pop_model.columns)
-    pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True)
-
-    # ================================================================================================
-    # DALYs BROKEN DOWN BY CAUSES AND YEAR
-    # DALYs by cause per year
-    # %% Quantify the health losses associated with all interventions combined.
-    
-    year_target = 2023 # This global variable will be passed to custom function
-    def get_num_dalys_by_year_and_cause(_df):
-        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
-        return pd.Series(
-            data=_df
-            .loc[_df.year == year_target]
-            .drop(columns=['date', 'sex', 'age_range', 'year'])
-            .sum()
-        )
-        
-    ALL = {}
-    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
-    # are consistent across different policies
-    this_min_year = 2010
-    for year in range(this_min_year, max_year+1):
-        year_target = year
-        num_dalys_by_year = extract_results(
-            results_folder,
-            module='tlo.methods.healthburden',
-            key='dalys_stacked',
-            custom_generate_series=get_num_dalys_by_year_and_cause,
-            do_scaling=True
-        ).pipe(set_param_names_as_column_index_level_0)
-        ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year)
-
-    # Concatenate the DataFrames into a single DataFrame
-    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
-
-    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
-    
-    df_total = concatenated_df
-    df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True)
-
-    ALL = {}
-    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
-    # are consistent across different policies
-    for year in range(min_year, max_year+1):
-        year_target = year
-        
-        hsi_delivered_by_year = extract_results(
-                results_folder,
-                module='tlo.methods.healthsystem.summary',
-                key='HSI_Event',
-                custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year,
-                do_scaling=True
-            ).pipe(set_param_names_as_column_index_level_0)
-        ALL[year_target] = hsi_delivered_by_year
-
-    # Concatenate the DataFrames into a single DataFrame
-    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
-    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
-    HSI_ran_by_year = concatenated_df
-
-    del ALL
-    
-    ALL = {}
-    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
-    # are consistent across different policies
-    for year in range(min_year, max_year+1):
-        year_target = year
-        
-        hsi_not_delivered_by_year = extract_results(
-                results_folder,
-                module='tlo.methods.healthsystem.summary',
-                key='Never_ran_HSI_Event',
-                custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year,
-                do_scaling=True
-            ).pipe(set_param_names_as_column_index_level_0)
-        ALL[year_target] = hsi_not_delivered_by_year
-
-    # Concatenate the DataFrames into a single DataFrame
-    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
-    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
-    HSI_never_ran_by_year = concatenated_df
-    
-    HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df(
-    HSI_ran_by_year = HSI_ran_by_year.fillna(0)
-    HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0)
-    HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True)
-    HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True)
-    print(HSI_ran_by_year)
-    print(HSI_never_ran_by_year)
-    print(HSI_total_by_year)
-    
-if __name__ == "__main__":
-    rfp = Path('resources')
-
-    parser = argparse.ArgumentParser(
-        description="Produce plots to show the impact each set of treatments",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--output-path",
-        help=(
-            "Directory to write outputs to. If not specified (set to None) outputs "
-            "will be written to value of --results-path argument."
-        ),
-        type=Path,
-        default=None,
-        required=False,
-    )
-    parser.add_argument(
-        "--resources-path",
-        help="Directory containing resource files",
-        type=Path,
-        default=Path('resources'),
-        required=False,
-    )
-    parser.add_argument(
-        "--results-path",
-        type=Path,
-        help=(
-            "Directory containing results from running "
-            "src/scripts/analysis_data_generation/scenario_generate_chains.py "
-        ),
-        default=None,
-        required=False
-    )
-    args = parser.parse_args()
-    assert args.results_path is not None
-    results_path = args.results_path
-
-    output_path = results_path if args.output_path is None else args.output_path
-
-    apply(
-        results_folder=results_path,
-        output_folder=output_path,
-        resourcefilepath=args.resources_path
-    )
diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py
deleted file mode 100644
index 3b4a00e110..0000000000
--- a/src/scripts/analysis_data_generation/postprocess_events_chain.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import pandas as pd
-from dateutil.relativedelta import relativedelta
-
-
-# Remove from every individual's event chain all events that were fired after death
-def cut_off_events_after_death(df):
-
-    events_chain = df.groupby('person_ID')
-    
-    filtered_data = pd.DataFrame()
-
-    for name, group in events_chain:
-
-        # Find the first non-NaN 'date_of_death' and its index
-        first_non_nan_index = group['date_of_death'].first_valid_index()
-        
-        if first_non_nan_index is not None:
-            # Filter out all rows after the first non-NaN index
-            filtered_group = group.loc[:first_non_nan_index]  # Keep rows up to and including the first valid index
-            filtered_data = pd.concat([filtered_data, filtered_group])
-        else:
-            # If there are no non-NaN values, keep the original group
-            filtered_data = pd.concat([filtered_data, group])
-
-    return filtered_data
-
-# Load into DataFrame
-def load_csv_to_dataframe(file_path):
-    try:
-        # Load raw chains into df
-        df = pd.read_csv(file_path)
-        print("Raw event chains loaded successfully!")
-        return df
-    except FileNotFoundError:
-        print(f"Error: The file '{file_path}' was not found.")
-    except Exception as e:
-        print(f"An error occurred: {e}")
-
-file_path = 'output.csv'  # Replace with the path to your CSV file
-
-output = load_csv_to_dataframe(file_path)
-
-# Some of the dates appeared not to be in datetime format. Correct here.
-output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce')
-output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce')
-if 'hv_date_inf' in output.columns:
-    output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce')
-
-
-date_start = pd.to_datetime('2010-01-01')
-if 'Other' in output['cause_of_death'].values:
-    print("ERROR: 'Other' was included in sim as possible cause of death")
-    exit(-1)
-
-# Choose which columns in individual properties to visualise
-columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when']
-#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event']
-
-# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison
-columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint']
-
-# If considering epidemiology consistent with sim, add check here.
-check_ages_of_those_HIV_inf = False
-if check_ages_of_those_HIV_inf:
-    for index, row in output.iterrows():
-        if pd.isna(row['hv_date_inf']):
-            continue  # Skip this iteration
-        diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth'])
-        if diff.years > 1 and diff.years<15:
-            print("Person contracted HIV infection at age younger than 15", diff)
-
-# Remove events after death
-filtered_data = cut_off_events_after_death(output)
-
-print_raw_events = True # Print raw chain of events for each individual
-print_selected_changes = False
-print_all_changes = True
-person_ID_of_interest = 494
-
-pd.set_option('display.max_rows', None)
-
-for name, group in filtered_data.groupby('person_ID'):
-    list_of_dob = group['date_of_birth']
-    
-    # Select individuals based on when they were born
-    if list_of_dob.iloc[0].year<2010:
-
-        # Check that immutable properties are fixed for this individual, i.e. that events were collated properly:
-        all_identical_dob = group['date_of_birth'].nunique() == 1
-        all_identical_sex = group['sex'].nunique() == 1
-        if all_identical_dob is False or all_identical_sex is False:
-            print("Immutable properties are changing! This is not chain for single individual")
-            print(group)
-            exit(-1)
-            
-        print("----------------------------------------------------------------------")
-        print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0])
-        print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event
-        number_of_events =len(group)/2
-        number_of_changes=0
-        if print_raw_events:
-            print(group)
-        
-        if print_all_changes:
-            # Check each row
-            comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999))
-
-            # Iterate over rows where any column has changed
-            for idx, row_changed in comparison.iloc[1:].iterrows():
-                if row_changed.any():  # Check if any column changed in this row
-                    number_of_changes+=1
-                    changed_columns = row_changed[row_changed].index.tolist()  # Get the columns where changes occurred
-                    print(f"Row {idx} - Changes detected in columns: {changed_columns}")
-                    columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns
-                    print(group.loc[idx, columns_output])  # Print only the changed columns
-                    if group.loc[idx, 'when'] == 'Before':
-                        print('-----> THIS CHANGE OCCURRED BEFORE EVENT!')
-                    #print(group.loc[idx,columns_to_print])
-                    print()  # For better readability
-            print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events")
-        
-        if print_selected_changes:
-            tb_inf_condition = (
-                ((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) |
-                ((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) |
-                ((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) |
-                ((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) |
-                ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) |
-                ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) |
-                ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) |
-                ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) |
-                ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) |
-                ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not'))
-            )
-
-            alive_condition = (
-                (group['is_alive'].shift(1) is True) & (group['is_alive'] is False)
-            )
-            # Combine conditions for rows of interest
-            transition_condition = tb_inf_condition | alive_condition
-
-            if list_of_dob.iloc[0].year >= 2010:
-                print("DETECTED OF INTEREST")
-                print(group[group['event'] == 'Birth'][columns_to_print])
-
-            # Filter the DataFrame based on the condition
-            filtered_transitions = group[transition_condition]
-            if not filtered_transitions.empty:
-                if list_of_dob.iloc[0].year < 2010:
-                    print("DETECTED OF INTEREST")
-                print(filtered_transitions[columns_to_print])
-    
-    
-print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups)
-
-
-
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
deleted file mode 100644
index 90d0801e2f..0000000000
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""This Scenario file run the model to generate event chans
-
-Run on the batch system using:
-```
-tlo batch-submit 
-    src/scripts/analysis_data_generation/scenario_generate_chains.py
-```
-
-or locally using:
-```
-    tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py
-```
-
-"""
-from pathlib import Path
-from typing import Dict
-
-import pandas as pd
-
-from tlo import Date, logging
-from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
-from tlo.methods.fullmodel import fullmodel
-from tlo.scenario import BaseScenario
-
-
-class GenerateEventChains(BaseScenario):
-    def __init__(self):
-        super().__init__()
-        self.seed = 42
-        self.start_date = Date(2010, 1, 1)
-        self.end_date = self.start_date + pd.DateOffset(months=1)
-        self.pop_size = 1000
-        self._scenarios = self._get_scenarios()
-        self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 3
-        self.generate_event_chains = True
-
-    def log_configuration(self):
-        return {
-            'filename': 'generate_event_chains',
-            'directory': Path('./outputs'),  # <- (specified only for local running)
-            'custom_levels': {
-                '*': logging.WARNING,
-                'tlo.methods.demography': logging.INFO,
-                'tlo.methods.events': logging.INFO,
-                'tlo.methods.demography.detail': logging.WARNING,
-                'tlo.methods.healthburden': logging.INFO,
-                'tlo.methods.healthsystem.summary': logging.INFO,
-                'tlo.methods.collect_event_chains': logging.INFO
-            }
-        }
-
-    def modules(self):
-        return (
-            fullmodel()
-        )
-
-    def draw_parameters(self, draw_number, rng):
-        if draw_number < self.number_of_draws:
-            return list(self._scenarios.values())[draw_number]
-        else:
-            return
-
-    def _get_scenarios(self) -> Dict[str, Dict]:
-
-        return {
-            "Baseline":
-                mix_scenarios(
-                    self._baseline(),
-                    {
-                     "CollectEventChains": {
-                            "generate_event_chains": True,
-                      },
-                    }
-                ),
-
-        }
-        
-    def _baseline(self) -> Dict:
-        #Return the Dict with values for the parameter changes that define the baseline scenario.
-        return mix_scenarios(
-            get_parameters_for_status_quo(),
-            {
-                "HealthSystem": {
-                    "mode_appt_constraints": 1,                 # <-- Mode 1 prior to change to preserve calibration
-                    "cons_availability": "all",
-                }
-            },
-        )
-
-if __name__ == '__main__':
-    from tlo.cli import scenario_run
-
-    scenario_run([__file__])
diff --git a/src/scripts/collect_event_chains/analysis_extract_data.py b/src/scripts/collect_event_chains/analysis_extract_data.py
new file mode 100644
index 0000000000..bef6540934
--- /dev/null
+++ b/src/scripts/collect_event_chains/analysis_extract_data.py
@@ -0,0 +1,83 @@
+import argparse
+from pathlib import Path
+
+import pandas as pd
+
+from tlo.analysis.utils import extract_event_chains
+
+
+def print_filtered_df(df):
+    """
+    Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'.
+    """
+    pd.set_option('display.max_colwidth', None)
+    filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])]
+    
+    dict_cols = ["Info"]
+    max_items = 2
+    # Step 2: Truncate dictionary columns for display
+    if dict_cols is not None:
+        for col in dict_cols:
+            def truncate_dict(d):
+                if isinstance(d, dict):
+                    items = list(d.items())[:max_items]  # keep only first `max_items`
+                    return dict(items)
+                return d
+            filtered[col] = filtered[col].apply(truncate_dict)
+    print(filtered)
+
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ):
+    """Extract event chains
+    """
+    pd.set_option('display.max_rows', None)
+    pd.set_option('display.max_colwidth', None)
+    
+    individual_event_chains = extract_event_chains(results_folder)
+    print_filtered_df(individual_event_chains[0])
+    
+if __name__ == "__main__":
+    rfp = Path('resources')
+
+    parser = argparse.ArgumentParser(
+        description="Produce plots to show the impact each set of treatments",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--output-path",
+        help=(
+            "Directory to write outputs to. If not specified (set to None) outputs "
+            "will be written to value of --results-path argument."
+        ),
+        type=Path,
+        default=None,
+        required=False,
+    )
+    parser.add_argument(
+        "--resources-path",
+        help="Directory containing resource files",
+        type=Path,
+        default=Path('resources'),
+        required=False,
+    )
+    parser.add_argument(
+        "--results-path",
+        type=Path,
+        help=(
+            "Directory containing results from running "
+            "src/scripts/analysis_data_generation/scenario_collect_event_chains.py "
+        ),
+        default=None,
+        required=False
+    )
+    args = parser.parse_args()
+    assert args.results_path is not None
+    results_path = args.results_path
+
+    output_path = results_path if args.output_path is None else args.output_path
+
+    apply(
+        results_folder=results_path,
+        output_folder=output_path,
+        resourcefilepath=args.resources_path
+    )

From add05e9f91fedc4ee5fcd97868f0fa0090712e10 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 26 Nov 2025 15:01:33 +0000
Subject: [PATCH 61/97] Track PR specific test file

---
 tests/test_collect_event_chains.py | 91 ++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 tests/test_collect_event_chains.py

diff --git a/tests/test_collect_event_chains.py b/tests/test_collect_event_chains.py
new file mode 100644
index 0000000000..d77bec85d9
--- /dev/null
+++ b/tests/test_collect_event_chains.py
@@ -0,0 +1,91 @@
+import os
+from pathlib import Path
+
+import pytest
+
+from tlo import Date, Simulation, logging
+from tlo.analysis.utils import parse_log_file, reconstruct_event_chains
+from tlo.methods import (
+    chronicsyndrome,
+    collect_event_chains,
+    demography,
+    enhanced_lifestyle,
+    healthseekingbehaviour,
+    healthsystem,
+    mockitis,
+    simplified_births,
+    symptommanager,
+)
+
+resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
+
+start_date = Date(2010, 1, 1)
+end_date = Date(2012, 1, 1)
+popsize = 200
+
+def check_dtypes(simulation):
+    # check types of columns
+    df = simulation.population.props
+    orig = simulation.population.new_row
+    assert (df.dtypes == orig.dtypes).all()
+
+
+@pytest.mark.slow
+def test_collection_of_event_chains(tmpdir, seed):
+
+    # Establish the simulation object
+    sim = Simulation(
+        start_date=start_date,
+        seed=seed,
+        log_config={
+            "filename": "log",
+            "directory": tmpdir,
+            "custom_levels": {
+                "tlo.methods.healthsystem": logging.DEBUG,
+                "tlo.methods.collect_event_chains": logging.INFO
+            }
+        }, resourcefilepath=resourcefilepath
+    )
+
+    # Register the core modules
+    sim.register(demography.Demography(),
+                 simplified_births.SimplifiedBirths(),
+                 enhanced_lifestyle.Lifestyle(),
+                 healthsystem.HealthSystem(),
+                 collect_event_chains.CollectEventChains(generate_event_chains=True),
+                 symptommanager.SymptomManager(),
+                 healthseekingbehaviour.HealthSeekingBehaviour(),
+                 mockitis.Mockitis(),
+                 chronicsyndrome.ChronicSyndrome()
+                 )
+
+    # Run the simulation
+    sim.make_initial_population(n=popsize)
+    sim.simulate(end_date=end_date)
+    check_dtypes(sim)
+
+    # read the results
+    output = parse_log_file(sim.log_filepath, level=logging.DEBUG)
+    output_chains = parse_log_file(sim.log_filepath, level=logging.INFO)
+    event_chains = reconstruct_event_chains(output_chains['tlo.methods.collect_event_chains']['event_chains'])
+    
+    # Check that we have a "StartOfSimulation" event for every individual in the initial population,
+    # and that this was logged at the start date
+    assert (event_chains['EventName'] == 'StartOfSimulation').sum() == popsize
+    assert (event_chains.loc[event_chains['EventName'] == 'StartOfSimulation', 'date'] == start_date).all()
+    
+    # Check that in the case of birth or start of simulation, all properties were logged
+    num_properties = len(sim.population.props.columns)
+    mask = event_chains["EventName"].isin(["Birth", "StartOfSimulation"])
+    assert event_chains.loc[mask, "Info"].apply(len).eq(num_properties).all()
+    
+    # Assert that all HSI events that occurred were also collected in the event chains
+    HSIs_in_event_chains = event_chains["EventName"].str.contains('HSI', na=False).sum()
+    assert HSIs_in_event_chains == len(output['tlo.methods.healthsystem']['HSI_Event'])
+
+    # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too
+    mask = (~event_chains["EventName"].isin(["StartOfSimulation", "Birth"])) & \
+           (~event_chains["EventName"].str.contains("HSI", na=False))
+    count = mask.sum()
+    assert count > 0
+

From 0fe1d803fadb6a587a934d76cd421e462e98c98f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 09:38:49 +0000
Subject: [PATCH 62/97] Rename module and keys, and assume that if module is
 included then will want to track individual histories, i.e. remove option as
 module parameter

---
 .../parameter_values.csv                      |  3 -
 .../analysis_extract_data.py                  |  2 +-
 .../scenario_track_individual_histories.py}   | 13 ++--
 src/tlo/analysis/utils.py                     | 10 +--
 src/tlo/methods/fullmodel.py                  |  4 +-
 ...hains.py => individual_history_tracker.py} | 61 ++++++++-----------
 ....py => test_individual_history_tracker.py} | 28 ++++-----
 7 files changed, 52 insertions(+), 69 deletions(-)
 delete mode 100644 resources/ResourceFile_GenerateEventChains/parameter_values.csv
 rename src/scripts/{collect_event_chains => track_individual_histories}/analysis_extract_data.py (96%)
 rename src/scripts/{collect_event_chains/scenario_collect_event_chains.py => track_individual_histories/scenario_track_individual_histories.py} (85%)
 rename src/tlo/methods/{collect_event_chains.py => individual_history_tracker.py} (87%)
 rename tests/{test_collect_event_chains.py => test_individual_history_tracker.py} (65%)

diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv
deleted file mode 100644
index ebf20c5f79..0000000000
--- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:172a0c24c859aaafbad29f6016433cac7a7324efc582e6c4b19c74b6b97436e7
-size 420
diff --git a/src/scripts/collect_event_chains/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py
similarity index 96%
rename from src/scripts/collect_event_chains/analysis_extract_data.py
rename to src/scripts/track_individual_histories/analysis_extract_data.py
index bef6540934..291a430ad1 100644
--- a/src/scripts/collect_event_chains/analysis_extract_data.py
+++ b/src/scripts/track_individual_histories/analysis_extract_data.py
@@ -65,7 +65,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
         type=Path,
         help=(
             "Directory containing results from running "
-            "src/scripts/analysis_data_generation/scenario_collect_event_chains.py "
+            "src/scripts/analysis_data_generation/scenario_track_individual_histories.py "
         ),
         default=None,
         required=False
diff --git a/src/scripts/collect_event_chains/scenario_collect_event_chains.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
similarity index 85%
rename from src/scripts/collect_event_chains/scenario_collect_event_chains.py
rename to src/scripts/track_individual_histories/scenario_track_individual_histories.py
index f85e987487..2df7f28c44 100644
--- a/src/scripts/collect_event_chains/scenario_collect_event_chains.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -1,14 +1,14 @@
-"""This Scenario file run the model to generate event chans
+"""This Scenario file run the model to track individual histories
 
 Run on the batch system using:
 ```
 tlo batch-submit 
-    src/scripts/analysis_data_generation/scenario_generate_chains.py
+    src/scripts/analysis_data_generation/scenario_track_individual_histories.py
 ```
 
 or locally using:
 ```
-    tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py
+    tlo scenario-run src/scripts/analysis_data_generation/scenario_track_individual_histories.py
 ```
 
 """
@@ -37,7 +37,7 @@ def __init__(self):
 
     def log_configuration(self):
         return {
-            'filename': 'generate_event_chains',
+            'filename': 'track_individual_histories',
             'directory': Path('./outputs'),  # <- (specified only for local running)
             'custom_levels': {
                 '*': logging.WARNING,
@@ -46,7 +46,7 @@ def log_configuration(self):
                 'tlo.methods.demography.detail': logging.WARNING,
                 'tlo.methods.healthburden': logging.INFO,
                 'tlo.methods.healthsystem.summary': logging.INFO,
-                'tlo.methods.collect_event_chains': logging.INFO
+                'tlo.methods.individual_history_tracker': logging.INFO
             }
         }
 
@@ -68,9 +68,6 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                 mix_scenarios(
                     self._baseline(),
                     {
-                     "CollectEventChains": {
-                            "generate_event_chains": True,
-                      },
                     }
                 ),
 
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 9d8b1d5696..d862a4e359 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -413,7 +413,7 @@ def unpack_dict_rows(df, non_dict_cols=None):
 
     return out.reset_index(drop=True)
 
-def reconstruct_event_chains(df):
+def reconstruct_individual_histories(df):
                 
     recon = unpack_dict_rows(df, ['date'])
 
@@ -453,7 +453,7 @@ def truncate_dict(d):
     print(filtered)
     
     
-def extract_event_chains(results_folder: Path,
+def extract_individual_histories(results_folder: Path,
                         ) -> dict:
     """Utility function to collect chains of events. Individuals across runs of the same draw
     will be combined into unique df.
@@ -461,8 +461,8 @@ def extract_event_chains(results_folder: Path,
     format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines 
     A&Vs for a particular individual + date + event name combination.
     """
-    module = 'tlo.collect_event_chains'
-    key = 'event_chains'
+    module = 'tlo.individual_history_tracker'
+    key = 'individual_histories'
 
     # get number of draws and numbers of runs
     info = get_scenario_info(results_folder)
@@ -481,7 +481,7 @@ def extract_event_chains(results_folder: Path,
             try:
                 df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
 
-                df_final = reconstruct_event_chains(df)
+                df_final = reconstruct_individual_histories(df)
 
                 # Offset person ID to account for the fact that we are collecting chains across runs
                 df_final['E'] = df_final['E'] + ID_offset
diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py
index 3c710c7dd2..83b30266c5 100644
--- a/src/tlo/methods/fullmodel.py
+++ b/src/tlo/methods/fullmodel.py
@@ -8,7 +8,7 @@
     cardio_metabolic_disorders,
     care_of_women_during_pregnancy,
     cervical_cancer,
-    collect_event_chains,
+    individual_history_tracker,
     contraception,
     copd,
     demography,
@@ -117,7 +117,7 @@ def fullmodel(
         copd.Copd,
         depression.Depression,
         epilepsy.Epilepsy,
-        collect_event_chains.CollectEventChains,
+        individual_history_tracker.IndividualHistoryTracker,
     ]
     return [
         module_class(
diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/individual_history_tracker.py
similarity index 87%
rename from src/tlo/methods/collect_event_chains.py
rename to src/tlo/methods/individual_history_tracker.py
index 6a31e868fc..e36a844fd8 100644
--- a/src/tlo/methods/collect_event_chains.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -12,19 +12,17 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-class CollectEventChains(Module):
+class IndividualHistoryTracker(Module):
 
     def __init__(
         self,
         name: Optional[str] = None,
-        generate_event_chains: Optional[bool] = None,
         modules_of_interest: Optional[List[str]] = None,
         events_to_ignore: Optional[List[str]] = None
         
     ):
         super().__init__(name)
         
-        self.generate_event_chains = generate_event_chains
         self.modules_of_interest = modules_of_interest
         self.events_to_ignore = events_to_ignore
     
@@ -39,9 +37,6 @@ def __init__(
         
     PARAMETERS = {
         # Options within module
-        "generate_event_chains": Parameter(
-            Types.BOOL, "Whether or not we want to collect chains of events for individuals"
-        ),
         "modules_of_interest": Parameter(
             Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules"
         ),
@@ -57,13 +52,10 @@ def initialise_simulation(self, sim):
         notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran)
         
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
-        self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv"))
+        self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv"))
         
     def initialise_population(self, population):
         # Use parameter file values by default, if not overwritten
-        self.generate_event_chains = self.parameters['generate_event_chains'] \
-            if self.generate_event_chains is None \
-            else self.generate_event_chains
             
         self.modules_of_interest = self.parameters['modules_of_interest'] \
             if self.modules_of_interest is None \
@@ -88,32 +80,30 @@ def on_notification_pop_has_been_initialised(self, data):
         # At the start of the simulation + when a new individual is born,
         # we therefore want to store all of their properties
         # at the start.
-        if self.generate_event_chains:
-
-            # EDNAV structure to capture status of individuals at the start of the simulation
-            ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
+        
+        # EDNAV structure to capture status of individuals at the start of the simulation
+        ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
 
-            logger.info(key='event_chains',
-                               data = ednav.to_dict(),
-                               description='Links forming chains of events for simulated individuals')
+        logger.info(key='individual_histories',
+                           data = ednav.to_dict(),
+                           description='Links forming chains of events for simulated individuals')
                                
                                
     def on_notification_of_birth(self, data):
                 
-        if self.generate_event_chains:
-            # When individual is born, store their initial properties to provide a starting point to the
-            # chain of property changes that this individual will undergo
-            # as a result of events taking place.
-            link_info = data['link_info']
-            link_info.update(self.sim.population.props.loc[data['target']].to_dict())
-            chain_links = {}
-            chain_links[data['target']] = link_info
+        # When individual is born, store their initial properties to provide a starting point to the
+        # chain of property changes that this individual will undergo
+        # as a result of events taking place.
+        link_info = data['link_info']
+        link_info.update(self.sim.population.props.loc[data['target']].to_dict())
+        chain_links = {}
+        chain_links[data['target']] = link_info
 
-            ednav = convert_chain_links_into_EAV(chain_links)
-            
-            logger.info(key='event_chains',
-                               data = ednav.to_dict(),
-                               description='Links forming chains of events for simulated individuals')
+        ednav = convert_chain_links_into_EAV(chain_links)
+        
+        logger.info(key='individual_histories',
+                           data = ednav.to_dict(),
+                           description='Links forming chains of events for simulated individuals')
                                
         
     def on_notification_event_about_to_run(self, data):
@@ -123,11 +113,10 @@ def on_notification_event_about_to_run(self, data):
         """
         
         # Only log event if
-        # 1) generate_event_chains is set to True
-        # 2) the event belongs to modules of interest and
-        # 3) the event is not in the list of events to ignore
-        if (not self.generate_event_chains
-            or (data['module'] not in self.modules_of_interest)
+        # 1) the event belongs to modules of interest and
+        # 2) the event is not in the list of events to ignore
+        if (
+            (data['module'] not in self.modules_of_interest)
             or (data['link_info']['EventName'] in self.events_to_ignore)
             ):
             return
@@ -251,7 +240,7 @@ def on_notification_event_has_just_ran(self, data):
             # Convert chain_links into EAV
             ednav = convert_chain_links_into_EAV(chain_links)
 
-            logger.info(key='event_chains',
+            logger.info(key='individual_histories',
                   data= ednav.to_dict(),
                   description='Links forming chains of events for simulated individuals')
                       
diff --git a/tests/test_collect_event_chains.py b/tests/test_individual_history_tracker.py
similarity index 65%
rename from tests/test_collect_event_chains.py
rename to tests/test_individual_history_tracker.py
index d77bec85d9..110b3ffa93 100644
--- a/tests/test_collect_event_chains.py
+++ b/tests/test_individual_history_tracker.py
@@ -4,10 +4,10 @@
 import pytest
 
 from tlo import Date, Simulation, logging
-from tlo.analysis.utils import parse_log_file, reconstruct_event_chains
+from tlo.analysis.utils import parse_log_file, reconstruct_individual_histories
 from tlo.methods import (
     chronicsyndrome,
-    collect_event_chains,
+    individual_history_tracker,
     demography,
     enhanced_lifestyle,
     healthseekingbehaviour,
@@ -31,7 +31,7 @@ def check_dtypes(simulation):
 
 
 @pytest.mark.slow
-def test_collection_of_event_chains(tmpdir, seed):
+def test_individual_history_tracker(tmpdir, seed):
 
     # Establish the simulation object
     sim = Simulation(
@@ -42,7 +42,7 @@ def test_collection_of_event_chains(tmpdir, seed):
             "directory": tmpdir,
             "custom_levels": {
                 "tlo.methods.healthsystem": logging.DEBUG,
-                "tlo.methods.collect_event_chains": logging.INFO
+                "tlo.methods.individual_history_tracker": logging.INFO
             }
         }, resourcefilepath=resourcefilepath
     )
@@ -52,7 +52,7 @@ def test_collection_of_event_chains(tmpdir, seed):
                  simplified_births.SimplifiedBirths(),
                  enhanced_lifestyle.Lifestyle(),
                  healthsystem.HealthSystem(),
-                 collect_event_chains.CollectEventChains(generate_event_chains=True),
+                 individual_history_tracker.IndividualHistoryTracker(),
                  symptommanager.SymptomManager(),
                  healthseekingbehaviour.HealthSeekingBehaviour(),
                  mockitis.Mockitis(),
@@ -67,25 +67,25 @@ def test_collection_of_event_chains(tmpdir, seed):
     # read the results
     output = parse_log_file(sim.log_filepath, level=logging.DEBUG)
     output_chains = parse_log_file(sim.log_filepath, level=logging.INFO)
-    event_chains = reconstruct_event_chains(output_chains['tlo.methods.collect_event_chains']['event_chains'])
+    individual_histories = reconstruct_individual_histories(output_chains['tlo.methods.individual_history_tracker']['individual_histories'])
     
     # Check that we have a "StartOfSimulation" event for every individual in the initial population,
     # and that this was logged at the start date
-    assert (event_chains['EventName'] == 'StartOfSimulation').sum() == popsize
-    assert (event_chains.loc[event_chains['EventName'] == 'StartOfSimulation', 'date'] == start_date).all()
+    assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize
+    assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation', 'date'] == start_date).all()
     
     # Check that in the case of birth or start of simulation, all properties were logged
     num_properties = len(sim.population.props.columns)
-    mask = event_chains["EventName"].isin(["Birth", "StartOfSimulation"])
-    assert event_chains.loc[mask, "Info"].apply(len).eq(num_properties).all()
+    mask = individual_histories["EventName"].isin(["Birth", "StartOfSimulation"])
+    assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all()
     
     # Assert that all HSI events that occurred were also collected in the event chains
-    HSIs_in_event_chains = event_chains["EventName"].str.contains('HSI', na=False).sum()
-    assert HSIs_in_event_chains == len(output['tlo.methods.healthsystem']['HSI_Event'])
+    HSIs_in_individual_histories = individual_histories["EventName"].str.contains('HSI', na=False).sum()
+    assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event'])
 
     # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too
-    mask = (~event_chains["EventName"].isin(["StartOfSimulation", "Birth"])) & \
-           (~event_chains["EventName"].str.contains("HSI", na=False))
+    mask = (~individual_histories["EventName"].isin(["StartOfSimulation", "Birth"])) & \
+           (~individual_histories["EventName"].str.contains("HSI", na=False))
     count = mask.sum()
     assert count > 0
 

From 65522a1d4f13eb2a84d1d338fec022ac78173305 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 09:41:24 +0000
Subject: [PATCH 63/97] Revert changes in rti module

---
 src/tlo/methods/rti.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 92f79f7538..a5f31e71b0 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Schedule to take place every month
         """
-        super().__init__(module, frequency=DateOffset(months=1000)) # Single polling event
+        super().__init__(module, frequency=DateOffset(months=1))
         p = module.parameters
         # Parameters which transition the model between states
         self.base_1m_prob_rti = (p['base_rate_injrti'] / 12)

From c10e6aab8b333ea38d01dcbd1888c7d118870d75 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 09:43:40 +0000
Subject: [PATCH 64/97] Revert changes in rti module

---
 src/tlo/methods/rti.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index a5f31e71b0..4ec4fe18a5 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2864,12 +2864,8 @@ def apply(self, population):
                          .when('.between(70,79)', self.rr_injrti_age7079),
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
-        #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
-        #pred = 1.0
-        #else:
+
         pred = eq.predict(df.loc[rt_current_non_ind])
-            
-            
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
         selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti]
 

From ab975c2ad2e6b72635a37586a7f8f076f18a6818 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 09:59:33 +0000
Subject: [PATCH 65/97] change function names on listener's end

---
 src/tlo/events.py                             |  4 ++--
 src/tlo/methods/hsi_event.py                  |  4 ++--
 src/tlo/methods/individual_history_tracker.py | 16 ++++++++--------
 src/tlo/simulation.py                         |  4 ++--
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 74c28a1ded..0fff320c3c 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -66,7 +66,7 @@ def run(self):
         """Make the event happen."""
 
         # Dispatch notification that event is about to run
-        notifier.dispatch("event.about_to_run", data={"target": self.target,
+        notifier.dispatch("event.pre-run", data={"target": self.target,
                                                       "module" : self.module.name,
                                                       "link_info" : {"EventName": type(self).__name__}})
                 
@@ -74,7 +74,7 @@ def run(self):
         self.post_apply_hook()
         
         # Dispatch notification that event has just ran
-        notifier.dispatch("event.has_just_ran", data={"target": self.target,
+        notifier.dispatch("event.post-run", data={"target": self.target,
                                                       "link_info" : {"EventName": type(self).__name__}})
 
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 085d80683c..f3ee3c7a46 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -200,7 +200,7 @@ def run(self, squeeze_factor):
         """Make the event happen."""
         
         # Dispatch notification that HSI event is about to run
-        notifier.dispatch("event.about_to_run",
+        notifier.dispatch("event.pre-run",
                           data={"target": self.target,
                                 "module" : self.module.name,
                                 "link_info" : {"EventName": type(self).__name__}})
@@ -220,7 +220,7 @@ def run(self, squeeze_factor):
         else:
             level = "N/A"
             
-        notifier.dispatch("event.has_just_ran",
+        notifier.dispatch("event.post-run",
                           data={"target": self.target,
                                 "link_info" : {"EventName": type(self).__name__,
                                          "footprint": footprint,
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index e36a844fd8..c69a4b16fe 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -46,10 +46,10 @@ def __init__(
         }
         
     def initialise_simulation(self, sim):
-        notifier.add_listener("simulation.pop_has_been_initialised", self.on_notification_pop_has_been_initialised)
-        notifier.add_listener("simulation.on_birth", self.on_notification_of_birth)
-        notifier.add_listener("event.about_to_run", self.on_notification_event_about_to_run)
-        notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran)
+        notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise)
+        notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth)
+        notifier.add_listener("event.pre-run", self.on_event_pre_run)
+        notifier.add_listener("event.post-run", self.on_event_post_run)
         
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
         self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv"))
@@ -73,7 +73,7 @@ def on_birth(self, mother, child):
         # Could the notification of birth simply take place here?
         pass
         
-    def on_notification_pop_has_been_initialised(self, data):
+    def on_simulation_post_initialise(self, data):
 
         # When logging events for each individual to reconstruct chains,
         # only the changes in individual properties will be logged.
@@ -89,7 +89,7 @@ def on_notification_pop_has_been_initialised(self, data):
                            description='Links forming chains of events for simulated individuals')
                                
                                
-    def on_notification_of_birth(self, data):
+    def on_simulation_post_do_birth(self, data):
                 
         # When individual is born, store their initial properties to provide a starting point to the
         # chain of property changes that this individual will undergo
@@ -106,7 +106,7 @@ def on_notification_of_birth(self, data):
                            description='Links forming chains of events for simulated individuals')
                                
         
-    def on_notification_event_about_to_run(self, data):
+    def on_event_pre_run(self, data):
         """Do this when notified that an event is about to run. 
         This function checks whether this event should be logged as part of the event chains, a
         nd if so stored required information before the event has occurred.
@@ -160,7 +160,7 @@ def on_notification_event_about_to_run(self, data):
         return
         
     
-    def on_notification_event_has_just_ran(self, data):
+    def on_event_post_run(self, data):
         """ If print_chains=True, this function logs the event and identifies and logs the any property 
         changes that have occured to one or multiple individuals as a result of the event taking place. 
         """
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index bde1c72b76..4fec9ed36e 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -310,7 +310,7 @@ def initialise(self, *, end_date: Date) -> None:
         # Otherwise, would have to add listener outside of CollectEventChains initialisation
         
         # Dispatch notification that pop has been initialised
-        notifier.dispatch("simulation.pop_has_been_initialised", data={"EventName" : "StartOfSimulation"})
+        notifier.dispatch("simulation.post-initialise", data={"EventName" : "StartOfSimulation"})
 
     def finalise(self, wall_clock_time: Optional[float] = None) -> None:
         """Finalise all modules in simulation and close logging file if open.
@@ -449,7 +449,7 @@ def do_birth(self, mother_id: int) -> int:
             module.on_birth(mother_id, child_id)
             
         # Dispatch notification that birth is about to occur
-        notifier.dispatch("simulation.on_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}})
+        notifier.dispatch("simulation.post-do_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}})
 
         return child_id
 

From 450f06cd5670c7ac2c2ee05cdc68ef57e1d7be89 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 10:14:19 +0000
Subject: [PATCH 66/97] Restructure data passed by dispatcher

---
 src/tlo/events.py                             |  6 +++---
 src/tlo/methods/hsi_event.py                  | 10 +++++-----
 src/tlo/methods/individual_history_tracker.py | 13 ++++++++-----
 src/tlo/simulation.py                         |  4 ++--
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 0fff320c3c..8aba0069bc 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -67,15 +67,15 @@ def run(self):
 
         # Dispatch notification that event is about to run
         notifier.dispatch("event.pre-run", data={"target": self.target,
-                                                      "module" : self.module.name,
-                                                      "link_info" : {"EventName": type(self).__name__}})
+                                                 "module" : self.module.name,
+                                                 "EventName": type(self).__name__})
                 
         self.apply(self.target)
         self.post_apply_hook()
         
         # Dispatch notification that event has just ran
         notifier.dispatch("event.post-run", data={"target": self.target,
-                                                      "link_info" : {"EventName": type(self).__name__}})
+                                                  "EventName": type(self).__name__})
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index f3ee3c7a46..780b9afff6 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -203,7 +203,7 @@ def run(self, squeeze_factor):
         notifier.dispatch("event.pre-run",
                           data={"target": self.target,
                                 "module" : self.module.name,
-                                "link_info" : {"EventName": type(self).__name__}})
+                                "EventName": type(self).__name__})
 
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()
@@ -222,10 +222,10 @@ def run(self, squeeze_factor):
             
         notifier.dispatch("event.post-run",
                           data={"target": self.target,
-                                "link_info" : {"EventName": type(self).__name__,
-                                         "footprint": footprint,
-                                         "level": level
-                                         }})
+                                "EventName": type(self).__name__,
+                                "footprint": footprint,
+                                "level": level
+                                })
                 
         return updated_appt_footprint
         
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index c69a4b16fe..e6317bf942 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -94,10 +94,10 @@ def on_simulation_post_do_birth(self, data):
         # When individual is born, store their initial properties to provide a starting point to the
         # chain of property changes that this individual will undergo
         # as a result of events taking place.
-        link_info = data['link_info']
-        link_info.update(self.sim.population.props.loc[data['target']].to_dict())
+        link_info = {'EventName': 'Birth'}
+        link_info.update(self.sim.population.props.loc[data['child_id']].to_dict())
         chain_links = {}
-        chain_links[data['target']] = link_info
+        chain_links[data['child_id']] = link_info
 
         ednav = convert_chain_links_into_EAV(chain_links)
         
@@ -117,7 +117,7 @@ def on_event_pre_run(self, data):
         # 2) the event is not in the list of events to ignore
         if (
             (data['module'] not in self.modules_of_interest)
-            or (data['link_info']['EventName'] in self.events_to_ignore)
+            or (data['EventName'] in self.events_to_ignore)
             ):
             return
         
@@ -186,7 +186,10 @@ def on_event_post_run(self, data):
                 mni_instances_after = None
             
             # Create and store event for this individual, regardless of whether any property change occurred
-            link_info = data['link_info']
+            link_info = {'EventName' : data['EventName']}
+            if 'footprint' in data.keys():
+                link_info['footprint'] = data['footprint']
+                link_info['level'] = data['level']
             
             # Store (if any) property changes as a result of the event for this individual
             for key in self.row_before.index:
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 4fec9ed36e..71a90b04ff 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -310,7 +310,7 @@ def initialise(self, *, end_date: Date) -> None:
         # Otherwise, would have to add listener outside of CollectEventChains initialisation
         
         # Dispatch notification that pop has been initialised
-        notifier.dispatch("simulation.post-initialise", data={"EventName" : "StartOfSimulation"})
+        notifier.dispatch("simulation.post-initialise", data={})
 
     def finalise(self, wall_clock_time: Optional[float] = None) -> None:
         """Finalise all modules in simulation and close logging file if open.
@@ -449,7 +449,7 @@ def do_birth(self, mother_id: int) -> int:
             module.on_birth(mother_id, child_id)
             
         # Dispatch notification that birth is about to occur
-        notifier.dispatch("simulation.post-do_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}})
+        notifier.dispatch("simulation.post-do_birth", data={'child_id': child_id})
 
         return child_id
 

From 9e36395e0c4f3615c626c555fc323d7b20f7a69f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 10:20:47 +0000
Subject: [PATCH 67/97] Style fixes

---
 src/tlo/methods/fullmodel.py             | 3 ++-
 tests/test_individual_history_tracker.py | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py
index 83b30266c5..b0c7abeef1 100644
--- a/src/tlo/methods/fullmodel.py
+++ b/src/tlo/methods/fullmodel.py
@@ -8,7 +8,6 @@
     cardio_metabolic_disorders,
     care_of_women_during_pregnancy,
     cervical_cancer,
-    individual_history_tracker,
     contraception,
     copd,
     demography,
@@ -21,6 +20,7 @@
     healthseekingbehaviour,
     healthsystem,
     hiv,
+    individual_history_tracker,
     labour,
     malaria,
     measles,
@@ -117,6 +117,7 @@ def fullmodel(
         copd.Copd,
         depression.Depression,
         epilepsy.Epilepsy,
+        #  - Track Individual histories
         individual_history_tracker.IndividualHistoryTracker,
     ]
     return [
diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py
index 110b3ffa93..db460187d8 100644
--- a/tests/test_individual_history_tracker.py
+++ b/tests/test_individual_history_tracker.py
@@ -7,11 +7,11 @@
 from tlo.analysis.utils import parse_log_file, reconstruct_individual_histories
 from tlo.methods import (
     chronicsyndrome,
-    individual_history_tracker,
     demography,
     enhanced_lifestyle,
     healthseekingbehaviour,
     healthsystem,
+    individual_history_tracker,
     mockitis,
     simplified_births,
     symptommanager,
@@ -67,12 +67,14 @@ def test_individual_history_tracker(tmpdir, seed):
     # read the results
     output = parse_log_file(sim.log_filepath, level=logging.DEBUG)
     output_chains = parse_log_file(sim.log_filepath, level=logging.INFO)
-    individual_histories = reconstruct_individual_histories(output_chains['tlo.methods.individual_history_tracker']['individual_histories'])
+    individual_histories = reconstruct_individual_histories(
+                            output_chains['tlo.methods.individual_history_tracker']['individual_histories'])
     
     # Check that we have a "StartOfSimulation" event for every individual in the initial population,
     # and that this was logged at the start date
     assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize
-    assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation', 'date'] == start_date).all()
+    assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation',
+                                                                          'date'] == start_date).all()
     
     # Check that in the case of birth or start of simulation, all properties were logged
     num_properties = len(sim.population.props.columns)

From 07beb70bab0570f9b74a55330dcf94bf2551928e Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 10:33:26 +0000
Subject: [PATCH 68/97] Remove individual history tracker from the fullmodule

---
 .../scenario_track_individual_histories.py                     | 3 ++-
 src/tlo/methods/fullmodel.py                                   | 3 ---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
index 2df7f28c44..69cd0438d9 100644
--- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -20,6 +20,7 @@
 from tlo import Date, logging
 from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
 from tlo.methods.fullmodel import fullmodel
+from tlo.methods import individual_history_tracker
 from tlo.scenario import BaseScenario
 
 
@@ -52,7 +53,7 @@ def log_configuration(self):
 
     def modules(self):
         return (
-            fullmodel()
+            fullmodel()# + [individual_history_tracker.IndividualHistoryTracker()]
         )
 
     def draw_parameters(self, draw_number, rng):
diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py
index b0c7abeef1..3f0c79434e 100644
--- a/src/tlo/methods/fullmodel.py
+++ b/src/tlo/methods/fullmodel.py
@@ -20,7 +20,6 @@
     healthseekingbehaviour,
     healthsystem,
     hiv,
-    individual_history_tracker,
     labour,
     malaria,
     measles,
@@ -117,8 +116,6 @@ def fullmodel(
         copd.Copd,
         depression.Depression,
         epilepsy.Epilepsy,
-        #  - Track Individual histories
-        individual_history_tracker.IndividualHistoryTracker,
     ]
     return [
         module_class(

From 3644668eeb930b040f98f6c91f0240ebc1902099 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 12:02:32 +0000
Subject: [PATCH 69/97] Fix analysis scripts

---
 .../track_individual_histories/analysis_extract_data.py    | 6 +++---
 .../scenario_track_individual_histories.py                 | 6 +++---
 src/tlo/analysis/utils.py                                  | 7 +++++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/scripts/track_individual_histories/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py
index 291a430ad1..db733ab69f 100644
--- a/src/scripts/track_individual_histories/analysis_extract_data.py
+++ b/src/scripts/track_individual_histories/analysis_extract_data.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 
-from tlo.analysis.utils import extract_event_chains
+from tlo.analysis.utils import extract_individual_histories
 
 
 def print_filtered_df(df):
@@ -33,8 +33,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     pd.set_option('display.max_rows', None)
     pd.set_option('display.max_colwidth', None)
     
-    individual_event_chains = extract_event_chains(results_folder)
-    print_filtered_df(individual_event_chains[0])
+    individual_individual_histories = extract_individual_histories(results_folder)
+    print_filtered_df(individual_individual_histories[0])
     
 if __name__ == "__main__":
     rfp = Path('resources')
diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
index 69cd0438d9..5cc4d2eeeb 100644
--- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -19,12 +19,12 @@
 
 from tlo import Date, logging
 from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
+#from tlo.methods import individual_history_tracker
 from tlo.methods.fullmodel import fullmodel
-from tlo.methods import individual_history_tracker
 from tlo.scenario import BaseScenario
 
 
-class GenerateEventChains(BaseScenario):
+class TrackIndividualHistories(BaseScenario):
     def __init__(self):
         super().__init__()
         self.seed = 42
@@ -47,7 +47,7 @@ def log_configuration(self):
                 'tlo.methods.demography.detail': logging.WARNING,
                 'tlo.methods.healthburden': logging.INFO,
                 'tlo.methods.healthsystem.summary': logging.INFO,
-                'tlo.methods.individual_history_tracker': logging.INFO
+                #'tlo.methods.individual_history_tracker': logging.INFO
             }
         }
 
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index d862a4e359..4f56fd9b37 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -461,7 +461,7 @@ def extract_individual_histories(results_folder: Path,
     format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines 
     A&Vs for a particular individual + date + event name combination.
     """
-    module = 'tlo.individual_history_tracker'
+    module = 'tlo.methods.individual_history_tracker'
     key = 'individual_histories'
 
     # get number of draws and numbers of runs
@@ -478,9 +478,12 @@ def extract_individual_histories(results_folder: Path,
         
         for run in range(info['runs_per_draw']):
 
+            df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
+            print(df)
+
             try:
                 df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
-
+                print(df)
                 df_final = reconstruct_individual_histories(df)
 
                 # Offset person ID to account for the fact that we are collecting chains across runs

From 23f45e13acd48ab53d6d6af369bfba44335912ec Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 14:13:55 +0000
Subject: [PATCH 70/97] Fix retreival of class name

---
 src/tlo/events.py            | 4 ++--
 src/tlo/methods/hsi_event.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 8aba0069bc..1ceb30a576 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -68,14 +68,14 @@ def run(self):
         # Dispatch notification that event is about to run
         notifier.dispatch("event.pre-run", data={"target": self.target,
                                                  "module" : self.module.name,
-                                                 "EventName": type(self).__name__})
+                                                 "EventName": self.__class__.__name__})
                 
         self.apply(self.target)
         self.post_apply_hook()
         
         # Dispatch notification that event has just ran
         notifier.dispatch("event.post-run", data={"target": self.target,
-                                                  "EventName": type(self).__name__})
+                                                  "EventName": self.__class__.__name__})
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 780b9afff6..ad1f92eedd 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -203,7 +203,7 @@ def run(self, squeeze_factor):
         notifier.dispatch("event.pre-run",
                           data={"target": self.target,
                                 "module" : self.module.name,
-                                "EventName": type(self).__name__})
+                                "EventName": self.__class__.__name__})
 
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()
@@ -222,7 +222,7 @@ def run(self, squeeze_factor):
             
         notifier.dispatch("event.post-run",
                           data={"target": self.target,
-                                "EventName": type(self).__name__,
+                                "EventName": self.__class__.__name__,
                                 "footprint": footprint,
                                 "level": level
                                 })

From e6f35cc42dd360cb87aee589eaa3dc8ddd50b001 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 27 Nov 2025 14:56:09 +0000
Subject: [PATCH 71/97] Add resource file

---
 .../parameter_values.csv                                      | 3 +++
 .../scenario_track_individual_histories.py                    | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv

diff --git a/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv b/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv
new file mode 100644
index 0000000000..87a6ed9e99
--- /dev/null
+++ b/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c56ab1e989bf1133f8e52f81552cb55945d6bf14e1758ae1baa62b6e12b37ce2
+size 365
diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
index 5cc4d2eeeb..696612352f 100644
--- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -19,7 +19,7 @@
 
 from tlo import Date, logging
 from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
-#from tlo.methods import individual_history_tracker
+from tlo.methods import individual_history_tracker
 from tlo.methods.fullmodel import fullmodel
 from tlo.scenario import BaseScenario
 
@@ -53,7 +53,7 @@ def log_configuration(self):
 
     def modules(self):
         return (
-            fullmodel()# + [individual_history_tracker.IndividualHistoryTracker()]
+            fullmodel() + [individual_history_tracker.IndividualHistoryTracker()]
         )
 
     def draw_parameters(self, draw_number, rng):

From b4301cebdf89fa8d23e56452b318fb91b1ae1386 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 28 Nov 2025 11:20:11 +0000
Subject: [PATCH 72/97] Fix EventName error and logging of EAV dataframe

---
 src/tlo/methods/individual_history_tracker.py | 44 ++++++++++---------
 src/tlo/util.py                               |  1 +
 tests/test_individual_history_tracker.py      |  1 -
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index e6317bf942..26a0ed8708 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -73,6 +73,14 @@ def on_birth(self, mother, child):
         # Could the notification of birth simply take place here?
         pass
         
+    def log_EAV_dataframe_to_individual_histories(self, df):
+     
+        for idx, row in df.iterrows():
+            print({"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName})
+            logger.info(key='individual_histories',
+                               data = {"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName},
+                               description='Links forming chains of events for simulated individuals')
+        
     def on_simulation_post_initialise(self, data):
 
         # When logging events for each individual to reconstruct chains,
@@ -82,12 +90,10 @@ def on_simulation_post_initialise(self, data):
         # at the start.
         
         # EDNAV structure to capture status of individuals at the start of the simulation
-        ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
-
-        logger.info(key='individual_histories',
-                           data = ednav.to_dict(),
-                           description='Links forming chains of events for simulated individuals')
-                               
+        eav_plus_EventName = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
+        self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
+        
+        return
                                
     def on_simulation_post_do_birth(self, data):
                 
@@ -99,12 +105,10 @@ def on_simulation_post_do_birth(self, data):
         chain_links = {}
         chain_links[data['child_id']] = link_info
 
-        ednav = convert_chain_links_into_EAV(chain_links)
-        
-        logger.info(key='individual_histories',
-                           data = ednav.to_dict(),
-                           description='Links forming chains of events for simulated individuals')
+        eav_plus_EventName = convert_chain_links_into_EAV(chain_links)
+        self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
                                
+        return
         
     def on_event_pre_run(self, data):
         """Do this when notified that an event is about to run. 
@@ -235,17 +239,15 @@ def on_event_post_run(self, data):
             chain_links = self.compare_population_dataframe_and_mni(self.df_before,
                                                                     df_after,
                                                                     self.entire_mni_before,
-                                                                    entire_mni_after)
+                                                                    entire_mni_after,
+                                                                    data['EventName'])
 
         # Log chains
         if chain_links:
-        
-            # Convert chain_links into EAV
-            ednav = convert_chain_links_into_EAV(chain_links)
-
-            logger.info(key='individual_histories',
-                  data= ednav.to_dict(),
-                  description='Links forming chains of events for simulated individuals')
+            # Convert chain_links into EAV-type dataframe
+            eav_plus_EventName = convert_chain_links_into_EAV(chain_links)
+            # log it
+            self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
                       
         # Reset variables
         self.print_chains = False
@@ -298,7 +300,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
 
         return diffs
         
-    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after):
+    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, EventName):
         """ 
         This function compares the population dataframe and mni dictionary before/after a population-wide e
         vent has occurred. 
@@ -327,7 +329,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
-                    'EventName': type(self).__name__,
+                    'EventName': EventName,
                 }
                 
                 # Store the new values from df_after for the changed columns
diff --git a/src/tlo/util.py b/src/tlo/util.py
index 98b13e45fd..e34a887e42 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -105,6 +105,7 @@ def df_to_EAV(df, date, event_name):
     
     
 def convert_chain_links_into_EAV(chain_links):
+
     df = pd.DataFrame.from_dict(chain_links, orient="index")
     id_cols = ["EventName"]
 
diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py
index db460187d8..619c062925 100644
--- a/tests/test_individual_history_tracker.py
+++ b/tests/test_individual_history_tracker.py
@@ -90,4 +90,3 @@ def test_individual_history_tracker(tmpdir, seed):
            (~individual_histories["EventName"].str.contains("HSI", na=False))
     count = mask.sum()
     assert count > 0
-

From 8177340a3892b492dcb8514fc80cfe242adf79e2 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 28 Nov 2025 12:12:17 +0000
Subject: [PATCH 73/97] Adjust utily functions based on new logging

---
 .../scenario_track_individual_histories.py    |   2 +-
 src/tlo/analysis/utils.py                     | 109 ++++--------------
 src/tlo/methods/individual_history_tracker.py |   1 -
 3 files changed, 24 insertions(+), 88 deletions(-)

diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
index 696612352f..ded9e4d5ed 100644
--- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -47,7 +47,7 @@ def log_configuration(self):
                 'tlo.methods.demography.detail': logging.WARNING,
                 'tlo.methods.healthburden': logging.INFO,
                 'tlo.methods.healthsystem.summary': logging.INFO,
-                #'tlo.methods.individual_history_tracker': logging.INFO
+                'tlo.methods.individual_history_tracker': logging.INFO
             }
         }
 
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 4f56fd9b37..73e31ee944 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -366,91 +366,35 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
     return _concat
 
 
-def unpack_dict_rows(df, non_dict_cols=None):
-    """
-    Reconstruct a full DataFrame from rows where most columns are dictionaries.
-    Non-dict columns (e.g., 'date') are propagated to all reconstructed rows.
-    
-    Parameters:
-        df: pd.DataFrame
-        non_dict_cols: list of columns that are NOT dictionaries
-    """
-    if non_dict_cols is None:
-        non_dict_cols = []
-
-    original_cols =  ['E', 'date', 'EventName', 'A', 'V']
-
-    reconstructed_rows = []
-
-    for _, row in df.iterrows():
-        # Determine dict columns for this row
-        dict_cols = [col for col in original_cols if col not in non_dict_cols]
-
-        if not dict_cols:
-            # No dict columns, just append row
-            reconstructed_rows.append(row.to_dict())
-            continue
-
-        # Use the first dict column to get the block length
-        first_dict_col = dict_cols[0]
-        block_length = len(row[first_dict_col])
-
-        # Build each expanded row
-        for i in range(block_length):
-            new_row = {}
-            for col in original_cols:
-                cell = row[col]
-                if col in dict_cols:
-                    # Access the dict using string or integer keys
-                    new_row[col] = cell.get(str(i), cell.get(i))
-                else:
-                    # Propagate non-dict value
-                    new_row[col] = cell
-            reconstructed_rows.append(new_row)
-
-    # Build DataFrame in original column order
-    out = pd.DataFrame(reconstructed_rows)[original_cols]
-
-    return out.reset_index(drop=True)
-
 def reconstruct_individual_histories(df):
                 
-    recon = unpack_dict_rows(df, ['date'])
-
-    # For now convert value to string in all cases to facilitate manipulation. This can be reversed later.
-    recon['V'] = recon['V'].apply(str)
     # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes
     # (e.g. {a1:v1, a2:v2, a3:v3, ...} )
     df_collapsed = (
-            recon.groupby(['E', 'date', 'EventName'])
+            df.groupby(['E', 'date', 'EventName'])
               .apply(lambda g: dict(zip(g['A'], g['V'])))
               .reset_index(name='Info')
         )
-    df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True)
-    #birth_count = (df_final['EventName'] == 'Birth').sum()
+        
+    first_events = ["StartOfSimulation", "Birth"]
     
-    return df_final
+    # Ensure that if E and date are the same, StartOfSimulation or Birth come first
+    df_collapsed["EventName"] = pd.Categorical(
+        df_collapsed["EventName"],
+        categories=first_events + sorted(
+            x for x in df_collapsed["EventName"].unique()
+            if x not in first_events
+        ),
+        ordered=True,
+    )
 
-    
-def print_filtered_df(df):
-    """
-    Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'.
-    """
-    pd.set_option('display.max_colwidth', None)
-    filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])]
-    
-    dict_cols = ["Info"]
-    max_items = 2
-    # Step 2: Truncate dictionary columns for display
-    if dict_cols is not None:
-        for col in dict_cols:
-            def truncate_dict(d):
-                if isinstance(d, dict):
-                    items = list(d.items())[:max_items]  # keep only first `max_items`
-                    return dict(items)
-                return d
-            filtered[col] = filtered[col].apply(truncate_dict)
-    print(filtered)
+    df_final = (
+        df_collapsed
+            .sort_values(by=['E', 'date', 'EventName'])
+            .reset_index(drop=True)
+    )
+
+    return df_final
     
     
 def extract_individual_histories(results_folder: Path,
@@ -478,22 +422,18 @@ def extract_individual_histories(results_folder: Path,
         
         for run in range(info['runs_per_draw']):
 
-            df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
-            print(df)
-
             try:
                 df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
-                print(df)
-                df_final = reconstruct_individual_histories(df)
+                df_single_run= reconstruct_individual_histories(df)
 
                 # Offset person ID to account for the fact that we are collecting chains across runs
-                df_final['E'] = df_final['E'] + ID_offset
+                df_single_run['E'] = df_single_run['E'] + ID_offset
                 
                 # Calculate ID offset for next run
-                ID_offset = (max(df_final['E']) + 1)
+                ID_offset = (max(df_single_run['E']) + 1)
         
                 # Append these chains to list
-                dfs_from_runs.append(df_final)
+                dfs_from_runs.append(df_single_run)
                 
             except KeyError:
                 # Some logs could not be found - probably because this run failed.
@@ -503,9 +443,6 @@ def extract_individual_histories(results_folder: Path,
         # Combine all dfs into a single DataFrame
         res[draw] = pd.concat(dfs_from_runs, ignore_index=True)
 
-        # Optionally, sort by 'E' and 'EventDate' after combining
-        res[draw] = res[draw].sort_values(by=['E', 'date']).reset_index(drop=True)
-
     return res
 
 
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index 26a0ed8708..972223694a 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -76,7 +76,6 @@ def on_birth(self, mother, child):
     def log_EAV_dataframe_to_individual_histories(self, df):
      
         for idx, row in df.iterrows():
-            print({"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName})
             logger.info(key='individual_histories',
                                data = {"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName},
                                description='Links forming chains of events for simulated individuals')

From 2215645f1de2611d2e800d56758886ef85dd1b7f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 28 Nov 2025 12:23:43 +0000
Subject: [PATCH 74/97] Rename E column

---
 .../track_individual_histories/analysis_extract_data.py      | 1 -
 src/tlo/analysis/utils.py                                    | 5 ++++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/scripts/track_individual_histories/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py
index db733ab69f..e88f68bfe9 100644
--- a/src/scripts/track_individual_histories/analysis_extract_data.py
+++ b/src/scripts/track_individual_histories/analysis_extract_data.py
@@ -34,7 +34,6 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     pd.set_option('display.max_colwidth', None)
     
     individual_individual_histories = extract_individual_histories(results_folder)
-    print_filtered_df(individual_individual_histories[0])
     
 if __name__ == "__main__":
     rfp = Path('resources')
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 73e31ee944..bf76625bef 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -393,7 +393,7 @@ def reconstruct_individual_histories(df):
             .sort_values(by=['E', 'date', 'EventName'])
             .reset_index(drop=True)
     )
-
+    
     return df_final
     
     
@@ -432,6 +432,9 @@ def extract_individual_histories(results_folder: Path,
                 # Calculate ID offset for next run
                 ID_offset = (max(df_single_run['E']) + 1)
         
+                # The E has now become an ID for the individual in the draw overall, so rename column as such
+                df_single_run = df_single_run.rename(columns={"E": "person ID in draw"})
+        
                 # Append these chains to list
                 dfs_from_runs.append(df_single_run)
                 

From d869f17008f169bb24bf03e6cd8fd46e57faf35a Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 28 Nov 2025 16:13:55 +0000
Subject: [PATCH 75/97] Rename column name

---
 src/tlo/analysis/utils.py                     | 4 ++--
 src/tlo/methods/individual_history_tracker.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index bf76625bef..2ce404e821 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -433,8 +433,8 @@ def extract_individual_histories(results_folder: Path,
                 ID_offset = (max(df_single_run['E']) + 1)
         
                 # The E has now become an ID for the individual in the draw overall, so rename column as such
-                df_single_run = df_single_run.rename(columns={"E": "person ID in draw"})
-        
+                df_single_run = df_single_run.rename(columns={'E': 'person_ID_in_draw'})
+
                 # Append these chains to list
                 dfs_from_runs.append(df_single_run)
                 
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index 972223694a..189b8f2052 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -77,7 +77,7 @@ def log_EAV_dataframe_to_individual_histories(self, df):
      
         for idx, row in df.iterrows():
             logger.info(key='individual_histories',
-                               data = {"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName},
+                               data = {"E": row.E, "A": row.A, "V": str(row.V), "EventName": row.EventName},
                                description='Links forming chains of events for simulated individuals')
         
     def on_simulation_post_initialise(self, data):

From 3d1154539b513807d3c226fce9243b6db32f7564 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 28 Nov 2025 16:58:41 +0000
Subject: [PATCH 76/97] Check changes in df

---
 src/tlo/analysis/utils.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 2ce404e821..96c4337f4a 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -365,6 +365,40 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
     _concat.columns.names = ['draw', 'run']  # name the levels of the columns multi-index
     return _concat
 
+def check_info_value_changes(df):
+    # Ensure rows are sorted within each person
+    problems = []  # store violations
+
+    # iterate group-by-group
+    for E, g in df.groupby("E"):
+        prev_info = {}
+        
+        for _, row in g.iterrows():
+            current_info = row["Info"]
+
+            for key, value in current_info.items():
+                if key in prev_info and key != 'footprint' and key != 'level':
+                    # compare with previous value
+                    if prev_info[key] == value:
+                        problems.append({
+                            "key": key,
+                            "value": value,
+                            "message": "Value repeated but should differ"
+                        })
+
+
+            # update latest value
+            if len(problems)>0:
+                print(prev_info)
+                print(current_info)
+                print(problems)
+            problems = []
+            print()
+            prev_info = row["Info"]
+        exit(-1)
+                
+    return pd.DataFrame(problems)
+
 
 def reconstruct_individual_histories(df):
                 
@@ -394,6 +428,10 @@ def reconstruct_individual_histories(df):
             .reset_index(drop=True)
     )
     
+    problems = check_info_value_changes(df_final)
+    print(problems)
+    exit(-1)
+    
     return df_final
     
     

From c1463265a13b5d47a38e2479474e9c7d6e8b8bac Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 28 Nov 2025 19:16:44 +0000
Subject: [PATCH 77/97] Ensure order of events on same date is preserved

---
 .../scenario_track_individual_histories.py    |  6 +--
 src/tlo/analysis/utils.py                     | 43 +++++--------------
 2 files changed, 13 insertions(+), 36 deletions(-)

diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
index ded9e4d5ed..e8ef3fb929 100644
--- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -29,11 +29,11 @@ def __init__(self):
         super().__init__()
         self.seed = 42
         self.start_date = Date(2010, 1, 1)
-        self.end_date = self.start_date + pd.DateOffset(months=1)
-        self.pop_size = 1000
+        self.end_date = self.start_date + pd.DateOffset(years=5)
+        self.pop_size = 100
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 3
+        self.runs_per_draw = 1
         self.generate_event_chains = True
 
     def log_configuration(self):
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 96c4337f4a..9aba6111d0 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -366,8 +366,8 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
     return _concat
 
 def check_info_value_changes(df):
-    # Ensure rows are sorted within each person
-    problems = []  # store violations
+
+    problems = {}  # store issues
 
     # iterate group-by-group
     for E, g in df.groupby("E"):
@@ -379,25 +379,13 @@ def check_info_value_changes(df):
             for key, value in current_info.items():
                 if key in prev_info and key != 'footprint' and key != 'level':
                     # compare with previous value
-                    if prev_info[key] == value:
-                        problems.append({
-                            "key": key,
-                            "value": value,
-                            "message": "Value repeated but should differ"
-                        })
-
-
+                    if prev_info[key] == value and key not in problems.keys():
+                        problems[key] = value
+                    
             # update latest value
-            if len(problems)>0:
-                print(prev_info)
-                print(current_info)
-                print(problems)
-            problems = []
-            print()
             prev_info = row["Info"]
-        exit(-1)
                 
-    return pd.DataFrame(problems)
+    return problems
 
 
 def reconstruct_individual_histories(df):
@@ -405,32 +393,19 @@ def reconstruct_individual_histories(df):
     # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes
     # (e.g. {a1:v1, a2:v2, a3:v3, ...} )
     df_collapsed = (
-            df.groupby(['E', 'date', 'EventName'])
+            df.groupby(['E', 'date', 'EventName'], sort=False)
               .apply(lambda g: dict(zip(g['A'], g['V'])))
               .reset_index(name='Info')
         )
         
-    first_events = ["StartOfSimulation", "Birth"]
-    
-    # Ensure that if E and date are the same, StartOfSimulation or Birth come first
-    df_collapsed["EventName"] = pd.Categorical(
-        df_collapsed["EventName"],
-        categories=first_events + sorted(
-            x for x in df_collapsed["EventName"].unique()
-            if x not in first_events
-        ),
-        ordered=True,
-    )
-
     df_final = (
         df_collapsed
-            .sort_values(by=['E', 'date', 'EventName'])
+            .sort_values(by=['E', 'date'])
             .reset_index(drop=True)
     )
     
     problems = check_info_value_changes(df_final)
     print(problems)
-    exit(-1)
     
     return df_final
     
@@ -483,6 +458,8 @@ def extract_individual_histories(results_folder: Path,
             
         # Combine all dfs into a single DataFrame
         res[draw] = pd.concat(dfs_from_runs, ignore_index=True)
+        
+        res[0].to_csv('individual_histories.csv')
 
     return res
 

From d48376fdc54674520fc51346c3ec9ece4a1014b4 Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Tue, 2 Dec 2025 10:20:36 +0000
Subject: [PATCH 78/97] Rename module to allow bundling other useful things in
 there

---
 .../scenario_track_individual_histories.py    | 10 ++---
 src/tlo/analysis/utils.py                     | 40 +++++++++----------
 ...story_tracker.py => individual_history.py} |  0
 tests/test_individual_history_tracker.py      | 14 +++----
 4 files changed, 32 insertions(+), 32 deletions(-)
 rename src/tlo/methods/{individual_history_tracker.py => individual_history.py} (100%)

diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
index e8ef3fb929..0c6a43b127 100644
--- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -2,7 +2,7 @@
 
 Run on the batch system using:
 ```
-tlo batch-submit 
+tlo batch-submit
     src/scripts/analysis_data_generation/scenario_track_individual_histories.py
 ```
 
@@ -19,7 +19,7 @@
 
 from tlo import Date, logging
 from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
-from tlo.methods import individual_history_tracker
+from tlo.methods import individual_history
 from tlo.methods.fullmodel import fullmodel
 from tlo.scenario import BaseScenario
 
@@ -47,13 +47,13 @@ def log_configuration(self):
                 'tlo.methods.demography.detail': logging.WARNING,
                 'tlo.methods.healthburden': logging.INFO,
                 'tlo.methods.healthsystem.summary': logging.INFO,
-                'tlo.methods.individual_history_tracker': logging.INFO
+                'tlo.methods.individual_history': logging.INFO
             }
         }
 
     def modules(self):
         return (
-            fullmodel() + [individual_history_tracker.IndividualHistoryTracker()]
+            fullmodel() + [individual_history.IndividualHistoryTracker()]
         )
 
     def draw_parameters(self, draw_number, rng):
@@ -73,7 +73,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                 ),
 
         }
-        
+
     def _baseline(self) -> Dict:
         #Return the Dict with values for the parameter changes that define the baseline scenario.
         return mix_scenarios(
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 9aba6111d0..66e4487467 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -372,7 +372,7 @@ def check_info_value_changes(df):
     # iterate group-by-group
     for E, g in df.groupby("E"):
         prev_info = {}
-        
+
         for _, row in g.iterrows():
             current_info = row["Info"]
 
@@ -381,15 +381,15 @@ def check_info_value_changes(df):
                     # compare with previous value
                     if prev_info[key] == value and key not in problems.keys():
                         problems[key] = value
-                    
+
             # update latest value
             prev_info = row["Info"]
-                
+
     return problems
 
 
 def reconstruct_individual_histories(df):
-                
+
     # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes
     # (e.g. {a1:v1, a2:v2, a3:v3, ...} )
     df_collapsed = (
@@ -397,28 +397,28 @@ def reconstruct_individual_histories(df):
               .apply(lambda g: dict(zip(g['A'], g['V'])))
               .reset_index(name='Info')
         )
-        
+
     df_final = (
         df_collapsed
             .sort_values(by=['E', 'date'])
             .reset_index(drop=True)
     )
-    
+
     problems = check_info_value_changes(df_final)
     print(problems)
-    
+
     return df_final
-    
-    
+
+
 def extract_individual_histories(results_folder: Path,
                         ) -> dict:
     """Utility function to collect chains of events. Individuals across runs of the same draw
     will be combined into unique df.
-    Returns dictionary where keys are draws, and each draw is associated with a dataframe of 
-    format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines 
+    Returns dictionary where keys are draws, and each draw is associated with a dataframe of
+    format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines
     A&Vs for a particular individual + date + event name combination.
     """
-    module = 'tlo.methods.individual_history_tracker'
+    module = 'tlo.methods.individual_history'
     key = 'individual_histories'
 
     # get number of draws and numbers of runs
@@ -426,13 +426,13 @@ def extract_individual_histories(results_folder: Path,
 
     # Collect results from each draw/run. Individuals across runs of the same draw will be combined into unique df.
     res = dict()
-    
+
     for draw in range(info['number_of_draws']):
-    
+
         # All individuals in same draw will be combined across runs, so their ID will be offset.
         dfs_from_runs = []
         ID_offset = 0
-        
+
         for run in range(info['runs_per_draw']):
 
             try:
@@ -441,24 +441,24 @@ def extract_individual_histories(results_folder: Path,
 
                 # Offset person ID to account for the fact that we are collecting chains across runs
                 df_single_run['E'] = df_single_run['E'] + ID_offset
-                
+
                 # Calculate ID offset for next run
                 ID_offset = (max(df_single_run['E']) + 1)
-        
+
                 # The E has now become an ID for the individual in the draw overall, so rename column as such
                 df_single_run = df_single_run.rename(columns={'E': 'person_ID_in_draw'})
 
                 # Append these chains to list
                 dfs_from_runs.append(df_single_run)
-                
+
             except KeyError:
                 # Some logs could not be found - probably because this run failed.
                 # Simply to not append anything to the df collecting chains.
                 print("Run failed")
-            
+
         # Combine all dfs into a single DataFrame
         res[draw] = pd.concat(dfs_from_runs, ignore_index=True)
-        
+
         res[0].to_csv('individual_histories.csv')
 
     return res
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history.py
similarity index 100%
rename from src/tlo/methods/individual_history_tracker.py
rename to src/tlo/methods/individual_history.py
diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py
index 619c062925..e065e20c84 100644
--- a/tests/test_individual_history_tracker.py
+++ b/tests/test_individual_history_tracker.py
@@ -11,7 +11,7 @@
     enhanced_lifestyle,
     healthseekingbehaviour,
     healthsystem,
-    individual_history_tracker,
+    individual_history,
     mockitis,
     simplified_births,
     symptommanager,
@@ -42,7 +42,7 @@ def test_individual_history_tracker(tmpdir, seed):
             "directory": tmpdir,
             "custom_levels": {
                 "tlo.methods.healthsystem": logging.DEBUG,
-                "tlo.methods.individual_history_tracker": logging.INFO
+                "tlo.methods.individual_history": logging.INFO
             }
         }, resourcefilepath=resourcefilepath
     )
@@ -52,7 +52,7 @@ def test_individual_history_tracker(tmpdir, seed):
                  simplified_births.SimplifiedBirths(),
                  enhanced_lifestyle.Lifestyle(),
                  healthsystem.HealthSystem(),
-                 individual_history_tracker.IndividualHistoryTracker(),
+                 individual_history.IndividualHistoryTracker(),
                  symptommanager.SymptomManager(),
                  healthseekingbehaviour.HealthSeekingBehaviour(),
                  mockitis.Mockitis(),
@@ -68,19 +68,19 @@ def test_individual_history_tracker(tmpdir, seed):
     output = parse_log_file(sim.log_filepath, level=logging.DEBUG)
     output_chains = parse_log_file(sim.log_filepath, level=logging.INFO)
     individual_histories = reconstruct_individual_histories(
-                            output_chains['tlo.methods.individual_history_tracker']['individual_histories'])
-    
+                            output_chains['tlo.methods.individual_history']['individual_histories'])
+
     # Check that we have a "StartOfSimulation" event for every individual in the initial population,
     # and that this was logged at the start date
     assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize
     assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation',
                                                                           'date'] == start_date).all()
-    
+
     # Check that in the case of birth or start of simulation, all properties were logged
     num_properties = len(sim.population.props.columns)
     mask = individual_histories["EventName"].isin(["Birth", "StartOfSimulation"])
     assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all()
-    
+
     # Assert that all HSI events that occurred were also collected in the event chains
     HSIs_in_individual_histories = individual_histories["EventName"].str.contains('HSI', na=False).sum()
     assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event'])

From af4ace9291ad60ef8d50cbf1999410e7379cba37 Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Tue, 2 Dec 2025 22:08:23 +0000
Subject: [PATCH 79/97] Clean up variable names

---
 .../analysis_extract_data.py                  | 10 ++---
 src/tlo/analysis/utils.py                     | 18 ++++-----
 src/tlo/events.py                             |  8 ++--
 src/tlo/methods/hsi_event.py                  | 16 ++++----
 src/tlo/methods/individual_history.py         | 39 ++++++++++++-------
 src/tlo/util.py                               | 26 ++++++-------
 tests/test_individual_history_tracker.py      | 14 +++----
 7 files changed, 70 insertions(+), 61 deletions(-)

diff --git a/src/scripts/track_individual_histories/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py
index e88f68bfe9..fc67df6879 100644
--- a/src/scripts/track_individual_histories/analysis_extract_data.py
+++ b/src/scripts/track_individual_histories/analysis_extract_data.py
@@ -8,11 +8,11 @@
 
 def print_filtered_df(df):
     """
-    Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'.
+    Prints rows of the DataFrame excluding event_name 'Initialise' and 'Birth'.
     """
     pd.set_option('display.max_colwidth', None)
-    filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])]
-    
+    filtered = df  # [~df['event_name'].isin(['StartOfSimulation', 'Birth'])]
+
     dict_cols = ["Info"]
     max_items = 2
     # Step 2: Truncate dictionary columns for display
@@ -32,9 +32,9 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     """
     pd.set_option('display.max_rows', None)
     pd.set_option('display.max_colwidth', None)
-    
+
     individual_individual_histories = extract_individual_histories(results_folder)
-    
+
 if __name__ == "__main__":
     rfp = Path('resources')
 
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 66e4487467..360fc36416 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -370,7 +370,7 @@ def check_info_value_changes(df):
     problems = {}  # store issues
 
     # iterate group-by-group
-    for E, g in df.groupby("E"):
+    for E, g in df.groupby("entity"):
         prev_info = {}
 
         for _, row in g.iterrows():
@@ -390,17 +390,17 @@ def check_info_value_changes(df):
 
 def reconstruct_individual_histories(df):
 
-    # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes
+    # Collapse into 'entity', 'date', 'event_name', 'Info' format where 'Info' is dict listing attributes
     # (e.g. {a1:v1, a2:v2, a3:v3, ...} )
     df_collapsed = (
-            df.groupby(['E', 'date', 'EventName'], sort=False)
-              .apply(lambda g: dict(zip(g['A'], g['V'])))
+            df.groupby(['entity', 'date', 'event_name'], sort=False)
+              .apply(lambda g: dict(zip(g['attribute'], g['value'])))
               .reset_index(name='Info')
         )
 
     df_final = (
         df_collapsed
-            .sort_values(by=['E', 'date'])
+            .sort_values(by=['entity', 'date'])
             .reset_index(drop=True)
     )
 
@@ -415,7 +415,7 @@ def extract_individual_histories(results_folder: Path,
     """Utility function to collect chains of events. Individuals across runs of the same draw
     will be combined into unique df.
     Returns dictionary where keys are draws, and each draw is associated with a dataframe of
-    format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines
+    format 'entity', 'date', 'event_name', 'Info' where 'Info' is a dictionary that combines
     A&Vs for a particular individual + date + event name combination.
     """
     module = 'tlo.methods.individual_history'
@@ -440,13 +440,13 @@ def extract_individual_histories(results_folder: Path,
                 df_single_run= reconstruct_individual_histories(df)
 
                 # Offset person ID to account for the fact that we are collecting chains across runs
-                df_single_run['E'] = df_single_run['E'] + ID_offset
+                df_single_run['entity'] = df_single_run['entity'] + ID_offset
 
                 # Calculate ID offset for next run
-                ID_offset = (max(df_single_run['E']) + 1)
+                ID_offset = (max(df_single_run['entity']) + 1)
 
                 # The E has now become an ID for the individual in the draw overall, so rename column as such
-                df_single_run = df_single_run.rename(columns={'E': 'person_ID_in_draw'})
+                df_single_run = df_single_run.rename(columns={'entity': 'person_ID_in_draw'})
 
                 # Append these chains to list
                 dfs_from_runs.append(df_single_run)
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 1ceb30a576..afed91afea 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -68,14 +68,14 @@ def run(self):
         # Dispatch notification that event is about to run
         notifier.dispatch("event.pre-run", data={"target": self.target,
                                                  "module" : self.module.name,
-                                                 "EventName": self.__class__.__name__})
-                
+                                                 "event_name": self.__class__.__name__})
+
         self.apply(self.target)
         self.post_apply_hook()
-        
+
         # Dispatch notification that event has just ran
         notifier.dispatch("event.post-run", data={"target": self.target,
-                                                  "EventName": self.__class__.__name__})
+                                                  "event_name": self.__class__.__name__})
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index ad1f92eedd..a7b6a440e9 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -194,16 +194,16 @@ def _run_after_hsi_event(self) -> None:
                 item_codes=self._EQUIPMENT,
                 facility_id=self.facility_info.id
             )
-        
+
 
     def run(self, squeeze_factor):
         """Make the event happen."""
-        
+
         # Dispatch notification that HSI event is about to run
         notifier.dispatch("event.pre-run",
                           data={"target": self.target,
                                 "module" : self.module.name,
-                                "EventName": self.__class__.__name__})
+                                "event_name": self.__class__.__name__})
 
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()
@@ -214,21 +214,21 @@ def run(self, squeeze_factor):
             footprint = updated_appt_footprint
         else:
             footprint = self.EXPECTED_APPT_FOOTPRINT
-            
+
         if self.facility_info:
             level = self.facility_info.level
         else:
             level = "N/A"
-            
+
         notifier.dispatch("event.post-run",
                           data={"target": self.target,
-                                "EventName": self.__class__.__name__,
+                                "event_name": self.__class__.__name__,
                                 "footprint": footprint,
                                 "level": level
                                 })
-                
+
         return updated_appt_footprint
-        
+
 
     def get_consumables(
         self,
diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
index 189b8f2052..0a04d211d3 100644
--- a/src/tlo/methods/individual_history.py
+++ b/src/tlo/methods/individual_history.py
@@ -77,7 +77,12 @@ def log_EAV_dataframe_to_individual_histories(self, df):
      
         for idx, row in df.iterrows():
             logger.info(key='individual_histories',
-                               data = {"E": row.E, "A": row.A, "V": str(row.V), "EventName": row.EventName},
+                               data = {
+                                   "entity": row.entity,
+                                   "attribute": row.attribute,
+                                   "value": str(row.value),
+                                   "event_name": row.event_name
+                               },
                                description='Links forming chains of events for simulated individuals')
         
     def on_simulation_post_initialise(self, data):
@@ -89,9 +94,9 @@ def on_simulation_post_initialise(self, data):
         # at the start.
         
         # EDNAV structure to capture status of individuals at the start of the simulation
-        eav_plus_EventName = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
-        self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
-        
+        eav_plus_event = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
+        self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
+
         return
                                
     def on_simulation_post_do_birth(self, data):
@@ -99,7 +104,7 @@ def on_simulation_post_do_birth(self, data):
         # When individual is born, store their initial properties to provide a starting point to the
         # chain of property changes that this individual will undergo
         # as a result of events taking place.
-        link_info = {'EventName': 'Birth'}
+        link_info = {'event_name': 'Birth'}
         link_info.update(self.sim.population.props.loc[data['child_id']].to_dict())
         chain_links = {}
         chain_links[data['child_id']] = link_info
@@ -107,6 +112,9 @@ def on_simulation_post_do_birth(self, data):
         eav_plus_EventName = convert_chain_links_into_EAV(chain_links)
         self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
                                
+        eav_plus_event = convert_chain_links_into_EAV(chain_links)
+        self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
+
         return
         
     def on_event_pre_run(self, data):
@@ -120,7 +128,7 @@ def on_event_pre_run(self, data):
         # 2) the event is not in the list of events to ignore
         if (
             (data['module'] not in self.modules_of_interest)
-            or (data['EventName'] in self.events_to_ignore)
+            or (data['event_name'] in self.events_to_ignore)
             ):
             return
         
@@ -189,7 +197,7 @@ def on_event_post_run(self, data):
                 mni_instances_after = None
             
             # Create and store event for this individual, regardless of whether any property change occurred
-            link_info = {'EventName' : data['EventName']}
+            link_info = {'event_name' : data['event_name']}
             if 'footprint' in data.keys():
                 link_info['footprint'] = data['footprint']
                 link_info['level'] = data['level']
@@ -244,10 +252,12 @@ def on_event_post_run(self, data):
         # Log chains
         if chain_links:
             # Convert chain_links into EAV-type dataframe
-            eav_plus_EventName = convert_chain_links_into_EAV(chain_links)
+            eav_plus_event = convert_chain_links_into_EAV(chain_links)
             # log it
             self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
                       
+            self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
+
         # Reset variables
         self.print_chains = False
         self.df_before = []
@@ -301,10 +311,13 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
         
     def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, EventName):
         """ 
+
+    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name):
+        """
         This function compares the population dataframe and mni dictionary before/after a population-wide e
-        vent has occurred. 
-        It allows us to identify the individuals for which this event led to a significant (i.e. property) change, 
-        and to store the properties which have changed as a result of it. 
+        vent has occurred.
+        It allows us to identify the individuals for which this event led to a significant (i.e. property) change,
+        and to store the properties which have changed as a result of it.
         """
         
         # Create a mask of where values are different
@@ -328,7 +341,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
-                    'EventName': EventName,
+                    'event_name': event_name,
                 }
                 
                 # Store the new values from df_after for the changed columns
@@ -350,7 +363,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                     if key not in persons_changed:
                         # If individual hadn't been previously added due to changes in pop df, add it here
                         link_info = {
-                            'EventName': type(self).__name__,
+                            'event_name': self.__class__.__name__,
                         }
                         
                         for key_prop in diff_mni[key]:
diff --git a/src/tlo/util.py b/src/tlo/util.py
index e34a887e42..6ab3f67b20 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -97,30 +97,26 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng:
 def df_to_EAV(df, date, event_name):
     """Function to convert dataframe into EAV"""
     eav = df.stack(dropna=False).reset_index()
-    eav.columns = ['E', 'A', 'V']
-    eav['EventName'] = event_name
-    eav = eav[["E", "EventName", "A", "V"]]
-
+    eav.columns = ['entity', 'attribute', 'value']
+    eav['event_name'] = event_name
+    eav = eav[["entity", "event_name", "attribute", "value"]]
     return eav
-    
-    
-def convert_chain_links_into_EAV(chain_links):
 
+
+def convert_chain_links_into_EAV(chain_links):
     df = pd.DataFrame.from_dict(chain_links, orient="index")
-    id_cols = ["EventName"]
+    id_cols = ["event_name"]
 
     eav = df.reset_index().melt(
         id_vars=["index"] + id_cols,  # index = person ID
-        var_name="A",
-        value_name="V"
+        var_name="attribute",
+        value_name="value"
     )
 
-    eav.rename(columns={"index": "E"}, inplace=True)
-
-    eav = eav[["E", "EventName", "A", "V"]]
-
+    eav.rename(columns={"index": "entity"}, inplace=True)
+    eav = eav[["entity", "event_name", "attribute", "value"]]
     return eav
-    
+
 
 def sample_outcome(probs: pd.DataFrame, rng: np.random.RandomState):
     """ Helper function to randomly sample an outcome for each individual in a population from a set of probabilities
diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py
index e065e20c84..20ce42eb44 100644
--- a/tests/test_individual_history_tracker.py
+++ b/tests/test_individual_history_tracker.py
@@ -71,22 +71,22 @@ def test_individual_history_tracker(tmpdir, seed):
                             output_chains['tlo.methods.individual_history']['individual_histories'])
 
     # Check that we have a "StartOfSimulation" event for every individual in the initial population,
-    # and that this was logged at the start date
-    assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize
-    assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation',
+    #   and that this was logged at the start date
+    assert (individual_histories['event_name'] == 'StartOfSimulation').sum() == popsize
+    assert (individual_histories.loc[individual_histories['event_name'] == 'StartOfSimulation',
                                                                           'date'] == start_date).all()
 
     # Check that in the case of birth or start of simulation, all properties were logged
     num_properties = len(sim.population.props.columns)
-    mask = individual_histories["EventName"].isin(["Birth", "StartOfSimulation"])
+    mask = individual_histories["event_name"].isin(["Birth", "StartOfSimulation"])
     assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all()
 
     # Assert that all HSI events that occurred were also collected in the event chains
-    HSIs_in_individual_histories = individual_histories["EventName"].str.contains('HSI', na=False).sum()
+    HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum()
     assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event'])
 
     # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too
-    mask = (~individual_histories["EventName"].isin(["StartOfSimulation", "Birth"])) & \
-           (~individual_histories["EventName"].str.contains("HSI", na=False))
+    mask = (~individual_histories["event_name"].isin(["StartOfSimulation", "Birth"])) & \
+           (~individual_histories["event_name"].str.contains("HSI", na=False))
     count = mask.sum()
     assert count > 0

From fcf803a5032330fd2810c421368febcc4ee2045b Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Tue, 2 Dec 2025 22:17:35 +0000
Subject: [PATCH 80/97] Fix variable names

---
 src/tlo/methods/individual_history.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
index 0a04d211d3..48b8cbf1c7 100644
--- a/src/tlo/methods/individual_history.py
+++ b/src/tlo/methods/individual_history.py
@@ -108,10 +108,7 @@ def on_simulation_post_do_birth(self, data):
         link_info.update(self.sim.population.props.loc[data['child_id']].to_dict())
         chain_links = {}
         chain_links[data['child_id']] = link_info
-
-        eav_plus_EventName = convert_chain_links_into_EAV(chain_links)
-        self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
-                               
+                       
         eav_plus_event = convert_chain_links_into_EAV(chain_links)
         self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
 
@@ -247,17 +244,15 @@ def on_event_post_run(self, data):
                                                                     df_after,
                                                                     self.entire_mni_before,
                                                                     entire_mni_after,
-                                                                    data['EventName'])
+                                                                    data['event_name'])
 
         # Log chains
         if chain_links:
             # Convert chain_links into EAV-type dataframe
             eav_plus_event = convert_chain_links_into_EAV(chain_links)
             # log it
-            self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName)
-                      
             self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
-
+                      
         # Reset variables
         self.print_chains = False
         self.df_before = []
@@ -309,9 +304,6 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
 
         return diffs
         
-    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, EventName):
-        """ 
-
     def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name):
         """
         This function compares the population dataframe and mni dictionary before/after a population-wide e

From f615f140657fd585c9e4f2f831c28c1a9045c385 Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Tue, 2 Dec 2025 23:24:52 +0000
Subject: [PATCH 81/97] Move individual-history-specific utils into its module

---
 src/tlo/methods/individual_history.py | 166 +++++++++++++-------------
 src/tlo/util.py                       |  24 ----
 2 files changed, 84 insertions(+), 106 deletions(-)

diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
index 48b8cbf1c7..b97d5e8e09 100644
--- a/src/tlo/methods/individual_history.py
+++ b/src/tlo/methods/individual_history.py
@@ -7,7 +7,6 @@
 from tlo import Module, Parameter, Types, logging
 from tlo.notify import notifier
 from tlo.population import Population
-from tlo.util import convert_chain_links_into_EAV, df_to_EAV
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -19,13 +18,12 @@ def __init__(
         name: Optional[str] = None,
         modules_of_interest: Optional[List[str]] = None,
         events_to_ignore: Optional[List[str]] = None
-        
     ):
         super().__init__(name)
-        
+
         self.modules_of_interest = modules_of_interest
         self.events_to_ignore = events_to_ignore
-    
+
         # This is how I am passing data from fnc taking place before event to the one after
         # It doesn't seem very elegant but not sure how else to go about it
         self.print_chains = False
@@ -34,7 +32,7 @@ def __init__(
         self.mni_instances_before = False
         self.mni_row_before = {}
         self.entire_mni_before = {}
-        
+
     PARAMETERS = {
         # Options within module
         "modules_of_interest": Parameter(
@@ -43,28 +41,27 @@ def __init__(
         "events_to_ignore": Parameter(
             Types.LIST, "Events to be ignored when collecting chains"
         ),
-        }
-        
+    }
+
     def initialise_simulation(self, sim):
         notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise)
         notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth)
         notifier.add_listener("event.pre-run", self.on_event_pre_run)
         notifier.add_listener("event.post-run", self.on_event_post_run)
-        
+
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
-        self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv"))
-        
+        self.load_parameters_from_dataframe(
+            pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv")
+        )
+
     def initialise_population(self, population):
         # Use parameter file values by default, if not overwritten
-            
-        self.modules_of_interest = self.parameters['modules_of_interest'] \
-            if self.modules_of_interest is None \
-            else self.modules_of_interest
-            
-        self.events_to_ignore = self.parameters['events_to_ignore'] \
-            if self.events_to_ignore is None \
-            else self.events_to_ignore
-            
+        if self.modules_of_interest is None:
+            self.modules_of_interest = self.parameters['modules_of_interest']
+
+        if self.events_to_ignore is None:
+            self.events_to_ignore = self.parameters['events_to_ignore']
+
         # If modules of interest is '*', set by default to all modules included in the simulation
         if self.modules_of_interest == ['*']:
             self.modules_of_interest = list(self.sim.modules.keys())
@@ -72,9 +69,8 @@ def initialise_population(self, population):
     def on_birth(self, mother, child):
         # Could the notification of birth simply take place here?
         pass
-        
-    def log_EAV_dataframe_to_individual_histories(self, df):
-     
+
+    def log_eav_dataframe_to_individual_histories(self, df):
         for idx, row in df.iterrows():
             logger.info(key='individual_histories',
                                data = {
@@ -84,51 +80,41 @@ def log_EAV_dataframe_to_individual_histories(self, df):
                                    "event_name": row.event_name
                                },
                                description='Links forming chains of events for simulated individuals')
-        
-    def on_simulation_post_initialise(self, data):
 
+    def on_simulation_post_initialise(self, data):
         # When logging events for each individual to reconstruct chains,
         # only the changes in individual properties will be logged.
         # At the start of the simulation + when a new individual is born,
         # we therefore want to store all of their properties
         # at the start.
-        
+
         # EDNAV structure to capture status of individuals at the start of the simulation
-        eav_plus_event = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation')
-        self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
+        eav_plus_event = df_to_eav(self.sim.population.props, self.sim.date, 'StartOfSimulation')
+        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
 
-        return
-                               
     def on_simulation_post_do_birth(self, data):
-                
         # When individual is born, store their initial properties to provide a starting point to the
         # chain of property changes that this individual will undergo
         # as a result of events taking place.
         link_info = {'event_name': 'Birth'}
         link_info.update(self.sim.population.props.loc[data['child_id']].to_dict())
-        chain_links = {}
-        chain_links[data['child_id']] = link_info
-                       
-        eav_plus_event = convert_chain_links_into_EAV(chain_links)
-        self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
+        chain_links = {data['child_id']: link_info}
+
+        eav_plus_event = convert_chain_links_into_eav(chain_links)
+        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
 
-        return
-        
     def on_event_pre_run(self, data):
-        """Do this when notified that an event is about to run. 
+        """Do this when notified that an event is about to run.
         This function checks whether this event should be logged as part of the event chains, a
         nd if so stored required information before the event has occurred.
         """
-        
+
         # Only log event if
         # 1) the event belongs to modules of interest and
         # 2) the event is not in the list of events to ignore
-        if (
-            (data['module'] not in self.modules_of_interest)
-            or (data['event_name'] in self.events_to_ignore)
-            ):
+        if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore):
             return
-        
+
         # Initialise these variables
         self.print_chains = False
         self.df_before = []
@@ -136,15 +122,15 @@ def on_event_pre_run(self, data):
         self.mni_instances_before = False
         self.mni_row_before = {}
         self.entire_mni_before = {}
-        
+
         self.print_chains = True
-        
+
         # Target is single individual
         if not isinstance(data['target'], Population):
 
             # Save row for comparison after event has occurred
             self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
-            
+
             # Check if individual is already in mni dictionary, if so copy her original status
             if 'PregnancySupervisor' in self.sim.modules:
                 mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
@@ -153,7 +139,7 @@ def on_event_pre_run(self, data):
                     self.mni_row_before = mni[data['target']].copy()
             else:
                 self.mni_row_before = None
-            
+
         else:
 
             # This will be a population-wide event. In order to find individuals for which this led to
@@ -165,25 +151,22 @@ def on_event_pre_run(self, data):
             else:
                 self.entire_mni_before = None
 
-        return
-        
-    
     def on_event_post_run(self, data):
-        """ If print_chains=True, this function logs the event and identifies and logs the any property 
-        changes that have occured to one or multiple individuals as a result of the event taking place. 
+        """ If print_chains=True, this function logs the event and identifies and logs the any property
+        changes that have occured to one or multiple individuals as a result of the event taking place.
         """
-        
+
         if not self.print_chains:
             return
-            
+
         chain_links = {}
-    
+
         # Target is single individual
         if not isinstance(data["target"], Population):
-    
+
             # Copy full new status for individual
             row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999)
-            
+
             # Check if individual is in mni after the event
             mni_instances_after = False
             if 'PregnancySupervisor' in self.sim.modules:
@@ -192,18 +175,18 @@ def on_event_post_run(self, data):
                     mni_instances_after = True
             else:
                 mni_instances_after = None
-            
+
             # Create and store event for this individual, regardless of whether any property change occurred
             link_info = {'event_name' : data['event_name']}
             if 'footprint' in data.keys():
                 link_info['footprint'] = data['footprint']
                 link_info['level'] = data['level']
-            
+
             # Store (if any) property changes as a result of the event for this individual
             for key in self.row_before.index:
                 if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
                     link_info[key] = row_after[key]
-            
+
             if 'PregnancySupervisor' in self.sim.modules:
                 # Now check and store changes in the mni dictionary, accounting for following cases:
                 # Individual is in mni dictionary before and after
@@ -224,21 +207,21 @@ def on_event_post_run(self, data):
                         if self.mni_values_differ(default[key], mni[data['target']][key]):
                             link_info[key] = mni[data['target']][key]
                 # Else, no need to do anything
-                    
+
             # Add individual to the chain links
             chain_links[data['target']] = link_info
-            
+
         else:
             # Target is entire population. Identify individuals for which properties have changed
             # and store their changes.
-            
+
             # Population frame after event
             df_after = self.sim.population.props
             if 'PregnancySupervisor' in self.sim.modules:
                 entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
             else:
                 entire_mni_after = None
-            
+
             #  Create and store the event and dictionary of changes for affected individuals
             chain_links = self.compare_population_dataframe_and_mni(self.df_before,
                                                                     df_after,
@@ -249,10 +232,10 @@ def on_event_post_run(self, data):
         # Log chains
         if chain_links:
             # Convert chain_links into EAV-type dataframe
-            eav_plus_event = convert_chain_links_into_EAV(chain_links)
+            eav_plus_event = convert_chain_links_into_eav(chain_links)
             # log it
-            self.log_EAV_dataframe_to_individual_histories(eav_plus_event)
-                      
+            self.log_eav_dataframe_to_individual_histories(eav_plus_event)
+
         # Reset variables
         self.print_chains = False
         self.df_before = []
@@ -261,8 +244,6 @@ def on_event_post_run(self, data):
         self.mni_row_before = {}
         self.entire_mni_before = {}
 
-        return
-    
     def mni_values_differ(self, v1, v2):
 
         if isinstance(v1, list) and isinstance(v2, list):
@@ -271,12 +252,12 @@ def mni_values_differ(self, v1, v2):
         if pd.isna(v1) and pd.isna(v2):
             return False  # treat both NaT/NaN as equal
         return v1 != v2
-    
+
     def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
         diffs = {}
 
         all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys())
-            
+
         for person in all_individuals:
             if person not in entire_mni_before: # but is afterward
                 for key in entire_mni_after[person]:
@@ -285,7 +266,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
                         if person not in diffs:
                             diffs[person] = {}
                         diffs[person][key] = entire_mni_after[person][key]
-                    
+
             elif person not in entire_mni_after: # but is beforehand
                 for key in entire_mni_before[person]:
                     if self.mni_values_differ(entire_mni_before[person][key],
@@ -303,7 +284,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
                         diffs[person][key] = entire_mni_after[person][key]
 
         return diffs
-        
+
     def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name):
         """
         This function compares the population dataframe and mni dictionary before/after a population-wide e
@@ -311,20 +292,20 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
         It allows us to identify the individuals for which this event led to a significant (i.e. property) change,
         and to store the properties which have changed as a result of it.
         """
-        
+
         # Create a mask of where values are different
         diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
         if 'PregnancySupervisor' in self.sim.modules:
             diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
         else:
             diff_mni = []
-        
+
         # Create an empty dict to store changes for each of the individuals
         chain_links = {}
 
         # Loop through each row of the mask
         persons_changed = []
-        
+
         for idx, row in diff_mask.iterrows():
             changed_cols = row.index[row].tolist()
 
@@ -335,7 +316,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                 link_info = {
                     'event_name': event_name,
                 }
-                
+
                 # Store the new values from df_after for the changed columns
                 for col in changed_cols:
                     link_info[col] = df_after.at[idx, col]
@@ -347,7 +328,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
 
                 # Append the event and changes to the individual key
                 chain_links[idx] = link_info
-     
+
         if 'PregnancySupervisor' in self.sim.modules:
             # For individuals which only underwent changes in mni dictionary, save changes here
             if len(diff_mni)>0:
@@ -357,12 +338,33 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                         link_info = {
                             'event_name': self.__class__.__name__,
                         }
-                        
+
                         for key_prop in diff_mni[key]:
                             link_info[key_prop] = diff_mni[key][key_prop]
-                            
+
                         chain_links[key] = link_info
         return chain_links
 
 
-        
+def df_to_eav(df, date, event_name):
+    """Function to convert dataframe into EAV"""
+    eav = df.stack(dropna=False).reset_index()
+    eav.columns = ['entity', 'attribute', 'value']
+    eav['event_name'] = event_name
+    eav = eav[["entity", "event_name", "attribute", "value"]]
+    return eav
+
+
+def convert_chain_links_into_eav(chain_links):
+    df = pd.DataFrame.from_dict(chain_links, orient="index")
+    id_cols = ["event_name"]
+
+    eav = df.reset_index().melt(
+        id_vars=["index"] + id_cols,  # index = person ID
+        var_name="attribute",
+        value_name="value"
+    )
+
+    eav.rename(columns={"index": "entity"}, inplace=True)
+    eav = eav[["entity", "event_name", "attribute", "value"]]
+    return eav
diff --git a/src/tlo/util.py b/src/tlo/util.py
index 6ab3f67b20..efe17a9920 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -94,30 +94,6 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng:
     return final_states
 
 
-def df_to_EAV(df, date, event_name):
-    """Function to convert dataframe into EAV"""
-    eav = df.stack(dropna=False).reset_index()
-    eav.columns = ['entity', 'attribute', 'value']
-    eav['event_name'] = event_name
-    eav = eav[["entity", "event_name", "attribute", "value"]]
-    return eav
-
-
-def convert_chain_links_into_EAV(chain_links):
-    df = pd.DataFrame.from_dict(chain_links, orient="index")
-    id_cols = ["event_name"]
-
-    eav = df.reset_index().melt(
-        id_vars=["index"] + id_cols,  # index = person ID
-        var_name="attribute",
-        value_name="value"
-    )
-
-    eav.rename(columns={"index": "entity"}, inplace=True)
-    eav = eav[["entity", "event_name", "attribute", "value"]]
-    return eav
-
-
 def sample_outcome(probs: pd.DataFrame, rng: np.random.RandomState):
     """ Helper function to randomly sample an outcome for each individual in a population from a set of probabilities
     that are specific to each individual.

From 21482a6999bb891f31cc579d6d0196985f4f6eb3 Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Wed, 3 Dec 2025 01:49:18 +0000
Subject: [PATCH 82/97] Don't send data if there isn't any - default is `None`

---
 src/tlo/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 71a90b04ff..4d0c5d4cee 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -310,7 +310,7 @@ def initialise(self, *, end_date: Date) -> None:
         # Otherwise, would have to add listener outside of CollectEventChains initialisation
         
         # Dispatch notification that pop has been initialised
-        notifier.dispatch("simulation.post-initialise", data={})
+        notifier.dispatch("simulation.post-initialise")
 
     def finalise(self, wall_clock_time: Optional[float] = None) -> None:
         """Finalise all modules in simulation and close logging file if open.

From 87f4710228778387d8edb675da95b0323ac4f2e7 Mon Sep 17 00:00:00 2001
From: Asif Tamuri <tamuri@gmail.com>
Date: Wed, 3 Dec 2025 01:55:42 +0000
Subject: [PATCH 83/97] Rename HSI event notifications - should only come from
 one place - same listener can listen for different notifications

---
 src/tlo/methods/hsi_event.py          | 4 ++--
 src/tlo/methods/individual_history.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index a7b6a440e9..db5d599493 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -200,7 +200,7 @@ def run(self, squeeze_factor):
         """Make the event happen."""
 
         # Dispatch notification that HSI event is about to run
-        notifier.dispatch("event.pre-run",
+        notifier.dispatch("hsi_event.pre-run",
                           data={"target": self.target,
                                 "module" : self.module.name,
                                 "event_name": self.__class__.__name__})
@@ -220,7 +220,7 @@ def run(self, squeeze_factor):
         else:
             level = "N/A"
 
-        notifier.dispatch("event.post-run",
+        notifier.dispatch("hsi_event.post-run",
                           data={"target": self.target,
                                 "event_name": self.__class__.__name__,
                                 "footprint": footprint,
diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
index b97d5e8e09..6265c7b928 100644
--- a/src/tlo/methods/individual_history.py
+++ b/src/tlo/methods/individual_history.py
@@ -48,6 +48,8 @@ def initialise_simulation(self, sim):
         notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth)
         notifier.add_listener("event.pre-run", self.on_event_pre_run)
         notifier.add_listener("event.post-run", self.on_event_post_run)
+        notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run)
+        notifier.add_listener("hsi_event.post-run", self.on_event_post_run)
 
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
         self.load_parameters_from_dataframe(

From d44285e95bde37305d5e2f64fde1bbb3727f8f75 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:28:26 +0000
Subject: [PATCH 84/97] Simplify handling of mni differences for individual

---
 src/tlo/methods/individual_history.py | 34 +++++++++++++++------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
index 6265c7b928..420710a9d9 100644
--- a/src/tlo/methods/individual_history.py
+++ b/src/tlo/methods/individual_history.py
@@ -166,17 +166,10 @@ def on_event_post_run(self, data):
         # Target is single individual
         if not isinstance(data["target"], Population):
 
-            # Copy full new status for individual
-            row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999)
+            pop = self.sim.population.props
 
-            # Check if individual is in mni after the event
-            mni_instances_after = False
-            if 'PregnancySupervisor' in self.sim.modules:
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                if data['target'] in mni:
-                    mni_instances_after = True
-            else:
-                mni_instances_after = None
+            # Copy full new status for individual
+            row_after = pop.loc[data['target']].fillna(-99999)
 
             # Create and store event for this individual, regardless of whether any property change occurred
             link_info = {'event_name' : data['event_name']}
@@ -189,26 +182,37 @@ def on_event_post_run(self, data):
                 if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
                     link_info[key] = row_after[key]
 
-            if 'PregnancySupervisor' in self.sim.modules:
+            if 'PregnancySupervisor' in self.sim.modules and pop.loc[data['target']].sex == 'F':
+        
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+
+                # Check if individual is in mni after the event
+                mni_instances_after = False
+                if data['target'] in mni:
+                    mni_instances_after = True
+            
                 # Now check and store changes in the mni dictionary, accounting for following cases:
-                # Individual is in mni dictionary before and after
+                
+                # 1. Individual is not in mni neither before nor after event, can pass
+                if not self.mni_instances_before and not self.mni_instances_after:
+                    pass
+                # 2. Individual is in mni dictionary before and after
                 if self.mni_instances_before and mni_instances_after:
                     for key in self.mni_row_before:
                         if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]):
                             link_info[key] = mni[data['target']][key]
-                # Individual is only in mni dictionary before event
+                # 3. Individual is only in mni dictionary before event
                 elif self.mni_instances_before and not mni_instances_after:
                     default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
                     for key in self.mni_row_before:
                         if self.mni_values_differ(self.mni_row_before[key], default[key]):
                             link_info[key] = default[key]
-                # Individual is only in mni dictionary after event
+                # 4. Individual is only in mni dictionary after event
                 elif mni_instances_after and not self.mni_instances_before:
                     default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
                     for key in default:
                         if self.mni_values_differ(default[key], mni[data['target']][key]):
                             link_info[key] = mni[data['target']][key]
-                # Else, no need to do anything
 
             # Add individual to the chain links
             chain_links[data['target']] = link_info

From d01fb3e9f5bf4b6224d57197145a3190ac344fb1 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 5 Dec 2025 11:34:18 +0000
Subject: [PATCH 85/97] Fix issue with nan changes being saved. This was down
 to EAV conversion, not df comparison

---
 src/tlo/methods/individual_history.py | 50 +++++++++++++++++----------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
index 420710a9d9..0411ee016d 100644
--- a/src/tlo/methods/individual_history.py
+++ b/src/tlo/methods/individual_history.py
@@ -194,7 +194,7 @@ def on_event_post_run(self, data):
                 # Now check and store changes in the mni dictionary, accounting for following cases:
                 
                 # 1. Individual is not in mni neither before nor after event, can pass
-                if not self.mni_instances_before and not self.mni_instances_after:
+                if not self.mni_instances_before and not mni_instances_after:
                     pass
                 # 2. Individual is in mni dictionary before and after
                 if self.mni_instances_before and mni_instances_after:
@@ -223,6 +223,7 @@ def on_event_post_run(self, data):
 
             # Population frame after event
             df_after = self.sim.population.props
+            
             if 'PregnancySupervisor' in self.sim.modules:
                 entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
             else:
@@ -298,20 +299,22 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
         It allows us to identify the individuals for which this event led to a significant (i.e. property) change,
         and to store the properties which have changed as a result of it.
         """
+        # Create an empty dict to store changes for each of the individuals
+        chain_links = {}
+
+        # Individuals undergoing changes in the generap pop dataframe
+        persons_changed = []
 
-        # Create a mask of where values are different
-        diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
+        # Collect changes in the pop dataframe before/after the event
+        same = df_before.eq(df_after) | (df_before.isna() & df_after.isna())
+        diff_mask = ~same
+        
+        # Collect changes in the mni dictionary
         if 'PregnancySupervisor' in self.sim.modules:
             diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
         else:
             diff_mni = []
 
-        # Create an empty dict to store changes for each of the individuals
-        chain_links = {}
-
-        # Loop through each row of the mask
-        persons_changed = []
-
         for idx, row in diff_mask.iterrows():
             changed_cols = row.index[row].tolist()
 
@@ -334,7 +337,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
 
                 # Append the event and changes to the individual key
                 chain_links[idx] = link_info
-
+        
         if 'PregnancySupervisor' in self.sim.modules:
             # For individuals which only underwent changes in mni dictionary, save changes here
             if len(diff_mni)>0:
@@ -349,6 +352,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
                             link_info[key_prop] = diff_mni[key][key_prop]
 
                         chain_links[key] = link_info
+        
         return chain_links
 
 
@@ -362,15 +366,23 @@ def df_to_eav(df, date, event_name):
 
 
 def convert_chain_links_into_eav(chain_links):
-    df = pd.DataFrame.from_dict(chain_links, orient="index")
-    id_cols = ["event_name"]
 
-    eav = df.reset_index().melt(
-        id_vars=["index"] + id_cols,  # index = person ID
-        var_name="attribute",
-        value_name="value"
-    )
+    rows = []
 
-    eav.rename(columns={"index": "entity"}, inplace=True)
-    eav = eav[["entity", "event_name", "attribute", "value"]]
+    for e, data in chain_links.items():
+        event_name = data.get("event_name")
+
+        for attr, val in data.items():
+            if attr == "event_name":
+                continue
+            
+            rows.append({
+                "entity": e,
+                "event_name": event_name,
+                "attribute": attr,
+                "value": val
+            })
+
+    eav = pd.DataFrame(rows)
+    
     return eav

From 1e0c55ae4e8785c262a8f6167665180d93b572e3 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 5 Dec 2025 16:39:56 +0000
Subject: [PATCH 86/97] Unify approach taken to copy pop dataframe and mni, and
 remove all events logged following death in postprocessing

---
 src/tlo/analysis/utils.py             | 28 +++++++++++++-
 src/tlo/methods/individual_history.py | 53 +++++++++++++++++++++------
 2 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 360fc36416..c0cafa0f7c 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -386,7 +386,27 @@ def check_info_value_changes(df):
             prev_info = row["Info"]
 
     return problems
+    
+def remove_events_for_individual_after_death(df):
+    rows_to_drop = []
 
+    # Group by entity
+    for entity, g in df.groupby("entity"):
+        died = False
+
+        for idx, row in g.iterrows():
+            current_info = row["Info"]
+
+            if not died:
+                # Check if this row marks death
+                if isinstance(current_info, dict) and current_info.get("is_alive") is False:
+                    died = True
+            else:
+                # Already dead → mark this row for removal
+                rows_to_drop.append(idx)
+
+    # Drop all marked rows
+    return df.drop(index=rows_to_drop)
 
 def reconstruct_individual_histories(df):
 
@@ -404,8 +424,14 @@ def reconstruct_individual_histories(df):
             .reset_index(drop=True)
     )
 
+    df_final = remove_events_for_individual_after_death(df_final)
+
     problems = check_info_value_changes(df_final)
-    print(problems)
+    if len(problems)>0:
+        print("Values didn't change but were still detected")
+        print(problems)
+        
+    
 
     return df_final
 
diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
index 0411ee016d..23af6d94ff 100644
--- a/src/tlo/methods/individual_history.py
+++ b/src/tlo/methods/individual_history.py
@@ -72,6 +72,34 @@ def on_birth(self, mother, child):
         # Could the notification of birth simply take place here?
         pass
 
+    def copy_of_pop_dataframe(self):
+        df_copy = self.sim.population.props.copy()
+        for col in df_copy.columns:
+            df_copy[col] = df_copy[col].apply(
+                lambda x: copy.deepcopy(x) if isinstance(x, (list, dict, pd.Series)) else x
+            )
+        return df_copy
+    
+    def copy_of_pop_dataframe_row(self, person_ID):
+        copy_of_row = self.sim.population.props.loc[person_ID].copy()
+        for col,val in copy_of_row.items():
+            if isinstance(val, (list, dict, pd.Series)):
+                copy_of_row[col] = copy.deepcopy(val)
+        copy_of_row = copy_of_row.fillna(-99999)
+        return copy_of_row
+        
+    def copy_of_mni(self):
+        """Function to safely copy entire mni dictionary, ensuring that series attributes
+        are safely copied too.
+        """
+        return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+    
+    def copy_of_mni_row(self, person_ID):
+        """Function to safely copy mni entry for single individual, ensuring that series attributes
+        are safely copied too.
+        """
+        return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info[person_ID])
+        
     def log_eav_dataframe_to_individual_histories(self, df):
         for idx, row in df.iterrows():
             logger.info(key='individual_histories',
@@ -131,14 +159,14 @@ def on_event_pre_run(self, data):
         if not isinstance(data['target'], Population):
 
             # Save row for comparison after event has occurred
-            self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999)
+            self.row_before = self.copy_of_pop_dataframe_row(data['target'])
 
             # Check if individual is already in mni dictionary, if so copy her original status
             if 'PregnancySupervisor' in self.sim.modules:
                 mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
                 if data['target'] in mni:
                     self.mni_instances_before = True
-                    self.mni_row_before = mni[data['target']].copy()
+                    self.mni_row_before = self.copy_of_mni_row(data['target'])
             else:
                 self.mni_row_before = None
 
@@ -146,10 +174,9 @@ def on_event_pre_run(self, data):
 
             # This will be a population-wide event. In order to find individuals for which this led to
             # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
-            self.df_before = self.sim.population.props.copy()
+            self.df_before = self.copy_of_pop_dataframe()
             if 'PregnancySupervisor' in self.sim.modules:
-                self.entire_mni_before = copy.deepcopy(
-                                            self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                self.entire_mni_before = self.copy_of_mni()
             else:
                 self.entire_mni_before = None
 
@@ -164,12 +191,12 @@ def on_event_post_run(self, data):
         chain_links = {}
 
         # Target is single individual
-        if not isinstance(data["target"], Population):
+        if not isinstance(data['target'], Population):
 
             pop = self.sim.population.props
 
             # Copy full new status for individual
-            row_after = pop.loc[data['target']].fillna(-99999)
+            row_after = self.copy_of_pop_dataframe_row(data['target'])
 
             # Create and store event for this individual, regardless of whether any property change occurred
             link_info = {'event_name' : data['event_name']}
@@ -221,11 +248,11 @@ def on_event_post_run(self, data):
             # Target is entire population. Identify individuals for which properties have changed
             # and store their changes.
 
-            # Population frame after event
-            df_after = self.sim.population.props
+            # Population dataframe after event
+            df_after = self.copy_of_pop_dataframe()
             
             if 'PregnancySupervisor' in self.sim.modules:
-                entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+                entire_mni_after = self.copy_of_mni()
             else:
                 entire_mni_after = None
 
@@ -357,7 +384,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
 
 
 def df_to_eav(df, date, event_name):
-    """Function to convert dataframe into EAV"""
+    """Function to convert entire population dataframe into custom EAV"""
     eav = df.stack(dropna=False).reset_index()
     eav.columns = ['entity', 'attribute', 'value']
     eav['event_name'] = event_name
@@ -366,7 +393,7 @@ def df_to_eav(df, date, event_name):
 
 
 def convert_chain_links_into_eav(chain_links):
-
+    """Function to convert chain links into custom EAV"""
     rows = []
 
     for e, data in chain_links.items():
@@ -386,3 +413,5 @@ def convert_chain_links_into_eav(chain_links):
     eav = pd.DataFrame(rows)
     
     return eav
+
+    

From 1269bda421b6a7d774c8b1692b3d3e07bde822a4 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 18 Dec 2025 12:50:05 +0000
Subject: [PATCH 87/97] Add tracking property for individual

---
 src/tlo/methods/individual_history.py    | 417 -----------------------
 tests/test_individual_history_tracker.py |  36 +-
 2 files changed, 25 insertions(+), 428 deletions(-)
 delete mode 100644 src/tlo/methods/individual_history.py

diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py
deleted file mode 100644
index 23af6d94ff..0000000000
--- a/src/tlo/methods/individual_history.py
+++ /dev/null
@@ -1,417 +0,0 @@
-import copy
-from pathlib import Path
-from typing import List, Optional
-
-import pandas as pd
-
-from tlo import Module, Parameter, Types, logging
-from tlo.notify import notifier
-from tlo.population import Population
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-class IndividualHistoryTracker(Module):
-
-    def __init__(
-        self,
-        name: Optional[str] = None,
-        modules_of_interest: Optional[List[str]] = None,
-        events_to_ignore: Optional[List[str]] = None
-    ):
-        super().__init__(name)
-
-        self.modules_of_interest = modules_of_interest
-        self.events_to_ignore = events_to_ignore
-
-        # This is how I am passing data from fnc taking place before event to the one after
-        # It doesn't seem very elegant but not sure how else to go about it
-        self.print_chains = False
-        self.df_before = []
-        self.row_before = pd.Series()
-        self.mni_instances_before = False
-        self.mni_row_before = {}
-        self.entire_mni_before = {}
-
-    PARAMETERS = {
-        # Options within module
-        "modules_of_interest": Parameter(
-            Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules"
-        ),
-        "events_to_ignore": Parameter(
-            Types.LIST, "Events to be ignored when collecting chains"
-        ),
-    }
-
-    def initialise_simulation(self, sim):
-        notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise)
-        notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth)
-        notifier.add_listener("event.pre-run", self.on_event_pre_run)
-        notifier.add_listener("event.post-run", self.on_event_post_run)
-        notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run)
-        notifier.add_listener("hsi_event.post-run", self.on_event_post_run)
-
-    def read_parameters(self, resourcefilepath: Optional[Path] = None):
-        self.load_parameters_from_dataframe(
-            pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv")
-        )
-
-    def initialise_population(self, population):
-        # Use parameter file values by default, if not overwritten
-        if self.modules_of_interest is None:
-            self.modules_of_interest = self.parameters['modules_of_interest']
-
-        if self.events_to_ignore is None:
-            self.events_to_ignore = self.parameters['events_to_ignore']
-
-        # If modules of interest is '*', set by default to all modules included in the simulation
-        if self.modules_of_interest == ['*']:
-            self.modules_of_interest = list(self.sim.modules.keys())
-
-    def on_birth(self, mother, child):
-        # Could the notification of birth simply take place here?
-        pass
-
-    def copy_of_pop_dataframe(self):
-        df_copy = self.sim.population.props.copy()
-        for col in df_copy.columns:
-            df_copy[col] = df_copy[col].apply(
-                lambda x: copy.deepcopy(x) if isinstance(x, (list, dict, pd.Series)) else x
-            )
-        return df_copy
-    
-    def copy_of_pop_dataframe_row(self, person_ID):
-        copy_of_row = self.sim.population.props.loc[person_ID].copy()
-        for col,val in copy_of_row.items():
-            if isinstance(val, (list, dict, pd.Series)):
-                copy_of_row[col] = copy.deepcopy(val)
-        copy_of_row = copy_of_row.fillna(-99999)
-        return copy_of_row
-        
-    def copy_of_mni(self):
-        """Function to safely copy entire mni dictionary, ensuring that series attributes
-        are safely copied too.
-        """
-        return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
-    
-    def copy_of_mni_row(self, person_ID):
-        """Function to safely copy mni entry for single individual, ensuring that series attributes
-        are safely copied too.
-        """
-        return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info[person_ID])
-        
-    def log_eav_dataframe_to_individual_histories(self, df):
-        for idx, row in df.iterrows():
-            logger.info(key='individual_histories',
-                               data = {
-                                   "entity": row.entity,
-                                   "attribute": row.attribute,
-                                   "value": str(row.value),
-                                   "event_name": row.event_name
-                               },
-                               description='Links forming chains of events for simulated individuals')
-
-    def on_simulation_post_initialise(self, data):
-        # When logging events for each individual to reconstruct chains,
-        # only the changes in individual properties will be logged.
-        # At the start of the simulation + when a new individual is born,
-        # we therefore want to store all of their properties
-        # at the start.
-
-        # EDNAV structure to capture status of individuals at the start of the simulation
-        eav_plus_event = df_to_eav(self.sim.population.props, self.sim.date, 'StartOfSimulation')
-        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
-
-    def on_simulation_post_do_birth(self, data):
-        # When individual is born, store their initial properties to provide a starting point to the
-        # chain of property changes that this individual will undergo
-        # as a result of events taking place.
-        link_info = {'event_name': 'Birth'}
-        link_info.update(self.sim.population.props.loc[data['child_id']].to_dict())
-        chain_links = {data['child_id']: link_info}
-
-        eav_plus_event = convert_chain_links_into_eav(chain_links)
-        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
-
-    def on_event_pre_run(self, data):
-        """Do this when notified that an event is about to run.
-        This function checks whether this event should be logged as part of the event chains, a
-        nd if so stored required information before the event has occurred.
-        """
-
-        # Only log event if
-        # 1) the event belongs to modules of interest and
-        # 2) the event is not in the list of events to ignore
-        if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore):
-            return
-
-        # Initialise these variables
-        self.print_chains = False
-        self.df_before = []
-        self.row_before = pd.Series()
-        self.mni_instances_before = False
-        self.mni_row_before = {}
-        self.entire_mni_before = {}
-
-        self.print_chains = True
-
-        # Target is single individual
-        if not isinstance(data['target'], Population):
-
-            # Save row for comparison after event has occurred
-            self.row_before = self.copy_of_pop_dataframe_row(data['target'])
-
-            # Check if individual is already in mni dictionary, if so copy her original status
-            if 'PregnancySupervisor' in self.sim.modules:
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-                if data['target'] in mni:
-                    self.mni_instances_before = True
-                    self.mni_row_before = self.copy_of_mni_row(data['target'])
-            else:
-                self.mni_row_before = None
-
-        else:
-
-            # This will be a population-wide event. In order to find individuals for which this led to
-            # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
-            self.df_before = self.copy_of_pop_dataframe()
-            if 'PregnancySupervisor' in self.sim.modules:
-                self.entire_mni_before = self.copy_of_mni()
-            else:
-                self.entire_mni_before = None
-
-    def on_event_post_run(self, data):
-        """ If print_chains=True, this function logs the event and identifies and logs the any property
-        changes that have occured to one or multiple individuals as a result of the event taking place.
-        """
-
-        if not self.print_chains:
-            return
-
-        chain_links = {}
-
-        # Target is single individual
-        if not isinstance(data['target'], Population):
-
-            pop = self.sim.population.props
-
-            # Copy full new status for individual
-            row_after = self.copy_of_pop_dataframe_row(data['target'])
-
-            # Create and store event for this individual, regardless of whether any property change occurred
-            link_info = {'event_name' : data['event_name']}
-            if 'footprint' in data.keys():
-                link_info['footprint'] = data['footprint']
-                link_info['level'] = data['level']
-
-            # Store (if any) property changes as a result of the event for this individual
-            for key in self.row_before.index:
-                if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
-                    link_info[key] = row_after[key]
-
-            if 'PregnancySupervisor' in self.sim.modules and pop.loc[data['target']].sex == 'F':
-        
-                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
-
-                # Check if individual is in mni after the event
-                mni_instances_after = False
-                if data['target'] in mni:
-                    mni_instances_after = True
-            
-                # Now check and store changes in the mni dictionary, accounting for following cases:
-                
-                # 1. Individual is not in mni neither before nor after event, can pass
-                if not self.mni_instances_before and not mni_instances_after:
-                    pass
-                # 2. Individual is in mni dictionary before and after
-                if self.mni_instances_before and mni_instances_after:
-                    for key in self.mni_row_before:
-                        if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]):
-                            link_info[key] = mni[data['target']][key]
-                # 3. Individual is only in mni dictionary before event
-                elif self.mni_instances_before and not mni_instances_after:
-                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                    for key in self.mni_row_before:
-                        if self.mni_values_differ(self.mni_row_before[key], default[key]):
-                            link_info[key] = default[key]
-                # 4. Individual is only in mni dictionary after event
-                elif mni_instances_after and not self.mni_instances_before:
-                    default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
-                    for key in default:
-                        if self.mni_values_differ(default[key], mni[data['target']][key]):
-                            link_info[key] = mni[data['target']][key]
-
-            # Add individual to the chain links
-            chain_links[data['target']] = link_info
-
-        else:
-            # Target is entire population. Identify individuals for which properties have changed
-            # and store their changes.
-
-            # Population dataframe after event
-            df_after = self.copy_of_pop_dataframe()
-            
-            if 'PregnancySupervisor' in self.sim.modules:
-                entire_mni_after = self.copy_of_mni()
-            else:
-                entire_mni_after = None
-
-            #  Create and store the event and dictionary of changes for affected individuals
-            chain_links = self.compare_population_dataframe_and_mni(self.df_before,
-                                                                    df_after,
-                                                                    self.entire_mni_before,
-                                                                    entire_mni_after,
-                                                                    data['event_name'])
-
-        # Log chains
-        if chain_links:
-            # Convert chain_links into EAV-type dataframe
-            eav_plus_event = convert_chain_links_into_eav(chain_links)
-            # log it
-            self.log_eav_dataframe_to_individual_histories(eav_plus_event)
-
-        # Reset variables
-        self.print_chains = False
-        self.df_before = []
-        self.row_before = pd.Series()
-        self.mni_instances_before = False
-        self.mni_row_before = {}
-        self.entire_mni_before = {}
-
-    def mni_values_differ(self, v1, v2):
-
-        if isinstance(v1, list) and isinstance(v2, list):
-            return v1 != v2  # simple element-wise comparison
-
-        if pd.isna(v1) and pd.isna(v2):
-            return False  # treat both NaT/NaN as equal
-        return v1 != v2
-
-    def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after):
-        diffs = {}
-
-        all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys())
-
-        for person in all_individuals:
-            if person not in entire_mni_before: # but is afterward
-                for key in entire_mni_after[person]:
-                    if self.mni_values_differ(entire_mni_after[person][key],
-                                              self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
-                        if person not in diffs:
-                            diffs[person] = {}
-                        diffs[person][key] = entire_mni_after[person][key]
-
-            elif person not in entire_mni_after: # but is beforehand
-                for key in entire_mni_before[person]:
-                    if self.mni_values_differ(entire_mni_before[person][key],
-                                              self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
-                        if person not in diffs:
-                            diffs[person] = {}
-                        diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]
-
-            else: # person is in both
-                # Compare properties
-                for key in entire_mni_before[person]:
-                    if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]):
-                        if person not in diffs:
-                            diffs[person] = {}
-                        diffs[person][key] = entire_mni_after[person][key]
-
-        return diffs
-
-    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name):
-        """
-        This function compares the population dataframe and mni dictionary before/after a population-wide e
-        vent has occurred.
-        It allows us to identify the individuals for which this event led to a significant (i.e. property) change,
-        and to store the properties which have changed as a result of it.
-        """
-        # Create an empty dict to store changes for each of the individuals
-        chain_links = {}
-
-        # Individuals undergoing changes in the generap pop dataframe
-        persons_changed = []
-
-        # Collect changes in the pop dataframe before/after the event
-        same = df_before.eq(df_after) | (df_before.isna() & df_after.isna())
-        diff_mask = ~same
-        
-        # Collect changes in the mni dictionary
-        if 'PregnancySupervisor' in self.sim.modules:
-            diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after)
-        else:
-            diff_mni = []
-
-        for idx, row in diff_mask.iterrows():
-            changed_cols = row.index[row].tolist()
-
-            if changed_cols:  # Proceed only if there are changes in the row
-                persons_changed.append(idx)
-                # Create a dictionary for this person
-                # First add event info
-                link_info = {
-                    'event_name': event_name,
-                }
-
-                # Store the new values from df_after for the changed columns
-                for col in changed_cols:
-                    link_info[col] = df_after.at[idx, col]
-
-                if idx in diff_mni:
-                    # This person has also undergone changes in the mni dictionary, so add these here
-                    for key in diff_mni[idx]:
-                        link_info[col] = diff_mni[idx][key]
-
-                # Append the event and changes to the individual key
-                chain_links[idx] = link_info
-        
-        if 'PregnancySupervisor' in self.sim.modules:
-            # For individuals which only underwent changes in mni dictionary, save changes here
-            if len(diff_mni)>0:
-                for key in diff_mni:
-                    if key not in persons_changed:
-                        # If individual hadn't been previously added due to changes in pop df, add it here
-                        link_info = {
-                            'event_name': self.__class__.__name__,
-                        }
-
-                        for key_prop in diff_mni[key]:
-                            link_info[key_prop] = diff_mni[key][key_prop]
-
-                        chain_links[key] = link_info
-        
-        return chain_links
-
-
-def df_to_eav(df, date, event_name):
-    """Function to convert entire population dataframe into custom EAV"""
-    eav = df.stack(dropna=False).reset_index()
-    eav.columns = ['entity', 'attribute', 'value']
-    eav['event_name'] = event_name
-    eav = eav[["entity", "event_name", "attribute", "value"]]
-    return eav
-
-
-def convert_chain_links_into_eav(chain_links):
-    """Function to convert chain links into custom EAV"""
-    rows = []
-
-    for e, data in chain_links.items():
-        event_name = data.get("event_name")
-
-        for attr, val in data.items():
-            if attr == "event_name":
-                continue
-            
-            rows.append({
-                "entity": e,
-                "event_name": event_name,
-                "attribute": attr,
-                "value": val
-            })
-
-    eav = pd.DataFrame(rows)
-    
-    return eav
-
-    
diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py
index 20ce42eb44..7d4a34a591 100644
--- a/tests/test_individual_history_tracker.py
+++ b/tests/test_individual_history_tracker.py
@@ -11,10 +11,16 @@
     enhanced_lifestyle,
     healthseekingbehaviour,
     healthsystem,
-    individual_history,
+    contraception,
+    individual_history_tracker,
     mockitis,
-    simplified_births,
+    newborn_outcomes,
+    pregnancy_supervisor,
+    care_of_women_during_pregnancy,
+    labour,
+    postnatal_supervisor,
     symptommanager,
+    hiv,
 )
 
 resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
@@ -42,21 +48,26 @@ def test_individual_history_tracker(tmpdir, seed):
             "directory": tmpdir,
             "custom_levels": {
                 "tlo.methods.healthsystem": logging.DEBUG,
-                "tlo.methods.individual_history": logging.INFO
+                "tlo.methods.individual_history_tracker": logging.INFO
             }
         }, resourcefilepath=resourcefilepath
     )
 
     # Register the core modules
     sim.register(demography.Demography(),
-                 simplified_births.SimplifiedBirths(),
                  enhanced_lifestyle.Lifestyle(),
                  healthsystem.HealthSystem(),
-                 individual_history.IndividualHistoryTracker(),
+                 individual_history_tracker.IndividualHistoryTracker(),
                  symptommanager.SymptomManager(),
                  healthseekingbehaviour.HealthSeekingBehaviour(),
-                 mockitis.Mockitis(),
-                 chronicsyndrome.ChronicSyndrome()
+                 chronicsyndrome.ChronicSyndrome(),
+                 contraception.Contraception(),
+                 newborn_outcomes.NewbornOutcomes(),
+                 pregnancy_supervisor.PregnancySupervisor(),
+                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(),
+                 labour.Labour(),
+                 postnatal_supervisor.PostnatalSupervisor(),
+                 hiv.DummyHivModule(),
                  )
 
     # Run the simulation
@@ -68,7 +79,7 @@ def test_individual_history_tracker(tmpdir, seed):
     output = parse_log_file(sim.log_filepath, level=logging.DEBUG)
     output_chains = parse_log_file(sim.log_filepath, level=logging.INFO)
     individual_histories = reconstruct_individual_histories(
-                            output_chains['tlo.methods.individual_history']['individual_histories'])
+                            output_chains['tlo.methods.individual_history_tracker']['individual_histories'])
 
     # Check that we have a "StartOfSimulation" event for every individual in the initial population,
     #   and that this was logged at the start date
@@ -81,9 +92,12 @@ def test_individual_history_tracker(tmpdir, seed):
     mask = individual_histories["event_name"].isin(["Birth", "StartOfSimulation"])
     assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all()
 
-    # Assert that all HSI events that occurred were also collected in the event chains
-    HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum()
-    assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event'])
+    # Assert that all HSI events that occurred were also collected in the event chains. Do not include Inpatient_Care HSIs, as these
+    # are not currently treated as being individual-specific
+    Num_of_HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum()
+    Num_of_HSIs_in_hs_log = len(output['tlo.methods.healthsystem']['HSI_Event'].loc[
+    output['tlo.methods.healthsystem']['HSI_Event']['Event_Name'] != 'Inpatient_Care'])
+    assert Num_of_HSIs_in_individual_histories == Num_of_HSIs_in_hs_log
 
     # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too
     mask = (~individual_histories["event_name"].isin(["StartOfSimulation", "Birth"])) & \

From b8b858ecc2c24f7306358e50c342a3d6474ea9b7 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 19 Dec 2025 12:10:16 +0000
Subject: [PATCH 88/97] Track consumable access

---
 .../scenario_track_individual_histories.py    |   4 +-
 src/tlo/methods/consumables.py                |  41 +-
 src/tlo/methods/hsi_event.py                  |   8 +-
 src/tlo/methods/individual_history_tracker.py | 468 ++++++++++++++++++
 4 files changed, 502 insertions(+), 19 deletions(-)
 create mode 100644 src/tlo/methods/individual_history_tracker.py

diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
index 0c6a43b127..0b9eaf9263 100644
--- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py
+++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py
@@ -19,7 +19,7 @@
 
 from tlo import Date, logging
 from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
-from tlo.methods import individual_history
+from tlo.methods import individual_history_tracker
 from tlo.methods.fullmodel import fullmodel
 from tlo.scenario import BaseScenario
 
@@ -53,7 +53,7 @@ def log_configuration(self):
 
     def modules(self):
         return (
-            fullmodel() + [individual_history.IndividualHistoryTracker()]
+            fullmodel() + [individual_history_tracker.IndividualHistoryTracker()]
         )
 
     def draw_parameters(self, draw_number, rng):
diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index c16b22e6cd..46a2dd94ae 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -8,6 +8,8 @@
 import pandas as pd
 
 from tlo import logging
+from tlo.notify import notifier
+
 
 logger = logging.getLogger('tlo.methods.healthsystem')
 logger_summary = logging.getLogger('tlo.methods.healthsystem.summary')
@@ -249,7 +251,11 @@ def _request_consumables(self,
                              essential_item_codes: dict,
                              optional_item_codes: Optional[dict] = None,
                              to_log: bool = True,
+                             to_broadcast: bool = True,
                              treatment_id: Optional[str] = None,
+                             target: Optional[int] = None,
+                             event_name: Optional[str] = None,
+                             module: Optional[str] = None
                              ) -> dict:
         """This is a private function called by 'get_consumables` in the `HSI_Event` base class. It queries whether
         item_codes are currently available at a particular Facility_ID and logs the request.
@@ -282,28 +288,31 @@ def _request_consumables(self,
                                                              override_probability=override_probability)
 
         # Log the request and the outcome:
-        if to_log:
+        if to_log or to_broadcast:
             items_available = {k: v for k, v in _all_item_codes.items() if available[k]}
             items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]}
 
             # Log items used if all essential items are available
             items_used = items_available if all(available.get(k, False) for k in essential_item_codes) else {}
 
-            logger.info(
-                key='Consumables',
-                data={
-                    'TREATMENT_ID': treatment_id or "",
-                    'Item_Available': str(items_available),
-                    'Item_NotAvailable': str(items_not_available),
-                    'Item_Used': str(items_used),
-                },
-                description="Record of requested and used consumable items."
-            )
-            self._summary_counter.record_availability(
-                items_available=items_available,
-                items_not_available=items_not_available,
-                items_used=items_used,
-            )
+            if to_log:
+                logger.info(
+                    key='Consumables',
+                    data={
+                        'TREATMENT_ID': treatment_id or "",
+                        'Item_Available': str(items_available),
+                        'Item_NotAvailable': str(items_not_available),
+                        'Item_Used': str(items_used),
+                    },
+                    description="Record of requested and used consumable items."
+                )
+                self._summary_counter.record_availability(
+                    items_available=items_available,
+                    items_not_available=items_not_available,
+                    items_used=items_used,
+                )
+            
+            notifier.dispatch("consumables._request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
 
         # Return the result of the check on availability
         return available
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index e0cc1adda9..c48d1a4ca0 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -227,7 +227,8 @@ def run(self, squeeze_factor):
                           data={"target": self.target,
                                 "event_name": self.__class__.__name__,
                                 "footprint": footprint,
-                                "level": level
+                                "level": level,
+                                "treatment_ID": self.TREATMENT_ID
                                 })
 
         return updated_appt_footprint
@@ -263,14 +264,19 @@ def get_consumables(
 
         # Determine if the request should be logged (over-ride argument provided if HealthSystem is disabled).
         _to_log = to_log if not self.healthcare_system.disable else False
+        _to_broadcast = True if 'IndividualHistoryTracker' in self.module.sim.modules else False
 
         # Checking the availability and logging:
         rtn = self.healthcare_system.consumables._request_consumables(
             essential_item_codes=_item_codes,
             optional_item_codes=_optional_item_codes,
             to_log=_to_log,
+            to_broadcast = _to_broadcast,
             facility_info=self.facility_info,
             treatment_id=self.TREATMENT_ID,
+            target=self.target,
+            event_name=self.__class__.__name__,
+            module = self.module
         )
 
         # Return result in expected format:
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
new file mode 100644
index 0000000000..175198948c
--- /dev/null
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -0,0 +1,468 @@
+import copy
+from pathlib import Path
+from typing import List, Optional
+
+import pandas as pd
+
+from tlo import Module, Parameter, Property, Types, logging
+from tlo.notify import notifier
+from tlo.population import Population
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+class IndividualHistoryTracker(Module):
+
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        modules_of_interest: Optional[List[str]] = None,
+        events_to_ignore: Optional[List[str]] = None
+    ):
+        super().__init__(name)
+
+        self.modules_of_interest = modules_of_interest
+        self.events_to_ignore = events_to_ignore
+
+        # This is how I am passing data from fnc taking place before event to the one after
+        # It doesn't seem very elegant but not sure how else to go about it
+        self.print_chains = False
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
+
+    PARAMETERS = {
+        # Options within module
+        "modules_of_interest": Parameter(
+            Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules"
+        ),
+        "events_to_ignore": Parameter(
+            Types.LIST, "Events to be ignored when collecting chains"
+        ),
+    }
+    
+    PROPERTIES = {
+        "track_history": Property(Types.BOOL, "Whether the individual should be tracked by the individual history tracker or not")
+    }
+
+    def initialise_simulation(self, sim):
+        notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise)
+        notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth)
+        notifier.add_listener("event.pre-run", self.on_event_pre_run)
+        notifier.add_listener("event.post-run", self.on_event_post_run)
+        notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run)
+        notifier.add_listener("hsi_event.post-run", self.on_event_post_run)
+        notifier.add_listener("consumables._request-consumables", self.on_consumable_request)
+
+    def read_parameters(self, resourcefilepath: Optional[Path] = None):
+        self.load_parameters_from_dataframe(
+            pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv")
+        )
+
+    def initialise_population(self, population):
+        # Use parameter file values by default, if not overwritten
+        if self.modules_of_interest is None:
+            self.modules_of_interest = self.parameters['modules_of_interest']
+
+        if self.events_to_ignore is None:
+            self.events_to_ignore = self.parameters['events_to_ignore']
+
+        # If modules of interest is '*', set by default to all modules included in the simulation
+        if self.modules_of_interest == ['*']:
+            self.modules_of_interest = list(self.sim.modules.keys())
+            
+        # Initialise all individuals as being tracked by default
+        pop = self.sim.population.props
+        pop.loc[pop.is_alive, "track_history"] = True
+
+    def on_birth(self, mother, child):
+        self.sim.population.props.at[child, "track_history"] = True
+        return
+        
+    def copy_of_pop_dataframe(self):
+        df_copy = self.sim.population.props.copy()
+        for col in df_copy.columns:
+            df_copy[col] = df_copy[col].apply(
+                lambda x: copy.deepcopy(x) if isinstance(x, (list, dict, pd.Series)) else x
+            )
+        return df_copy
+    
+    def copy_of_pop_dataframe_row(self, person_ID):
+        copy_of_row = self.sim.population.props.loc[person_ID].copy()
+        for col,val in copy_of_row.items():
+            if isinstance(val, (list, dict, pd.Series)):
+                copy_of_row[col] = copy.deepcopy(val)
+        copy_of_row = copy_of_row.fillna(-99999)
+        return copy_of_row
+        
+    def copy_of_mni(self):
+        """Function to safely copy entire mni dictionary, ensuring that series attributes
+        are safely copied too.
+        """
+        return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info)
+    
+    def copy_of_mni_row(self, person_ID):
+        """Function to safely copy mni entry for single individual, ensuring that series attributes
+        are safely copied too.
+        """
+        return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info[person_ID])
+                    
+    def log_eav_dataframe_to_individual_histories(self, df):
+        for idx, row in df.iterrows():
+            logger.info(key='individual_histories',
+                               data = {
+                                   "entity": row.entity,
+                                   "attribute": row.attribute,
+                                   "value": str(row.value),
+                                   "event_name": row.event_name
+                               },
+                               description='Links forming chains of events for simulated individuals')
+
+    def on_simulation_post_initialise(self, data):
+        # When logging events for each individual to reconstruct chains,
+        # only the changes in individual properties will be logged.
+        # At the start of the simulation + when a new individual is born,
+        # we therefore want to store all of their properties
+        # at the start.
+
+        # EDNAV structure to capture status of individuals at the start of the simulation
+        eav_plus_event = df_to_eav(self.sim.population.props, self.sim.date, 'StartOfSimulation')
+        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
+
+    def on_simulation_post_do_birth(self, data):
+        # When individual is born, store their initial properties to provide a starting point to the
+        # chain of property changes that this individual will undergo
+        # as a result of events taking place.
+        link_info = {'event_name': 'Birth'}
+        link_info.update(self.sim.population.props.loc[data['child_id']].to_dict())
+        chain_links = {data['child_id']: link_info}
+
+        eav_plus_event = convert_chain_links_into_eav(chain_links)
+        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
+
+    def on_consumable_request(self,data):
+        """Do this when notified that an individual has accessed consumables"""
+        # Only log event if
+        # 1) the event belongs to modules of interest and
+        # 2) the event is not in the list of events to ignore
+        if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore):
+            return
+            
+        # Copy this info for individual
+        chain_links = {}
+        chain_links[data['target']] = {k: v for k, v in data.items() if k != 'target'}
+        
+        # Convert chain_links into EAV-type dataframe
+        eav_plus_event = convert_chain_links_into_eav(chain_links)
+        # log it
+        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
+        
+        return
+
+
+    def on_event_pre_run(self, data):
+        """Do this when notified that an event is about to run.
+        This function checks whether this event should be logged as part of the event chains, a
+        nd if so stored required information before the event has occurred.
+        """
+
+        # Only log event if
+        # 1) the event belongs to modules of interest and
+        # 2) the event is not in the list of events to ignore
+        if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore):
+            self.print_chains = False
+            return
+
+        # Initialise these variables
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
+        self.print_chains = True
+
+        # Target is single individual
+        if not isinstance(data['target'], Population):
+        
+            # Save pop dataframe row for comparison after event has occurred
+            self.row_before = self.copy_of_pop_dataframe_row(data['target'])
+
+            # Check if individual is already in mni dictionary, if so copy her original status
+            if 'PregnancySupervisor' in self.sim.modules and (self.sim.population.props.loc[data['target'],'sex'] == 'F'):
+                mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+                if data['target'] in mni:
+                    self.mni_instances_before = True
+                    self.mni_row_before = self.copy_of_mni_row(data['target'])
+            else:
+                self.mni_row_before = None
+            
+        else:
+
+            # This will be a population-wide event. In order to find individuals for which this led to
+            # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred.
+            self.df_before = self.copy_of_pop_dataframe()
+            if 'PregnancySupervisor' in self.sim.modules:
+                self.entire_mni_before = self.copy_of_mni()
+            else:
+                self.entire_mni_before = None
+
+    def on_event_post_run(self, data):
+        """ If print_chains=True, this function logs the event and identifies and logs the any property
+        changes that have occured to one or multiple individuals as a result of the event taking place.
+        """
+
+        if not self.print_chains:
+            return
+
+        chain_links = {}
+
+        # Target is single individual
+        if not isinstance(data['target'], Population):
+
+            pop = self.sim.population.props
+
+            # Copy full new status for individual
+            row_after = self.copy_of_pop_dataframe_row(data['target'])
+
+            # If individual qualified for the 'tracked' category either before OR after the event occurred, the event will be logged:
+            if self.row_before['track_history'] or row_after['track_history']:
+            
+                # Create and store event for this individual, regardless of whether any property change occurred
+                link_info = {'event_name' : data['event_name']}
+                if 'footprint' in data.keys():
+                    link_info['footprint'] = data['footprint']
+                    link_info['level'] = data['level']
+                    link_info['treatment_ID'] = data['treatment_ID']
+
+                # Store (if any) property changes as a result of the event for this individual
+                for key in self.row_before.index:
+                    if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe
+                        link_info[key] = row_after[key]
+
+                # Check for any changes in mni dictionary
+                if 'PregnancySupervisor' in self.sim.modules and pop.loc[data['target']].sex == 'F':
+            
+                    mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info
+
+                    # Check if individual is in mni after the event
+                    mni_instances_after = False
+                    if data['target'] in mni:
+                        mni_instances_after = True
+                
+                    # Now check and store changes in the mni dictionary, accounting for following cases:
+                    
+                    # 1. Individual is not in mni neither before nor after event, can pass
+                    if not self.mni_instances_before and not mni_instances_after:
+                        pass
+                    # 2. Individual is in mni dictionary before and after
+                    if self.mni_instances_before and mni_instances_after:
+                        for key in self.mni_row_before:
+                            if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]):
+                                link_info[key] = mni[data['target']][key]
+                    # 3. Individual is only in mni dictionary before event
+                    elif self.mni_instances_before and not mni_instances_after:
+                        default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                        for key in self.mni_row_before:
+                            if self.mni_values_differ(self.mni_row_before[key], default[key]):
+                                link_info[key] = default[key]
+                    # 4. Individual is only in mni dictionary after event
+                    elif mni_instances_after and not self.mni_instances_before:
+                        default = self.sim.modules['PregnancySupervisor'].default_all_mni_values
+                        for key in default:
+                            if self.mni_values_differ(default[key], mni[data['target']][key]):
+                                link_info[key] = mni[data['target']][key]
+
+                # Add individual to the chain links
+                chain_links[data['target']] = link_info
+
+        else:
+            # Target is entire population. Identify individuals for which properties have changed
+            # and store their changes.
+
+            # Population dataframe after event
+            df_after = self.copy_of_pop_dataframe()
+            
+            if 'PregnancySupervisor' in self.sim.modules:
+                entire_mni_after = self.copy_of_mni()
+            else:
+                entire_mni_after = None
+
+            #  Create and store the event and dictionary of changes for affected individuals
+            chain_links = self.compare_population_dataframe_and_mni(self.df_before,
+                                                                    df_after,
+                                                                    self.entire_mni_before,
+                                                                    entire_mni_after,
+                                                                    data['event_name'])
+
+        # Log chains
+        if chain_links:
+            # Convert chain_links into EAV-type dataframe
+            eav_plus_event = convert_chain_links_into_eav(chain_links)
+            # log it
+            self.log_eav_dataframe_to_individual_histories(eav_plus_event)
+
+        # Reset variables
+        self.print_chains = False
+        self.df_before = []
+        self.row_before = pd.Series()
+        self.mni_instances_before = False
+        self.mni_row_before = {}
+        self.entire_mni_before = {}
+
+    def mni_values_differ(self, v1, v2):
+
+        if isinstance(v1, list) and isinstance(v2, list):
+            return v1 != v2  # simple element-wise comparison
+
+        if pd.isna(v1) and pd.isna(v2):
+            return False  # treat both NaT/NaN as equal
+        return v1 != v2
+
+    def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after, set_of_tracked_individuals):
+        diffs = {}
+
+        all_individuals_in_mni = set(entire_mni_before.keys()) | set(entire_mni_after.keys())
+
+        in_mni_and_tracked = all_individuals_in_mni.intersection(set_of_tracked_individuals)
+
+        for person in in_mni_and_tracked:
+            if person not in entire_mni_before: # but is afterward
+                for key in entire_mni_after[person]:
+                    if self.mni_values_differ(entire_mni_after[person][key],
+                                              self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = entire_mni_after[person][key]
+
+            elif person not in entire_mni_after: # but is beforehand
+                for key in entire_mni_before[person]:
+                    if self.mni_values_differ(entire_mni_before[person][key],
+                                              self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]
+
+            else: # person is in both
+                # Compare properties
+                for key in entire_mni_before[person]:
+                    if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]):
+                        if person not in diffs:
+                            diffs[person] = {}
+                        diffs[person][key] = entire_mni_after[person][key]
+
+        return diffs
+
+    def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name):
+        """
+        This function compares the population dataframe and mni dictionary before/after a population-wide e
+        vent has occurred.
+        It allows us to identify the individuals for which this event led to a significant (i.e. property) change,
+        and to store the properties which have changed as a result of it.
+        """
+        # Create an empty dict to store changes for each of the individuals
+        chain_links = {}
+
+        # Individuals undergoing changes in the generap pop dataframe
+        persons_changed = []
+
+        # Find individuals which qualify as being tracked because they satisfied requirements either before OR after
+        # the event occurred.
+        assert df_before.index.equals(df_after.index), "Indices are not identical!"
+        assert df_before.columns.equals(df_after.columns), "Columns of df_before and df_after do not match!"
+
+        mask_of_tracked_individuals = df_before['track_history'] | df_after['track_history']
+        set_of_tracked_individuals = set(mask_of_tracked_individuals.index[mask_of_tracked_individuals])
+        
+        # Only keep those individuals in dataframes
+        df_before = df_before[mask_of_tracked_individuals]
+        df_after = df_after[mask_of_tracked_individuals]
+
+        # For those individuals, collect changes in the pop dataframe before/after the event
+        same = df_before.eq(df_after) | (df_before.isna() & df_after.isna())
+        diff_mask = ~same
+
+        # Collect changes in the mni dictionary
+        if 'PregnancySupervisor' in self.sim.modules:
+            diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after, set_of_tracked_individuals)
+        else:
+            diff_mni = []
+
+        # Iterate over tracked individuals who experienced changes to properties as a result of the event
+        for idx, row in diff_mask.iterrows():
+        
+            changed_cols = row.index[row].tolist()
+            
+            if changed_cols:  # Proceed only if there are changes in the row
+
+                persons_changed.append(idx)
+                # Create a dictionary for this person
+                # First add event info
+                link_info = {
+                    'event_name': event_name,
+                }
+
+                # Store the new values from df_after for the changed columns
+                for col in changed_cols:
+                    link_info[col] = df_after.at[idx, col]
+
+                # This person has also undergone changes in the mni dictionary, so add these here
+                if idx in diff_mni:
+                    for key in diff_mni[idx]:
+                        link_info[col] = diff_mni[idx][key]
+
+                # Append the event and changes to the individual key
+                chain_links[idx] = link_info
+        
+        if 'PregnancySupervisor' in self.sim.modules:
+            # For individuals which only underwent changes in mni dictionary, save changes here
+            if len(diff_mni)>0:
+                for key in diff_mni:
+                    # If individual didn't also undergo changes in pop dataframe AND is tracked, add
+                    if key not in persons_changed and key in set_of_tracked_individuals:
+                        # If individual hadn't been previously added due to changes in pop df, add it here
+                        link_info = {
+                            'event_name': self.__class__.__name__,
+                        }
+
+                        for key_prop in diff_mni[key]:
+                            link_info[key_prop] = diff_mni[key][key_prop]
+
+                        chain_links[key] = link_info
+        
+        return chain_links
+
+
+def df_to_eav(df, date, event_name):
+    """Function to convert entire population dataframe into custom EAV"""
+    eav = df.stack(dropna=False).reset_index()
+    eav.columns = ['entity', 'attribute', 'value']
+    eav['event_name'] = event_name
+    eav = eav[["entity", "event_name", "attribute", "value"]]
+    return eav
+
+
+def convert_chain_links_into_eav(chain_links):
+    """Function to convert chain links into custom EAV"""
+    rows = []
+
+    for e, data in chain_links.items():
+        event_name = data.get("event_name")
+
+        for attr, val in data.items():
+            if attr == "event_name":
+                continue
+            
+            rows.append({
+                "entity": e,
+                "event_name": event_name,
+                "attribute": attr,
+                "value": val
+            })
+
+    eav = pd.DataFrame(rows)
+    
+    return eav
+
+    

From 71a7776394b6ac0617fd836eb3a67fa335dac357 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 19 Dec 2025 13:25:42 +0000
Subject: [PATCH 89/97] Additionally log equipment and beddays

---
 src/tlo/methods/hsi_event.py                  | 4 +++-
 src/tlo/methods/individual_history_tracker.py | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index c48d1a4ca0..357e0b7893 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -228,7 +228,9 @@ def run(self, squeeze_factor):
                                 "event_name": self.__class__.__name__,
                                 "footprint": footprint,
                                 "level": level,
-                                "treatment_ID": self.TREATMENT_ID
+                                "treatment_ID": self.TREATMENT_ID,
+                                "equipment", self._EQUIPMENT,
+                                "bed_days", self.bed_days_allocated_to_this_event,
                                 })
 
         return updated_appt_footprint
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index 175198948c..856228a393 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -232,9 +232,9 @@ def on_event_post_run(self, data):
                 # Create and store event for this individual, regardless of whether any property change occurred
                 link_info = {'event_name' : data['event_name']}
                 if 'footprint' in data.keys():
-                    link_info['footprint'] = data['footprint']
-                    link_info['level'] = data['level']
-                    link_info['treatment_ID'] = data['treatment_ID']
+                    HSI_specific_fields = {'footprint','level','treatment_ID','equipment','bed_days'}
+                    for field in HSI_specific_fields:
+                        link_info[field] = data[field]
 
                 # Store (if any) property changes as a result of the event for this individual
                 for key in self.row_before.index:

From 2feff779ffb64f799f1555cb995394e212cd2626 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 19 Dec 2025 15:05:51 +0000
Subject: [PATCH 90/97] Log consumable access as part of HSI

---
 src/tlo/methods/hsi_event.py                  |  4 ++--
 src/tlo/methods/individual_history_tracker.py | 18 +++++++++++++-----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 357e0b7893..e4199dd790 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -229,8 +229,8 @@ def run(self, squeeze_factor):
                                 "footprint": footprint,
                                 "level": level,
                                 "treatment_ID": self.TREATMENT_ID,
-                                "equipment", self._EQUIPMENT,
-                                "bed_days", self.bed_days_allocated_to_this_event,
+                                "equipment": self._EQUIPMENT,
+                                "bed_days": self.bed_days_allocated_to_this_event,
                                 })
 
         return updated_appt_footprint
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index 856228a393..c0aeca1e96 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -32,6 +32,7 @@ def __init__(
         self.mni_instances_before = False
         self.mni_row_before = {}
         self.entire_mni_before = {}
+        self.consumable_access = {}
 
     PARAMETERS = {
         # Options within module
@@ -154,14 +155,10 @@ def on_consumable_request(self,data):
         chain_links = {}
         chain_links[data['target']] = {k: v for k, v in data.items() if k != 'target'}
         
-        # Convert chain_links into EAV-type dataframe
-        eav_plus_event = convert_chain_links_into_eav(chain_links)
-        # log it
-        self.log_eav_dataframe_to_individual_histories(eav_plus_event)
+        self.consumable_access = chain_links
         
         return
 
-
     def on_event_pre_run(self, data):
         """Do this when notified that an event is about to run.
         This function checks whether this event should be logged as part of the event chains, a
@@ -177,6 +174,7 @@ def on_event_pre_run(self, data):
 
         # Initialise these variables
         self.df_before = []
+        self.consumable_access = {}
         self.row_before = pd.Series()
         self.mni_instances_before = False
         self.mni_row_before = {}
@@ -276,6 +274,15 @@ def on_event_post_run(self, data):
 
                 # Add individual to the chain links
                 chain_links[data['target']] = link_info
+                
+                # Update with consumable access info
+                # Consumable access is only at individual level, so this should either be size 0 or 1
+                assert len(self.consumable_access) == 0 or len(self.consumable_access) == 1
+                if len(self.consumable_access) == 1:
+                    chain_links[data['target']].update({k: v for k, v in
+                                                        self.consumable_access[data['target']].items() if k not in chains_links[data['target']]})
+                    self.consumable_access = {}
+
 
         else:
             # Target is entire population. Identify individuals for which properties have changed
@@ -310,6 +317,7 @@ def on_event_post_run(self, data):
         self.mni_instances_before = False
         self.mni_row_before = {}
         self.entire_mni_before = {}
+        self.consumable_access = {}
 
     def mni_values_differ(self, v1, v2):
 

From 3b9cf72c649293dd53a36bc9e0db732c8b1617eb Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 8 Jan 2026 09:34:35 +0000
Subject: [PATCH 91/97] Add prefix on property name

---
 src/tlo/methods/individual_history_tracker.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index c0aeca1e96..fe4b77d80d 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -45,7 +45,7 @@ def __init__(
     }
     
     PROPERTIES = {
-        "track_history": Property(Types.BOOL, "Whether the individual should be tracked by the individual history tracker or not")
+        "iht_track_history": Property(Types.BOOL, "Whether the individual should be tracked by the individual history tracker or not")
     }
 
     def initialise_simulation(self, sim):
@@ -76,10 +76,10 @@ def initialise_population(self, population):
             
         # Initialise all individuals as being tracked by default
         pop = self.sim.population.props
-        pop.loc[pop.is_alive, "track_history"] = True
+        pop.loc[pop.is_alive, "iht_track_history"] = True
 
     def on_birth(self, mother, child):
-        self.sim.population.props.at[child, "track_history"] = True
+        self.sim.population.props.at[child, "iht_track_history"] = True
         return
         
     def copy_of_pop_dataframe(self):
@@ -225,7 +225,7 @@ def on_event_post_run(self, data):
             row_after = self.copy_of_pop_dataframe_row(data['target'])
 
             # If individual qualified for the 'tracked' category either before OR after the event occurred, the event will be logged:
-            if self.row_before['track_history'] or row_after['track_history']:
+            if self.row_before['iht_track_history'] or row_after['iht_track_history']:
             
                 # Create and store event for this individual, regardless of whether any property change occurred
                 link_info = {'event_name' : data['event_name']}
@@ -380,7 +380,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be
         assert df_before.index.equals(df_after.index), "Indices are not identical!"
         assert df_before.columns.equals(df_after.columns), "Columns of df_before and df_after do not match!"
 
-        mask_of_tracked_individuals = df_before['track_history'] | df_after['track_history']
+        mask_of_tracked_individuals = df_before['iht_track_history'] | df_after['iht_track_history']
         set_of_tracked_individuals = set(mask_of_tracked_individuals.index[mask_of_tracked_individuals])
         
         # Only keep those individuals in dataframes

From 5eff1c2e9a4df0abbf48ea0e46720ec3ce86e754 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:11:30 +0000
Subject: [PATCH 92/97] Remove to_broadcast input to function

---
 src/tlo/methods/consumables.py | 3 +--
 src/tlo/methods/hsi_event.py   | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index 46a2dd94ae..2ab2e1891d 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -251,7 +251,6 @@ def _request_consumables(self,
                              essential_item_codes: dict,
                              optional_item_codes: Optional[dict] = None,
                              to_log: bool = True,
-                             to_broadcast: bool = True,
                              treatment_id: Optional[str] = None,
                              target: Optional[int] = None,
                              event_name: Optional[str] = None,
@@ -288,7 +287,7 @@ def _request_consumables(self,
                                                              override_probability=override_probability)
 
         # Log the request and the outcome:
-        if to_log or to_broadcast:
+        if to_log or 'IndividualHistoryTracker' in self.sim.modules:
             items_available = {k: v for k, v in _all_item_codes.items() if available[k]}
             items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]}
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index e4199dd790..56febfd12b 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -266,14 +266,12 @@ def get_consumables(
 
         # Determine if the request should be logged (over-ride argument provided if HealthSystem is disabled).
         _to_log = to_log if not self.healthcare_system.disable else False
-        _to_broadcast = True if 'IndividualHistoryTracker' in self.module.sim.modules else False
 
         # Checking the availability and logging:
         rtn = self.healthcare_system.consumables._request_consumables(
             essential_item_codes=_item_codes,
             optional_item_codes=_optional_item_codes,
             to_log=_to_log,
-            to_broadcast = _to_broadcast,
             facility_info=self.facility_info,
             treatment_id=self.TREATMENT_ID,
             target=self.target,

From 98b2d9787668e5f035e2546b0d8175eee2a6c508 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:15:51 +0000
Subject: [PATCH 93/97] Change name of broadcasting for consumable request

---
 src/tlo/methods/consumables.py                | 2 +-
 src/tlo/methods/individual_history_tracker.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index 2ab2e1891d..d11acccce3 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -311,7 +311,7 @@ def _request_consumables(self,
                     items_used=items_used,
                 )
             
-            notifier.dispatch("consumables._request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
+            notifier.dispatch("consumables.on_request-consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
 
         # Return the result of the check on availability
         return available
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index fe4b77d80d..1185699add 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -55,7 +55,7 @@ def initialise_simulation(self, sim):
         notifier.add_listener("event.post-run", self.on_event_post_run)
         notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run)
         notifier.add_listener("hsi_event.post-run", self.on_event_post_run)
-        notifier.add_listener("consumables._request-consumables", self.on_consumable_request)
+        notifier.add_listener("consumables.on_request-consumables", self.on_consumable_request)
 
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
         self.load_parameters_from_dataframe(

From 17f9f13d6e6d9fdf6e3aa4f9351d70280e35ee85 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:39:52 +0000
Subject: [PATCH 94/97] Declare demography as dependency to ensure
 iht_track_individual can work

---
 src/tlo/methods/individual_history_tracker.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index 1185699add..03400f86fa 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -34,6 +34,8 @@ def __init__(
         self.entire_mni_before = {}
         self.consumable_access = {}
 
+    INIT_DEPENDENCIES = {"Demography"}
+
     PARAMETERS = {
         # Options within module
         "modules_of_interest": Parameter(

From 4bf655f75c108d28a9c0a51e4742cc20707df324 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:42:26 +0000
Subject: [PATCH 95/97] Prepare consumable data if notifier has listeners

---
 src/tlo/methods/consumables.py                | 4 ++--
 src/tlo/methods/individual_history_tracker.py | 2 +-
 src/tlo/notify.py                             | 9 +++++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index d11acccce3..b9a597ca14 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -287,7 +287,7 @@ def _request_consumables(self,
                                                              override_probability=override_probability)
 
         # Log the request and the outcome:
-        if to_log or 'IndividualHistoryTracker' in self.sim.modules:
+        if to_log or notifier.has_listeners('consumables.on-request_consumables'):
             items_available = {k: v for k, v in _all_item_codes.items() if available[k]}
             items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]}
 
@@ -311,7 +311,7 @@ def _request_consumables(self,
                     items_used=items_used,
                 )
             
-            notifier.dispatch("consumables.on_request-consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
+            notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
 
         # Return the result of the check on availability
         return available
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index 03400f86fa..3bf40f98ff 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -57,7 +57,7 @@ def initialise_simulation(self, sim):
         notifier.add_listener("event.post-run", self.on_event_post_run)
         notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run)
         notifier.add_listener("hsi_event.post-run", self.on_event_post_run)
-        notifier.add_listener("consumables.on_request-consumables", self.on_consumable_request)
+        notifier.add_listener("consumables.on-request_consumables", self.on_consumable_request)
 
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
         self.load_parameters_from_dataframe(
diff --git a/src/tlo/notify.py b/src/tlo/notify.py
index b1b4434ba9..e90166d472 100644
--- a/src/tlo/notify.py
+++ b/src/tlo/notify.py
@@ -66,6 +66,15 @@ def clear_listeners(self):
         e.g. if you are running multiple tests or simulations in the same process.
         """
         self.listeners.clear()
+        
+    def has_listeners(self, notification_key):
+        """
+        Check if there are any listeners registered for a specific notification.
+
+        :param notification_key: The identifier to check.
+        :return: True if there are listeners, False otherwise.
+        """
+        return notification_key in self.listeners and len(self.listeners[notification_key]) > 0
 
 
 # Create a global notifier instance

From 3935113306d19b22f78e6e2298805583640a9f2d Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 8 Jan 2026 13:02:36 +0000
Subject: [PATCH 96/97] Fix lack of module entry for broadcasted data

---
 src/tlo/methods/consumables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index b9a597ca14..41c1bd322c 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -311,7 +311,7 @@ def _request_consumables(self,
                     items_used=items_used,
                 )
             
-            notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
+            notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'module' : module, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
 
         # Return the result of the check on availability
         return available

From c3834cd052f13e7230e735e728db3366b300131f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 8 Jan 2026 13:11:43 +0000
Subject: [PATCH 97/97] Change name of dispacher key to
 post-request_consumables

---
 src/tlo/methods/consumables.py                | 2 +-
 src/tlo/methods/individual_history_tracker.py | 2 +-
 tests/test_individual_history_tracker.py      | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index 41c1bd322c..e6de9c27e9 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -311,7 +311,7 @@ def _request_consumables(self,
                     items_used=items_used,
                 )
             
-            notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'module' : module, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
+            notifier.dispatch("consumables.post-request_consumables", data={'target' : target, 'module' : module, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)})
 
         # Return the result of the check on availability
         return available
diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py
index 3bf40f98ff..5edb6e9f0a 100644
--- a/src/tlo/methods/individual_history_tracker.py
+++ b/src/tlo/methods/individual_history_tracker.py
@@ -57,7 +57,7 @@ def initialise_simulation(self, sim):
         notifier.add_listener("event.post-run", self.on_event_post_run)
         notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run)
         notifier.add_listener("hsi_event.post-run", self.on_event_post_run)
-        notifier.add_listener("consumables.on-request_consumables", self.on_consumable_request)
+        notifier.add_listener("consumables.post-request_consumables", self.on_consumable_request)
 
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
         self.load_parameters_from_dataframe(
diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py
index 7d4a34a591..1ac9eac01b 100644
--- a/tests/test_individual_history_tracker.py
+++ b/tests/test_individual_history_tracker.py
@@ -92,7 +92,8 @@ def test_individual_history_tracker(tmpdir, seed):
     mask = individual_histories["event_name"].isin(["Birth", "StartOfSimulation"])
     assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all()
 
-    # Assert that all HSI events that occurred were also collected in the event chains. Do not include Inpatient_Care HSIs, as these
+    # Assert that all HSI events that occurred were also collected in the event chains.
+    # Do not include Inpatient_Care HSIs, as these
     # are not currently treated as being individual-specific
     Num_of_HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum()
     Num_of_HSIs_in_hs_log = len(output['tlo.methods.healthsystem']['HSI_Event'].loc[