From dbff470b51cde44beeefdae3575d52e0c19964bc Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:00:09 +0100 Subject: [PATCH 01/97] Investigate analysis of events at sim level --- src/tlo/simulation.py | 9 +++++++++ tests/test_rti.py | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 219b1b8a6f..a641909ed1 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -231,6 +231,15 @@ def simulate(self, *, end_date): if date >= end_date: self.date = end_date break + + #if event.target != self.population: + # print("Event: ", event) + + if event.module == self.modules['RTI']: + print("RTI event ", event) + print(" target ", event.target) + if event.target != self.population: + self.population.props.at[event.tar] self.fire_single_event(event, date) # The simulation has ended. diff --git a/tests/test_rti.py b/tests/test_rti.py index 0e231fb4af..99243b988e 100644 --- a/tests/test_rti.py +++ b/tests/test_rti.py @@ -25,6 +25,17 @@ end_date = Date(2012, 1, 1) popsize = 1000 +@pytest.mark.slow +def test_data_harvesting(seed): + """ + This test runs a simulation with a functioning health system with full service availability and no set + constraints + """ + # create sim object + sim = create_basic_rti_sim(popsize, seed) + # run simulation + sim.simulate(end_date=end_date) + exit(-1) def check_dtypes(simulation): # check types of columns in dataframe, check they are the same, list those that aren't @@ -65,6 +76,7 @@ def test_run(seed): check_dtypes(sim) + @pytest.mark.slow def test_all_injuries_run(seed): """ From 05098f78668a5317667d58cbda882a364a031277 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:26:39 +0200 Subject: [PATCH 02/97] Final data-printing set-up --- src/tlo/methods/demography.py | 7 ++- src/tlo/methods/healthsystem.py | 18 ++++++ src/tlo/methods/hiv.py | 67 ++++++++++++++++++---- src/tlo/methods/tb.py | 99 +++++++++++++++++++++++++-------- src/tlo/simulation.py | 82 ++++++++++++++++++++++++--- 5 files changed, 226 insertions(+), 47 deletions(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index e58f3895f4..6b2578fd44 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -315,9 +315,10 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately - self.other_death_poll = OtherDeathPoll(self) - sim.schedule_event(self.other_death_poll, sim.date) + if sim.generate_data is False: + # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately + self.other_death_poll = OtherDeathPoll(self) + sim.schedule_event(self.other_death_poll, sim.date) # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`) for _logger in (logger, logger_scale_factor): diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 181c08f5aa..6e251e636c 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2033,8 +2033,26 @@ def run_individual_level_events_in_mode_0_or_1(self, assert event.facility_info is not None, \ f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." + go_ahead = False + if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']): + go_ahead = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + + if go_ahead: + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Check if the HSI event returned updated appt_footprint if actual_appt_footprint is not None: diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index d6455cc861..8e0d337fc1 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,11 +631,12 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - # Launch sub-routines for allocating the right number of people into each category - self.initialise_baseline_prevalence(population) # allocate baseline prevalence + if self.sim.generate_data is False: + # Launch sub-routines for allocating the right number of people into each category + self.initialise_baseline_prevalence(population) # allocate baseline prevalence - self.initialise_baseline_art(population) # allocate baseline art coverage - self.initialise_baseline_tested(population) # allocate baseline testing coverage + self.initialise_baseline_art(population) # allocate baseline art coverage + self.initialise_baseline_tested(population) # allocate baseline testing coverage def initialise_baseline_prevalence(self, population): """ @@ -905,10 +906,16 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - # 1) Schedule the Main HIV Regular Polling Event - sim.schedule_event( - HivRegularPollingEvent(self), sim.date + DateOffset(days=0) - ) + if self.sim.generate_data: + print("Should be generating data") + sim.schedule_event( + HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) + ) + else: + # 1) Schedule the Main HIV Regular Polling Event + sim.schedule_event( + HivRegularPollingEvent(self), sim.date + DateOffset(days=0) + ) # 2) Schedule the Logging Event sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1)) @@ -1662,6 +1669,37 @@ def do_at_generic_first_appt( # Main Polling Event # --------------------------------------------------------------------------- +class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin): + """ The HIV Polling Events for Data Generation + * Ensures that + """ + + def __init__(self, module): + super().__init__( + module, frequency=DateOffset(years=120) + ) # repeats every 12 months, but this can be changed + + def apply(self, population): + + df = population.props + + # Make everyone who is alive and not infected (no-one should be) susceptible + susc_idx = df.loc[ + df.is_alive + & ~df.hv_inf + ].index + + n_susceptible = len(susc_idx) + print("Number of individuals susceptible", n_susceptible) + # Schedule the date of infection for each new infection: + for i in susc_idx: + date_of_infection = self.sim.date + pd.DateOffset( + # Ensure that individual will be infected before end of sim + days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) + ) + self.sim.schedule_event( + HivInfectionEvent(self.module, i), date_of_infection + ) class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin): """ The HIV Regular Polling Events @@ -1683,6 +1721,7 @@ def apply(self, population): fraction_of_year_between_polls = self.frequency.months / 12 beta = p["beta"] * fraction_of_year_between_polls + # ----------------------------------- HORIZONTAL TRANSMISSION ----------------------------------- def horizontal_transmission(to_sex, from_sex): # Count current number of alive 15-80 year-olds at risk of transmission @@ -1758,6 +1797,7 @@ def horizontal_transmission(to_sex, from_sex): HivInfectionEvent(self.module, idx), date_of_infection ) + # ----------------------------------- SPONTANEOUS TESTING ----------------------------------- def spontaneous_testing(current_year): @@ -1861,11 +1901,12 @@ def vmmc_for_child(): priority=0, ) - # Horizontal transmission: Male --> Female - horizontal_transmission(from_sex="M", to_sex="F") + if self.sim.generate_data is False: + # Horizontal transmission: Male --> Female + horizontal_transmission(from_sex="M", to_sex="F") - # Horizontal transmission: Female --> Male - horizontal_transmission(from_sex="F", to_sex="M") + # Horizontal transmission: Female --> Male + horizontal_transmission(from_sex="F", to_sex="M") # testing # if year later than 2020, set testing rates to those reported in 2020 @@ -1882,6 +1923,8 @@ def vmmc_for_child(): vmmc_for_child() + + # --------------------------------------------------------------------------- # Natural History Events # --------------------------------------------------------------------------- diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 623ee2e483..cd79ae22a5 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -833,28 +833,29 @@ def initialise_population(self, population): df["tb_date_ipt"] = pd.NaT # # ------------------ infection status ------------------ # - # WHO estimates of active TB for 2010 to get infected initial population - # don't need to scale or include treated proportion as no-one on treatment yet - inc_estimates = p["who_incidence_estimates"] - incidence_year = (inc_estimates.loc[ - (inc_estimates.year == self.sim.date.year), "incidence_per_100k" - ].values[0]) / 100_000 - - incidence_year = incidence_year * p["scaling_factor_WHO"] - - self.assign_active_tb( - population, - strain="ds", - incidence=incidence_year) - - self.assign_active_tb( - population, - strain="mdr", - incidence=incidence_year * p['prop_mdr2010']) - - self.send_for_screening_general( - population - ) # send some baseline population for screening + if self.sim.generate_data is False: + # WHO estimates of active TB for 2010 to get infected initial population + # don't need to scale or include treated proportion as no-one on treatment yet + inc_estimates = p["who_incidence_estimates"] + incidence_year = (inc_estimates.loc[ + (inc_estimates.year == self.sim.date.year), "incidence_per_100k" + ].values[0]) / 100_000 + + incidence_year = incidence_year * p["scaling_factor_WHO"] + + self.assign_active_tb( + population, + strain="ds", + incidence=incidence_year) + + self.assign_active_tb( + population, + strain="mdr", + incidence=incidence_year * p['prop_mdr2010']) + + self.send_for_screening_general( + population + ) # send some baseline population for screening def initialise_simulation(self, sim): """ @@ -867,7 +868,11 @@ def initialise_simulation(self, sim): sim.schedule_event(TbActiveEvent(self), sim.date) sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + + if sim.generate_data is False: + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + else: + sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) # 2) log at the end of the year # Optional: Schedule the scale-up of programs @@ -1366,6 +1371,53 @@ def is_subset(col_for_set, col_for_subset): # # TB infection event # # --------------------------------------------------------------------------- +class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin): + """The Tb Regular Poll Event for Data Generation for assigning active infections + * selects everyone to develop an active infection and schedules onset of active tb + sometime during the simulation + """ + + def __init__(self, module): + super().__init__(module, frequency=DateOffset(years=120)) + + def apply(self, population): + + df = population.props + now = self.sim.date + rng = self.module.rng + # Make everyone who is alive and not infected (no-one should be) susceptible + susc_idx = df.loc[ + df.is_alive + & (df.tb_inf != "active") + ].index + + n_susceptible = len(susc_idx) + + middle_index = len(susc_idx) // 2 + + # Will equally split two strains among the population + list_ds = susc_idx[:middle_index] + list_mdr = susc_idx[middle_index:] + + # schedule onset of active tb. This will be equivalent to the "Onset", so it + # doesn't matter how long after we have decided which infection this is. + for person_id in list_ds: + date_progression = now + pd.DateOffset( + # At some point during their lifetime, this person will develop TB + days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) + ) + # set date of active tb - properties will be updated at TbActiveEvent poll daily + df.at[person_id, "tb_scheduled_date_active"] = date_progression + df.at[person_id, "tb_strain"] = "ds" + + for person_id in list_mdr: + date_progression = now + pd.DateOffset( + days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1)) + ) + # set date of active tb - properties will be updated at TbActiveEvent poll daily + df.at[person_id, "tb_scheduled_date_active"] = date_progression + df.at[person_id, "tb_strain"] = "mdr" + class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin): """The Tb Regular Poll Event for assigning active infections @@ -1439,7 +1491,6 @@ def apply(self, population): self.module.update_parameters_for_program_scaleup() - class TbActiveEvent(RegularEvent, PopulationScopeEventMixin): """ * check for those with dates of active tb onset within last time-period diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 5b4e2fff4c..f0c8d6f09f 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -7,7 +7,7 @@ from collections import OrderedDict from pathlib import Path from typing import Dict, Optional, Union - +import pandas as pd import numpy as np from tlo import Date, Population, logging @@ -63,9 +63,11 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() + self.generate_data = None self.end_date = None self.output_file = None self.population: Optional[Population] = None + self.event_chains: Optinoal[Population] = None self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -209,6 +211,8 @@ def make_initial_population(self, *, n): module.initialise_population(self.population) logger.debug(key='debug', data=f'{module.name}.initialise_population() {time.time() - start1} s') + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + end = time.time() logger.info(key='info', data=f'make_initial_population() {end - start} s') @@ -221,7 +225,14 @@ def simulate(self, *, end_date): """ start = time.time() self.end_date = end_date # store the end_date so that others can reference it + self.generate_data = True # for now ensure we're always aiming to print data + + f = open('output.txt', mode='a') + #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + # Reorder columns to place the new columns at the front + pd.set_option('display.max_columns', None) + print(self.event_chains.columns) for module in self.modules.values(): module.initialise_simulation(self) @@ -250,17 +261,72 @@ def simulate(self, *, end_date): if date >= end_date: self.date = end_date + self.event_chains.to_csv('output.csv', index=False) break - + #if event.target != self.population: # print("Event: ", event) - - if event.module == self.modules['RTI']: - print("RTI event ", event) - print(" target ", event.target) - if event.target != self.population: - self.population.props.at[event.tar] + go_ahead = False + df_before = [] + + # Only print events relevant to modules of interest + # Do not want to compare before/after in births because it may expand the pop dataframe + print_output = True + if print_output: + if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event): + #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event): + go_ahead = True + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'Before' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_before = self.population.props.copy() + self.fire_single_event(event, date) + + if print_output: + if go_ahead == True: + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_after = self.population.props.copy() + # if not df_before.columns.equals(df_after.columns): + # print("Number of columns in pop dataframe", len(self.population.props.columns)) + # print("Before", df_before.columns) + # print("After", df_after.columns#) + # exit(-1) + # if not df_before.index.equals(df_after.index): + # print("Number of indices in pop dataframe", len(self.population.props.index)) + # print("----> ", event) + # print("Before", df_before.index#) + # print("After", df_after.index) + # exit(-1) + + change = df_before.compare(df_after) + if ~change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = event + new_rows_before['event_date'] = date + new_rows_before['when'] = 'Before' + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = event + new_rows_after['event_date'] = date + new_rows_after['when'] = 'After' + + self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) + self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) # The simulation has ended. if self.show_progress_bar: From 16c071c6220edcc20b539f346625f628e5e8c4c5 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:37:38 +0200 Subject: [PATCH 03/97] Print event chains --- src/tlo/methods/demography.py | 2 +- src/tlo/methods/healthsystem.py | 8 ++-- src/tlo/methods/hiv.py | 6 +-- src/tlo/methods/tb.py | 4 +- src/tlo/simulation.py | 47 +++++++++--------- tests/test_data_generation.py | 85 +++++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 35 deletions(-) create mode 100644 tests/test_data_generation.py diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index 6b2578fd44..4f19af6d55 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -315,7 +315,7 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - if sim.generate_data is False: + if sim.generate_event_chains is False: # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately self.other_death_poll = OtherDeathPoll(self) sim.schedule_event(self.other_death_poll, sim.date) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 6e251e636c..203ca10985 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2033,9 +2033,9 @@ def run_individual_level_events_in_mode_0_or_1(self, assert event.facility_info is not None, \ f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." - go_ahead = False - if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']): - go_ahead = True + print_chains = False + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target row['event'] = event @@ -2046,7 +2046,7 @@ def run_individual_level_events_in_mode_0_or_1(self, # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) - if go_ahead: + if print_chains: row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target row['event'] = event diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 8e0d337fc1..36b1a4bd6e 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,7 +631,7 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # Launch sub-routines for allocating the right number of people into each category self.initialise_baseline_prevalence(population) # allocate baseline prevalence @@ -906,7 +906,7 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_data: + if self.sim.generate_event_chains: print("Should be generating data") sim.schedule_event( HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) @@ -1901,7 +1901,7 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # Horizontal transmission: Male --> Female horizontal_transmission(from_sex="M", to_sex="F") diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index cd79ae22a5..57ccd97368 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -833,7 +833,7 @@ def initialise_population(self, population): df["tb_date_ipt"] = pd.NaT # # ------------------ infection status ------------------ # - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # WHO estimates of active TB for 2010 to get infected initial population # don't need to scale or include treated proportion as no-one on treatment yet inc_estimates = p["who_incidence_estimates"] @@ -869,7 +869,7 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_data is False: + if sim.generate_event_chains is False: sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) else: sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index f0c8d6f09f..d055d6e367 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -63,7 +63,9 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - self.generate_data = None + self.generate_event_chains = None + self.generate_event_chains_modules_of_interest = [] + self.generate_event_chains_ignore_events = [] self.end_date = None self.output_file = None self.population: Optional[Population] = None @@ -216,7 +218,7 @@ def make_initial_population(self, *, n): end = time.time() logger.info(key='info', data=f'make_initial_population() {end - start} s') - def simulate(self, *, end_date): + def simulate(self, *, end_date, generate_event_chains = False): """Simulation until the given end date :param end_date: when to stop simulating. Only events strictly before this @@ -225,7 +227,11 @@ def simulate(self, *, end_date): """ start = time.time() self.end_date = end_date # store the end_date so that others can reference it - self.generate_data = True # for now ensure we're always aiming to print data + self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data + if self.generate_event_chains: + # For now keep these fixed, eventually they will be input from user + self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] + self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] f = open('output.txt', mode='a') #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) @@ -264,17 +270,13 @@ def simulate(self, *, end_date): self.event_chains.to_csv('output.csv', index=False) break - #if event.target != self.population: - # print("Event: ", event) - go_ahead = False + + print_chains = False df_before = [] - # Only print events relevant to modules of interest - # Do not want to compare before/after in births because it may expand the pop dataframe - print_output = True - if print_output: - if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event): - #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event): + if self.generate_event_chains: + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events): go_ahead = True if event.target != self.population: row = self.population.props.iloc[[event.target]] @@ -288,7 +290,7 @@ def simulate(self, *, end_date): self.fire_single_event(event, date) - if print_output: + if go_ahead: if go_ahead == True: if event.target != self.population: row = self.population.props.iloc[[event.target]] @@ -299,18 +301,6 @@ def simulate(self, *, end_date): self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) else: df_after = self.population.props.copy() - # if not df_before.columns.equals(df_after.columns): - # print("Number of columns in pop dataframe", len(self.population.props.columns)) - # print("Before", df_before.columns) - # print("After", df_after.columns#) - # exit(-1) - # if not df_before.index.equals(df_after.index): - # print("Number of indices in pop dataframe", len(self.population.props.index)) - # print("----> ", event) - # print("Before", df_before.index#) - # print("After", df_after.index) - # exit(-1) - change = df_before.compare(df_after) if ~change.empty: indices = change.index @@ -385,6 +375,13 @@ def do_birth(self, mother_id): child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) + if self.generate_event_chains: + row = self.population.props.iloc[[child_id]] + row['person_ID'] = child_id + row['event'] = 'Birth' + row['event_date'] = self.date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id def find_events_for_person(self, person_id: int): diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py new file mode 100644 index 0000000000..1f6333bbfe --- /dev/null +++ b/tests/test_data_generation.py @@ -0,0 +1,85 @@ +import os +from pathlib import Path + +import pandas as pd +import pytest + +from tlo import Date, Simulation +from tlo.methods import ( + care_of_women_during_pregnancy, + demography, + depression, + enhanced_lifestyle, + epi, + epilepsy, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + cardio_metabolic_disorders, + labour, + newborn_outcomes, + postnatal_supervisor, + pregnancy_helper_functions, + pregnancy_supervisor, + depression, + tb, + contraception, +# simplified_births, + symptommanager, +) +from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt + +# create simulation parameters +start_date = Date(2010, 1, 1) +end_date = Date(2015, 1, 1) +popsize = 100 + +@pytest.mark.slow +def test_data_harvesting(seed): + """ + This test runs a simulation to print all individual events of specific individuals + """ + + module_of_interest = 'Hiv' + # create sim object + sim = create_basic_sim(popsize, seed) + + dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES) + + # Check that all dependencies are included + for dep in dependencies_list: + if dep not in sim.modules: + print("WARNING: dependency ", dep, "not included") + exit(-1) + + # run simulation + sim.simulate(end_date=end_date, generate_event_chains = True) + + +def create_basic_sim(population_size, seed): + # create the basic outline of an rti simulation object + sim = Simulation(start_date=start_date, seed=seed) + resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' + sim.register(demography.Demography(resourcefilepath=resourcefilepath), + contraception.Contraception(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + hiv.Hiv(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath), + cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), + depression.Depression(resourcefilepath=resourcefilepath), + newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + labour.Labour(resourcefilepath=resourcefilepath), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + ) + + sim.make_initial_population(n=population_size) + return sim + From ba81487a3fa003e2f10206e435a1d64f170f14e3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:08:50 +0200 Subject: [PATCH 04/97] Add chains in mode 2 too and clean up in simuation --- src/tlo/methods/healthsystem.py | 40 ++++++++++++++++++------ src/tlo/simulation.py | 55 ++++++++++++++++----------------- 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 203ca10985..54cb976b26 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2034,18 +2034,20 @@ def run_individual_level_events_in_mode_0_or_1(self, f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." print_chains = False - if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): - print_chains = True - row = self.sim.population.props.iloc[[event.target]] - row['person_ID'] = event.target - row['event'] = event - row['event_date'] = self.sim.date - row['when'] = 'Before' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.generate_event_chains: + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + # Print individual info after event if print_chains: row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target @@ -2445,8 +2447,28 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None: # Expected appt footprint before running event _appt_footprint_before_running = event.EXPECTED_APPT_FOOTPRINT - # Run event & get actual footprint + + print_chains = False + if self.sim.generate_event_chains: + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + + # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + + if print_chains: + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Check if the HSI event returned updated_appt_footprint, and if so adjust original_call if actual_appt_footprint is not None: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d055d6e367..616e159453 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -277,7 +277,7 @@ def simulate(self, *, end_date, generate_event_chains = False): if self.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events): - go_ahead = True + print_chains = True if event.target != self.population: row = self.population.props.iloc[[event.target]] row['person_ID'] = event.target @@ -290,33 +290,32 @@ def simulate(self, *, end_date, generate_event_chains = False): self.fire_single_event(event, date) - if go_ahead: - if go_ahead == True: - if event.target != self.population: - row = self.population.props.iloc[[event.target]] - row['person_ID'] = event.target - row['event'] = event - row['event_date'] = date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) - else: - df_after = self.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = event - new_rows_before['event_date'] = date - new_rows_before['when'] = 'Before' - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = event - new_rows_after['event_date'] = date - new_rows_after['when'] = 'After' - - self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) - self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) + if print_chains: + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_after = self.population.props.copy() + change = df_before.compare(df_after) + if ~change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = event + new_rows_before['event_date'] = date + new_rows_before['when'] = 'Before' + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = event + new_rows_after['event_date'] = date + new_rows_after['when'] = 'After' + + self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) + self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) # The simulation has ended. if self.show_progress_bar: From b1c907c12bfa54621983415b560381d1737afc9a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 09:36:06 +0200 Subject: [PATCH 05/97] Fix issue with tests by ensuring standard Polling and infection is maintained is generate_event_chains is None --- src/tlo/methods/hiv.py | 6 +++--- src/tlo/methods/hsi_event.py | 14 ++++++++------ src/tlo/methods/tb.py | 10 ++++++---- src/tlo/simulation.py | 4 +++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 36b1a4bd6e..391cf587a8 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,7 +631,7 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: # Launch sub-routines for allocating the right number of people into each category self.initialise_baseline_prevalence(population) # allocate baseline prevalence @@ -906,7 +906,7 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_event_chains: + if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi: print("Should be generating data") sim.schedule_event( HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) @@ -1901,7 +1901,7 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: # Horizontal transmission: Male --> Female horizontal_transmission(from_sex="M", to_sex="F") diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 470794bcdd..785f27b7a6 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -193,10 +193,12 @@ def run(self, squeeze_factor): print_chains = False df_before = [] - + if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: row = self.sim.population.props.iloc[[self.target]] @@ -204,7 +206,7 @@ def run(self, squeeze_factor): row['event'] = self row['event_date'] = self.sim.date row['when'] = 'Before' - self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -219,7 +221,7 @@ def run(self, squeeze_factor): row['event'] = self row['event_date'] = self.sim.date row['when'] = 'After' - self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_after = self.sim.population.props.copy() change = df_before.compare(df_after) @@ -236,8 +238,8 @@ def run(self, squeeze_factor): new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' - self.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) return updated_appt_footprint def get_consumables( diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 57ccd97368..4c170944d2 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -832,8 +832,9 @@ def initialise_population(self, population): df["tb_on_ipt"] = False df["tb_date_ipt"] = pd.NaT + # # ------------------ infection status ------------------ # - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None: # WHO estimates of active TB for 2010 to get infected initial population # don't need to scale or include treated proportion as no-one on treatment yet inc_estimates = p["who_incidence_estimates"] @@ -869,10 +870,11 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_event_chains is False: - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) - else: + if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True: sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) + else: + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + # 2) log at the end of the year # Optional: Schedule the scale-up of programs diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 794bfef98e..4aff23c9d7 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -105,6 +105,7 @@ def __init__( self.modules = OrderedDict() self.event_queue = EventQueue() self.generate_event_chains = None + self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] self.end_date = None @@ -298,10 +299,11 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.end_date = end_date # store the end_date so that others can reference it self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data + self.generate_event_chains_overwrite_epi = False if self.generate_event_chains: # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] - self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) From cfb4264f0133fccbc0a82a6c9d3f51479d19038f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:51:37 +0200 Subject: [PATCH 06/97] Switch iloc for loc --- src/tlo/events.py | 5 ++--- src/tlo/methods/hsi_event.py | 4 ++-- src/tlo/simulation.py | 9 ++++++--- tests/test_data_generation.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 78b828091d..a50832a58d 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -74,7 +74,7 @@ def run(self): if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date @@ -83,13 +83,12 @@ def run(self): else: df_before = self.sim.population.props.copy() - self.apply(self.target) self.post_apply_hook() if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 785f27b7a6..cffeb32992 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -201,7 +201,7 @@ def run(self, squeeze_factor): print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date @@ -216,7 +216,7 @@ def run(self, squeeze_factor): if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 4aff23c9d7..42a2a288d3 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -298,14 +298,17 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data - self.generate_event_chains_overwrite_epi = False + self.generate_event_chains = generate_event_chains if self.generate_event_chains: + # Eventually this can be made an option + self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + else: + # If not using to print chains, cannot ignore epi + self.generate_event_chains_overwrite_epi = False - #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) # Reorder columns to place the new columns at the front pd.set_option('display.max_columns', None) diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 1f6333bbfe..8dd92513f9 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -32,7 +32,7 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2015, 1, 1) +end_date = Date(2014, 1, 1) popsize = 100 @pytest.mark.slow From e0327de6b6f850ac871a2308271f6863333f173e Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:55:57 +0200 Subject: [PATCH 07/97] Change syntax of if statement --- src/tlo/events.py | 2 +- src/tlo/methods/hsi_event.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index a50832a58d..2eef87ba3f 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,7 +71,7 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index cffeb32992..805c9584fb 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,9 +196,7 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] From fceee02e68722e29314c3d9efe35983709a78deb Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 9 Oct 2024 09:27:54 +0100 Subject: [PATCH 08/97] Change syntax of if statement and print string of event --- src/tlo/events.py | 6 +++--- src/tlo/methods/hsi_event.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 2eef87ba3f..2a7871c2c8 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,12 +71,12 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -90,7 +90,7 @@ def run(self): if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 805c9584fb..ea9066bc8b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,12 +196,12 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -216,7 +216,7 @@ def run(self, squeeze_factor): if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) From eaeae626a4b37c024db38abf82bdb7c2e723ffe2 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:45:41 +0100 Subject: [PATCH 09/97] Focus on rti and print footprint --- src/tlo/events.py | 16 +++++++++++++--- src/tlo/methods/hsi_event.py | 36 ++++++++++++++++------------------- src/tlo/methods/rti.py | 8 ++++++-- src/tlo/simulation.py | 6 +++--- tests/test_data_generation.py | 31 ++++++++++++++++-------------- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 2a7871c2c8..76e1b9a117 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,14 +71,19 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' + row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -88,11 +93,12 @@ def run(self): if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' + row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_after = self.sim.population.props.copy() @@ -104,11 +110,15 @@ def run(self): new_rows_before['event'] = self new_rows_before['event_date'] = self.sim.date new_rows_before['when'] = 'Before' + new_rows_before['appt_footprint'] = 'N/A' + new_rows_after = df_after.loc[indices] new_rows_after['person_ID'] = new_rows_after.index new_rows_after['event'] = self new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' + new_rows_after['appt_footprint'] = 'N/A' + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index ea9066bc8b..f8e8738543 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,14 +196,19 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): +# if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -212,32 +217,23 @@ def run(self, squeeze_factor): self.post_apply_hook() self._run_after_hsi_event() + footprint = self.EXPECTED_APPT_FOOTPRINT + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' + row['appt_footprint'] = str(footprint) self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - df_after = self.sim.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + print("Error, I shouldn't be here") + exit(-1) + return updated_appt_footprint def get_consumables( diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 18c1987483..1c12e7162b 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): def __init__(self, module): """Schedule to take place every month """ - super().__init__(module, frequency=DateOffset(months=1)) + super().__init__(module, frequency=DateOffset(months=1000)) p = module.parameters # Parameters which transition the model between states self.base_1m_prob_rti = (p['base_rate_injrti'] / 12) @@ -2864,9 +2864,13 @@ def apply(self, population): .when('.between(70,79)', self.rr_injrti_age7079), Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) - pred = eq.predict(df.loc[rt_current_non_ind]) + if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: + pred = 1 + else: + pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] + # Update to say they have been involved in a rti df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 42a2a288d3..a8ecf14cc6 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint']) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -303,8 +303,8 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_modules_of_interest = [self.modules['RTI']] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 8dd92513f9..af3c4f0ae9 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -25,15 +25,16 @@ depression, tb, contraception, -# simplified_births, + simplified_births, + rti, symptommanager, ) from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2014, 1, 1) -popsize = 100 +end_date = Date(2012, 1, 1) +popsize = 200 @pytest.mark.slow def test_data_harvesting(seed): @@ -41,7 +42,7 @@ def test_data_harvesting(seed): This test runs a simulation to print all individual events of specific individuals """ - module_of_interest = 'Hiv' + module_of_interest = 'RTI' # create sim object sim = create_basic_sim(popsize, seed) @@ -55,29 +56,31 @@ def test_data_harvesting(seed): # run simulation sim.simulate(end_date=end_date, generate_event_chains = True) - + exit(-1) def create_basic_sim(population_size, seed): # create the basic outline of an rti simulation object sim = Simulation(start_date=start_date, seed=seed) resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' sim.register(demography.Demography(resourcefilepath=resourcefilepath), - contraception.Contraception(resourcefilepath=resourcefilepath), + # contraception.Contraception(resourcefilepath=resourcefilepath), enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), healthburden.HealthBurden(resourcefilepath=resourcefilepath), symptommanager.SymptomManager(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), + rti.RTI(resourcefilepath=resourcefilepath), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - epi.Epi(resourcefilepath=resourcefilepath), - hiv.Hiv(resourcefilepath=resourcefilepath), - tb.Tb(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + # epi.Epi(resourcefilepath=resourcefilepath), + # hiv.Hiv(resourcefilepath=resourcefilepath), + # tb.Tb(resourcefilepath=resourcefilepath), cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), depression.Depression(resourcefilepath=resourcefilepath), - newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - labour.Labour(resourcefilepath=resourcefilepath), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + # labour.Labour(resourcefilepath=resourcefilepath), + #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), ) sim.make_initial_population(n=population_size) From c7bd9d058cea79fad0f8471830766f5c335a7df1 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:57:21 +0100 Subject: [PATCH 10/97] Only store change in individual properties, not entire property row. Log changes to logger. --- src/tlo/events.py | 204 ++++++++++++++++++++++++++-------- src/tlo/methods/hsi_event.py | 134 ++++++++++++++++------ src/tlo/simulation.py | 2 +- tests/test_data_generation.py | 22 ++-- 4 files changed, 268 insertions(+), 94 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 76e1b9a117..436a01a97c 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -4,13 +4,20 @@ from enum import Enum from typing import TYPE_CHECKING -from tlo import DateOffset +from tlo import DateOffset, logging if TYPE_CHECKING: from tlo import Simulation import pandas as pd +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +logger_summary = logging.getLogger(f"{__name__}.summary") +logger_summary.setLevel(logging.INFO) + +debug_chains = True class Priority(Enum): """Enumeration for the Priority, which is used in sorting the events in the simulation queue.""" @@ -62,66 +69,167 @@ def apply(self, target): :param target: the target of the event """ raise NotImplementedError - - def run(self): - """Make the event happen.""" + def compare_population_dataframe(self,df_before, df_after): + """ This function compares the population dataframe before/after a population-wide event has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ + + # Create a mask of where values are different + diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + + # Create an empty list to store changes for each of the individuals + chain_links = {} + + # Loop through each row of the mask + for idx, row in diff_mask.iterrows(): + changed_cols = row.index[row].tolist() + + if changed_cols: # Proceed only if there are changes in the row + + # Create a dictionary for this person + # First add event info + link_info = { + #'person_ID': idx, + 'event': str(self), + 'event_date': self.sim.date, + } + + # Store the new values from df_after for the changed columns + for col in changed_cols: + link_info[col] = df_after.at[idx, col] + + + # Append the event and changes to the individual key + chain_links = {idx : link_info} + + return chain_links + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + + # Initialise these variables print_chains = False df_before = [] + row_before = pd.Series() - if self.sim.generate_event_chains: - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - print_chains = True - if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + + # Will eventually use this once I can actually GET THE NAME OF THE SELF + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + + print_chains = True + + # Target is single individual + if self.target != self.sim.population: + # Save row for comparison after event has occurred + row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + + if debug_chains: + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' - row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - else: - df_before = self.sim.population.props.copy() - - self.apply(self.target) - self.post_apply_hook() + else: + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the pop dataframe before the event has occurred. + df_before = self.sim.population.props.copy() + + return print_chains, row_before, df_before + + def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict: + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + + chain_links = {} + if print_chains: + + # Target is single individual if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = 'N/A' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + # Create and store event for this individual + link_info = { + #'person_ID' : self.target, + 'event' : str(self), + 'event_date' : self.sim.date, + } + # Store property changes as a result of the event for this individual + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] + + chain_links = {self.target : link_info} + + if debug_chains: + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + else: - df_after = self.sim.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - new_rows_before['appt_footprint'] = 'N/A' - - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - new_rows_after['appt_footprint'] = 'N/A' - - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe(df_before, df_after) + + if debug_chains: + # Or print entire rows + change = df_before.compare(df_after) + if not change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = self + new_rows_before['event_date'] = self.sim.date + new_rows_before['when'] = 'Before' + + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = self + new_rows_after['event_date'] = self.sim.date + new_rows_after['when'] = 'After' + + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + + return chain_links + + def run(self): + """Make the event happen.""" + + # Collect relevant information before event takes place + if self.sim.generate_event_chains: + print_chains, row_before, df_before = self.store_chains_to_do_before_event() + + self.apply(self.target) + self.post_apply_hook() + + # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' + # in the individual's event chain. + if self.sim.generate_event_chains: + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + + # Log chain_links here + if len(chain_links)>0: + logger.info(key='event_chains', + data= chain_links, + description='Links forming chains of events for simulated individuals') + + #print("Chain events ", chain_links) + class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index f8e8738543..1c727f014b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -16,12 +16,19 @@ from tlo import Module, Simulation from tlo.methods.healthsystem import HealthSystem +# Pointing to the logger in events +logger_chains = logging.getLogger("tlo.methods.event") +logger_chains.setLevel(logging.INFO) + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) +debug_chains = True + + # Declare the level which will be used to represent the merging of levels '1b' and '2' LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2" @@ -187,54 +194,113 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - - def run(self, squeeze_factor): - """Make the event happen.""" + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + # Initialise these variables print_chains = False - df_before = [] - - if self.sim.generate_event_chains: - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): -# if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - print_chains = True - if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'Before' - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - else: - df_before = self.sim.population.props.copy() - - updated_appt_footprint = self.apply(self.target, squeeze_factor) - self.post_apply_hook() - self._run_after_hsi_event() + row_before = pd.Series() - footprint = self.EXPECTED_APPT_FOOTPRINT - if updated_appt_footprint is not None: - footprint = updated_appt_footprint + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + # if (self.module in self.sim.generate_event_chains_modules_of_interest) and + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if print_chains: + # Will eventually use this once I can actually GET THE NAME OF THE SELF + # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + + # In the case of HSI events, only individual events should exist and therefore be logged + print_chains = True + + # Save row for comparison after event has occurred + row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + + row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = str(footprint) + row['when'] = 'Before' + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + else: + # Many of our HealthSystem implementations rely on the assumption that print("Error, I shouldn't be here") exit(-1) + + return print_chains, row_before + + def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict: + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + if print_chains: + # For HSI event, this will only ever occur for individual events + + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level + # will be stored regardless of whether individual experienced property changes. + + # Add event details + link_info = { + 'event' : str(self), + 'event_date' : self.sim.date, + 'appt_footprint' : str(footprint), + 'level' : self.facility_info.level, + } + + # Add changes to properties + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] + + chain_links = {self.target : link_info} + + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + row['appt_footprint'] = footprint + row['level'] = self.facility_info.level + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + + return chain_links + + + def run(self, squeeze_factor): + """Make the event happen.""" + + + if self.sim.generate_event_chains: + print_chains, row_before = self.store_chains_to_do_before_event() + + footprint = self.EXPECTED_APPT_FOOTPRINT + updated_appt_footprint = self.apply(self.target, squeeze_factor) + self.post_apply_hook() + self._run_after_hsi_event() + + + if self.sim.generate_event_chains: + + # If the footprint has been updated when the event ran, change it here + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) + + if len(chain_links)>0: + logger_chains.info(key='event_chains', + data = chain_links, + description='Links forming chains of events for simulated individuals') + #print(chain_links) + return updated_appt_footprint + def get_consumables( self, diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index a8ecf14cc6..20b3a4898f 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint']) + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index af3c4f0ae9..39f2b022aa 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -33,7 +33,7 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2012, 1, 1) +end_date = Date(2011, 1, 1) popsize = 200 @pytest.mark.slow @@ -63,24 +63,24 @@ def create_basic_sim(population_size, seed): sim = Simulation(start_date=start_date, seed=seed) resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' sim.register(demography.Demography(resourcefilepath=resourcefilepath), - # contraception.Contraception(resourcefilepath=resourcefilepath), + contraception.Contraception(resourcefilepath=resourcefilepath), enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), healthburden.HealthBurden(resourcefilepath=resourcefilepath), symptommanager.SymptomManager(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), rti.RTI(resourcefilepath=resourcefilepath), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - # epi.Epi(resourcefilepath=resourcefilepath), - # hiv.Hiv(resourcefilepath=resourcefilepath), - # tb.Tb(resourcefilepath=resourcefilepath), + # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + hiv.Hiv(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath), cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), depression.Depression(resourcefilepath=resourcefilepath), - # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - # labour.Labour(resourcefilepath=resourcefilepath), - #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + labour.Labour(resourcefilepath=resourcefilepath), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), ) sim.make_initial_population(n=population_size) From 769aaeca44aaedc324bd3da2f5f338bb47e02106 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:03:22 +0100 Subject: [PATCH 11/97] Style fixes --- src/tlo/methods/tb.py | 2 +- src/tlo/simulation.py | 4 ++-- tests/test_data_generation.py | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 4c170944d2..9dc05ff301 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -1393,7 +1393,7 @@ def apply(self, population): & (df.tb_inf != "active") ].index - n_susceptible = len(susc_idx) + len(susc_idx) middle_index = len(susc_idx) // 2 diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 20b3a4898f..75dfa76429 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -8,7 +8,7 @@ import time from collections import OrderedDict from pathlib import Path -from typing import Dict, Optional, Union +from typing import Optional from typing import TYPE_CHECKING, Optional import pandas as pd @@ -374,7 +374,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ - f = open('output.txt', mode='a') + open('output.txt', mode='a') if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 39f2b022aa..c94618a77d 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -1,7 +1,6 @@ import os from pathlib import Path -import pandas as pd import pytest from tlo import Date, Simulation @@ -11,7 +10,6 @@ depression, enhanced_lifestyle, epi, - epilepsy, healthburden, healthseekingbehaviour, healthsystem, @@ -20,16 +18,13 @@ labour, newborn_outcomes, postnatal_supervisor, - pregnancy_helper_functions, pregnancy_supervisor, depression, tb, contraception, - simplified_births, rti, symptommanager, ) -from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt # create simulation parameters start_date = Date(2010, 1, 1) From 757cee36b0ae611f1f7ae31d25799fc0d6e7daa1 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sun, 13 Oct 2024 11:15:17 +0100 Subject: [PATCH 12/97] Include printing of individual properties at the beginning and at birth, label what is only used for ddebugging and will be later removed --- src/tlo/events.py | 5 +++-- src/tlo/methods/hsi_event.py | 7 ++++--- src/tlo/methods/rti.py | 2 +- src/tlo/simulation.py | 28 ++++++++++++++++++++++++++++ tests/test_data_generation.py | 5 ++--- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 436a01a97c..03bf7c72fa 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -98,7 +98,6 @@ def compare_population_dataframe(self,df_before, df_after): for col in changed_cols: link_info[col] = df_after.at[idx, col] - # Append the event and changes to the individual key chain_links = {idx : link_info} @@ -127,7 +126,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) if debug_chains: - # Print entire row + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) @@ -166,6 +165,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links = {self.target : link_info} + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births @@ -185,6 +185,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Create and store the event and dictionary of changes for affected individuals chain_links = self.compare_population_dataframe(df_before, df_after) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: # Or print entire rows change = df_before.compare(df_after) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 1c727f014b..0c3bc16072 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -217,6 +217,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) @@ -228,8 +229,8 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: else: # Many of our HealthSystem implementations rely on the assumption that - print("Error, I shouldn't be here") - exit(-1) + raise RuntimeError("Cannot have population-wide HSI events") + return print_chains, row_before @@ -258,7 +259,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> chain_links = {self.target : link_info} - # Print entire row + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 1c12e7162b..3642365976 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2865,7 +2865,7 @@ def apply(self, population): Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1 + pred = 1.0 else: pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 75dfa76429..582fb4ba1c 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -37,6 +37,9 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +logger_chains = logging.getLogger("tlo.methods.event") +logger_chains.setLevel(logging.INFO) + class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -111,6 +114,8 @@ def __init__( self.end_date = None self.output_file = None self.population: Optional[Population] = None + + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. self.event_chains: Optinoal[Population] = None self.show_progress_bar = show_progress_bar @@ -281,7 +286,16 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) + + # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. + # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. + if self.generate_event_chains: + pop_dict = self.population.props.to_dict(orient='index') + logger_chains.info(key='event_chains', + data = pop_dict, + description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -392,6 +406,8 @@ def run_simulation_to(self, *, to_date: Date) -> None: self._update_progress_bar(progress_bar, date) self.fire_single_event(event, date) self.date = to_date + + # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: @@ -449,13 +465,25 @@ def do_birth(self, mother_id: int) -> int: child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) + if self.generate_event_chains: + # When individual is born, store their initial properties to provide a starting point to the chain of property + # changes that this individual will undergo as a result of events taking place. + prop_dict = self.population.props.loc[child_id].to_dict() + + child_dict = {child_id : prop_dict} + logger_chains.info(key='event_chains', + data = child_dict, + description='Links forming chains of events for simulated individuals') + + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.population.props.iloc[[child_id]] row['person_ID'] = child_id row['event'] = 'Birth' row['event_date'] = self.date row['when'] = 'After' self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + return child_id def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index c94618a77d..d9885c1fab 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -28,8 +28,8 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2011, 1, 1) -popsize = 200 +end_date = Date(2012, 1, 1) +popsize = 100 @pytest.mark.slow def test_data_harvesting(seed): @@ -51,7 +51,6 @@ def test_data_harvesting(seed): # run simulation sim.simulate(end_date=end_date, generate_event_chains = True) - exit(-1) def create_basic_sim(population_size, seed): # create the basic outline of an rti simulation object From 22a5e44312ad4d2f1d955b70399ae9569efb13c0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:00:22 +0100 Subject: [PATCH 13/97] Log everything to simulation, as events logger doesn't seem to be visible to all modules. For now add person_ID to the dict of info printed as the outer dictionary key logging seems to have a problem. --- src/tlo/events.py | 13 +++++++++---- src/tlo/methods/hsi_event.py | 3 ++- src/tlo/simulation.py | 25 +++++++++++++++++-------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 03bf7c72fa..98832faecb 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,9 +11,13 @@ import pandas as pd + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +logger_chain = logging.getLogger('tlo.simulation') +logger_chain.setLevel(logging.INFO) + logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) @@ -89,7 +93,7 @@ def compare_population_dataframe(self,df_before, df_after): # Create a dictionary for this person # First add event info link_info = { - #'person_ID': idx, + 'person_ID': idx, 'event': str(self), 'event_date': self.sim.date, } @@ -152,13 +156,14 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> if self.target != self.sim.population: row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - # Create and store event for this individual + # Create and store event for this individual, regardless of whether any property change occurred link_info = { #'person_ID' : self.target, + 'person_ID' : self.target, 'event' : str(self), 'event_date' : self.sim.date, } - # Store property changes as a result of the event for this individual + # Store (if any) property changes as a result of the event for this individual for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] @@ -225,7 +230,7 @@ def run(self): # Log chain_links here if len(chain_links)>0: - logger.info(key='event_chains', + logger_chain.info(key='event_chains', data= chain_links, description='Links forming chains of events for simulated individuals') diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 0c3bc16072..6651a8704a 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -17,7 +17,7 @@ from tlo.methods.healthsystem import HealthSystem # Pointing to the logger in events -logger_chains = logging.getLogger("tlo.methods.event") +logger_chains = logging.getLogger("tlo.simulation") logger_chains.setLevel(logging.INFO) logger = logging.getLogger(__name__) @@ -246,6 +246,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # Add event details link_info = { + 'person_ID': self.target, 'event' : str(self), 'event_date' : self.sim.date, 'appt_footprint' : str(footprint), diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 582fb4ba1c..fd9fade215 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -107,7 +107,7 @@ def __init__( self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - self.generate_event_chains = None + self.generate_event_chains = True self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] @@ -292,15 +292,23 @@ def make_initial_population(self, *, n: int) -> None: # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: + pop_dict = self.population.props.to_dict(orient='index') - logger_chains.info(key='event_chains', + + print(pop_dict) + print(pop_dict.keys()) + for key in pop_dict.keys(): + pop_dict[key]['person_ID'] = key + print("Length of properties", len(pop_dict[0].keys())) + #exit(-1) + logger.info(key='event_chains', data = pop_dict, description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") - def initialise(self, *, end_date: Date, generate_event_chains) -> None: + def initialise(self, *, end_date: Date) -> None: """Initialise all modules in simulation. :param end_date: Date to end simulation on - accessible to modules to allow initialising data structures which may depend (in size for example) on the @@ -312,7 +320,7 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - self.generate_event_chains = generate_event_chains + #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True @@ -413,7 +421,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: if self.show_progress_bar: progress_bar.stop() - def simulate(self, *, end_date: Date, generate_event_chains=False) -> None: + def simulate(self, *, end_date: Date) -> None: """Simulate until the given end date :param end_date: When to stop simulating. Only events strictly before this @@ -421,7 +429,7 @@ def simulate(self, *, end_date: Date, generate_event_chains=False) -> None: clarity. """ start = time.time() - self.initialise(end_date=end_date, generate_event_chains=generate_event_chains) + self.initialise(end_date=end_date) self.run_simulation_to(to_date=end_date) self.finalise(time.time() - start) @@ -470,9 +478,10 @@ def do_birth(self, mother_id: int) -> int: # When individual is born, store their initial properties to provide a starting point to the chain of property # changes that this individual will undergo as a result of events taking place. prop_dict = self.population.props.loc[child_id].to_dict() - + prop_dict['event'] = 'Birth' + prop_dict['event_date'] = self.date child_dict = {child_id : prop_dict} - logger_chains.info(key='event_chains', + logger.info(key='event_chains', data = child_dict, description='Links forming chains of events for simulated individuals') From 7faa81783dc43e434e26ef8c95717480cebd3816 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:07:46 +0200 Subject: [PATCH 14/97] Consider all modules included as of interest --- src/tlo/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index fd9fade215..15be1622e8 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -325,7 +325,7 @@ def initialise(self, *, end_date: Date) -> None: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules['RTI']] + self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi From 7232f976831054ed541d59d8da20c91289fa79e6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:29:43 +0200 Subject: [PATCH 15/97] Remove pop-wide HSI warning and make epi default even when printing chains --- src/tlo/methods/hsi_event.py | 38 ++++++++++++++++++++++++++---------- src/tlo/simulation.py | 2 +- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 6651a8704a..d0cdb5bbdd 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -223,13 +223,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level + try: + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level + except: + row['appt_footprint'] = 'N/A' + row['level'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - # Many of our HealthSystem implementations rely on the assumption that - raise RuntimeError("Cannot have population-wide HSI events") + # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error + # raise RuntimeError("Cannot have population-wide HSI events") + logger.debug( + key="message", + data=( + f"Cannot have population-wide HSI events" + ), + ) return print_chains, row_before @@ -245,12 +255,20 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # will be stored regardless of whether individual experienced property changes. # Add event details + + try: + record_footprint = str(footprint) + record_level = self.facility_info.level + except: + record_footprint = 'N/A' + record_level = 'N/A' + link_info = { 'person_ID': self.target, 'event' : str(self), 'event_date' : self.sim.date, - 'appt_footprint' : str(footprint), - 'level' : self.facility_info.level, + 'appt_footprint' : record_footprint, + 'level' : record_level, } # Add changes to properties @@ -266,8 +284,8 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' - row['appt_footprint'] = footprint - row['level'] = self.facility_info.level + row['appt_footprint'] = record_footprint + row['level'] = record_level self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) return chain_links @@ -277,7 +295,7 @@ def run(self, squeeze_factor): """Make the event happen.""" - if self.sim.generate_event_chains: + if self.sim.generate_event_chains and self.target != self.sim.population: print_chains, row_before = self.store_chains_to_do_before_event() footprint = self.EXPECTED_APPT_FOOTPRINT @@ -287,7 +305,7 @@ def run(self, squeeze_factor): self._run_after_hsi_event() - if self.sim.generate_event_chains: + if self.sim.generate_event_chains and self.target != self.sim.population: # If the footprint has been updated when the event ran, change it here if updated_appt_footprint is not None: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 15be1622e8..0c70b164d9 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -323,7 +323,7 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = True + self.generate_event_chains_overwrite_epi = False # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] From a6def2d22c0d291ce775afef561b580847ad36cf Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:39:24 +0200 Subject: [PATCH 16/97] Style fix --- src/tlo/methods/hsi_event.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index d0cdb5bbdd..041ab9cf08 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -237,7 +237,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: logger.debug( key="message", data=( - f"Cannot have population-wide HSI events" + "Cannot have population-wide HSI events" ), ) From ecea532a2843d312580accf97383cd62c457fd04 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:51:39 +0200 Subject: [PATCH 17/97] Remove data generation test, which wasn't really a test --- tests/test_data_generation.py | 82 ----------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 tests/test_data_generation.py diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py deleted file mode 100644 index d9885c1fab..0000000000 --- a/tests/test_data_generation.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -from pathlib import Path - -import pytest - -from tlo import Date, Simulation -from tlo.methods import ( - care_of_women_during_pregnancy, - demography, - depression, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - cardio_metabolic_disorders, - labour, - newborn_outcomes, - postnatal_supervisor, - pregnancy_supervisor, - depression, - tb, - contraception, - rti, - symptommanager, -) - -# create simulation parameters -start_date = Date(2010, 1, 1) -end_date = Date(2012, 1, 1) -popsize = 100 - -@pytest.mark.slow -def test_data_harvesting(seed): - """ - This test runs a simulation to print all individual events of specific individuals - """ - - module_of_interest = 'RTI' - # create sim object - sim = create_basic_sim(popsize, seed) - - dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES) - - # Check that all dependencies are included - for dep in dependencies_list: - if dep not in sim.modules: - print("WARNING: dependency ", dep, "not included") - exit(-1) - - # run simulation - sim.simulate(end_date=end_date, generate_event_chains = True) - -def create_basic_sim(population_size, seed): - # create the basic outline of an rti simulation object - sim = Simulation(start_date=start_date, seed=seed) - resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' - sim.register(demography.Demography(resourcefilepath=resourcefilepath), - contraception.Contraception(resourcefilepath=resourcefilepath), - enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), - healthburden.HealthBurden(resourcefilepath=resourcefilepath), - symptommanager.SymptomManager(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), - rti.RTI(resourcefilepath=resourcefilepath), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - epi.Epi(resourcefilepath=resourcefilepath), - hiv.Hiv(resourcefilepath=resourcefilepath), - tb.Tb(resourcefilepath=resourcefilepath), - cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), - depression.Depression(resourcefilepath=resourcefilepath), - newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - labour.Labour(resourcefilepath=resourcefilepath), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), - ) - - sim.make_initial_population(n=population_size) - return sim - From ae7a44cb5f72063c48555e3b21d5d6dd4400ee97 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:29:03 +0200 Subject: [PATCH 18/97] Change dict of properties to string in logging, and add analysis files --- .../analysis_extract_data.py | 370 ++++++++++++++++++ .../postprocess_events_chain.py | 156 ++++++++ .../scenario_generate_chains.py | 115 ++++++ src/tlo/events.py | 23 +- src/tlo/methods/hsi_event.py | 13 +- src/tlo/simulation.py | 29 +- 6 files changed, 684 insertions(+), 22 deletions(-) create mode 100644 src/scripts/analysis_data_generation/analysis_extract_data.py create mode 100644 src/scripts/analysis_data_generation/postprocess_events_chain.py create mode 100644 src/scripts/analysis_data_generation/scenario_generate_chains.py diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py new file mode 100644 index 0000000000..2cfba5315b --- /dev/null +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -0,0 +1,370 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import pandas as pd + +from tlo import Date +from tlo.analysis.utils import extract_results +from datetime import datetime + +# Range of years considered +min_year = 2010 +max_year = 2040 + + +def all_columns(_df): + return pd.Series(_df.all()) + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + pd.set_option('display.max_rows', None) + pd.set_option('display.max_colwidth', None) + event_chains = extract_results( + results_folder, + module='tlo.simulation', + key='event_chains', + column='0', + #column = str(i), + #custom_generate_series=get_num_dalys_by_year, + do_scaling=False + ) + # print(event_chains.loc[0,(0, 0)]) + + eval_env = { + 'datetime': datetime, # Add the datetime class to the eval environment + 'pd': pd, # Add pandas to handle Timestamp + 'Timestamp': pd.Timestamp, # Specifically add Timestamp for eval + 'NaT': pd.NaT, + 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) + } + + for item,row in event_chains.iterrows(): + value = event_chains.loc[item,(0, 0)] + if value !='': + print('') + print(value) + exit(-1) + #dict = {} + #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: + # dict[i] = [] + + #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: + # event_chains = extract_results( + # results_folder, + # module='tlo.simulation'#, + # key='event_chains', + # column = str(i), + # #custom_generate_series=get_num_dalys_by_year, + # do_scaling=False + # ) + # print(event_chains) + # print(event_chains.index) + # print(event_chains.columns.levels) + + # for index, row in event_chains.iterrows(): + # if event_chains.iloc[index,0] is not None: + # if(event_chains.iloc[index,0]['person_ID']==i): #and 'event' in event_chains.iloc[index,0].keys()): + # dict[i].append(event_chains.iloc[index,0]) + #elif (event_chains.iloc[index,0]['person_ID']==i and 'event' not in event_chains.iloc[index,0].keys()): + #print(event_chains.iloc[index,0]['de_depr']) + # exit(-1) + #for item in dict[0]: + # print(item) + + #exit(-1) + + TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + + # Definitions of general helper functions + lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import ( + ImpactOfHealthSystemMode, + ) + e = ImpactOfHealthSystemMode() + return tuple(e._scenarios.keys()) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + + def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + year_target = 2023 + def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + + # Obtain parameter names for this scenario file + param_names = get_parameter_names_from_scenario_file() + print(param_names) + + # ================================================================================================ + # TIME EVOLUTION OF TOTAL DALYs + # Plot DALYs averted compared to the ``No Policy'' policy + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) + concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) + dalys_by_year = concatenated_df + print(dalys_by_year) + dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) + + # ================================================================================================ + # Print population under each scenario + pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + pop_model.index = pop_model.index.year + pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] + print(pop_model) + assert dalys_by_year.index.equals(pop_model.index) + assert all(dalys_by_year.columns == pop_model.columns) + pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) + + # ================================================================================================ + # DALYs BROKEN DOWN BY CAUSES AND YEAR + # DALYs by cause per year + # %% Quantify the health losses associated with all interventions combined. + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_ran_by_year = concatenated_df + + del ALL + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_never_ran_by_year = concatenated_df + + HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( + HSI_ran_by_year = HSI_ran_by_year.fillna(0) + HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) + HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) + print(HSI_ran_by_year) + print(HSI_never_ran_by_year) + print(HSI_total_by_year) + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/analysis_data_generation/scenario_generate_chains.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py new file mode 100644 index 0000000000..96c27a04b1 --- /dev/null +++ b/src/scripts/analysis_data_generation/postprocess_events_chain.py @@ -0,0 +1,156 @@ +import pandas as pd +from dateutil.relativedelta import relativedelta + +# Remove from every individual's event chain all events that were fired after death +def cut_off_events_after_death(df): + + events_chain = df.groupby('person_ID') + + filtered_data = pd.DataFrame() + + for name, group in events_chain: + + # Find the first non-NaN 'date_of_death' and its index + first_non_nan_index = group['date_of_death'].first_valid_index() + + if first_non_nan_index is not None: + # Filter out all rows after the first non-NaN index + filtered_group = group.loc[:first_non_nan_index] # Keep rows up to and including the first valid index + filtered_data = pd.concat([filtered_data, filtered_group]) + else: + # If there are no non-NaN values, keep the original group + filtered_data = pd.concat([filtered_data, group]) + + return filtered_data + +# Load into DataFrame +def load_csv_to_dataframe(file_path): + try: + # Load raw chains into df + df = pd.read_csv(file_path) + print("Raw event chains loaded successfully!") + return df + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found.") + except Exception as e: + print(f"An error occurred: {e}") + +file_path = 'output.csv' # Replace with the path to your CSV file + +output = load_csv_to_dataframe(file_path) + +# Some of the dates appeared not to be in datetime format. Correct here. +output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce') +output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce') +if 'hv_date_inf' in output.columns: + output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce') + + +date_start = pd.to_datetime('2010-01-01') +if 'Other' in output['cause_of_death'].values: + print("ERROR: 'Other' was included in sim as possible cause of death") + exit(-1) + +# Choose which columns in individual properties to visualise +columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when'] +#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event'] + +# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison +columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint'] + +# If considering epidemiology consistent with sim, add check here. +check_ages_of_those_HIV_inf = False +if check_ages_of_those_HIV_inf: + for index, row in output.iterrows(): + if pd.isna(row['hv_date_inf']): + continue # Skip this iteration + diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth']) + if diff.years > 1 and diff.years<15: + print("Person contracted HIV infection at age younger than 15", diff) + +# Remove events after death +filtered_data = cut_off_events_after_death(output) + +print_raw_events = True # Print raw chain of events for each individual +print_selected_changes = False +print_all_changes = True +person_ID_of_interest = 494 + +pd.set_option('display.max_rows', None) + +for name, group in filtered_data.groupby('person_ID'): + list_of_dob = group['date_of_birth'] + + # Select individuals based on when they were born + if list_of_dob.iloc[0].year<2010: + + # Check that immutable properties are fixed for this individual, i.e. that events were collated properly: + all_identical_dob = group['date_of_birth'].nunique() == 1 + all_identical_sex = group['sex'].nunique() == 1 + if all_identical_dob is False or all_identical_sex is False: + print("Immutable properties are changing! This is not chain for single individual") + print(group) + exit(-1) + + print("----------------------------------------------------------------------") + print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0]) + print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event + number_of_events =len(group)/2 + number_of_changes=0 + if print_raw_events: + print(group) + + if print_all_changes: + # Check each row + comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999)) + + # Iterate over rows where any column has changed + for idx, row_changed in comparison.iloc[1:].iterrows(): + if row_changed.any(): # Check if any column changed in this row + number_of_changes+=1 + changed_columns = row_changed[row_changed].index.tolist() # Get the columns where changes occurred + print(f"Row {idx} - Changes detected in columns: {changed_columns}") + columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns + print(group.loc[idx, columns_output]) # Print only the changed columns + if group.loc[idx, 'when'] == 'Before': + print('-----> THIS CHANGE OCCURRED BEFORE EVENT!') + #print(group.loc[idx,columns_to_print]) + print() # For better readability + print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events") + + if print_selected_changes: + tb_inf_condition = ( + ((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) | + ((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) | + ((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) | + ((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) | + ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) | + ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not')) + ) + + alive_condition = ( + (group['is_alive'].shift(1) is True) & (group['is_alive'] is False) + ) + # Combine conditions for rows of interest + transition_condition = tb_inf_condition | alive_condition + + if list_of_dob.iloc[0].year >= 2010: + print("DETECTED OF INTEREST") + print(group[group['event'] == 'Birth'][columns_to_print]) + + # Filter the DataFrame based on the condition + filtered_transitions = group[transition_condition] + if not filtered_transitions.empty: + if list_of_dob.iloc[0].year < 2010: + print("DETECTED OF INTEREST") + print(filtered_transitions[columns_to_print]) + + +print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups) + + + diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py new file mode 100644 index 0000000000..6bdcd02d90 --- /dev/null +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -0,0 +1,115 @@ +"""This Scenario file run the model to generate event chans + +Run on the batch system using: +``` +tlo batch-submit + src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +or locally using: +``` + tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + +class GenerateDataChains(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(months=1) + self.pop_size = 120 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 1 + self.generate_event_chains = True + + def log_configuration(self): + return { + 'filename': 'generate_event_chains', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.events': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + # case 1: gfHE = -0.030, factor = 1.01074 + # case 2: gfHE = -0.020, factor = 1.02116 + # case 3: gfHE = -0.015, factor = 1.02637 + # case 4: gfHE = 0.015, factor = 1.05763 + # case 5: gfHE = 0.020, factor = 1.06284 + # case 6: gfHE = 0.030, factor = 1.07326 + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + + self.YEAR_OF_CHANGE = 2019 + + return { + + # =========== STATUS QUO ============ + "Baseline": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "yearly_HR_scaling_mode": "no_scaling", + }, + } + ), + + } + + def _baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration + "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH + "year_mode_switch": self.YEAR_OF_CHANGE, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "cons_availability": "default", + } + }, + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/tlo/events.py b/src/tlo/events.py index 98832faecb..00a6fe4e7d 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,6 +11,8 @@ import pandas as pd +FACTOR_POP_DICT = 5000 + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -83,13 +85,14 @@ def compare_population_dataframe(self,df_before, df_after): # Create an empty list to store changes for each of the individuals chain_links = {} - + len_of_diff = len(diff_mask) + # Loop through each row of the mask + for idx, row in diff_mask.iterrows(): changed_cols = row.index[row].tolist() - + if changed_cols: # Proceed only if there are changes in the row - # Create a dictionary for this person # First add event info link_info = { @@ -103,7 +106,7 @@ def compare_population_dataframe(self,df_before, df_after): link_info[col] = df_after.at[idx, col] # Append the event and changes to the individual key - chain_links = {idx : link_info} + chain_links[idx] = str(link_info) return chain_links @@ -168,7 +171,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - chain_links = {self.target : link_info} + chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: @@ -228,14 +231,18 @@ def run(self): if self.sim.generate_event_chains: chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + # Create empty logger for entire pop + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals + + pop_dict.update(chain_links) + # Log chain_links here if len(chain_links)>0: logger_chain.info(key='event_chains', - data= chain_links, - description='Links forming chains of events for simulated individuals') + data= pop_dict, + description='Links forming chains of events for simulated individuals') #print("Chain events ", chain_links) - class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 041ab9cf08..d657e9d3a0 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -11,6 +11,8 @@ import pandas as pd +FACTOR_POP_DICT = 5000 + if TYPE_CHECKING: from tlo import Module, Simulation @@ -276,7 +278,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - chain_links = {self.target : link_info} + chain_links = {self.target : str(link_info)} # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] @@ -314,10 +316,15 @@ def run(self, squeeze_factor): chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) if len(chain_links)>0: + + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} + # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals + + pop_dict.update(chain_links) + logger_chains.info(key='event_chains', - data = chain_links, + data = pop_dict, description='Links forming chains of events for simulated individuals') - #print(chain_links) return updated_appt_footprint diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 0c70b164d9..d9ba62c43a 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -40,6 +40,8 @@ logger_chains = logging.getLogger("tlo.methods.event") logger_chains.setLevel(logging.INFO) +FACTOR_POP_DICT = 5000 + class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -294,17 +296,18 @@ def make_initial_population(self, *, n: int) -> None: if self.generate_event_chains: pop_dict = self.population.props.to_dict(orient='index') - - print(pop_dict) - print(pop_dict.keys()) for key in pop_dict.keys(): pop_dict[key]['person_ID'] = key - print("Length of properties", len(pop_dict[0].keys())) - #exit(-1) + pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later + + pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)} + pop_dict_full.update(pop_dict) + + print("Size for full sim", len(pop_dict_full)) + logger.info(key='event_chains', - data = pop_dict, + data = pop_dict_full, description='Links forming chains of events for simulated individuals') - end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -323,7 +326,7 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = False + self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] @@ -480,9 +483,13 @@ def do_birth(self, mother_id: int) -> int: prop_dict = self.population.props.loc[child_id].to_dict() prop_dict['event'] = 'Birth' prop_dict['event_date'] = self.date - child_dict = {child_id : prop_dict} + + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals + pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length + + print("Length at birth", len(pop_dict)) logger.info(key='event_chains', - data = child_dict, + data = pop_dict, description='Links forming chains of events for simulated individuals') # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. @@ -492,7 +499,7 @@ def do_birth(self, mother_id: int) -> int: row['event_date'] = self.date row['when'] = 'After' self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) - + return child_id def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: From 16299a21f43862a188f41ea6117b81c2c11d72ab Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:37:29 +0000 Subject: [PATCH 19/97] Include debugging option, final set-up of scenario to print data, analysis file now collects all relevant info and prints them --- .../analysis_extract_data.py | 157 ++++++++++++++++-- .../scenario_generate_chains.py | 53 +++++- src/tlo/events.py | 10 +- src/tlo/methods/hsi_event.py | 50 +++--- src/tlo/methods/rti.py | 17 +- src/tlo/simulation.py | 39 +++-- src/tlo/util.py | 1 + 7 files changed, 252 insertions(+), 75 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 2cfba5315b..6eb6408830 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -8,10 +8,14 @@ from typing import Tuple import pandas as pd +import matplotlib.pyplot as plt from tlo import Date from tlo.analysis.utils import extract_results from datetime import datetime +from collections import Counter +import ast + # Range of years considered min_year = 2010 @@ -28,17 +32,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No """ pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) - event_chains = extract_results( - results_folder, - module='tlo.simulation', - key='event_chains', - column='0', - #column = str(i), - #custom_generate_series=get_num_dalys_by_year, - do_scaling=False - ) - # print(event_chains.loc[0,(0, 0)]) - + eval_env = { 'datetime': datetime, # Add the datetime class to the eval environment 'pd': pd, # Add pandas to handle Timestamp @@ -46,13 +40,144 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No 'NaT': pd.NaT, 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) } + + initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] + + # Will be added through computation: age at time of RTI + + # Will be added through computation: total duration of event + + initial_rt_event_properties = set() + + num_individuals = 1000 + num_runs = 50 + record = [] + + + for i in range(0,num_individuals): - for item,row in event_chains.iterrows(): - value = event_chains.loc[item,(0, 0)] - if value !='': - print('') - print(value) + individual_event_chains = extract_results( + results_folder, + module='tlo.simulation', + key='event_chains', + column=str(i), + do_scaling=False + ) + + #print(individual_event_chains) + + + for r in range(0,num_runs): + + print("AT RUN = ", r) + + initial_properties = {} + progression_properties = {} + key_first_event = {} + key_last_event = {} + first_event = {} + last_event = {} + properties = {} + + + #ind_Counter = Counter() + ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} + # Count total appts + + list_for_individual = [] + for item,row in individual_event_chains.iterrows(): + value = individual_event_chains.loc[item,(0, r)] + # print("The value is", value, "at run ", r) + if value !='' and isinstance(value, str): + evaluated = eval(value, eval_env) + list_for_individual.append(evaluated) + # elif not isinstance(value,str): + # print(value) + + initial_properties = list_for_individual[0] + print(initial_properties) + + # Initialise first event by gathering parameters of interest from initial_properties + first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} + + progression_properties = {} + for i in list_for_individual: + if 'event' in i: + print("") + print(i) + if 'RTIPolling' in i['event']: + #print("I'm in polling event") + #print(i) + + # Keep track of which properties are changed during polling events + for key,value in i.items(): + if 'rt_' in key: + initial_rt_event_properties.add(key) + + # Retain a copy of Polling event + polling_event = i.copy() + + # Update parameters of interest following RTI + key_first_event = {key: i[key] if key in i else value for key, value in first_event.items()} + + # Calculate age of individual at time of event + key_first_event['age_in_days_at_event'] = (i['rt_date_inj'] - initial_properties['date_of_birth']).days + + # Keep track of evolution in individual's properties + progression_properties = initial_properties.copy() + progression_properties.update(i) + + else: + # Progress properties of individual, even if this event is a death + progression_properties.update(i) + + #print(progression_properties) + # Update footprint + if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()': + footprint = i['appt_footprint'] + if 'Counter' in footprint: + footprint = footprint[len("Counter("):-1] + apply = eval(footprint, eval_env) + ind_Counter[i['level']].update(Counter(apply)) + + if 'is_alive' in i and i['is_alive'] is False: + print("Death", i) + print("-------Total footprint", ind_Counter) + break + + + # Compute final properties of individual + key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] + key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days + key_last_event['rt_disability_final'] = progression_properties['rt_disability'] + key_last_event.update({'total_footprint': ind_Counter}) + + #print("-------Total footprint", ind_Counter) + #for key, value in key_first_event.items(): + # if 'rt_' in key or 'alive' in key: + # print(f"{key}: {value}") + #print(#) + #for key, value in key_last_event.items(): + #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: + # print(f"{key}: {value}") + + #print(key_first_event) + #print(key_last_event) + print(initial_rt_event_properties) + properties = key_first_event | key_last_event + record.append(properties) + for key, value in properties.items(): + #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: + print(f"{key}: {value}") + + df = pd.DataFrame(record) + df.to_csv("raw_data.csv", index=False) + + print(df) + print(initial_rt_event_properties) exit(-1) + #print(i) + #dict = {} #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: # dict[i] = [] diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 6bdcd02d90..79df3f55b6 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -22,18 +22,42 @@ from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario - +from tlo.methods import ( + alri, + cardio_metabolic_disorders, + care_of_women_during_pregnancy, + contraception, + demography, + depression, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + rti, + labour, + malaria, + newborn_outcomes, + postnatal_supervisor, + pregnancy_supervisor, + stunting, + symptommanager, + tb, + wasting, +) class GenerateDataChains(BaseScenario): def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=1) - self.pop_size = 120 + self.end_date = self.start_date + pd.DateOffset(months=13) + self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 1 + self.runs_per_draw = 50 self.generate_event_chains = True def log_configuration(self): @@ -51,10 +75,23 @@ def log_configuration(self): } def modules(self): - return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - ) + # MODIFY + # Here instead of running full module + return [demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthburden.HealthBurden(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False), + rti.RTI(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=self.resources, + mode_appt_constraints=1, + cons_availability='all')] + + # return ( + # fullmodel(resourcefilepath=self.resources) + # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + # ) def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: diff --git a/src/tlo/events.py b/src/tlo/events.py index 00a6fe4e7d..ba8024f621 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,7 +11,7 @@ import pandas as pd -FACTOR_POP_DICT = 5000 +from tlo.util import FACTOR_POP_DICT logger = logging.getLogger(__name__) @@ -132,7 +132,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - if debug_chains: + if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target @@ -142,6 +142,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: + # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the pop dataframe before the event has occurred. df_before = self.sim.population.props.copy() @@ -174,7 +175,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if debug_chains: + if self.sim.debug_generate_event_chains: # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births row['person_ID'] = self.target @@ -194,7 +195,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links = self.compare_population_dataframe(df_before, df_after) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if debug_chains: + if self.sim.debug_generate_event_chains: # Or print entire rows change = df_before.compare(df_after) if not change.empty: @@ -233,7 +234,6 @@ def run(self): # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals - pop_dict.update(chain_links) # Log chain_links here diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index d657e9d3a0..bdf597fba4 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -8,10 +8,9 @@ from tlo import Date, logging from tlo.events import Event from tlo.population import Population - +from tlo.util import FACTOR_POP_DICT import pandas as pd -FACTOR_POP_DICT = 5000 if TYPE_CHECKING: @@ -219,19 +218,21 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'Before' - try: - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level - except: - row['appt_footprint'] = 'N/A' - row['level'] = 'N/A' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'Before' + + try: + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level + except: + row['appt_footprint'] = 'N/A' + row['level'] = 'N/A' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error @@ -280,15 +281,16 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> chain_links = {self.target : str(link_info)} - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = record_footprint - row['level'] = record_level - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + row['appt_footprint'] = record_footprint + row['level'] = record_level + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) return chain_links diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 3642365976..1ca2749af7 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): def __init__(self, module): """Schedule to take place every month """ - super().__init__(module, frequency=DateOffset(months=1000)) + super().__init__(module, frequency=DateOffset(months=1000)) # Single polling event p = module.parameters # Parameters which transition the model between states self.base_1m_prob_rti = (p['base_rate_injrti'] / 12) @@ -2864,10 +2864,12 @@ def apply(self, population): .when('.between(70,79)', self.rr_injrti_age7079), Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) - if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1.0 - else: - pred = eq.predict(df.loc[rt_current_non_ind]) + #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: + pred = 1.0 + #else: + # pred = eq.predict(df.loc[rt_current_non_ind]) + + random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] @@ -4852,6 +4854,7 @@ def __init__(self, module, person_id): self.treated_code = 'none' def apply(self, person_id, squeeze_factor): + self._number_of_times_this_event_has_run += 1 df = self.sim.population.props rng = self.module.rng @@ -4900,10 +4903,12 @@ def apply(self, person_id, squeeze_factor): # injury is being treated in this surgery # find untreated injury codes that are treated with major surgery relevant_codes = np.intersect1d(injuries_to_be_treated, surgically_treated_codes) + # check that the person sent here has an appropriate code(s) assert len(relevant_codes) > 0 # choose a code at random self.treated_code = rng.choice(relevant_codes) + if request_outcome: # check the people sent here hasn't died due to rti, have had their injuries diagnosed and been through # RTI_Med @@ -4990,7 +4995,9 @@ def apply(self, person_id, squeeze_factor): # ------------------------------------- Perm disability from amputation ------------------------------------ codes = ['782', '782a', '782b', '782c', '783', '882', '883', '884'] + if self.treated_code in codes: + # Track whether they are permanently disabled df.at[person_id, 'rt_perm_disability'] = True # Find the column and code where the permanent injury is stored diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d9ba62c43a..bb766562a0 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -11,8 +11,9 @@ from typing import Optional from typing import TYPE_CHECKING, Optional import pandas as pd - +import tlo.population import numpy as np +from tlo.util import FACTOR_POP_DICT try: import dill @@ -40,8 +41,6 @@ logger_chains = logging.getLogger("tlo.methods.event") logger_chains.setLevel(logging.INFO) -FACTOR_POP_DICT = 5000 - class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -113,12 +112,15 @@ def __init__( self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] + self.debug_generate_event_chains = False self.end_date = None self.output_file = None self.population: Optional[Population] = None - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains: Optinoal[Population] = None + + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + self.event_chains: Optional[Population] = None self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -288,8 +290,9 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. @@ -329,7 +332,7 @@ def initialise(self, *, end_date: Date) -> None: self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False @@ -418,8 +421,9 @@ def run_simulation_to(self, *, to_date: Date) -> None: self.fire_single_event(event, date) self.date = to_date - # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. - self.event_chains.to_csv('output.csv', index=False) + if self.debug_generate_event_chains: + # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. + self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: progress_bar.stop() @@ -492,13 +496,14 @@ def do_birth(self, mother_id: int) -> int: data = pop_dict, description='Links forming chains of events for simulated individuals') - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.population.props.iloc[[child_id]] - row['person_ID'] = child_id - row['event'] = 'Birth' - row['event_date'] = self.date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.population.props.iloc[[child_id]] + row['person_ID'] = child_id + row['event'] = 'Birth' + row['event_date'] = self.date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id diff --git a/src/tlo/util.py b/src/tlo/util.py index 168b1d41a1..f8dc67d471 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -12,6 +12,7 @@ # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation. DEFAULT_MOTHER_ID = -1e7 +FACTOR_POP_DICT = 1000 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]): From 0dd862f2a9b485a33933e185e3c59ad64ed33ed9 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:28:30 +0000 Subject: [PATCH 20/97] Change label of person when iterating --- .../analysis_extract_data.py | 68 ++++++++++++------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 6eb6408830..4c8e7d8197 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -41,7 +41,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) } - initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] + initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] # Will be added through computation: age at time of RTI @@ -54,13 +54,15 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No record = [] - for i in range(0,num_individuals): + for p in range(0,num_individuals): + + print("At person = ", p) individual_event_chains = extract_results( results_folder, module='tlo.simulation', key='event_chains', - column=str(i), + column=str(p), do_scaling=False ) @@ -69,7 +71,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No for r in range(0,num_runs): - print("AT RUN = ", r) + initial_properties = {} progression_properties = {} @@ -78,7 +80,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No first_event = {} last_event = {} properties = {} - + average_disability = 0 + prev_disability_incurred = 0 #ind_Counter = Counter() ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} @@ -95,7 +98,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No # print(value) initial_properties = list_for_individual[0] - print(initial_properties) + # print(initial_properties) # Initialise first event by gathering parameters of interest from initial_properties first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} @@ -103,8 +106,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No progression_properties = {} for i in list_for_individual: if 'event' in i: - print("") - print(i) + #print("") + #print(i) if 'RTIPolling' in i['event']: #print("I'm in polling event") #print(i) @@ -126,10 +129,26 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No # Keep track of evolution in individual's properties progression_properties = initial_properties.copy() progression_properties.update(i) + + # dalys incurred + if 'rt_disability' in i: + prev_disability_incurred = i['rt_disability'] + prev_date = i['event_date'] + #print('At polling event, ', prev_disability_incurred, prev_date) else: # Progress properties of individual, even if this event is a death progression_properties.update(i) + + # If disability has changed as a result of this, recalculate + if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred: + dt_in_prev_disability = (i['event_date'] - prev_date).days + average_disability += prev_disability_incurred*dt_in_prev_disability + # Update variables + prev_disability_incurred = i['rt_disability'] + prev_date = i['event_date'] + + #print(progression_properties) # Update footprint @@ -141,34 +160,33 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No ind_Counter[i['level']].update(Counter(apply)) if 'is_alive' in i and i['is_alive'] is False: - print("Death", i) - print("-------Total footprint", ind_Counter) + #print("Death", i) + #print("-------Total footprint", ind_Counter) break # Compute final properties of individual key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days - key_last_event['rt_disability_final'] = progression_properties['rt_disability'] + if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0: + key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days'] + else: + key_last_event['rt_disability_average'] = 0.0 + key_last_event['rt_disability_permanent'] = progression_properties['rt_disability'] key_last_event.update({'total_footprint': ind_Counter}) - - #print("-------Total footprint", ind_Counter) - #for key, value in key_first_event.items(): - # if 'rt_' in key or 'alive' in key: - # print(f"{key}: {value}") - #print(#) - #for key, value in key_last_event.items(): - #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - # print(f"{key}: {value}") - #print(key_first_event) - #print(key_last_event) - print(initial_rt_event_properties) + #print("Average disability", key_last_event['rt_disability_average']) + properties = key_first_event | key_last_event + + if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4): + print("Error in computed average for individual ", p, r ) + record.append(properties) - for key, value in properties.items(): + #for key, value in properties.items(): #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - print(f"{key}: {value}") + #print(f"{key}: {value}") + # print("Initial event properties", initial_rt_event_properties) df = pd.DataFrame(record) df.to_csv("raw_data.csv", index=False) From 84f826322ba13f6fa1631d639944c2bac50667f6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:55:03 +0000 Subject: [PATCH 21/97] Correctly retrieve event name --- src/tlo/events.py | 12 ++++++------ src/tlo/methods/hsi_event.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index ba8024f621..f67b54458a 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -97,7 +97,7 @@ def compare_population_dataframe(self,df_before, df_after): # First add event info link_info = { 'person_ID': idx, - 'event': str(self), + 'event': type(self).__name__, 'event_date': self.sim.date, } @@ -136,7 +136,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -164,7 +164,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> link_info = { #'person_ID' : self.target, 'person_ID' : self.target, - 'event' : str(self), + 'event' : type(self).__name__, 'event_date' : self.sim.date, } # Store (if any) property changes as a result of the event for this individual @@ -179,7 +179,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -202,13 +202,13 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> indices = change.index new_rows_before = df_before.loc[indices] new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self + new_rows_before['event'] = type(self).__name__ new_rows_before['event_date'] = self.sim.date new_rows_before['when'] = 'Before' new_rows_after = df_after.loc[indices] new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self + new_rows_after['event'] = type(self).__name__ new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index f267181b56..978b26d7c5 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -222,7 +222,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ #str(self.event_name) row['event_date'] = self.sim.date row['when'] = 'Before' @@ -268,7 +268,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> link_info = { 'person_ID': self.target, - 'event' : str(self), + 'event' : type(self).__name__, 'event_date' : self.sim.date, 'appt_footprint' : record_footprint, 'level' : record_level, @@ -285,7 +285,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' row['appt_footprint'] = record_footprint From a490d1995c12ac20beda2fbd16271d22f0e4f8fe Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 20 Jan 2025 11:34:02 +0000 Subject: [PATCH 22/97] Modify scenario file such that can exclude specific services, and corrected analysis file such as for small number of cases where the DALYs are not explicitly resolved the average DALYs are still computed correctly [skip ci] --- .../analysis_extract_data.py | 105 ++++++++++-------- .../scenario_generate_chains.py | 58 +++++++--- 2 files changed, 103 insertions(+), 60 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 4c8e7d8197..3afad7adcc 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -16,6 +16,9 @@ from collections import Counter import ast +# Time simulated to collect data +start_date = Date(2010, 1, 1) +end_date = start_date + pd.DateOffset(months=13) # Range of years considered min_year = 2010 @@ -25,6 +28,13 @@ def all_columns(_df): return pd.Series(_df.all()) +def check_if_beyond_time_range_considered(progression_properties): + matching_keys = [key for key in progression_properties.keys() if "rt_date_to_remove_daly" in key] + if matching_keys: + for key in matching_keys: + if progression_properties[key] > end_date: + print("Beyond time range considered, need at least ",progression_properties[key]) + def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): """Produce standard set of plots describing the effect of each TREATMENT_ID. - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. @@ -44,19 +54,21 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] # Will be added through computation: age at time of RTI - # Will be added through computation: total duration of event initial_rt_event_properties = set() - + num_individuals = 1000 num_runs = 50 record = [] - + # Include results folder in output file name + name_tag = str(results_folder).replace("outputs/", "") + + for p in range(0,num_individuals): - print("At person = ", p) + print("At person = ", p, " out of ", num_individuals) individual_event_chains = extract_results( results_folder, @@ -66,51 +78,41 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No do_scaling=False ) - #print(individual_event_chains) - - for r in range(0,num_runs): - - - initial_properties = {} - progression_properties = {} key_first_event = {} key_last_event = {} first_event = {} last_event = {} properties = {} average_disability = 0 + total_dt_included = 0 + dt_in_prev_disability = 0 prev_disability_incurred = 0 - - #ind_Counter = Counter() ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} # Count total appts list_for_individual = [] for item,row in individual_event_chains.iterrows(): value = individual_event_chains.loc[item,(0, r)] - # print("The value is", value, "at run ", r) if value !='' and isinstance(value, str): evaluated = eval(value, eval_env) list_for_individual.append(evaluated) - # elif not isinstance(value,str): - # print(value) + # These are the properties of the individual before the start of the chain of events initial_properties = list_for_individual[0] - # print(initial_properties) # Initialise first event by gathering parameters of interest from initial_properties first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} + # The changing or adding of properties from the first_event will be stored in progression_properties progression_properties = {} + for i in list_for_individual: + # Skip the initial_properties, or in other words only consider these if they are 'proper' events if 'event' in i: - #print("") #print(i) if 'RTIPolling' in i['event']: - #print("I'm in polling event") - #print(i) # Keep track of which properties are changed during polling events for key,value in i.items(): @@ -130,67 +132,80 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No progression_properties = initial_properties.copy() progression_properties.update(i) - # dalys incurred + # Initialise chain of Dalys incurred if 'rt_disability' in i: prev_disability_incurred = i['rt_disability'] prev_date = i['event_date'] - #print('At polling event, ', prev_disability_incurred, prev_date) else: # Progress properties of individual, even if this event is a death progression_properties.update(i) - # If disability has changed as a result of this, recalculate - if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred: + # If disability has changed as a result of this, recalculate and add previous to rolling average + if 'rt_disability' in i: + dt_in_prev_disability = (i['event_date'] - prev_date).days + #print("Detected change in disability", i['rt_disability'], "after dt=", dt_in_prev_disability) + #print("Adding the following to the average", prev_disability_incurred, " x ", dt_in_prev_disability ) average_disability += prev_disability_incurred*dt_in_prev_disability + total_dt_included += dt_in_prev_disability # Update variables prev_disability_incurred = i['rt_disability'] prev_date = i['event_date'] - - - #print(progression_properties) - # Update footprint + # Update running footprint if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()': footprint = i['appt_footprint'] if 'Counter' in footprint: footprint = footprint[len("Counter("):-1] apply = eval(footprint, eval_env) ind_Counter[i['level']].update(Counter(apply)) - + + # If the individual has died, ensure chain of event is interrupted here and update rolling average of DALYs if 'is_alive' in i and i['is_alive'] is False: - #print("Death", i) - #print("-------Total footprint", ind_Counter) + if ((i['event_date'] - polling_event['rt_date_inj']).days) > total_dt_included: + dt_in_prev_disability = (i['event_date'] - prev_date).days + average_disability += prev_disability_incurred*dt_in_prev_disability + total_dt_included += dt_in_prev_disability break - - + + # check_if_beyond_time_range_considered(progression_properties) + # Compute final properties of individual key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days - if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0: + + # If individual didn't die and the key_last_event didn't result in a final change in DALYs, ensure that the last change is recorded here + if not key_first_event['rt_imm_death'] and (total_dt_included < key_last_event['duration_days']): + #print("Number of events", len(list_for_individual)) + #for i in list_for_individual: + # if 'event' in i: + # print(i) + dt_in_prev_disability = (progression_properties['event_date'] - prev_date).days + average_disability += prev_disability_incurred*dt_in_prev_disability + total_dt_included += dt_in_prev_disability + + # Now calculate the average disability incurred, and store any permanent disability and total footprint + if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0: key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days'] else: key_last_event['rt_disability_average'] = 0.0 + key_last_event['rt_disability_permanent'] = progression_properties['rt_disability'] key_last_event.update({'total_footprint': ind_Counter}) - #print("Average disability", key_last_event['rt_disability_average']) + if key_last_event['duration_days']!=total_dt_included: + print("The duration of event and total_dt_included don't match", key_last_event['duration_days'], total_dt_included) + exit(-1) properties = key_first_event | key_last_event - - if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4): - print("Error in computed average for individual ", p, r ) record.append(properties) - #for key, value in properties.items(): - #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - #print(f"{key}: {value}") - # print("Initial event properties", initial_rt_event_properties) - - df = pd.DataFrame(record) - df.to_csv("raw_data.csv", index=False) + + df = pd.DataFrame(record) + df.to_csv("new_raw_data_" + name_tag + ".csv", index=False) + print(df) print(initial_rt_event_properties) exit(-1) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 79df3f55b6..822bf13ad8 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -18,7 +18,7 @@ import pandas as pd from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario @@ -92,7 +92,35 @@ def modules(self): # fullmodel(resourcefilepath=self.resources) # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] # ) + """ + def draw_parameters(self, draw_number, rng): + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'HealthSystem': { + 'Service_Availability': list(self._scenarios.values())[draw_number], + }, + } + ) + def _get_scenarios(self) -> Dict[str, list[str]]: + Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario. + The sequences of scenarios systematically omits one of the TREATMENT_ID's that is defined in the model. + + # Generate list of TREATMENT_IDs and filter to the resolution needed + treatments = get_filtered_treatment_ids(depth=2) + treatments_RTI = [item for item in treatments if 'Rti' in item] + + # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each + # treatment is omitted + service_availability = dict({"Everything": ["*", "Nothing": []}) + #service_availability.update( + # {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} + #) + + return service_availability + + """ def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: return list(self._scenarios.values())[draw_number] @@ -107,20 +135,27 @@ def draw_parameters(self, draw_number, rng): # case 6: gfHE = 0.030, factor = 1.07326 def _get_scenarios(self) -> Dict[str, Dict]: - """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. - """ + #Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + + treatments = get_filtered_treatment_ids(depth=2) + treatments_RTI = [item for item in treatments if 'Rti' in item] - self.YEAR_OF_CHANGE = 2019 + # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each + # treatment is omitted + service_availability = dict({"Everything": ["*"], "Nothing": []}) + service_availability.update( + {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} + ) + print(service_availability.keys()) return { - # =========== STATUS QUO ============ "Baseline": mix_scenarios( self._baseline(), { "HealthSystem": { - "yearly_HR_scaling_mode": "no_scaling", + "Service_Availability": service_availability["No Rti_BurnManagement*"], }, } ), @@ -128,20 +163,13 @@ def _get_scenarios(self) -> Dict[str, Dict]: } def _baseline(self) -> Dict: - """Return the Dict with values for the parameter changes that define the baseline scenario. """ + #Return the Dict with values for the parameter changes that define the baseline scenario. return mix_scenarios( get_parameters_for_status_quo(), { "HealthSystem": { "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration - "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH - "year_mode_switch": self.YEAR_OF_CHANGE, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - "use_funded_or_actual_staffing": "actual", - "cons_availability": "default", + "cons_availability": "all", } }, ) From 08a5d9a29c9e2e8af7832ca49bfca1cb75f6d8d6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 12 Apr 2025 11:34:07 +0100 Subject: [PATCH 23/97] Change seed in scenario file --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 822bf13ad8..3bc75978d2 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -51,7 +51,7 @@ class GenerateDataChains(BaseScenario): def __init__(self): super().__init__() - self.seed = 0 + self.seed = 42 self.start_date = Date(2010, 1, 1) self.end_date = self.start_date + pd.DateOffset(months=13) self.pop_size = 1000 From 3dda343f65c49e429c677b89d1536531fa83833a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 14 Apr 2025 18:06:08 +0200 Subject: [PATCH 24/97] latest scenario --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 3bc75978d2..1297c6b18b 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._baseline(), { "HealthSystem": { - "Service_Availability": service_availability["No Rti_BurnManagement*"], + "Service_Availability": service_availability["No Rti_FractureCast*"], }, } ), From d9e3f66138c0e372b2b0fa0ac10e7393457bcaf8 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 29 Apr 2025 09:35:47 +0100 Subject: [PATCH 25/97] Latest scenario version --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 1297c6b18b..b4ad946154 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._baseline(), { "HealthSystem": { - "Service_Availability": service_availability["No Rti_FractureCast*"], + "Service_Availability": service_availability["No Rti_MinorSurgeries*"], }, } ), From ddf6f689b6b9184e3f09ac1906417e6fa0495a7f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 29 Apr 2025 15:44:41 +0100 Subject: [PATCH 26/97] Latest version of scenario file --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index b4ad946154..35b7d75e1c 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._baseline(), { "HealthSystem": { - "Service_Availability": service_availability["No Rti_MinorSurgeries*"], + "Service_Availability": service_availability["No Rti_ShockTreatment*"], }, } ), From 0e38408d5e37ccb4f894bb89c4d3c93673ae09a3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 9 Oct 2025 09:20:35 +0100 Subject: [PATCH 27/97] Ensure changes to mni dataframe are captured as well --- .../scenario_generate_chains.py | 30 ++-- src/tlo/events.py | 164 ++++++++++++++++-- src/tlo/methods/hsi_event.py | 112 ++++++++---- src/tlo/methods/pregnancy_helper_functions.py | 50 +----- src/tlo/methods/pregnancy_supervisor.py | 50 ++++++ src/tlo/methods/rti.py | 4 +- src/tlo/simulation.py | 13 +- src/tlo/util.py | 2 +- 8 files changed, 314 insertions(+), 111 deletions(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 35b7d75e1c..64fa70d055 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -53,11 +53,11 @@ def __init__(self): super().__init__() self.seed = 42 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=13) + self.end_date = self.start_date + pd.DateOffset(months=36) self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 50 + self.runs_per_draw = 1 self.generate_event_chains = True def log_configuration(self): @@ -77,21 +77,31 @@ def log_configuration(self): def modules(self): # MODIFY # Here instead of running full module + """ return [demography.Demography(resourcefilepath=self.resources), enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), healthburden.HealthBurden(resourcefilepath=self.resources), - symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False), - rti.RTI(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),#, + #rti.RTI(resourcefilepath=self.resources), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), + labour.Labour(resourcefilepath=self.resources), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=self.resources), + contraception.Contraception(resourcefilepath=self.resources), + newborn_outcomes.NewbornOutcomes(resourcefilepath=self.resources), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=self.resources), + hiv.Hiv(resourcefilepath=self.resources), + tb.Tb(resourcefilepath=self.resources), + epi.Epi(resourcefilepath=self.resources), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), - #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=self.resources, mode_appt_constraints=1, cons_availability='all')] - - # return ( - # fullmodel(resourcefilepath=self.resources) - # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - # ) + """ + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) """ def draw_parameters(self, draw_number, rng): return mix_scenarios( diff --git a/src/tlo/events.py b/src/tlo/events.py index f67b54458a..3a8f4f58c7 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -13,6 +13,7 @@ from tlo.util import FACTOR_POP_DICT +import copy logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -76,23 +77,85 @@ def apply(self, target): """ raise NotImplementedError - def compare_population_dataframe(self,df_before, df_after): + def values_differ(self, v1, v2): + + if isinstance(v1, list) and isinstance(v2, list): + return v1 != v2 # simple element-wise comparison + + if pd.isna(v1) and pd.isna(v2): + return False # treat both NaT/NaN as equal + return v1 != v2 + + def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): + diffs = {} + """ + will_pause = False + + target_attribute = 'hcw_not_avail' + if len(entire_mni_after)>0: + print("Default target value before", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) + person = next(iter(entire_mni_after)) + entire_mni_after[person][target_attribute] = True + will_pause = True + print("Default target value after", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) + + + if will_pause: + print("Reprint") + print(entire_mni_before) + print(entire_mni_after) + print("Default target value", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) + """ + all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) + + for person in all_individuals: + if person not in entire_mni_before: # but is afterward + for key in entire_mni_after[person]: + if self.values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + elif person not in entire_mni_after: # but is beforehand + for key in entire_mni_before[person]: + if self.values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + + else: # person is in both + # Compare properties + for key in entire_mni_before[person]: + if self.values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + if len(diffs)>0: + print("DIfferences for ", diffs) + return diffs + + def compare_population_dataframe(self,df_before, df_after, entire_mni_before, entire_mni_after): """ This function compares the population dataframe before/after a population-wide event has occurred. It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + + diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) # Create an empty list to store changes for each of the individuals chain_links = {} len_of_diff = len(diff_mask) # Loop through each row of the mask + persons_changed = [] for idx, row in diff_mask.iterrows(): changed_cols = row.index[row].tolist() if changed_cols: # Proceed only if there are changes in the row + persons_changed.append(idx) # Create a dictionary for this person # First add event info link_info = { @@ -104,19 +167,47 @@ def compare_population_dataframe(self,df_before, df_after): # Store the new values from df_after for the changed columns for col in changed_cols: link_info[col] = df_after.at[idx, col] - + + if idx in diff_mni: + # This person has also undergone changes in the mni dictionary, so add these here + for key in diff_mni[idx]: + link_info[col] = diff_mni[idx][key] + # Append the event and changes to the individual key chain_links[idx] = str(link_info) - + + # Check individuals + if len(diff_mni)>0: + print("Non-zero changes in mni") + for key in diff_mni: + if key not in persons_changed: + print("Individual ", key, "is changing in mni alone") + # If individual hadn't been previously added due to changes in pop df, add it here + link_info = { + 'person_ID': key, + 'event': type(self).__name__, + 'event_date': self.sim.date, + } + + for key_prop in diff_mni[key]: + link_info[key_prop] = diff_mni[key][key_prop] + + chain_links[key] = str(link_info) + print("Change for ", key, " is ", str(link_info)) + return chain_links - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]: - """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame, dict, dict, bool]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + # Initialise these variables print_chains = False df_before = [] row_before = pd.Series() + mni_instances_before = False + mni_row_before = {} + entire_mni_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. @@ -129,9 +220,16 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # Target is single individual if self.target != self.sim.population: + # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() + if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] @@ -139,6 +237,13 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'Before' + if not mni_instances_before: + for key in self.sim.modules['PregnancySupervisor'].default_mni_values: + row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + else: + for key in mni_row_before: + row[key] = mni_row_before[key] + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: @@ -146,20 +251,30 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the pop dataframe before the event has occurred. df_before = self.sim.population.props.copy() + entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - return print_chains, row_before, df_before + return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict: + def store_chains_to_do_after_event(self, print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ chain_links = {} - + + if print_chains: # Target is single individual if self.target != self.sim.population: + + mni_instances_after = False + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_after = True + # Create and store event for this individual, regardless of whether any property change occurred link_info = { #'person_ID' : self.target, @@ -167,11 +282,35 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> 'event' : type(self).__name__, 'event_date' : self.sim.date, } + # Store (if any) property changes as a result of the event for this individual for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] + # Now store changes in the mni dictionary, accounting for following cases: + + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + print("INDIVIDUAL WAS ADDED") + exit(-1) + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Else, no need to do anything + chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. @@ -182,6 +321,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: @@ -190,9 +330,10 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Population frame after event df_after = self.sim.population.props + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe(df_before, df_after) + chain_links = self.compare_population_dataframe(df_before, df_after, entire_mni_before, entire_mni_after) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if self.sim.debug_generate_event_chains: @@ -222,7 +363,7 @@ def run(self): # Collect relevant information before event takes place if self.sim.generate_event_chains: - print_chains, row_before, df_before = self.store_chains_to_do_before_event() + print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event() self.apply(self.target) self.post_apply_hook() @@ -230,7 +371,7 @@ def run(self): # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' # in the individual's event chain. if self.sim.generate_event_chains: - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals @@ -238,6 +379,7 @@ def run(self): # Log chain_links here if len(chain_links)>0: + print(chain_links) logger_chain.info(key='event_chains', data= pop_dict, description='Links forming chains of events for simulated individuals') diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 978b26d7c5..41342f117e 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -195,65 +195,83 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: + + def values_differ(self, v1, v2): + + if isinstance(v1, list) and isinstance(v2, list): + return v1 != v2 # simple element-wise comparison + + if pd.isna(v1) and pd.isna(v2): + return False # treat both NaT/NaN as equal + return v1 != v2 + + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ # Initialise these variables print_chains = False row_before = pd.Series() + mni_instances_before = False + mni_row_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - # if (self.module in self.sim.generate_event_chains_modules_of_interest) and + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): # Will eventually use this once I can actually GET THE NAME OF THE SELF - # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + + print_chains = True + + # Target is single individual if self.target != self.sim.population: - # In the case of HSI events, only individual events should exist and therefore be logged - print_chains = True - # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - + + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() + if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = type(self).__name__ #str(self.event_name) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'Before' - - try: - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level - except: - row['appt_footprint'] = 'N/A' - row['level'] = 'N/A' + if not mni_instances_before: + for key in self.sim.modules['PregnancySupervisor'].default_mni_values: + row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + else: + for key in mni_row_before: + row[key] = mni_row_before[key] + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error - # raise RuntimeError("Cannot have population-wide HSI events") - logger.debug( - key="message", - data=( - "Cannot have population-wide HSI events" - ), - ) - + print("ERROR: there shouldn't be pop-wide HSI event") - return print_chains, row_before + return print_chains, row_before, mni_row_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict: + def store_chains_to_do_after_event(self, print_chains, row_before, footprint, mni_row_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ if print_chains: # For HSI event, this will only ever occur for individual events - + chain_links = {} + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + mni_instances_after = False + + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_after = True + # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level # will be stored regardless of whether individual experienced property changes. @@ -278,8 +296,35 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - - chain_links = {self.target : str(link_info)} + + # Now store changes in the mni dictionary, accounting for following cases: + + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + print("--------------------------------------------->",link_info[key]) + exit(-1) + + + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + print("--------------------------------------------->",link_info[key]) + exit(-1) + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + print("--------------------------------------------->",link_info[key]) + exit(-1) + chain_links[self.target] = str(link_info) if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. @@ -300,7 +345,7 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains and self.target != self.sim.population: - print_chains, row_before = self.store_chains_to_do_before_event() + print_chains, row_before, mni_row_before, mni_instances_before = self.store_chains_to_do_before_event() footprint = self.EXPECTED_APPT_FOOTPRINT @@ -315,10 +360,9 @@ def run(self, squeeze_factor): if updated_appt_footprint is not None: footprint = updated_appt_footprint - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint), mni_row_before, mni_instances_before) if len(chain_links)>0: - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py index 8f7faa0503..79483cddaa 100644 --- a/src/tlo/methods/pregnancy_helper_functions.py +++ b/src/tlo/methods/pregnancy_helper_functions.py @@ -542,55 +542,7 @@ def update_mni_dictionary(self, individual_id): if self == self.sim.modules['PregnancySupervisor']: - mni[individual_id] = {'delete_mni': False, # if True, mni deleted in report_daly_values function - 'didnt_seek_care': False, - 'cons_not_avail': False, - 'comp_not_avail': False, - 'hcw_not_avail': False, - 'ga_anc_one': 0, - 'anc_ints': [], - 'abortion_onset': pd.NaT, - 'abortion_haem_onset': pd.NaT, - 'abortion_sep_onset': pd.NaT, - 'eclampsia_onset': pd.NaT, - 'mild_mod_aph_onset': pd.NaT, - 'severe_aph_onset': pd.NaT, - 'chorio_onset': pd.NaT, - 'chorio_in_preg': False, # use in predictor in newborn linear models - 'ectopic_onset': pd.NaT, - 'ectopic_rupture_onset': pd.NaT, - 'gest_diab_onset': pd.NaT, - 'gest_diab_diagnosed_onset': pd.NaT, - 'gest_diab_resolution': pd.NaT, - 'mild_anaemia_onset': pd.NaT, - 'mild_anaemia_resolution': pd.NaT, - 'moderate_anaemia_onset': pd.NaT, - 'moderate_anaemia_resolution': pd.NaT, - 'severe_anaemia_onset': pd.NaT, - 'severe_anaemia_resolution': pd.NaT, - 'mild_anaemia_pp_onset': pd.NaT, - 'mild_anaemia_pp_resolution': pd.NaT, - 'moderate_anaemia_pp_onset': pd.NaT, - 'moderate_anaemia_pp_resolution': pd.NaT, - 'severe_anaemia_pp_onset': pd.NaT, - 'severe_anaemia_pp_resolution': pd.NaT, - 'hypertension_onset': pd.NaT, - 'hypertension_resolution': pd.NaT, - 'obstructed_labour_onset': pd.NaT, - 'sepsis_onset': pd.NaT, - 'uterine_rupture_onset': pd.NaT, - 'mild_mod_pph_onset': pd.NaT, - 'severe_pph_onset': pd.NaT, - 'secondary_pph_onset': pd.NaT, - 'vesicovaginal_fistula_onset': pd.NaT, - 'vesicovaginal_fistula_resolution': pd.NaT, - 'rectovaginal_fistula_onset': pd.NaT, - 'rectovaginal_fistula_resolution': pd.NaT, - 'test_run': False, # used by labour module when running some model tests - 'pred_syph_infect': pd.NaT, # date syphilis is predicted to onset - 'new_onset_spe': False, - 'cs_indication': 'none' - } + mni[individual_id] = self.sim.modules['PregnancySupervisor'].default_mni_values.copy() elif self == self.sim.modules['Labour']: labour_variables = {'labour_state': None, diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py index 7dd8819ab6..f634d9b971 100644 --- a/src/tlo/methods/pregnancy_supervisor.py +++ b/src/tlo/methods/pregnancy_supervisor.py @@ -61,6 +61,56 @@ def __init__(self, name=None, resourcefilepath=None): # This variable will store a Bitset handler for the property ps_abortion_complications self.abortion_complications = None + + self.default_mni_values = {'delete_mni': False, # if True, mni deleted in report_daly_values function + 'didnt_seek_care': False, + 'cons_not_avail': False, + 'comp_not_avail': False, + 'hcw_not_avail': False, + 'ga_anc_one': 0, + 'anc_ints': [], + 'abortion_onset': pd.NaT, + 'abortion_haem_onset': pd.NaT, + 'abortion_sep_onset': pd.NaT, + 'eclampsia_onset': pd.NaT, + 'mild_mod_aph_onset': pd.NaT, + 'severe_aph_onset': pd.NaT, + 'chorio_onset': pd.NaT, + 'chorio_in_preg': False, # use in predictor in newborn linear models + 'ectopic_onset': pd.NaT, + 'ectopic_rupture_onset': pd.NaT, + 'gest_diab_onset': pd.NaT, + 'gest_diab_diagnosed_onset': pd.NaT, + 'gest_diab_resolution': pd.NaT, + 'mild_anaemia_onset': pd.NaT, + 'mild_anaemia_resolution': pd.NaT, + 'moderate_anaemia_onset': pd.NaT, + 'moderate_anaemia_resolution': pd.NaT, + 'severe_anaemia_onset': pd.NaT, + 'severe_anaemia_resolution': pd.NaT, + 'mild_anaemia_pp_onset': pd.NaT, + 'mild_anaemia_pp_resolution': pd.NaT, + 'moderate_anaemia_pp_onset': pd.NaT, + 'moderate_anaemia_pp_resolution': pd.NaT, + 'severe_anaemia_pp_onset': pd.NaT, + 'severe_anaemia_pp_resolution': pd.NaT, + 'hypertension_onset': pd.NaT, + 'hypertension_resolution': pd.NaT, + 'obstructed_labour_onset': pd.NaT, + 'sepsis_onset': pd.NaT, + 'uterine_rupture_onset': pd.NaT, + 'mild_mod_pph_onset': pd.NaT, + 'severe_pph_onset': pd.NaT, + 'secondary_pph_onset': pd.NaT, + 'vesicovaginal_fistula_onset': pd.NaT, + 'vesicovaginal_fistula_resolution': pd.NaT, + 'rectovaginal_fistula_onset': pd.NaT, + 'rectovaginal_fistula_resolution': pd.NaT, + 'test_run': False, # used by labour module when running some model tests + 'pred_syph_infect': pd.NaT, # date syphilis is predicted to onset + 'new_onset_spe': False, + 'cs_indication': 'none' + } INIT_DEPENDENCIES = {'Demography'} diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index c79b26314d..e772366d57 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2865,9 +2865,9 @@ def apply(self, population): Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1.0 + #pred = 1.0 #else: - # pred = eq.predict(df.loc[rt_current_non_ind]) + pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index bb766562a0..045e86bdd8 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -109,7 +109,7 @@ def __init__( self.modules = OrderedDict() self.event_queue = EventQueue() self.generate_event_chains = True - self.generate_event_chains_overwrite_epi = None + self.generate_event_chains_overwrite_epi = False self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] self.debug_generate_event_chains = False @@ -299,6 +299,12 @@ def make_initial_population(self, *, n: int) -> None: if self.generate_event_chains: pop_dict = self.population.props.to_dict(orient='index') + + #if "PregnancySupervisor" in self.modules: + # print("I found it!") + # print(self.modules['PregnancySupervisor'].mother_and_newborn_info) + # exit(-1) + for key in pop_dict.keys(): pop_dict[key]['person_ID'] = key pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later @@ -329,10 +335,10 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = True + self.generate_event_chains_overwrite_epi = False # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler', 'RTIPollingEvent'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False @@ -491,7 +497,6 @@ def do_birth(self, mother_id: int) -> int: pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length - print("Length at birth", len(pop_dict)) logger.info(key='event_chains', data = pop_dict, description='Links forming chains of events for simulated individuals') diff --git a/src/tlo/util.py b/src/tlo/util.py index e246fcf05b..c9130e3f07 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -13,7 +13,7 @@ # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation. DEFAULT_MOTHER_ID = -1e7 -FACTOR_POP_DICT = 1000 +FACTOR_POP_DICT = 50000 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]): From 9b8f01ff383bdb0954146b93849c6c7a18008b2d Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:24:39 +0100 Subject: [PATCH 28/97] Tidy up --- .../analysis_extract_data.py | 2 +- src/tlo/events.py | 199 +++++++----------- src/tlo/methods/hiv.py | 32 ++- src/tlo/methods/hsi_event.py | 165 ++++++--------- src/tlo/methods/tb.py | 5 +- src/tlo/simulation.py | 41 +--- 6 files changed, 151 insertions(+), 293 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 3afad7adcc..8068db203a 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -59,7 +59,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No initial_rt_event_properties = set() num_individuals = 1000 - num_runs = 50 + num_runs = 1 record = [] # Include results folder in output file name name_tag = str(results_folder).replace("outputs/", "") diff --git a/src/tlo/events.py b/src/tlo/events.py index 3a8f4f58c7..9f762fd3c6 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -77,7 +77,7 @@ def apply(self, target): """ raise NotImplementedError - def values_differ(self, v1, v2): + def mni_values_differ(self, v1, v2): if isinstance(v1, list) and isinstance(v2, list): return v1 != v2 # simple element-wise comparison @@ -111,14 +111,14 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): for person in all_individuals: if person not in entire_mni_before: # but is afterward for key in entire_mni_after[person]: - if self.values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] elif person not in entire_mni_after: # but is beforehand for key in entire_mni_before[person]: - if self.values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] @@ -126,7 +126,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): else: # person is in both # Compare properties for key in entire_mni_before[person]: - if self.values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): + if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] @@ -135,13 +135,12 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): print("DIfferences for ", diffs) return diffs - def compare_population_dataframe(self,df_before, df_after, entire_mni_before, entire_mni_after): - """ This function compares the population dataframe before/after a population-wide event has occurred. + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): + """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred. It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) - diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) # Create an empty list to store changes for each of the individuals @@ -176,12 +175,10 @@ def compare_population_dataframe(self,df_before, df_after, entire_mni_before, en # Append the event and changes to the individual key chain_links[idx] = str(link_info) - # Check individuals + # For individuals which only underwent changes in mni dictionary, save changes here if len(diff_mni)>0: - print("Non-zero changes in mni") for key in diff_mni: if key not in persons_changed: - print("Individual ", key, "is changing in mni alone") # If individual hadn't been previously added due to changes in pop df, add it here link_info = { 'person_ID': key, @@ -193,7 +190,6 @@ def compare_population_dataframe(self,df_before, df_after, entire_mni_before, en link_info[key_prop] = diff_mni[key][key_prop] chain_links[key] = str(link_info) - print("Change for ", key, " is ", str(link_info)) return chain_links @@ -210,7 +206,6 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame entire_mni_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): # Will eventually use this once I can actually GET THE NAME OF THE SELF @@ -224,140 +219,88 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + # Check if individual is already in mni dictionary, if so copy her original status mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: mni_instances_before = True mni_row_before = mni[self.target].copy() - - if self.sim.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'Before' - if not mni_instances_before: - for key in self.sim.modules['PregnancySupervisor'].default_mni_values: - row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] - else: - for key in mni_row_before: - row[key] = mni_row_before[key] - - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the pop dataframe before the event has occurred. + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. df_before = self.sim.population.props.copy() entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: + def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ chain_links = {} - - - if print_chains: - - # Target is single individual - if self.target != self.sim.population: + + # Target is single individual + if self.target != self.sim.population: + + # Copy full new status for individual + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - mni_instances_after = False + # Check if individual is in mni after the event + mni_instances_after = False + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - - if self.target in mni: - mni_instances_after = True - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - #'person_ID' : self.target, - 'person_ID' : self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, - } - - # Store (if any) property changes as a result of the event for this individual - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously - link_info[key] = row_after[key] - - # Now store changes in the mni dictionary, accounting for following cases: - - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - print("INDIVIDUAL WAS ADDED") - exit(-1) - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Else, no need to do anything - - chain_links[self.target] = str(link_info) - - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if self.sim.debug_generate_event_chains: - # Print entire row - row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'After' - - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - - else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population frame after event - df_after = self.sim.population.props - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe(df_before, df_after, entire_mni_before, entire_mni_after) - - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if self.sim.debug_generate_event_chains: - # Or print entire rows - change = df_before.compare(df_after) - if not change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = type(self).__name__ - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = type(self).__name__ - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + # Create and store event for this individual, regardless of whether any property change occurred + link_info = { + #'person_ID' : self.target, + 'person_ID' : self.target, + 'event' : type(self).__name__, + 'event_date' : self.sim.date, + } + + # Store (if any) property changes as a result of the event for this individual + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Else, no need to do anything + + # Add individual to the chain links + chain_links[self.target] = str(link_info) + + else: + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after) return chain_links + def run(self): """Make the event happen.""" @@ -370,8 +313,8 @@ def run(self): # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' # in the individual's event chain. - if self.sim.generate_event_chains: - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) + if self.sim.generate_event_chains and print_chains: + chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals @@ -384,7 +327,7 @@ def run(self): data= pop_dict, description='Links forming chains of events for simulated individuals') - #print("Chain events ", chain_links) + print("Chain events ", chain_links) class RegularEvent(Event): diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 8487eaa467..0a80f8b41b 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,12 +631,11 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: - # Launch sub-routines for allocating the right number of people into each category - self.initialise_baseline_prevalence(population) # allocate baseline prevalence + # Launch sub-routines for allocating the right number of people into each category + self.initialise_baseline_prevalence(population) # allocate baseline prevalence - self.initialise_baseline_art(population) # allocate baseline art coverage - self.initialise_baseline_tested(population) # allocate baseline testing coverage + self.initialise_baseline_art(population) # allocate baseline art coverage + self.initialise_baseline_tested(population) # allocate baseline testing coverage def initialise_baseline_prevalence(self, population): """ @@ -906,16 +905,10 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi: - print("Should be generating data") - sim.schedule_event( - HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) - ) - else: - # 1) Schedule the Main HIV Regular Polling Event - sim.schedule_event( - HivRegularPollingEvent(self), sim.date + DateOffset(days=0) - ) + # 1) Schedule the Main HIV Regular Polling Event + sim.schedule_event( + HivRegularPollingEvent(self), sim.date + DateOffset(days=0) + ) # 2) Schedule the Logging Event sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1)) @@ -1901,12 +1894,11 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: - # Horizontal transmission: Male --> Female - horizontal_transmission(from_sex="M", to_sex="F") + # Horizontal transmission: Male --> Female + horizontal_transmission(from_sex="M", to_sex="F") - # Horizontal transmission: Female --> Male - horizontal_transmission(from_sex="F", to_sex="M") + # Horizontal transmission: Female --> Male + horizontal_transmission(from_sex="F", to_sex="M") # testing # if year later than 2020, set testing rates to those reported in 2020 diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 41342f117e..dbca98da5c 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -216,8 +216,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: mni_row_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): # Will eventually use this once I can actually GET THE NAME OF THE SELF #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): @@ -230,112 +229,75 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + # Check if individual is in mni dictionary before the event, if so store its original status mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: mni_instances_before = True mni_row_before = mni[self.target].copy() - - if self.sim.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'Before' - if not mni_instances_before: - for key in self.sim.modules['PregnancySupervisor'].default_mni_values: - row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] - else: - for key in mni_row_before: - row[key] = mni_row_before[key] - - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: print("ERROR: there shouldn't be pop-wide HSI event") + exit(-1) return print_chains, row_before, mni_row_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, footprint, mni_row_before, mni_instances_before) -> dict: + def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - if print_chains: - # For HSI event, this will only ever occur for individual events - chain_links = {} - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - mni_instances_after = False - - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - - if self.target in mni: - mni_instances_after = True - - # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level - # will be stored regardless of whether individual experienced property changes. + # For HSI event, this will only ever occur for individual events + chain_links = {} - # Add event details + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + mni_instances_after = False + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True - try: - record_footprint = str(footprint) - record_level = self.facility_info.level - except: - record_footprint = 'N/A' - record_level = 'N/A' - - link_info = { - 'person_ID': self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, - 'appt_footprint' : record_footprint, - 'level' : record_level, - } + # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level + # will be stored regardless of whether individual experienced property changes or not. + + # Add event details + try: + record_footprint = str(footprint) + record_level = self.facility_info.level + except: + record_footprint = 'N/A' + record_level = 'N/A' - # Add changes to properties - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously - link_info[key] = row_after[key] - - # Now store changes in the mni dictionary, accounting for following cases: + link_info = { + 'person_ID': self.target, + 'event' : type(self).__name__, + 'event_date' : self.sim.date, + 'appt_footprint' : record_footprint, + 'level' : record_level, + } + + # Add changes to properties + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - print("--------------------------------------------->",link_info[key]) - exit(-1) - - - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - print("--------------------------------------------->",link_info[key]) - exit(-1) - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - print("--------------------------------------------->",link_info[key]) - exit(-1) - chain_links[self.target] = str(link_info) - - if self.sim.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = record_footprint - row['level'] = record_level - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + # Now store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + + chain_links[self.target] = str(link_info) return chain_links @@ -360,17 +322,16 @@ def run(self, squeeze_factor): if updated_appt_footprint is not None: footprint = updated_appt_footprint - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint), mni_row_before, mni_instances_before) + if print_chains: + chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before) - if len(chain_links)>0: - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} - # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals - - pop_dict.update(chain_links) - - logger_chains.info(key='event_chains', - data = pop_dict, - description='Links forming chains of events for simulated individuals') + if len(chain_links)>0: + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} + pop_dict.update(chain_links) + + logger_chains.info(key='event_chains', + data = pop_dict, + description='Links forming chains of events for simulated individuals') return updated_appt_footprint diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 33edeb63c8..fe5d19c964 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -890,10 +890,7 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True: - sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) - else: - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) # 2) log at the end of the year diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 045e86bdd8..8356424901 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -109,19 +109,13 @@ def __init__( self.modules = OrderedDict() self.event_queue = EventQueue() self.generate_event_chains = True - self.generate_event_chains_overwrite_epi = False self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] - self.debug_generate_event_chains = False self.end_date = None self.output_file = None self.population: Optional[Population] = None - - if self.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains: Optional[Population] = None - + self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -289,21 +283,12 @@ def make_initial_population(self, *, n: int) -> None: key="debug", data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - - if self.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: pop_dict = self.population.props.to_dict(orient='index') - - #if "PregnancySupervisor" in self.modules: - # print("I found it!") - # print(self.modules['PregnancySupervisor'].mother_and_newborn_info) - # exit(-1) for key in pop_dict.keys(): pop_dict[key]['person_ID'] = key @@ -311,12 +296,11 @@ def make_initial_population(self, *, n: int) -> None: pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)} pop_dict_full.update(pop_dict) - - print("Size for full sim", len(pop_dict_full)) logger.info(key='event_chains', data = pop_dict_full, description='Links forming chains of events for simulated individuals') + end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -334,15 +318,9 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: - # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = False # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler', 'RTIPollingEvent'] - else: - # If not using to print chains, cannot ignore epi - self.generate_event_chains_overwrite_epi = False - + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration', 'RTIPollingEvent'] # Reorder columns to place the new columns at the front pd.set_option('display.max_columns', None) @@ -426,10 +404,6 @@ def run_simulation_to(self, *, to_date: Date) -> None: self._update_progress_bar(progress_bar, date) self.fire_single_event(event, date) self.date = to_date - - if self.debug_generate_event_chains: - # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. - self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: progress_bar.stop() @@ -500,15 +474,6 @@ def do_birth(self, mother_id: int) -> int: logger.info(key='event_chains', data = pop_dict, description='Links forming chains of events for simulated individuals') - - if self.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.population.props.iloc[[child_id]] - row['person_ID'] = child_id - row['event'] = 'Birth' - row['event_date'] = self.date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id From 3b81de6546cb498938ff9918c852e39369b29ca3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:32:50 +0100 Subject: [PATCH 29/97] All fixes made --- .../analysis_extract_data.py | 8 +++- .../scenario_generate_chains.py | 2 +- src/tlo/events.py | 33 +++---------- src/tlo/methods/hsi_event.py | 4 +- src/tlo/methods/pregnancy_helper_functions.py | 46 ++++--------------- src/tlo/methods/pregnancy_supervisor.py | 40 ++++++++++++++++ 6 files changed, 64 insertions(+), 69 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 8068db203a..7fe15f0eb4 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -98,7 +98,11 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No if value !='' and isinstance(value, str): evaluated = eval(value, eval_env) list_for_individual.append(evaluated) - + + for i in list_for_individual: + print(i) + + """ # These are the properties of the individual before the start of the chain of events initial_properties = list_for_individual[0] @@ -201,7 +205,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No properties = key_first_event | key_last_event record.append(properties) - + """ df = pd.DataFrame(record) df.to_csv("new_raw_data_" + name_tag + ".csv", index=False) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 64fa70d055..e9291a50ce 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -53,7 +53,7 @@ def __init__(self): super().__init__() self.seed = 42 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=36) + self.end_date = self.start_date + pd.DateOffset(months=18) self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) diff --git a/src/tlo/events.py b/src/tlo/events.py index 9f762fd3c6..993c27090c 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -88,40 +88,23 @@ def mni_values_differ(self, v1, v2): def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): diffs = {} - """ - will_pause = False - - target_attribute = 'hcw_not_avail' - if len(entire_mni_after)>0: - print("Default target value before", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) - person = next(iter(entire_mni_after)) - entire_mni_after[person][target_attribute] = True - will_pause = True - print("Default target value after", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) - - if will_pause: - print("Reprint") - print(entire_mni_before) - print(entire_mni_after) - print("Default target value", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) - """ all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) for person in all_individuals: if person not in entire_mni_before: # but is afterward for key in entire_mni_after[person]: - if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] elif person not in entire_mni_after: # but is beforehand for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): if person not in diffs: diffs[person] = {} - diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] else: # person is in both # Compare properties @@ -131,8 +114,6 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] - if len(diffs)>0: - print("DIfferences for ", diffs) return diffs def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): @@ -272,13 +253,13 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, link_info[key] = mni[self.target][key] # Individual is only in mni dictionary before event elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in mni_row_before: if self.mni_values_differ(mni_row_before[key], default[key]): link_info[key] = default[key] # Individual is only in mni dictionary after event elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in default: if self.mni_values_differ(default[key], mni[self.target][key]): link_info[key] = mni[self.target][key] @@ -322,12 +303,10 @@ def run(self): # Log chain_links here if len(chain_links)>0: - print(chain_links) + logger_chain.info(key='event_chains', data= pop_dict, description='Links forming chains of events for simulated individuals') - - print("Chain events ", chain_links) class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index dbca98da5c..85ac6da3e2 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -286,13 +286,13 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, link_info[key] = mni[self.target][key] # Individual is only in mni dictionary before event elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in mni_row_before: if self.values_differ(mni_row_before[key], default[key]): link_info[key] = default[key] # Individual is only in mni dictionary after event elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in default: if self.values_differ(default[key], mni[self.target][key]): link_info[key] = mni[self.target][key] diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py index 79483cddaa..2456f57e8b 100644 --- a/src/tlo/methods/pregnancy_helper_functions.py +++ b/src/tlo/methods/pregnancy_helper_functions.py @@ -545,40 +545,12 @@ def update_mni_dictionary(self, individual_id): mni[individual_id] = self.sim.modules['PregnancySupervisor'].default_mni_values.copy() elif self == self.sim.modules['Labour']: - labour_variables = {'labour_state': None, - # Term Labour (TL), Early Preterm (EPTL), Late Preterm (LPTL) or Post Term (POTL) - 'birth_weight': 'normal_birth_weight', - 'birth_size': 'average_for_gestational_age', - 'delivery_setting': None, # home_birth, health_centre, hospital - 'twins': df.at[individual_id, 'ps_multiple_pregnancy'], - 'twin_count': 0, - 'twin_one_comps': False, - 'pnc_twin_one': 'none', - 'bf_status_twin_one': 'none', - 'eibf_status_twin_one': False, - 'an_placental_abruption': df.at[individual_id, 'ps_placental_abruption'], - 'corticosteroids_given': False, - 'clean_birth_practices': False, - 'abx_for_prom_given': False, - 'abx_for_pprom_given': False, - 'endo_pp': False, - 'retained_placenta': False, - 'uterine_atony': False, - 'amtsl_given': False, - 'cpd': False, - 'mode_of_delivery': 'vaginal_delivery', - 'neo_will_receive_resus_if_needed': False, - # vaginal_delivery, instrumental, caesarean_section - 'hsi_cant_run': False, # True (T) or False (F) - 'sought_care_for_complication': False, # True (T) or False (F) - 'sought_care_labour_phase': 'none', - 'referred_for_cs': False, # True (T) or False (F) - 'referred_for_blood': False, # True (T) or False (F) - 'received_blood_transfusion': False, # True (T) or False (F) - 'referred_for_surgery': False, # True (T) or False (F)' - 'death_in_labour': False, # True (T) or False (F) - 'single_twin_still_birth': False, # True (T) or False (F) - 'will_receive_pnc': 'none', - 'passed_through_week_one': False} - - mni[individual_id].update(labour_variables) + + labour_default = self.sim.modules['PregnancySupervisor'].default_labour_values.copy() + mni[individual_id].update(labour_default) + + # Update from default based on individual case + mni[individual_id]['twins'] = df.at[individual_id, 'ps_multiple_pregnancy'] + mni[individual_id]['an_placental_abruption'] = df.at[individual_id, 'ps_placental_abruption'] + + diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py index f634d9b971..5d747d44c2 100644 --- a/src/tlo/methods/pregnancy_supervisor.py +++ b/src/tlo/methods/pregnancy_supervisor.py @@ -82,6 +82,8 @@ def __init__(self, name=None, resourcefilepath=None): 'gest_diab_onset': pd.NaT, 'gest_diab_diagnosed_onset': pd.NaT, 'gest_diab_resolution': pd.NaT, + 'none_anaemia_onset': pd.NaT, + 'none_anaemia_resolution': pd.NaT, 'mild_anaemia_onset': pd.NaT, 'mild_anaemia_resolution': pd.NaT, 'moderate_anaemia_onset': pd.NaT, @@ -111,6 +113,44 @@ def __init__(self, name=None, resourcefilepath=None): 'new_onset_spe': False, 'cs_indication': 'none' } + self.default_labour_values = {'labour_state': None, + # Term Labour (TL), Early Preterm (EPTL), Late Preterm (LPTL) or Post Term (POTL) + 'birth_weight': 'normal_birth_weight', + 'birth_size': 'average_for_gestational_age', + 'delivery_setting': None, # home_birth, health_centre, hospital + 'twins': None, + 'twin_count': 0, + 'twin_one_comps': False, + 'pnc_twin_one': 'none', + 'bf_status_twin_one': 'none', + 'eibf_status_twin_one': False, + 'an_placental_abruption': None, + 'corticosteroids_given': False, + 'clean_birth_practices': False, + 'abx_for_prom_given': False, + 'abx_for_pprom_given': False, + 'endo_pp': False, + 'retained_placenta': False, + 'uterine_atony': False, + 'amtsl_given': False, + 'cpd': False, + 'mode_of_delivery': 'vaginal_delivery', + 'neo_will_receive_resus_if_needed': False, + # vaginal_delivery, instrumental, caesarean_section + 'hsi_cant_run': False, # True (T) or False (F) + 'sought_care_for_complication': False, # True (T) or False (F) + 'sought_care_labour_phase': 'none', + 'referred_for_cs': False, # True (T) or False (F) + 'referred_for_blood': False, # True (T) or False (F) + 'received_blood_transfusion': False, # True (T) or False (F) + 'referred_for_surgery': False, # True (T) or False (F)' + 'death_in_labour': False, # True (T) or False (F) + 'single_twin_still_birth': False, # True (T) or False (F) + 'will_receive_pnc': 'none', + 'passed_through_week_one': False} + + self.default_all_mni_values = self.default_mni_values + self.default_all_mni_values.update(self.default_labour_values) INIT_DEPENDENCIES = {'Demography'} From bc61e1efbf7c79c4b85273b5b3c893c0030b362d Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 17 Nov 2025 10:21:22 +0000 Subject: [PATCH 30/97] Cleaned and [skip ci] --- src/tlo/simulation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 8356424901..ef2fe4518e 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -108,9 +108,11 @@ def __init__( self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() + self.generate_event_chains = True self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] + self.end_date = None self.output_file = None self.population: Optional[Population] = None From e084e3949c03a8e19bc49f42aea56a154d09dabf Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 20 Nov 2025 14:38:07 +0000 Subject: [PATCH 31/97] Start logging data in EAV format --- src/tlo/events.py | 17 ++++++++++------- src/tlo/simulation.py | 11 ++++++++++- src/tlo/util.py | 23 +++++++++++++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 993c27090c..9e9865cdad 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,7 +11,7 @@ import pandas as pd -from tlo.util import FACTOR_POP_DICT +from tlo.util import FACTOR_POP_DICT, convert_dict_into_eav import copy @@ -233,12 +233,12 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, mni_instances_after = True # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - #'person_ID' : self.target, - 'person_ID' : self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, - } + link_info = {} + # #'person_ID' : self.target, + # 'person_ID' : self.target, + # 'event' : type(self).__name__, + # 'event_date' : self.sim.date, + #} # Store (if any) property changes as a result of the event for this individual for key in row_before.index: @@ -265,6 +265,9 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, link_info[key] = mni[self.target][key] # Else, no need to do anything + eav = convert_dict_into_eav(link_info, self.target, self.sim.date, type(self).__name__) + print(eav) + exit(-1) # Add individual to the chain links chain_links[self.target] = str(link_info) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index ef2fe4518e..ef27fa6381 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -13,7 +13,7 @@ import pandas as pd import tlo.population import numpy as np -from tlo.util import FACTOR_POP_DICT +from tlo.util import FACTOR_POP_DICT, df_to_eav try: import dill @@ -290,6 +290,11 @@ def make_initial_population(self, *, n: int) -> None: # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: + print(len(self.population.props), n) + # EAV structure to capture status of individuals at the start of the simulation + eav = df_to_eav(self.population.props, self.date, 'StartOfSimulation') + + """ pop_dict = self.population.props.to_dict(orient='index') for key in pop_dict.keys(): @@ -302,6 +307,10 @@ def make_initial_population(self, *, n: int) -> None: logger.info(key='event_chains', data = pop_dict_full, description='Links forming chains of events for simulated individuals') + """ + logger.info(key='event_chains', + data = eav.to_dict(), + description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") diff --git a/src/tlo/util.py b/src/tlo/util.py index c9130e3f07..e83e19baab 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -94,6 +94,29 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng: return final_states +def df_to_eav(df, date, event_name): + """Function to convert dataframe into EAV""" + eav = df.stack().reset_index() + eav.columns = ['E', 'A', 'V'] + eav['Date'] = date + eav['NameEvent'] = event_name + eav = eav[["E", "Date", "NameEvent", "A", "V"]] + + return eav + + +def convert_dict_into_eav(link_info, target, date, event_name): + "Function to convert link info in the form of dictionary into an EAV" + eav = pd.DataFrame(list(link_info.items()), columns=['A', 'V']) + eav.columns = ['A', 'V'] + eav['E'] = target + eav['Date'] = date + eav['NameEvent'] = event_name + eav = eav[['E', 'Date', 'NameEvent', 'A', 'V']] + + return eav + + def sample_outcome(probs: pd.DataFrame, rng: np.random.RandomState): """ Helper function to randomly sample an outcome for each individual in a population from a set of probabilities that are specific to each individual. From ac617e80ff416976229b3f3bdd915198a26da96c Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 21 Nov 2025 13:15:07 +0000 Subject: [PATCH 32/97] Log event chains via EAV approach --- .../analysis_extract_data.py | 27 ++++- .../scenario_generate_chains.py | 4 +- src/tlo/analysis/utils.py | 111 ++++++++++++++++++ src/tlo/events.py | 41 +++---- src/tlo/methods/hsi_event.py | 21 ++-- src/tlo/simulation.py | 40 ++----- src/tlo/util.py | 30 +++-- 7 files changed, 200 insertions(+), 74 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 7fe15f0eb4..9ee37cabef 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -11,7 +11,7 @@ import matplotlib.pyplot as plt from tlo import Date -from tlo.analysis.utils import extract_results +from tlo.analysis.utils import extract_results, extract_event_chains from datetime import datetime from collections import Counter import ast @@ -35,6 +35,27 @@ def check_if_beyond_time_range_considered(progression_properties): if progression_properties[key] > end_date: print("Beyond time range considered, need at least ",progression_properties[key]) +def print_filtered_df(df): + """ + Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. + """ + pd.set_option('display.max_colwidth', None) + filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] + + dict_cols = ["Info"] + max_items = 2 + # Step 2: Truncate dictionary columns for display + if dict_cols is not None: + for col in dict_cols: + def truncate_dict(d): + if isinstance(d, dict): + items = list(d.items())[:max_items] # keep only first `max_items` + return dict(items) + return d + filtered[col] = filtered[col].apply(truncate_dict) + print(filtered) + + def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): """Produce standard set of plots describing the effect of each TREATMENT_ID. - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. @@ -43,6 +64,10 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) + individual_event_chains = extract_event_chains(results_folder) + print_filtered_df(individual_event_chains[0]) + exit(-1) + eval_env = { 'datetime': datetime, # Add the datetime class to the eval environment 'pd': pd, # Add pandas to handle Timestamp diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index e9291a50ce..6cfbd040fa 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -53,11 +53,11 @@ def __init__(self): super().__init__() self.seed = 42 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=18) + self.end_date = self.start_date + pd.DateOffset(months=1) self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 1 + self.runs_per_draw = 3 self.generate_event_chains = True def log_configuration(self): diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index e605400332..f762f1eb92 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -341,6 +341,117 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: _concat = pd.concat(res, axis=1) _concat.columns.names = ['draw', 'run'] # name the levels of the columns multi-index return _concat + + +import pandas as pd + +def unpack_dict_rows(df): + """ + Reconstruct a full dataframe from rows whose columns contain dictionaries + mapping local-row-index → value. Preserves original column order. + """ + original_cols = ['E', 'EventDate', 'EventName', 'A', 'V'] + reconstructed_rows = [] + + for _, row in df.iterrows(): + # Determine how many rows this block has (using the first dict column) + first_dict_col = next(col for col in original_cols if isinstance(row[col], dict)) + block_length = len(row[first_dict_col]) + + # Build each reconstructed row + for i in range(block_length): + new_row = {} + for col in original_cols: + cell = row[col] + if not isinstance(cell, dict): + raise ValueError(f"Column {col} does not contain a dictionary") + new_row[col] = cell.get(str(i)) + reconstructed_rows.append(new_row) + + # Build DataFrame and enforce the original column order + out = pd.DataFrame(reconstructed_rows)[original_cols] + return out.reset_index(drop=True) + + +def print_filtered_df(df): + """ + Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. + """ + pd.set_option('display.max_colwidth', None) + filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] + + dict_cols = ["Info"] + max_items = 2 + # Step 2: Truncate dictionary columns for display + if dict_cols is not None: + for col in dict_cols: + def truncate_dict(d): + if isinstance(d, dict): + items = list(d.items())[:max_items] # keep only first `max_items` + return dict(items) + return d + filtered[col] = filtered[col].apply(truncate_dict) + print(filtered) + + +def extract_event_chains(results_folder: Path, + ) -> dict: + """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. + Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. + """ + module = 'tlo.simulation' + key = 'event_chains' + + # get number of draws and numbers of runs + info = get_scenario_info(results_folder) + + # Collect results from each draw/run. Individuals across runs of the same draw will be combined into unique df. + res = dict() + + for draw in range(info['number_of_draws']): + + # All individuals in same draw will be combined across runs, so their ID will be offset. + dfs_from_runs = [] + ID_offset = 0 + + for run in range(info['runs_per_draw']): + + try: + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] + del df['date'] + recon = unpack_dict_rows(df) + # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. + recon['V'] = recon['V'].apply(str) + # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} ) + df_collapsed = ( + recon.groupby(['E', 'EventDate', 'EventName']) + .apply(lambda g: dict(zip(g['A'], g['V']))) + .reset_index(name='Info') + ) + df_final = df_collapsed.sort_values(by=['E','EventDate'], ascending=True).reset_index(drop=True) + birth_count = (df_final['EventName'] == 'Birth').sum() + + print("Birth count for run ", run, "is ", birth_count) + df_final['E'] = df_final['E'] + ID_offset + + # Calculate ID offset for next run + ID_offset = (max(df_final['E']) + 1) + + # Append these chains to list + dfs_from_runs.append(df_final) + + except KeyError: + # Some logs could not be found - probably because this run failed. + # Simply to not append anything to the df collecting chains. + print("Run failed") + + # Combine all dfs into a single DataFrame + res[draw] = pd.concat(dfs_from_runs, ignore_index=True) + + # Optionally, sort by 'E' and 'EventDate' after combining + res[draw] = res[draw].sort_values(by=['E', 'EventDate']).reset_index(drop=True) + + return res def summarize(results: pd.DataFrame, only_mean: bool = False, collapse_columns: bool = False) -> pd.DataFrame: diff --git a/src/tlo/events.py b/src/tlo/events.py index 9e9865cdad..ba91218dbc 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,7 +11,7 @@ import pandas as pd -from tlo.util import FACTOR_POP_DICT, convert_dict_into_eav +from tlo.util import convert_chain_links_into_EAV import copy @@ -139,9 +139,8 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a dictionary for this person # First add event info link_info = { - 'person_ID': idx, - 'event': type(self).__name__, - 'event_date': self.sim.date, + 'EventDate': self.sim.date, + 'EventName': type(self).__name__, } # Store the new values from df_after for the changed columns @@ -154,7 +153,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be link_info[col] = diff_mni[idx][key] # Append the event and changes to the individual key - chain_links[idx] = str(link_info) + chain_links[idx] = link_info # For individuals which only underwent changes in mni dictionary, save changes here if len(diff_mni)>0: @@ -162,15 +161,14 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be if key not in persons_changed: # If individual hadn't been previously added due to changes in pop df, add it here link_info = { - 'person_ID': key, - 'event': type(self).__name__, - 'event_date': self.sim.date, + 'EventDate': self.sim.date, + 'EventName': type(self).__name__, } for key_prop in diff_mni[key]: link_info[key_prop] = diff_mni[key][key_prop] - chain_links[key] = str(link_info) + chain_links[key] = link_info return chain_links @@ -233,12 +231,10 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, mni_instances_after = True # Create and store event for this individual, regardless of whether any property change occurred - link_info = {} - # #'person_ID' : self.target, - # 'person_ID' : self.target, - # 'event' : type(self).__name__, - # 'event_date' : self.sim.date, - #} + link_info = { + 'EventDate' : self.sim.date, + 'EventName' : type(self).__name__, + } # Store (if any) property changes as a result of the event for this individual for key in row_before.index: @@ -265,11 +261,8 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, link_info[key] = mni[self.target][key] # Else, no need to do anything - eav = convert_dict_into_eav(link_info, self.target, self.sim.date, type(self).__name__) - print(eav) - exit(-1) # Add individual to the chain links - chain_links[self.target] = str(link_info) + chain_links[self.target] = link_info else: # Target is entire population. Identify individuals for which properties have changed @@ -300,6 +293,14 @@ def run(self): if self.sim.generate_event_chains and print_chains: chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) + if chain_links: + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + + logger_chain.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + """ # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals pop_dict.update(chain_links) @@ -310,7 +311,7 @@ def run(self): logger_chain.info(key='event_chains', data= pop_dict, description='Links forming chains of events for simulated individuals') - + """ class RegularEvent(Event): """An event that automatically reschedules itself at a fixed frequency.""" diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 85ac6da3e2..59b7b1f60a 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -8,7 +8,7 @@ from tlo import Date, logging from tlo.events import Event from tlo.population import Population -from tlo.util import FACTOR_POP_DICT +from tlo.util import convert_chain_links_into_EAV import pandas as pd @@ -266,9 +266,8 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, record_level = 'N/A' link_info = { - 'person_ID': self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, + 'EventName' : type(self).__name__, + 'EventDate' : self.sim.date, 'appt_footprint' : record_footprint, 'level' : record_level, } @@ -297,7 +296,7 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, if self.values_differ(default[key], mni[self.target][key]): link_info[key] = mni[self.target][key] - chain_links[self.target] = str(link_info) + chain_links[self.target] = link_info return chain_links @@ -325,13 +324,13 @@ def run(self, squeeze_factor): if print_chains: chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before) - if len(chain_links)>0: - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} - pop_dict.update(chain_links) + if chain_links: - logger_chains.info(key='event_chains', - data = pop_dict, - description='Links forming chains of events for simulated individuals') + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + logger_chain.info(key='event_chains', + data = ednav, + description='Links forming chains of events for simulated individuals') return updated_appt_footprint diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index ef27fa6381..da55d42efc 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -13,7 +13,7 @@ import pandas as pd import tlo.population import numpy as np -from tlo.util import FACTOR_POP_DICT, df_to_eav +from tlo.util import df_to_EAV, convert_chain_links_into_EAV try: import dill @@ -290,26 +290,11 @@ def make_initial_population(self, *, n: int) -> None: # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: - print(len(self.population.props), n) - # EAV structure to capture status of individuals at the start of the simulation - eav = df_to_eav(self.population.props, self.date, 'StartOfSimulation') - - """ - pop_dict = self.population.props.to_dict(orient='index') - - for key in pop_dict.keys(): - pop_dict[key]['person_ID'] = key - pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later - - pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)} - pop_dict_full.update(pop_dict) - - logger.info(key='event_chains', - data = pop_dict_full, - description='Links forming chains of events for simulated individuals') - """ + # EDNAV structure to capture status of individuals at the start of the simulation + ednav = df_to_EAV(self.population.props, self.date, 'StartOfSimulation') + logger.info(key='event_chains', - data = eav.to_dict(), + data = ednav.to_dict(), description='Links forming chains of events for simulated individuals') end = time.time() @@ -475,15 +460,16 @@ def do_birth(self, mother_id: int) -> int: if self.generate_event_chains: # When individual is born, store their initial properties to provide a starting point to the chain of property # changes that this individual will undergo as a result of events taking place. - prop_dict = self.population.props.loc[child_id].to_dict() - prop_dict['event'] = 'Birth' - prop_dict['event_date'] = self.date - - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals - pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length + link_info = self.population.props.loc[child_id].to_dict() + link_info['EventName'] = 'Birth' + link_info['EventDate'] = self.date + chain_links = {} + chain_links[child_id] = link_info # Convert to string to avoid issue of length + ednav = convert_chain_links_into_EAV(chain_links) + logger.info(key='event_chains', - data = pop_dict, + data = ednav.to_dict(), description='Links forming chains of events for simulated individuals') return child_id diff --git a/src/tlo/util.py b/src/tlo/util.py index e83e19baab..ee29445e9a 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -13,7 +13,6 @@ # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation. DEFAULT_MOTHER_ID = -1e7 -FACTOR_POP_DICT = 50000 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]): @@ -94,25 +93,30 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng: return final_states -def df_to_eav(df, date, event_name): +def df_to_EAV(df, date, event_name): """Function to convert dataframe into EAV""" eav = df.stack().reset_index() eav.columns = ['E', 'A', 'V'] - eav['Date'] = date - eav['NameEvent'] = event_name - eav = eav[["E", "Date", "NameEvent", "A", "V"]] + eav['EventDate'] = date + eav['EventName'] = event_name + eav = eav[["E", "EventDate", "EventName", "A", "V"]] return eav -def convert_dict_into_eav(link_info, target, date, event_name): - "Function to convert link info in the form of dictionary into an EAV" - eav = pd.DataFrame(list(link_info.items()), columns=['A', 'V']) - eav.columns = ['A', 'V'] - eav['E'] = target - eav['Date'] = date - eav['NameEvent'] = event_name - eav = eav[['E', 'Date', 'NameEvent', 'A', 'V']] +def convert_chain_links_into_EAV(chain_links): + df = pd.DataFrame.from_dict(chain_links, orient="index") + id_cols = ["EventDate", "EventName"] + + eav = df.reset_index().melt( + id_vars=["index"] + id_cols, # index = person ID + var_name="A", + value_name="V" + ) + + eav.rename(columns={"index": "E"}, inplace=True) + + eav = eav[["E", "EventDate", "EventName", "A", "V"]] return eav From 5234550934fd0bf156e43603d593945c66d888c0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 21 Nov 2025 13:44:52 +0000 Subject: [PATCH 33/97] No need to store EventDate since this is already stored in logger by default --- src/tlo/analysis/utils.py | 62 ++++++++++++++++++++++++++++++++---- src/tlo/events.py | 3 -- src/tlo/methods/hsi_event.py | 1 - src/tlo/simulation.py | 1 - src/tlo/util.py | 7 ++-- 5 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index f762f1eb92..00a297030b 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -345,7 +345,7 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: import pandas as pd -def unpack_dict_rows(df): +def old_unpack_dict_rows(df): """ Reconstruct a full dataframe from rows whose columns contain dictionaries mapping local-row-index → value. Preserves original column order. @@ -372,6 +372,54 @@ def unpack_dict_rows(df): out = pd.DataFrame(reconstructed_rows)[original_cols] return out.reset_index(drop=True) + +def unpack_dict_rows(df, non_dict_cols=None): + """ + Reconstruct a full DataFrame from rows where most columns are dictionaries. + Non-dict columns (e.g., 'date') are propagated to all reconstructed rows. + + Parameters: + df: pd.DataFrame + non_dict_cols: list of columns that are NOT dictionaries + """ + if non_dict_cols is None: + non_dict_cols = [] + + original_cols = ['E', 'date', 'EventName', 'A', 'V'] + + reconstructed_rows = [] + + for _, row in df.iterrows(): + # Determine dict columns for this row + dict_cols = [col for col in original_cols if col not in non_dict_cols] + + if not dict_cols: + # No dict columns, just append row + reconstructed_rows.append(row.to_dict()) + continue + + # Use the first dict column to get the block length + first_dict_col = dict_cols[0] + block_length = len(row[first_dict_col]) + + # Build each expanded row + for i in range(block_length): + new_row = {} + for col in original_cols: + cell = row[col] + if col in dict_cols: + # Access the dict using string or integer keys + new_row[col] = cell.get(str(i), cell.get(i)) + else: + # Propagate non-dict value + new_row[col] = cell + reconstructed_rows.append(new_row) + + # Build DataFrame in original column order + out = pd.DataFrame(reconstructed_rows)[original_cols] + + return out.reset_index(drop=True) + def print_filtered_df(df): """ @@ -418,17 +466,19 @@ def extract_event_chains(results_folder: Path, try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - del df['date'] - recon = unpack_dict_rows(df) + + recon = unpack_dict_rows(df, ['date']) + print(recon) + #del recon['EventDate'] # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. recon['V'] = recon['V'].apply(str) # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} ) df_collapsed = ( - recon.groupby(['E', 'EventDate', 'EventName']) + recon.groupby(['E', 'date', 'EventName']) .apply(lambda g: dict(zip(g['A'], g['V']))) .reset_index(name='Info') ) - df_final = df_collapsed.sort_values(by=['E','EventDate'], ascending=True).reset_index(drop=True) + df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True) birth_count = (df_final['EventName'] == 'Birth').sum() print("Birth count for run ", run, "is ", birth_count) @@ -449,7 +499,7 @@ def extract_event_chains(results_folder: Path, res[draw] = pd.concat(dfs_from_runs, ignore_index=True) # Optionally, sort by 'E' and 'EventDate' after combining - res[draw] = res[draw].sort_values(by=['E', 'EventDate']).reset_index(drop=True) + res[draw] = res[draw].sort_values(by=['E', 'date']).reset_index(drop=True) return res diff --git a/src/tlo/events.py b/src/tlo/events.py index ba91218dbc..4b62c16932 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -139,7 +139,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a dictionary for this person # First add event info link_info = { - 'EventDate': self.sim.date, 'EventName': type(self).__name__, } @@ -161,7 +160,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be if key not in persons_changed: # If individual hadn't been previously added due to changes in pop df, add it here link_info = { - 'EventDate': self.sim.date, 'EventName': type(self).__name__, } @@ -232,7 +230,6 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, # Create and store event for this individual, regardless of whether any property change occurred link_info = { - 'EventDate' : self.sim.date, 'EventName' : type(self).__name__, } diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 59b7b1f60a..d59f8e2404 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -267,7 +267,6 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, link_info = { 'EventName' : type(self).__name__, - 'EventDate' : self.sim.date, 'appt_footprint' : record_footprint, 'level' : record_level, } diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index da55d42efc..35f6818f66 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -462,7 +462,6 @@ def do_birth(self, mother_id: int) -> int: # changes that this individual will undergo as a result of events taking place. link_info = self.population.props.loc[child_id].to_dict() link_info['EventName'] = 'Birth' - link_info['EventDate'] = self.date chain_links = {} chain_links[child_id] = link_info # Convert to string to avoid issue of length diff --git a/src/tlo/util.py b/src/tlo/util.py index ee29445e9a..d678aa09ef 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -97,16 +97,15 @@ def df_to_EAV(df, date, event_name): """Function to convert dataframe into EAV""" eav = df.stack().reset_index() eav.columns = ['E', 'A', 'V'] - eav['EventDate'] = date eav['EventName'] = event_name - eav = eav[["E", "EventDate", "EventName", "A", "V"]] + eav = eav[["E", "EventName", "A", "V"]] return eav def convert_chain_links_into_EAV(chain_links): df = pd.DataFrame.from_dict(chain_links, orient="index") - id_cols = ["EventDate", "EventName"] + id_cols = ["EventName"] eav = df.reset_index().melt( id_vars=["index"] + id_cols, # index = person ID @@ -116,7 +115,7 @@ def convert_chain_links_into_EAV(chain_links): eav.rename(columns={"index": "E"}, inplace=True) - eav = eav[["E", "EventDate", "EventName", "A", "V"]] + eav = eav[["E", "EventName", "A", "V"]] return eav From 2f20cb392a9aaee1c8d004a82e4f31957d2130b8 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:45:16 +0000 Subject: [PATCH 34/97] Check if PregnancySupervisor is included before considering in chain of events production --- src/tlo/events.py | 101 ++++++++++++++++++++--------------- src/tlo/methods/hsi_event.py | 53 +++++++++--------- 2 files changed, 87 insertions(+), 67 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 4b62c16932..f03f150f92 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -122,7 +122,10 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) - diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) + if 'PregnancySupervisor' in self.sim.modules: + diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) + else: + diff_mni = [] # Create an empty list to store changes for each of the individuals chain_links = {} @@ -154,19 +157,20 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Append the event and changes to the individual key chain_links[idx] = link_info - # For individuals which only underwent changes in mni dictionary, save changes here - if len(diff_mni)>0: - for key in diff_mni: - if key not in persons_changed: - # If individual hadn't been previously added due to changes in pop df, add it here - link_info = { - 'EventName': type(self).__name__, - } - - for key_prop in diff_mni[key]: - link_info[key_prop] = diff_mni[key][key_prop] + if 'PregnancySupervisor' in self.sim.modules: + # For individuals which only underwent changes in mni dictionary, save changes here + if len(diff_mni)>0: + for key in diff_mni: + if key not in persons_changed: + # If individual hadn't been previously added due to changes in pop df, add it here + link_info = { + 'EventName': type(self).__name__, + } - chain_links[key] = link_info + for key_prop in diff_mni[key]: + link_info[key_prop] = diff_mni[key][key_prop] + + chain_links[key] = link_info return chain_links @@ -197,17 +201,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) # Check if individual is already in mni dictionary, if so copy her original status - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() + else: + mni_row_before = None else: # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. df_before = self.sim.population.props.copy() - entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_before = None return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before @@ -224,9 +234,12 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, # Check if individual is in mni after the event mni_instances_after = False - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True + else: + mni_instances_after = None # Create and store event for this individual, regardless of whether any property change occurred link_info = { @@ -237,26 +250,27 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe link_info[key] = row_after[key] - - # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Else, no need to do anything + + if 'PregnancySupervisor' in self.sim.modules: + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Else, no need to do anything # Add individual to the chain links chain_links[self.target] = link_info @@ -267,7 +281,10 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, # Population frame after event df_after = self.sim.population.props - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_after = None # Create and store the event and dictionary of changes for affected individuals chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 7d960077fc..edb5d3df3b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -239,10 +239,11 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) # Check if individual is in mni dictionary before the event, if so store its original status - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() else: print("ERROR: there shouldn't be pop-wide HSI event") @@ -259,9 +260,10 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) mni_instances_after = False - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level # will be stored regardless of whether individual experienced property changes or not. @@ -285,24 +287,25 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - # Now store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] + if 'PregnancySupervisor' in self.sim.modules: + # Now store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] chain_links[self.target] = link_info From 1b838235e2ca27e82412f37d302ff40adccbeba0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 09:42:59 +0000 Subject: [PATCH 35/97] Remove old util fnc --- src/tlo/analysis/utils.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index bc8784ae66..fc0d374fd1 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -364,36 +364,6 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: _concat = pd.concat(res, axis=1) _concat.columns.names = ['draw', 'run'] # name the levels of the columns multi-index return _concat - - -import pandas as pd - -def old_unpack_dict_rows(df): - """ - Reconstruct a full dataframe from rows whose columns contain dictionaries - mapping local-row-index → value. Preserves original column order. - """ - original_cols = ['E', 'EventDate', 'EventName', 'A', 'V'] - reconstructed_rows = [] - - for _, row in df.iterrows(): - # Determine how many rows this block has (using the first dict column) - first_dict_col = next(col for col in original_cols if isinstance(row[col], dict)) - block_length = len(row[first_dict_col]) - - # Build each reconstructed row - for i in range(block_length): - new_row = {} - for col in original_cols: - cell = row[col] - if not isinstance(cell, dict): - raise ValueError(f"Column {col} does not contain a dictionary") - new_row[col] = cell.get(str(i)) - reconstructed_rows.append(new_row) - - # Build DataFrame and enforce the original column order - out = pd.DataFrame(reconstructed_rows)[original_cols] - return out.reset_index(drop=True) def unpack_dict_rows(df, non_dict_cols=None): From f4cf120a60bf6da13fecb2d66dfe05fd4f495aff Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 09:44:08 +0000 Subject: [PATCH 36/97] Overwrite any changes to hiv and tb file --- src/tlo/methods/hiv.py | 35 --------------- src/tlo/methods/tb.py | 96 ++++++++++-------------------------------- 2 files changed, 23 insertions(+), 108 deletions(-) diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 7ecc741c25..8b40e37a34 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -1682,37 +1682,6 @@ def do_at_generic_first_appt( # Main Polling Event # --------------------------------------------------------------------------- -class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin): - """ The HIV Polling Events for Data Generation - * Ensures that - """ - - def __init__(self, module): - super().__init__( - module, frequency=DateOffset(years=120) - ) # repeats every 12 months, but this can be changed - - def apply(self, population): - - df = population.props - - # Make everyone who is alive and not infected (no-one should be) susceptible - susc_idx = df.loc[ - df.is_alive - & ~df.hv_inf - ].index - - n_susceptible = len(susc_idx) - print("Number of individuals susceptible", n_susceptible) - # Schedule the date of infection for each new infection: - for i in susc_idx: - date_of_infection = self.sim.date + pd.DateOffset( - # Ensure that individual will be infected before end of sim - days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) - ) - self.sim.schedule_event( - HivInfectionEvent(self.module, i), date_of_infection - ) class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin): """ The HIV Regular Polling Events @@ -1734,7 +1703,6 @@ def apply(self, population): fraction_of_year_between_polls = self.frequency.months / 12 beta = p["beta"] * fraction_of_year_between_polls - # ----------------------------------- HORIZONTAL TRANSMISSION ----------------------------------- def horizontal_transmission(to_sex, from_sex): # Count current number of alive 15-80 year-olds at risk of transmission @@ -1810,7 +1778,6 @@ def horizontal_transmission(to_sex, from_sex): HivInfectionEvent(self.module, idx), date_of_infection ) - # ----------------------------------- SPONTANEOUS TESTING ----------------------------------- def spontaneous_testing(current_year): @@ -1935,8 +1902,6 @@ def vmmc_for_child(): vmmc_for_child() - - # --------------------------------------------------------------------------- # Natural History Events # --------------------------------------------------------------------------- diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 71361a7951..d9ba7309e0 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -864,31 +864,29 @@ def initialise_population(self, population): df["tb_on_ipt"] = False df["tb_date_ipt"] = pd.NaT - # # ------------------ infection status ------------------ # - if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None: - # WHO estimates of active TB for 2010 to get infected initial population - # don't need to scale or include treated proportion as no-one on treatment yet - inc_estimates = p["who_incidence_estimates"] - incidence_year = (inc_estimates.loc[ - (inc_estimates.year == self.sim.date.year), "incidence_per_100k" - ].values[0]) / 100_000 - - incidence_year = incidence_year * p["scaling_factor_WHO"] - - self.assign_active_tb( - population, - strain="ds", - incidence=incidence_year) - - self.assign_active_tb( - population, - strain="mdr", - incidence=incidence_year * p['prop_mdr2010']) - - self.send_for_screening_general( - population - ) # send some baseline population for screening + # WHO estimates of active TB for 2010 to get infected initial population + # don't need to scale or include treated proportion as no-one on treatment yet + inc_estimates = p["who_incidence_estimates"] + incidence_year = (inc_estimates.loc[ + (inc_estimates.year == self.sim.date.year), "incidence_per_100k" + ].values[0]) / 100_000 + + incidence_year = incidence_year * p["scaling_factor_WHO"] + + self.assign_active_tb( + population, + strain="ds", + incidence=incidence_year) + + self.assign_active_tb( + population, + strain="mdr", + incidence=incidence_year * p['prop_mdr2010']) + + self.send_for_screening_general( + population + ) # send some baseline population for screening def initialise_simulation(self, sim): """ @@ -901,10 +899,8 @@ def initialise_simulation(self, sim): sim.schedule_event(TbActiveEvent(self), sim.date) sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) - # 2) log at the end of the year # Optional: Schedule the scale-up of programs if self.parameters["type_of_scaleup"] != 'none': @@ -1406,53 +1402,6 @@ def is_subset(col_for_set, col_for_subset): # # TB infection event # # --------------------------------------------------------------------------- -class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin): - """The Tb Regular Poll Event for Data Generation for assigning active infections - * selects everyone to develop an active infection and schedules onset of active tb - sometime during the simulation - """ - - def __init__(self, module): - super().__init__(module, frequency=DateOffset(years=120)) - - def apply(self, population): - - df = population.props - now = self.sim.date - rng = self.module.rng - # Make everyone who is alive and not infected (no-one should be) susceptible - susc_idx = df.loc[ - df.is_alive - & (df.tb_inf != "active") - ].index - - len(susc_idx) - - middle_index = len(susc_idx) // 2 - - # Will equally split two strains among the population - list_ds = susc_idx[:middle_index] - list_mdr = susc_idx[middle_index:] - - # schedule onset of active tb. This will be equivalent to the "Onset", so it - # doesn't matter how long after we have decided which infection this is. - for person_id in list_ds: - date_progression = now + pd.DateOffset( - # At some point during their lifetime, this person will develop TB - days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) - ) - # set date of active tb - properties will be updated at TbActiveEvent poll daily - df.at[person_id, "tb_scheduled_date_active"] = date_progression - df.at[person_id, "tb_strain"] = "ds" - - for person_id in list_mdr: - date_progression = now + pd.DateOffset( - days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1)) - ) - # set date of active tb - properties will be updated at TbActiveEvent poll daily - df.at[person_id, "tb_scheduled_date_active"] = date_progression - df.at[person_id, "tb_strain"] = "mdr" - class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin): """The Tb Regular Poll Event for assigning active infections @@ -1527,6 +1476,7 @@ def apply(self, population): self.module.update_parameters_for_program_scaleup() # note also culture test used in target/max scale-up in place of clinical dx + class TbActiveEvent(RegularEvent, PopulationScopeEventMixin): """ * check for those with dates of active tb onset within last time-period From 29dd543c2c182a724c7c9099bdeb5cf5ec439363 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 09:45:09 +0000 Subject: [PATCH 37/97] Overwrite any changes to demography file --- src/tlo/methods/demography.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index b8fa40b7df..2acaad75eb 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -324,10 +324,9 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - if sim.generate_event_chains is False: - # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately - self.other_death_poll = OtherDeathPoll(self) - sim.schedule_event(self.other_death_poll, sim.date) + # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately + self.other_death_poll = OtherDeathPoll(self) + sim.schedule_event(self.other_death_poll, sim.date) # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`) for _logger in (logger, logger_scale_factor): From 33f1143e1b2c46113c498a3fde5fe0799a2a6be7 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 18:00:52 +0000 Subject: [PATCH 38/97] Remove outdated test related to RTI data harvesting --- tests/test_rti.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/test_rti.py b/tests/test_rti.py index b696a249f5..711215b8cf 100644 --- a/tests/test_rti.py +++ b/tests/test_rti.py @@ -25,17 +25,6 @@ end_date = Date(2012, 1, 1) popsize = 1000 -@pytest.mark.slow -def test_data_harvesting(seed): - """ - This test runs a simulation with a functioning health system with full service availability and no set - constraints - """ - # create sim object - sim = create_basic_rti_sim(popsize, seed) - # run simulation - sim.simulate(end_date=end_date) - exit(-1) def check_dtypes(simulation): # check types of columns in dataframe, check they are the same, list those that aren't From af477c29485ee7b2d4d380753d9846b7d93c19c5 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 09:51:58 +0000 Subject: [PATCH 39/97] Add a very simple synchronous notification dispatcher - avoided using the more typical naming `event` or `signal` because they are already used. --- src/tlo/notify.py | 64 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_notify.py | 23 ++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 src/tlo/notify.py create mode 100644 tests/test_notify.py diff --git a/src/tlo/notify.py b/src/tlo/notify.py new file mode 100644 index 0000000000..28765f5afd --- /dev/null +++ b/src/tlo/notify.py @@ -0,0 +1,64 @@ +""" +A dead simple synchronous notification dispatcher. + +Usage +----- +# In the notifying class/module +from tlo.notify import notifier + +notifier.dispatch("simulation.on_start", data={"one": 1, "two": 2}) + +# In the listening class/module +from tlo.notify import notifier + +def on_notification(data): + print("Received notification:", data) + +notifier.add_listener("simulation.on_start", on_notification) +""" + +class Notifier: + """ + A simple synchronous notification dispatcher supporting listeners. + """ + + def __init__(self): + self.listeners = {} + + def add_listener(self, notification_key, listener): + """ + Register a listener for a specific notification. + + :param notification_key: The identifier to listen for. + :param listener: A callable to be invoked when the notification is dispatched. + """ + if notification_key not in self.listeners: + self.listeners[notification_key] = [] + self.listeners[notification_key].append(listener) + + def remove_listener(self, notification_key, listener): + """ + Remove a previously registered listener for a notification. + + :param notification_key: The identifier. + :param listener: The listener callable to remove. + """ + if notification_key in self.listeners: + self.listeners[notification_key].remove(listener) + if not self.listeners[notification_key]: + del self.listeners[notification_key] + + def dispatch(self, notification_key, data=None): + """ + Dispatch a notification to all registered listeners. + + :param notification_key: The identifier. + :param data: Optional data to pass to each listener. + """ + if notification_key in self.listeners: + for listener in list(self.listeners[notification_key]): + listener(data) + +# Create a global dispatcher instance +notifier = Notifier() + diff --git a/tests/test_notify.py b/tests/test_notify.py new file mode 100644 index 0000000000..e71e2acb9a --- /dev/null +++ b/tests/test_notify.py @@ -0,0 +1,23 @@ +from tlo.notify import notifier + +def test_notifier(): + # in listening code + received_data = [] + + def callback(data): + received_data.append(data) + + notifier.add_listener("test.signal", callback) + + # in emitting code + notifier.dispatch("test.signal", data={"value": 42}) + + assert len(received_data) == 1 + assert received_data[0] == {"value": 42} + + # Unsubscribe and test no further calls + notifier.remove_listener("test.signal", callback) + notifier.dispatch("test.signal", data={"value": 100}) + + assert len(received_data) == 1 # No new data + From 01e35d0079877dd7d12cdbd2cb6f7b285fef863f Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 10:02:59 +0000 Subject: [PATCH 40/97] Fix comment --- src/tlo/notify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 28765f5afd..325131a1c7 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -59,6 +59,6 @@ def dispatch(self, notification_key, data=None): for listener in list(self.listeners[notification_key]): listener(data) -# Create a global dispatcher instance +# Create a global notifier instance notifier = Notifier() From 9f23fcbeb46e2af5b6a1c6334aa579574ec18b66 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 10:23:00 +0000 Subject: [PATCH 41/97] Fix formatting --- src/tlo/notify.py | 3 ++- tests/test_notify.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 325131a1c7..2906fa712a 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -17,6 +17,7 @@ def on_notification(data): notifier.add_listener("simulation.on_start", on_notification) """ + class Notifier: """ A simple synchronous notification dispatcher supporting listeners. @@ -59,6 +60,6 @@ def dispatch(self, notification_key, data=None): for listener in list(self.listeners[notification_key]): listener(data) + # Create a global notifier instance notifier = Notifier() - diff --git a/tests/test_notify.py b/tests/test_notify.py index e71e2acb9a..ad5e828bbf 100644 --- a/tests/test_notify.py +++ b/tests/test_notify.py @@ -1,5 +1,6 @@ from tlo.notify import notifier + def test_notifier(): # in listening code received_data = [] @@ -20,4 +21,3 @@ def callback(data): notifier.dispatch("test.signal", data={"value": 100}) assert len(received_data) == 1 # No new data - From 5ff53bb7e104e46969199dbfefc15e3fccc02eec Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 12:23:49 +0000 Subject: [PATCH 42/97] Remove unnecessary list wrap --- src/tlo/notify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 2906fa712a..48c46b82b4 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -57,7 +57,7 @@ def dispatch(self, notification_key, data=None): :param data: Optional data to pass to each listener. """ if notification_key in self.listeners: - for listener in list(self.listeners[notification_key]): + for listener in self.listeners[notification_key]: listener(data) From 16f5e6701b03e826830352eeef8657991eae94bd Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:08:51 +0000 Subject: [PATCH 43/97] Use broadcaster to collect events. Need to expand to include HSI events --- src/tlo/events.py | 11 + src/tlo/methods/collect_event_chains.py | 281 ++++++++++++++++++++++++ src/tlo/methods/fullmodel.py | 2 + src/tlo/simulation.py | 5 + 4 files changed, 299 insertions(+) create mode 100644 src/tlo/methods/collect_event_chains.py diff --git a/src/tlo/events.py b/src/tlo/events.py index f03f150f92..dce44656bd 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,6 +11,7 @@ import pandas as pd +from tlo.notify import notifier from tlo.util import convert_chain_links_into_EAV import copy @@ -296,7 +297,12 @@ def run(self): """Make the event happen.""" # Collect relevant information before event takes place + # If statement outside or inside dispatch notification? if self.sim.generate_event_chains: + + # Dispatch notification that event is about to run + notifier.dispatch("event_about_to_run", data={"target": self.target, "EventName": type(self).__name__}) + print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event() self.apply(self.target) @@ -305,6 +311,11 @@ def run(self): # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' # in the individual's event chain. if self.sim.generate_event_chains and print_chains: + + print("About to pass") + # Dispatch notification that event is about to run + notifier.dispatch("event_has_just_ran", data={"target": self.target, "EventName": type(self).__name__}) + chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) if chain_links: diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py new file mode 100644 index 0000000000..553e095b0b --- /dev/null +++ b/src/tlo/methods/collect_event_chains.py @@ -0,0 +1,281 @@ +from tlo.notify import notifier + +from pathlib import Path +from typing import Optional +from tlo import Module, logging, population +from tlo.population import Population +import pandas as pd + +from tlo.util import convert_chain_links_into_EAV + +import copy + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +class CollectEventChains(Module): + + def __init__(self, name=None): + super().__init__(name) + + # This is how I am passing data from fnc taking place before event to the one after + # It doesn't seem very elegant but not sure how else to go about it + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + def initialise_simulation(self, sim): + notifier.add_listener("event_about_to_run", self.on_notification_event_about_to_run) + notifier.add_listener("event_has_just_ran", self.on_notification_event_has_just_ran) + + def read_parameters(self, resourcefilepath: Optional[Path] = None): + pass + + def initialise_population(self, population): + pass + + def on_birth(self, mother, child): + pass + + def on_notification_sim_about_to_start(self,data): + pass + + def on_notification_event_about_to_run(self, data): + """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + print("This is the data I received ", data) + + # Initialise these variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + print("My Modules") + print(self.sim.modules.keys()) + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + if all(sub not in str(data['EventName']) for sub in self.sim.generate_event_chains_ignore_events): + + # Will eventually use this once I can actually GET THE NAME OF THE SELF + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + + self.print_chains = True + + # Target is single individual + if not isinstance(data["target"], Population): + + # Save row for comparison after event has occurred + self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) + + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = mni[data['target']].copy() + else: + self.mni_row_before = None + + else: + + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.sim.population.props.copy() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + self.entire_mni_before = None + + return + + + def on_notification_event_has_just_ran(self, data): + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + print("This is the data I received ", data) + + chain_links = {} + + # Target is single individual + if not isinstance(data["target"], Population): + + # Copy full new status for individual + row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) + + # Check if individual is in mni after the event + mni_instances_after = False + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + mni_instances_after = True + else: + mni_instances_after = None + + # Create and store event for this individual, regardless of whether any property change occurred + link_info = { + 'EventName' : data['EventName'], + } + + # Store (if any) property changes as a result of the event for this individual + for key in self.row_before.index: + if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + if 'PregnancySupervisor' in self.sim.modules: + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if self.mni_instances_before and mni_instances_after: + for key in self.mni_row_before: + if self.mni_values_differ(mni_row_before[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Individual is only in mni dictionary before event + elif self.mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in self.mni_row_before: + if self.mni_values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not self.mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Else, no need to do anything + + # Add individual to the chain links + chain_links[data['target']] = link_info + + else: + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_after = None + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) + + if chain_links: + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + + logger.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + + # Reset variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + return + + def mni_values_differ(self, v1, v2): + + if isinstance(v1, list) and isinstance(v2, list): + return v1 != v2 # simple element-wise comparison + + if pd.isna(v1) and pd.isna(v2): + return False # treat both NaT/NaN as equal + return v1 != v2 + + def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): + diffs = {} + + all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) + + for person in all_individuals: + if person not in entire_mni_before: # but is afterward + for key in entire_mni_after[person]: + if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + elif person not in entire_mni_after: # but is beforehand + for key in entire_mni_before[person]: + if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] + + else: # person is in both + # Compare properties + for key in entire_mni_before[person]: + if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + return diffs + + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): + """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ + + # Create a mask of where values are different + diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + if 'PregnancySupervisor' in self.sim.modules: + diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) + else: + diff_mni = [] + + # Create an empty list to store changes for each of the individuals + chain_links = {} + len_of_diff = len(diff_mask) + + # Loop through each row of the mask + persons_changed = [] + + for idx, row in diff_mask.iterrows(): + changed_cols = row.index[row].tolist() + + if changed_cols: # Proceed only if there are changes in the row + persons_changed.append(idx) + # Create a dictionary for this person + # First add event info + link_info = { + 'EventName': type(self).__name__, + } + + # Store the new values from df_after for the changed columns + for col in changed_cols: + link_info[col] = df_after.at[idx, col] + + if idx in diff_mni: + # This person has also undergone changes in the mni dictionary, so add these here + for key in diff_mni[idx]: + link_info[col] = diff_mni[idx][key] + + # Append the event and changes to the individual key + chain_links[idx] = link_info + + if 'PregnancySupervisor' in self.sim.modules: + # For individuals which only underwent changes in mni dictionary, save changes here + if len(diff_mni)>0: + for key in diff_mni: + if key not in persons_changed: + # If individual hadn't been previously added due to changes in pop df, add it here + link_info = { + 'EventName': type(self).__name__, + } + + for key_prop in diff_mni[key]: + link_info[key_prop] = diff_mni[key][key_prop] + + chain_links[key] = link_info + + return chain_links + + + diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py index 3f0c79434e..3c710c7dd2 100644 --- a/src/tlo/methods/fullmodel.py +++ b/src/tlo/methods/fullmodel.py @@ -8,6 +8,7 @@ cardio_metabolic_disorders, care_of_women_during_pregnancy, cervical_cancer, + collect_event_chains, contraception, copd, demography, @@ -116,6 +117,7 @@ def fullmodel( copd.Copd, depression.Depression, epilepsy.Epilepsy, + collect_event_chains.CollectEventChains, ] return [ module_class( diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index b0c95683c1..eac1bbdc89 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -13,6 +13,10 @@ import pandas as pd import tlo.population import numpy as np +import tlo.methods.collect_event_chains + +from tlo.notify import notifier +from tlo.methods.collect_event_chains import CollectEventChains from tlo.util import df_to_EAV, convert_chain_links_into_EAV try: @@ -148,6 +152,7 @@ def __init__( # Whether simulation has been initialised self._initialised = False + def _configure_logging( self, From ebe0ebc6644f3a96bac01c7efb9f3ad47378048a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 11:28:02 +0000 Subject: [PATCH 44/97] Use broadcasting in HSI events too --- .../parameter_values.csv | 3 + src/tlo/events.py | 276 +--------------- src/tlo/methods/collect_event_chains.py | 297 ++++++++++-------- src/tlo/methods/hsi_event.py | 155 ++------- src/tlo/simulation.py | 61 +--- 5 files changed, 209 insertions(+), 583 deletions(-) create mode 100644 resources/ResourceFile_GenerateEventChains/parameter_values.csv diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv new file mode 100644 index 0000000000..82394e590b --- /dev/null +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68e30f87dbe757b98cea2658c8f0c40cab629c4b6825a012ce90e12a27bc612 +size 102 diff --git a/src/tlo/events.py b/src/tlo/events.py index dce44656bd..dba2f33cd5 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -4,28 +4,12 @@ from enum import Enum from typing import TYPE_CHECKING -from tlo import DateOffset, logging +from tlo import DateOffset if TYPE_CHECKING: from tlo import Simulation -import pandas as pd - from tlo.notify import notifier -from tlo.util import convert_chain_links_into_EAV - -import copy - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -logger_chain = logging.getLogger('tlo.simulation') -logger_chain.setLevel(logging.INFO) - -logger_summary = logging.getLogger(f"{__name__}.summary") -logger_summary.setLevel(logging.INFO) - -debug_chains = True class Priority(Enum): """Enumeration for the Priority, which is used in sorting the events in the simulation queue.""" @@ -39,7 +23,6 @@ def __lt__(self, other): return self.value < other.value return NotImplemented - class Event: """Base event class, from which all others inherit. @@ -78,265 +61,20 @@ def apply(self, target): """ raise NotImplementedError - def mni_values_differ(self, v1, v2): - - if isinstance(v1, list) and isinstance(v2, list): - return v1 != v2 # simple element-wise comparison - - if pd.isna(v1) and pd.isna(v2): - return False # treat both NaT/NaN as equal - return v1 != v2 - - def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): - diffs = {} - - all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) - - for person in all_individuals: - if person not in entire_mni_before: # but is afterward - for key in entire_mni_after[person]: - if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = entire_mni_after[person][key] - - elif person not in entire_mni_after: # but is beforehand - for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] - - else: # person is in both - # Compare properties - for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = entire_mni_after[person][key] - - return diffs - - def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): - """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred. - It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ - - # Create a mask of where values are different - diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) - if 'PregnancySupervisor' in self.sim.modules: - diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) - else: - diff_mni = [] - - # Create an empty list to store changes for each of the individuals - chain_links = {} - len_of_diff = len(diff_mask) - - # Loop through each row of the mask - persons_changed = [] - - for idx, row in diff_mask.iterrows(): - changed_cols = row.index[row].tolist() - - if changed_cols: # Proceed only if there are changes in the row - persons_changed.append(idx) - # Create a dictionary for this person - # First add event info - link_info = { - 'EventName': type(self).__name__, - } - - # Store the new values from df_after for the changed columns - for col in changed_cols: - link_info[col] = df_after.at[idx, col] - - if idx in diff_mni: - # This person has also undergone changes in the mni dictionary, so add these here - for key in diff_mni[idx]: - link_info[col] = diff_mni[idx][key] - - # Append the event and changes to the individual key - chain_links[idx] = link_info - - if 'PregnancySupervisor' in self.sim.modules: - # For individuals which only underwent changes in mni dictionary, save changes here - if len(diff_mni)>0: - for key in diff_mni: - if key not in persons_changed: - # If individual hadn't been previously added due to changes in pop df, add it here - link_info = { - 'EventName': type(self).__name__, - } - - for key_prop in diff_mni[key]: - link_info[key_prop] = diff_mni[key][key_prop] - - chain_links[key] = link_info - - return chain_links - - - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame, dict, dict, bool]: - """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - - # Initialise these variables - print_chains = False - df_before = [] - row_before = pd.Series() - mni_instances_before = False - mni_row_before = {} - entire_mni_before = {} - - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - - print_chains = True - - # Target is single individual - if self.target != self.sim.population: - - # Save row for comparison after event has occurred - row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() - else: - mni_row_before = None - - else: - - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - df_before = self.sim.population.props.copy() - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - entire_mni_before = None - - return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before - - def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: - """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - - chain_links = {} - - # Target is single individual - if self.target != self.sim.population: - - # Copy full new status for individual - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - # Check if individual is in mni after the event - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True - else: - mni_instances_after = None - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - 'EventName' : type(self).__name__, - } - - # Store (if any) property changes as a result of the event for this individual - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules: - # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Else, no need to do anything - - # Add individual to the chain links - chain_links[self.target] = link_info - - else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population frame after event - df_after = self.sim.population.props - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - entire_mni_after = None - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after) - - return chain_links - def run(self): """Make the event happen.""" - # Collect relevant information before event takes place - # If statement outside or inside dispatch notification? - if self.sim.generate_event_chains: - - # Dispatch notification that event is about to run - notifier.dispatch("event_about_to_run", data={"target": self.target, "EventName": type(self).__name__}) - - print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event() + + # Dispatch notification that event is about to run + notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) self.apply(self.target) self.post_apply_hook() - # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' - # in the individual's event chain. - if self.sim.generate_event_chains and print_chains: - - print("About to pass") - # Dispatch notification that event is about to run - notifier.dispatch("event_has_just_ran", data={"target": self.target, "EventName": type(self).__name__}) - - chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) - - if chain_links: - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) - - logger_chain.info(key='event_chains', - data= ednav.to_dict(), - description='Links forming chains of events for simulated individuals') - """ - # Create empty logger for entire pop - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals - pop_dict.update(chain_links) - - # Log chain_links here - if len(chain_links)>0: - - logger_chain.info(key='event_chains', - data= pop_dict, - description='Links forming chains of events for simulated individuals') - """ + # Dispatch notification that event has just ran + notifier.dispatch("event.has_just_ran", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) + class RegularEvent(Event): """An event that automatically reschedules itself at a fixed frequency.""" diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 553e095b0b..7fb946c524 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -2,11 +2,11 @@ from pathlib import Path from typing import Optional -from tlo import Module, logging, population +from tlo import Module, Parameter, Types, logging, population from tlo.population import Population import pandas as pd -from tlo.util import convert_chain_links_into_EAV +from tlo.util import df_to_EAV, convert_chain_links_into_EAV, read_csv_files import copy @@ -27,156 +27,207 @@ def __init__(self, name=None): self.mni_row_before = {} self.entire_mni_before = {} + PARAMETERS = { + # Options within module + "generate_event_chains": Parameter( + Types.BOOL, "Whether or not we want to collect chains of events for individuals" + ), + "modules_of_interest": Parameter( + Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules" + ), + "events_to_ignore": Parameter( + Types.LIST, "Events to be ignored when collecting chains" + ), + } + def initialise_simulation(self, sim): - notifier.add_listener("event_about_to_run", self.on_notification_event_about_to_run) - notifier.add_listener("event_has_just_ran", self.on_notification_event_has_just_ran) + notifier.add_listener("simulation.pop_has_been_initialised", self.on_notification_pop_has_been_initialised) + notifier.add_listener("simulation.on_birth", self.on_notification_of_birth) + notifier.add_listener("event.about_to_run", self.on_notification_event_about_to_run) + notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) def read_parameters(self, resourcefilepath: Optional[Path] = None): - pass + #print("resource file path", resourcefilepath) + #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) + self.parameters["generate_event_chains"] = True + self.parameters["modules_of_interest"] = self.sim.modules + + self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] + def initialise_population(self, population): pass def on_birth(self, mother, child): + # Could the notification of birth simply take place here? pass - def on_notification_sim_about_to_start(self,data): - pass + def on_notification_pop_has_been_initialised(self, data): + # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. + # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. + if self.parameters['generate_event_chains']: + + # EDNAV structure to capture status of individuals at the start of the simulation + ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') + + logger.info(key='event_chains', + data = ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + + + def on_notification_of_birth(self, data): + + if self.parameters['generate_event_chains']: + # When individual is born, store their initial properties to provide a starting point to the chain of property + # changes that this individual will undergo as a result of events taking place. + link_info = data['link_info'] + link_info.update(self.sim.population.props.loc[data['target']].to_dict()) + chain_links = {} + chain_links[data['target']] = link_info + + ednav = convert_chain_links_into_EAV(chain_links) + + logger.info(key='event_chains', + data = ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + def on_notification_event_about_to_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - print("This is the data I received ", data) - # Initialise these variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} + p = self.parameters - print("My Modules") - print(self.sim.modules.keys()) - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if all(sub not in str(data['EventName']) for sub in self.sim.generate_event_chains_ignore_events): - - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + if p['generate_event_chains']: - self.print_chains = True + # Initialise these variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} - # Target is single individual - if not isinstance(data["target"], Population): + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + if all(sub not in str(data['link_info']['EventName']) for sub in p['events_to_ignore']): + + # Will eventually use this once I can actually GET THE NAME OF THE SELF + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - # Save row for comparison after event has occurred - self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) + self.print_chains = True - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - self.mni_instances_before = True - self.mni_row_before = mni[data['target']].copy() - else: - self.mni_row_before = None - - else: + # Target is single individual + if not isinstance(data['target'], Population): - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - self.df_before = self.sim.population.props.copy() - if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + # Save row for comparison after event has occurred + self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) + + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = mni[data['target']].copy() + else: + self.mni_row_before = None + else: - self.entire_mni_before = None + + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.sim.population.props.copy() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + self.entire_mni_before = None return def on_notification_event_has_just_ran(self, data): """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - print("This is the data I received ", data) + + p = self.parameters - chain_links = {} - - # Target is single individual - if not isinstance(data["target"], Population): - - # Copy full new status for individual - row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) - - # Check if individual is in mni after the event - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - mni_instances_after = True - else: - mni_instances_after = None - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - 'EventName' : data['EventName'], - } - - # Store (if any) property changes as a result of the event for this individual - for key in self.row_before.index: - if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules: - # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if self.mni_instances_before and mni_instances_after: - for key in self.mni_row_before: - if self.mni_values_differ(mni_row_before[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - # Individual is only in mni dictionary before event - elif self.mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in self.mni_row_before: - if self.mni_values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not self.mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - # Else, no need to do anything - - # Add individual to the chain links - chain_links[data['target']] = link_info - - else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population frame after event - df_after = self.sim.population.props - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + if p['generate_event_chains'] and self.print_chains: + + chain_links = {} + + # Target is single individual + if not isinstance(data["target"], Population): + + # Copy full new status for individual + row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) + + # Check if individual is in mni after the event + mni_instances_after = False + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + mni_instances_after = True + else: + mni_instances_after = None + + # Create and store event for this individual, regardless of whether any property change occurred + link_info = data['link_info'] + + # Store (if any) property changes as a result of the event for this individual + for key in self.row_before.index: + if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + if 'PregnancySupervisor' in self.sim.modules: + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if self.mni_instances_before and mni_instances_after: + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Individual is only in mni dictionary before event + elif self.mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not self.mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Else, no need to do anything + + # Add individual to the chain links + chain_links[data['target']] = link_info + else: - entire_mni_after = None - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) - - if chain_links: - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) - - logger.info(key='event_chains', - data= ednav.to_dict(), - description='Links forming chains of events for simulated individuals') - - # Reset variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_after = None + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) + + if chain_links: + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + + logger.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + + # Reset variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} return diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index edb5d3df3b..01bd826f2d 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -7,28 +7,19 @@ from tlo import Date, logging from tlo.events import Event -from tlo.population import Population -from tlo.util import convert_chain_links_into_EAV -import pandas as pd +from tlo.notify import notifier if TYPE_CHECKING: from tlo import Module, Simulation from tlo.methods.healthsystem import HealthSystem -# Pointing to the logger in events -logger_chains = logging.getLogger("tlo.simulation") -logger_chains.setLevel(logging.INFO) - logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) -debug_chains = True - - # Declare the level which will be used to represent the merging of levels '1b' and '2' LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2" @@ -204,144 +195,34 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - - def values_differ(self, v1, v2): - - if isinstance(v1, list) and isinstance(v2, list): - return v1 != v2 # simple element-wise comparison - - if pd.isna(v1) and pd.isna(v2): - return False # treat both NaT/NaN as equal - return v1 != v2 - - - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: - """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - - # Initialise these variables - print_chains = False - row_before = pd.Series() - mni_instances_before = False - mni_row_before = {} - - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - - print_chains = True - - # Target is single individual - if self.target != self.sim.population: - - # Save row for comparison after event has occurred - row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - - # Check if individual is in mni dictionary before the event, if so store its original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() - - else: - print("ERROR: there shouldn't be pop-wide HSI event") - exit(-1) - - return print_chains, row_before, mni_row_before, mni_instances_before - - def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, mni_instances_before) -> dict: - """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - - # For HSI event, this will only ever occur for individual events - chain_links = {} - - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True - - # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level - # will be stored regardless of whether individual experienced property changes or not. - - # Add event details - try: - record_footprint = str(footprint) - record_level = self.facility_info.level - except: - record_footprint = 'N/A' - record_level = 'N/A' - - link_info = { - 'EventName' : type(self).__name__, - 'appt_footprint' : record_footprint, - 'level' : record_level, - } - - # Add changes to properties - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules: - # Now store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - - chain_links[self.target] = link_info - - return chain_links def run(self, squeeze_factor): """Make the event happen.""" - - if self.sim.generate_event_chains and self.target != self.sim.population: - print_chains, row_before, mni_row_before, mni_instances_before = self.store_chains_to_do_before_event() - - footprint = self.EXPECTED_APPT_FOOTPRINT + # Dispatch notification that HSI event is about to run + notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() self._run_after_hsi_event() - - - if self.sim.generate_event_chains and self.target != self.sim.population: - # If the footprint has been updated when the event ran, change it here - if updated_appt_footprint is not None: - footprint = updated_appt_footprint - - if print_chains: - chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before) + # Dispatch notification that HSI event has just ran + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + else: + footprint = self.EXPECTED_APPT_FOOTPRINT + try: + level = self.facility_info.level + except: + level = "N/A" - if chain_links: - - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) - logger_chain.info(key='event_chains', - data = ednav, - description='Links forming chains of events for simulated individuals') + notifier.dispatch("event.has_just_ran", + data={"target": self.target, + "link_info" : {"EventName": type(self).__name__, + "footprint": footprint, + "level": level + }}) return updated_appt_footprint diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index eac1bbdc89..17016f5fc7 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -8,16 +8,10 @@ import time from collections import Counter, OrderedDict from pathlib import Path -from typing import Optional from typing import TYPE_CHECKING, Optional -import pandas as pd -import tlo.population import numpy as np -import tlo.methods.collect_event_chains from tlo.notify import notifier -from tlo.methods.collect_event_chains import CollectEventChains -from tlo.util import df_to_EAV, convert_chain_links_into_EAV try: import dill @@ -42,9 +36,6 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -logger_chains = logging.getLogger("tlo.methods.event") -logger_chains.setLevel(logging.INFO) - class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -113,16 +104,9 @@ def __init__( self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - - self.generate_event_chains = True - self.generate_event_chains_modules_of_interest = [] - self.generate_event_chains_ignore_events = [] - self.end_date = None self.output_file = None self.population: Optional[Population] = None - - self.show_progress_bar = show_progress_bar self.resourcefilepath = Path(resourcefilepath) @@ -152,7 +136,6 @@ def __init__( # Whether simulation has been initialised self._initialised = False - def _configure_logging( self, @@ -299,21 +282,13 @@ def make_initial_population(self, *, n: int) -> None: key="debug", data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - - # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. - # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. - if self.generate_event_chains: - - # EDNAV structure to capture status of individuals at the start of the simulation - ednav = df_to_EAV(self.population.props, self.date, 'StartOfSimulation') - - logger.info(key='event_chains', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + + # Dispatch notification that pop has been initialised + notifier.dispatch("simulation.pop_has_been_initialised", data={}) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") - + def initialise(self, *, end_date: Date) -> None: """Initialise all modules in simulation. :param end_date: Date to end simulation on - accessible to modules to allow @@ -326,15 +301,6 @@ def initialise(self, *, end_date: Date) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - #self.generate_event_chains = generate_event_chains - if self.generate_event_chains: - # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration', 'RTIPollingEvent'] - - # Reorder columns to place the new columns at the front - pd.set_option('display.max_columns', None) - for module in self.modules.values(): module.initialise_simulation(self) self._initialised = True @@ -403,8 +369,6 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ - open('output.txt', mode='a') - if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" raise SimulationNotInitialisedError(msg) @@ -463,7 +427,6 @@ def fire_single_event(self, event: Event, date: Date) -> None: """ self.date = date event.run() - def do_birth(self, mother_id: int) -> int: """Create a new child person. @@ -478,22 +441,12 @@ def do_birth(self, mother_id: int) -> int: for module in self.modules.values(): module.on_birth(mother_id, child_id) - if self.generate_event_chains: - # When individual is born, store their initial properties to provide a starting point to the chain of property - # changes that this individual will undergo as a result of events taking place. - link_info = self.population.props.loc[child_id].to_dict() - link_info['EventName'] = 'Birth' - chain_links = {} - chain_links[child_id] = link_info # Convert to string to avoid issue of length - - ednav = convert_chain_links_into_EAV(chain_links) - - logger.info(key='event_chains', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + # Dispatch notification that birth is about to occur + notifier.dispatch("simulation.on_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}}) return child_id + def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: """Find the events in the queue for a particular person. From e617aa9a1885a260c28dfc47db5c72cac09fdcdd Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Tue, 25 Nov 2025 13:39:35 +0000 Subject: [PATCH 45/97] Clear listeners in the global notifier instance at the start of simulation --- src/tlo/notify.py | 7 +++++++ src/tlo/simulation.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 48c46b82b4..b1b4434ba9 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -60,6 +60,13 @@ def dispatch(self, notification_key, data=None): for listener in self.listeners[notification_key]: listener(data) + def clear_listeners(self): + """ + Clear all registered listeners. Essential because the notifier is a global singleton. + e.g. if you are running multiple tests or simulations in the same process. + """ + self.listeners.clear() + # Create a global notifier instance notifier = Notifier() diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d2560f92d9..b0bd733234 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -26,6 +26,7 @@ topologically_sort_modules, ) from tlo.events import Event, IndividualScopeEventMixin +from tlo.notify import notifier from tlo.progressbar import ProgressBar if TYPE_CHECKING: @@ -116,6 +117,8 @@ def __init__( self._custom_log_levels = None self._log_filepath = self._configure_logging(**log_config) + # clear notifier listeners from any previous simulation in this process + notifier.clear_listeners() # random number generator seed_from = "auto" if seed is None else "user" From 4fe8e1f11d9e7fa142735290e3d2f249d73c90d3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 14:03:16 +0000 Subject: [PATCH 46/97] Correct log name in analysis file --- .../ResourceFile_GenerateEventChains/parameter_values.csv | 4 ++-- src/tlo/analysis/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv index 82394e590b..2fa792a63a 100644 --- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f68e30f87dbe757b98cea2658c8f0c40cab629c4b6825a012ce90e12a27bc612 -size 102 +oid sha256:fbb13b64e27d6ba81ff4bc779e4c50f0dda63b784ec7cc1790ddb64777fb9f35 +size 242 diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index fc0d374fd1..94bc541d30 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -440,7 +440,7 @@ def extract_event_chains(results_folder: Path, """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. """ - module = 'tlo.simulation' + module = 'tlo.collect_event_chains' key = 'event_chains' # get number of draws and numbers of runs From c1e60969688f50bfef1aabde122fdffe2dc6f151 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 14:34:45 +0000 Subject: [PATCH 47/97] Summarise checks on whether to collect event changes --- src/tlo/events.py | 3 +- src/tlo/methods/collect_event_chains.py | 58 ++++++++++++------------- src/tlo/methods/hsi_event.py | 2 +- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index dba2f33cd5..e79074b333 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -65,9 +65,8 @@ def apply(self, target): def run(self): """Make the event happen.""" - # Dispatch notification that event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) self.apply(self.target) self.post_apply_hook() diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 7fb946c524..4ce38b43f8 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -50,7 +50,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): #print("resource file path", resourcefilepath) #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) self.parameters["generate_event_chains"] = True - self.parameters["modules_of_interest"] = self.sim.modules + self.parameters["modules_of_interest"] = self.sim.modules.values() self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] @@ -96,9 +96,13 @@ def on_notification_event_about_to_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ p = self.parameters - - if p['generate_event_chains']: + # Only log event if + # 1) generate_event_chains is set to True + # 2) the event belongs to modules of interest and + # 3) the event is not in the list of events to ignore + if p['generate_event_chains'] and (data['module'] in p['modules_of_interest']) and (data['link_info']['EventName'] not in p['events_to_ignore']): + # Initialise these variables self.print_chains = False self.df_before = [] @@ -107,38 +111,32 @@ def on_notification_event_about_to_run(self, data): self.mni_row_before = {} self.entire_mni_before = {} - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if all(sub not in str(data['link_info']['EventName']) for sub in p['events_to_ignore']): + self.print_chains = True - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + # Target is single individual + if not isinstance(data['target'], Population): - self.print_chains = True + # Save row for comparison after event has occurred + self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) - # Target is single individual - if not isinstance(data['target'], Population): - - # Save row for comparison after event has occurred - self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) - - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - self.mni_instances_before = True - self.mni_row_before = mni[data['target']].copy() - else: - self.mni_row_before = None - + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = mni[data['target']].copy() else: + self.mni_row_before = None + + else: - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - self.df_before = self.sim.population.props.copy() - if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - self.entire_mni_before = None + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.sim.population.props.copy() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + self.entire_mni_before = None return diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 01bd826f2d..edc7ffb721 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -201,7 +201,7 @@ def run(self, squeeze_factor): """Make the event happen.""" # Dispatch notification that HSI event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() From 5e0720496b1a2572aee6e0b4bb30740152c26bc5 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 15:02:14 +0000 Subject: [PATCH 48/97] Use module names rather than obj for ease of use --- src/tlo/events.py | 2 +- src/tlo/methods/collect_event_chains.py | 2 +- src/tlo/methods/hsi_event.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index e79074b333..299fffa6ed 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -66,7 +66,7 @@ def run(self): """Make the event happen.""" # Dispatch notification that event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) self.apply(self.target) self.post_apply_hook() diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 4ce38b43f8..ce480aa97e 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -50,7 +50,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): #print("resource file path", resourcefilepath) #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) self.parameters["generate_event_chains"] = True - self.parameters["modules_of_interest"] = self.sim.modules.values() + self.parameters["modules_of_interest"] = self.sim.modules.keys() self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index edc7ffb721..32620f6c28 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -201,7 +201,7 @@ def run(self, squeeze_factor): """Make the event happen.""" # Dispatch notification that HSI event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() From 2ce9bbd9a79c83ffee33294a6c646da717d5fd30 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 15:24:10 +0000 Subject: [PATCH 49/97] Fix parameters initialisation --- .../parameter_values.csv | 4 ++-- src/tlo/methods/collect_event_chains.py | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv index 2fa792a63a..a84c77ab60 100644 --- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbb13b64e27d6ba81ff4bc779e4c50f0dda63b784ec7cc1790ddb64777fb9f35 -size 242 +oid sha256:788332259649a19889355820b194fe0d16af44436f208e3a948e833f0ad5139a +size 419 diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index ce480aa97e..7f903e2035 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -47,13 +47,12 @@ def initialise_simulation(self, sim): notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) def read_parameters(self, resourcefilepath: Optional[Path] = None): - #print("resource file path", resourcefilepath) - #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) - self.parameters["generate_event_chains"] = True - self.parameters["modules_of_interest"] = self.sim.modules.keys() - - self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] + print("resource file path", resourcefilepath) + self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) + # If modules of interest is '*', set by default to all modules included in the simulation + if self.parameters["modules_of_interest"] == ['*']: + self.parameters["modules_of_interest"] = self.sim.modules.keys() def initialise_population(self, population): pass From a786b2e1cc9c3932a90fa2f3b9c01b556e5d31d7 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 15:29:43 +0000 Subject: [PATCH 50/97] Fix to type of parameter --- src/tlo/methods/collect_event_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 7f903e2035..f1f36224a6 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -52,7 +52,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): # If modules of interest is '*', set by default to all modules included in the simulation if self.parameters["modules_of_interest"] == ['*']: - self.parameters["modules_of_interest"] = self.sim.modules.keys() + self.parameters["modules_of_interest"] = list(self.sim.modules.keys()) def initialise_population(self, population): pass From 7af8c70fbb5f367619d98741d38f7f6e7954a926 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:35:54 +0000 Subject: [PATCH 51/97] Give option to overwrite module parameters --- src/tlo/methods/collect_event_chains.py | 37 +++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index f1f36224a6..ef5f04d639 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -1,7 +1,7 @@ from tlo.notify import notifier from pathlib import Path -from typing import Optional +from typing import Optional, List from tlo import Module, Parameter, Types, logging, population from tlo.population import Population import pandas as pd @@ -15,8 +15,19 @@ class CollectEventChains(Module): - def __init__(self, name=None): + def __init__( + self, + name: Optional[str] = None, + generate_event_chains: Optional[bool] = None, + modules_of_interest: Optional[List[str]] = None, + events_to_ignore: Optional[List[str]] = None + + ): super().__init__(name) + + self.generate_event_chains = generate_event_chains + self.modules_of_interest = modules_of_interest + self.events_to_ignore = events_to_ignore # This is how I am passing data from fnc taking place before event to the one after # It doesn't seem very elegant but not sure how else to go about it @@ -57,6 +68,28 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): def initialise_population(self, population): pass + def initialise_simulation(self, sim): + # Use parameter file values by default, if not overwritten + self.generate_event_chains = self.parameters['generate_event_chains'] \ + if self.generate_event_chains is None \ + else self.generate_event_chains + + self.modules_of_interest = self.parameters['modules_of_interest'] \ + if self.modules_of_interest is None \ + else self.modules_of_interest + + self.events_to_ignore = self.parameters['events_to_ignore'] \ + if self.events_to_ignore is None \ + else self.events_to_ignore + + def get_generate_event_chains(self) -> bool: + """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but + overwrite with what was provided in argument if an argument was specified -- provided for backward + compatibility/debugging.)""" + return self.parameters['generate_event_chains'] \ + if self.arg_generate_event_chains is None \ + else self.arg_generate_event_chains + def on_birth(self, mother, child): # Could the notification of birth simply take place here? pass From d8e6922a94eb5ecd398d20c886d9678dad3fb72f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:40:42 +0000 Subject: [PATCH 52/97] Correct use of parameters --- src/tlo/methods/collect_event_chains.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index ef5f04d639..0dca32dec1 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -127,13 +127,11 @@ def on_notification_of_birth(self, data): def on_notification_event_about_to_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - p = self.parameters - # Only log event if # 1) generate_event_chains is set to True # 2) the event belongs to modules of interest and # 3) the event is not in the list of events to ignore - if p['generate_event_chains'] and (data['module'] in p['modules_of_interest']) and (data['link_info']['EventName'] not in p['events_to_ignore']): + if self.generate_event_chains and (data['module'] in self.modules_of_interest) and (data['link_info']['EventName'] not in self.events_to_ignore): # Initialise these variables self.print_chains = False @@ -175,10 +173,8 @@ def on_notification_event_about_to_run(self, data): def on_notification_event_has_just_ran(self, data): """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - - p = self.parameters - if p['generate_event_chains'] and self.print_chains: + if self.print_chains: chain_links = {} From fd761f77511513e18875bedaf3b6ab3100eb8170 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:45:45 +0000 Subject: [PATCH 53/97] Exit as soon as condition is not met --- src/tlo/methods/collect_event_chains.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 0dca32dec1..b3eb42a8b3 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -131,7 +131,9 @@ def on_notification_event_about_to_run(self, data): # 1) generate_event_chains is set to True # 2) the event belongs to modules of interest and # 3) the event is not in the list of events to ignore - if self.generate_event_chains and (data['module'] in self.modules_of_interest) and (data['link_info']['EventName'] not in self.events_to_ignore): + if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore): + return + else: # Initialise these variables self.print_chains = False @@ -174,7 +176,9 @@ def on_notification_event_about_to_run(self, data): def on_notification_event_has_just_ran(self, data): """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - if self.print_chains: + if not self.print_chains: + return + else: chain_links = {} From edd9e0b8a3599b28ed91b87aa568180971aa9643 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 18:07:30 +0000 Subject: [PATCH 54/97] Allow option to overwrite parameter file --- .../parameter_values.csv | 4 +- .../scenario_generate_chains.py | 81 ++----------------- src/tlo/events.py | 3 +- src/tlo/methods/collect_event_chains.py | 13 +-- 4 files changed, 12 insertions(+), 89 deletions(-) diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv index a84c77ab60..ebf20c5f79 100644 --- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:788332259649a19889355820b194fe0d16af44436f208e3a948e833f0ad5139a -size 419 +oid sha256:172a0c24c859aaafbad29f6016433cac7a7324efc582e6c4b19c74b6b97436e7 +size 420 diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 6cfbd040fa..0f53a1461b 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -48,7 +48,7 @@ wasting, ) -class GenerateDataChains(BaseScenario): +class GenerateEventChains(BaseScenario): def __init__(self): super().__init__() self.seed = 42 @@ -71,101 +71,30 @@ def log_configuration(self): 'tlo.methods.demography.detail': logging.WARNING, 'tlo.methods.healthburden': logging.INFO, 'tlo.methods.healthsystem.summary': logging.INFO, + 'tlo.methods.collect_event_chains': logging.INFO } } def modules(self): - # MODIFY - # Here instead of running full module - """ - return [demography.Demography(resourcefilepath=self.resources), - enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), - healthburden.HealthBurden(resourcefilepath=self.resources), - symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),#, - #rti.RTI(resourcefilepath=self.resources), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), - labour.Labour(resourcefilepath=self.resources), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=self.resources), - contraception.Contraception(resourcefilepath=self.resources), - newborn_outcomes.NewbornOutcomes(resourcefilepath=self.resources), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=self.resources), - hiv.Hiv(resourcefilepath=self.resources), - tb.Tb(resourcefilepath=self.resources), - epi.Epi(resourcefilepath=self.resources), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), - #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=self.resources, - mode_appt_constraints=1, - cons_availability='all')] - """ return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + fullmodel() ) - """ - def draw_parameters(self, draw_number, rng): - return mix_scenarios( - get_parameters_for_status_quo(), - { - 'HealthSystem': { - 'Service_Availability': list(self._scenarios.values())[draw_number], - }, - } - ) - - def _get_scenarios(self) -> Dict[str, list[str]]: - Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario. - The sequences of scenarios systematically omits one of the TREATMENT_ID's that is defined in the model. - # Generate list of TREATMENT_IDs and filter to the resolution needed - treatments = get_filtered_treatment_ids(depth=2) - treatments_RTI = [item for item in treatments if 'Rti' in item] - - # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each - # treatment is omitted - service_availability = dict({"Everything": ["*", "Nothing": []}) - #service_availability.update( - # {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} - #) - - return service_availability - - """ def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: return list(self._scenarios.values())[draw_number] else: return - # case 1: gfHE = -0.030, factor = 1.01074 - # case 2: gfHE = -0.020, factor = 1.02116 - # case 3: gfHE = -0.015, factor = 1.02637 - # case 4: gfHE = 0.015, factor = 1.05763 - # case 5: gfHE = 0.020, factor = 1.06284 - # case 6: gfHE = 0.030, factor = 1.07326 - def _get_scenarios(self) -> Dict[str, Dict]: - #Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. - - treatments = get_filtered_treatment_ids(depth=2) - treatments_RTI = [item for item in treatments if 'Rti' in item] - - # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each - # treatment is omitted - service_availability = dict({"Everything": ["*"], "Nothing": []}) - service_availability.update( - {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} - ) - print(service_availability.keys()) return { - # =========== STATUS QUO ============ "Baseline": mix_scenarios( self._baseline(), { - "HealthSystem": { - "Service_Availability": service_availability["No Rti_ShockTreatment*"], + "CollectEventChains": { + "generate_event_chains": True, }, } ), diff --git a/src/tlo/events.py b/src/tlo/events.py index 299fffa6ed..56acb82f43 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -60,11 +60,10 @@ def apply(self, target): :param target: the target of the event """ raise NotImplementedError - def run(self): """Make the event happen.""" - + # Dispatch notification that event is about to run notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index b3eb42a8b3..712d8c045e 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -58,17 +58,9 @@ def initialise_simulation(self, sim): notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) def read_parameters(self, resourcefilepath: Optional[Path] = None): - print("resource file path", resourcefilepath) self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) - - # If modules of interest is '*', set by default to all modules included in the simulation - if self.parameters["modules_of_interest"] == ['*']: - self.parameters["modules_of_interest"] = list(self.sim.modules.keys()) def initialise_population(self, population): - pass - - def initialise_simulation(self, sim): # Use parameter file values by default, if not overwritten self.generate_event_chains = self.parameters['generate_event_chains'] \ if self.generate_event_chains is None \ @@ -81,6 +73,10 @@ def initialise_simulation(self, sim): self.events_to_ignore = self.parameters['events_to_ignore'] \ if self.events_to_ignore is None \ else self.events_to_ignore + + # If modules of interest is '*', set by default to all modules included in the simulation + if self.modules_of_interest == ['*']: + self.modules_of_interest = list(self.sim.modules.keys()) def get_generate_event_chains(self) -> bool: """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but @@ -134,7 +130,6 @@ def on_notification_event_about_to_run(self, data): if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore): return else: - # Initialise these variables self.print_chains = False self.df_before = [] From 24eacdbecb046a43cafa104eac46908e9e0d6380 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 26 Nov 2025 08:41:09 +0000 Subject: [PATCH 55/97] No need for else if exiting function --- src/tlo/methods/collect_event_chains.py | 213 ++++++++++++------------ 1 file changed, 106 insertions(+), 107 deletions(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 712d8c045e..4406a77345 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -129,41 +129,41 @@ def on_notification_event_about_to_run(self, data): # 3) the event is not in the list of events to ignore if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore): return - else: - # Initialise these variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} + + # Initialise these variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + self.print_chains = True + + # Target is single individual + if not isinstance(data['target'], Population): + + # Save row for comparison after event has occurred + self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) - self.print_chains = True + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = mni[data['target']].copy() + else: + self.mni_row_before = None - # Target is single individual - if not isinstance(data['target'], Population): + else: - # Save row for comparison after event has occurred - self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) - - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - self.mni_instances_before = True - self.mni_row_before = mni[data['target']].copy() - else: - self.mni_row_before = None - + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.sim.population.props.copy() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) else: - - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - self.df_before = self.sim.population.props.copy() - if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - self.entire_mni_before = None + self.entire_mni_before = None return @@ -173,86 +173,85 @@ def on_notification_event_has_just_ran(self, data): if not self.print_chains: return + + chain_links = {} + + # Target is single individual + if not isinstance(data["target"], Population): + + # Copy full new status for individual + row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) + + # Check if individual is in mni after the event + mni_instances_after = False + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + mni_instances_after = True + else: + mni_instances_after = None + + # Create and store event for this individual, regardless of whether any property change occurred + link_info = data['link_info'] + + # Store (if any) property changes as a result of the event for this individual + for key in self.row_before.index: + if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + if 'PregnancySupervisor' in self.sim.modules: + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if self.mni_instances_before and mni_instances_after: + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Individual is only in mni dictionary before event + elif self.mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not self.mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Else, no need to do anything + + # Add individual to the chain links + chain_links[data['target']] = link_info + else: - - chain_links = {} - - # Target is single individual - if not isinstance(data["target"], Population): - - # Copy full new status for individual - row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) - - # Check if individual is in mni after the event - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - mni_instances_after = True - else: - mni_instances_after = None - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = data['link_info'] - - # Store (if any) property changes as a result of the event for this individual - for key in self.row_before.index: - if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules: - # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if self.mni_instances_before and mni_instances_after: - for key in self.mni_row_before: - if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - # Individual is only in mni dictionary before event - elif self.mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in self.mni_row_before: - if self.mni_values_differ(self.mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not self.mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - # Else, no need to do anything - - # Add individual to the chain links - chain_links[data['target']] = link_info - + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population frame after event - df_after = self.sim.population.props - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - entire_mni_after = None - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) + entire_mni_after = None + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) - if chain_links: - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) + if chain_links: + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) - logger.info(key='event_chains', - data= ednav.to_dict(), - description='Links forming chains of events for simulated individuals') - - # Reset variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} + logger.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + + # Reset variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} return From 017c6d239cab8f51fe49f56733f3cbabc42a660a Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Wed, 26 Nov 2025 11:30:29 +0000 Subject: [PATCH 56/97] Turn off ruff warnings. Far too frequent changes. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7c9e891fd..cf19215833 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,8 +125,8 @@ line-length = 120 exclude = ['src/tlo/_version.py'] [tool.ruff.lint] -select = ["E", "F", "I", "W"] -per-file-ignores = {"src/scripts/**" = ["E501", "W"]} +select = ["E", "F", "I"] +per-file-ignores = {"src/scripts/**" = ["E501"]} [tool.setuptools.packages.find] where = ["src"] From 2942701e9247ec221943ecf9580a42144c3ae2cb Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 26 Nov 2025 11:52:12 +0000 Subject: [PATCH 57/97] Include test for event chains collection --- src/tlo/analysis/utils.py | 33 ++++++++++++++----------- src/tlo/methods/collect_event_chains.py | 27 +++++++++----------- src/tlo/simulation.py | 11 ++++++--- src/tlo/util.py | 2 +- 4 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 94bc541d30..0469dca438 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -413,6 +413,23 @@ def unpack_dict_rows(df, non_dict_cols=None): return out.reset_index(drop=True) +def reconstruct_event_chains(df): + + recon = unpack_dict_rows(df, ['date']) + + # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. + recon['V'] = recon['V'].apply(str) + # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} ) + df_collapsed = ( + recon.groupby(['E', 'date', 'EventName']) + .apply(lambda g: dict(zip(g['A'], g['V']))) + .reset_index(name='Info') + ) + df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True) + #birth_count = (df_final['EventName'] == 'Birth').sum() + + return df_final + def print_filtered_df(df): """ @@ -460,21 +477,9 @@ def extract_event_chains(results_folder: Path, try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - recon = unpack_dict_rows(df, ['date']) - print(recon) - #del recon['EventDate'] - # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. - recon['V'] = recon['V'].apply(str) - # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} ) - df_collapsed = ( - recon.groupby(['E', 'date', 'EventName']) - .apply(lambda g: dict(zip(g['A'], g['V']))) - .reset_index(name='Info') - ) - df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True) - birth_count = (df_final['EventName'] == 'Birth').sum() + df_final = reconstruct_event_chains(df) - print("Birth count for run ", run, "is ", birth_count) + # Offset person ID to account for the fact that we are collecting chains across runs df_final['E'] = df_final['E'] + ID_offset # Calculate ID offset for next run diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 4406a77345..289bd055dd 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -78,22 +78,15 @@ def initialise_population(self, population): if self.modules_of_interest == ['*']: self.modules_of_interest = list(self.sim.modules.keys()) - def get_generate_event_chains(self) -> bool: - """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but - overwrite with what was provided in argument if an argument was specified -- provided for backward - compatibility/debugging.)""" - return self.parameters['generate_event_chains'] \ - if self.arg_generate_event_chains is None \ - else self.arg_generate_event_chains - def on_birth(self, mother, child): # Could the notification of birth simply take place here? pass def on_notification_pop_has_been_initialised(self, data): + # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. - if self.parameters['generate_event_chains']: + if self.generate_event_chains: # EDNAV structure to capture status of individuals at the start of the simulation ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') @@ -105,7 +98,7 @@ def on_notification_pop_has_been_initialised(self, data): def on_notification_of_birth(self, data): - if self.parameters['generate_event_chains']: + if self.generate_event_chains: # When individual is born, store their initial properties to provide a starting point to the chain of property # changes that this individual will undergo as a result of events taking place. link_info = data['link_info'] @@ -237,13 +230,15 @@ def on_notification_event_has_just_ran(self, data): # Create and store the event and dictionary of changes for affected individuals chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) - if chain_links: - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) + # Log chains + if chain_links: + + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) - logger.info(key='event_chains', - data= ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + logger.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') # Reset variables self.print_chains = False diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index ded5960e6e..e1da725c53 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -266,7 +266,7 @@ def make_initial_population(self, *, n: int) -> None: a keyword parameter for clarity. """ start = time.time() - + # Collect information from all modules, that is required the population dataframe for module in self.modules.values(): module.pre_initialise_population() @@ -285,9 +285,6 @@ def make_initial_population(self, *, n: int) -> None: key="debug", data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - - # Dispatch notification that pop has been initialised - notifier.dispatch("simulation.pop_has_been_initialised", data={}) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -307,6 +304,12 @@ def initialise(self, *, end_date: Date) -> None: for module in self.modules.values(): module.initialise_simulation(self) self._initialised = True + + # Since CollectEventChains listeners are added to notified upon module initialisation, this can only be dispatched here. + # Otherwise, would have to add listener outside of CollectEventChains initialisation + + # Dispatch notification that pop has been initialised + notifier.dispatch("simulation.pop_has_been_initialised", data={"EventName" : "StartOfSimulation"}) def finalise(self, wall_clock_time: Optional[float] = None) -> None: """Finalise all modules in simulation and close logging file if open. diff --git a/src/tlo/util.py b/src/tlo/util.py index 189f994353..98b13e45fd 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -96,7 +96,7 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng: def df_to_EAV(df, date, event_name): """Function to convert dataframe into EAV""" - eav = df.stack().reset_index() + eav = df.stack(dropna=False).reset_index() eav.columns = ['E', 'A', 'V'] eav['EventName'] = event_name eav = eav[["E", "EventName", "A", "V"]] From 68b19615d91d3a00b48d6cbdc828cdaaecb5e747 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 26 Nov 2025 12:44:38 +0000 Subject: [PATCH 58/97] Scenario file --- .../scenario_collect_event_chains.py | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 src/scripts/collect_event_chains/scenario_collect_event_chains.py diff --git a/src/scripts/collect_event_chains/scenario_collect_event_chains.py b/src/scripts/collect_event_chains/scenario_collect_event_chains.py new file mode 100644 index 0000000000..aec12f9c62 --- /dev/null +++ b/src/scripts/collect_event_chains/scenario_collect_event_chains.py @@ -0,0 +1,118 @@ +"""This Scenario file run the model to generate event chans + +Run on the batch system using: +``` +tlo batch-submit + src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +or locally using: +``` + tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario +from tlo.methods import ( + alri, + cardio_metabolic_disorders, + care_of_women_during_pregnancy, + contraception, + demography, + depression, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + rti, + labour, + malaria, + newborn_outcomes, + postnatal_supervisor, + pregnancy_supervisor, + stunting, + symptommanager, + tb, + wasting, +) + +class GenerateEventChains(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 42 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(months=1) + self.pop_size = 1000 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 3 + self.generate_event_chains = True + + def log_configuration(self): + return { + 'filename': 'generate_event_chains', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.events': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + 'tlo.methods.collect_event_chains': logging.INFO + } + } + + def modules(self): + return ( + fullmodel() + ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + def _get_scenarios(self) -> Dict[str, Dict]: + + return { + "Baseline": + mix_scenarios( + self._baseline(), + { + "CollectEventChains": { + "generate_event_chains": True, + }, + } + ), + + } + + def _baseline(self) -> Dict: + #Return the Dict with values for the parameter changes that define the baseline scenario. + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration + } + }, + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) From ba78a018b8e9f8623294821abbee4d6f0e4bc1b8 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 26 Nov 2025 14:33:00 +0000 Subject: [PATCH 59/97] Style fixes --- .../analysis_extract_data.py | 8 +-- .../postprocess_events_chain.py | 1 + .../scenario_generate_chains.py | 29 +------- .../scenario_collect_event_chains.py | 29 +------- src/tlo/analysis/utils.py | 10 ++- src/tlo/events.py | 8 ++- src/tlo/methods/collect_event_chains.py | 69 ++++++++++++------- src/tlo/methods/hsi_event.py | 11 +-- src/tlo/simulation.py | 5 +- 9 files changed, 75 insertions(+), 95 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 9ee37cabef..3063b6b425 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -4,17 +4,15 @@ # short tclose -> ideal case # long tclose -> status quo import argparse +from collections import Counter +from datetime import datetime from pathlib import Path from typing import Tuple import pandas as pd -import matplotlib.pyplot as plt from tlo import Date -from tlo.analysis.utils import extract_results, extract_event_chains -from datetime import datetime -from collections import Counter -import ast +from tlo.analysis.utils import extract_event_chains, extract_results # Time simulated to collect data start_date = Date(2010, 1, 1) diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py index 96c27a04b1..3b4a00e110 100644 --- a/src/scripts/analysis_data_generation/postprocess_events_chain.py +++ b/src/scripts/analysis_data_generation/postprocess_events_chain.py @@ -1,6 +1,7 @@ import pandas as pd from dateutil.relativedelta import relativedelta + # Remove from every individual's event chain all events that were fired after death def cut_off_events_after_death(df): diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 0f53a1461b..90d0801e2f 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -18,35 +18,10 @@ import pandas as pd from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios from tlo.methods.fullmodel import fullmodel -from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario -from tlo.methods import ( - alri, - cardio_metabolic_disorders, - care_of_women_during_pregnancy, - contraception, - demography, - depression, - diarrhoea, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - rti, - labour, - malaria, - newborn_outcomes, - postnatal_supervisor, - pregnancy_supervisor, - stunting, - symptommanager, - tb, - wasting, -) + class GenerateEventChains(BaseScenario): def __init__(self): diff --git a/src/scripts/collect_event_chains/scenario_collect_event_chains.py b/src/scripts/collect_event_chains/scenario_collect_event_chains.py index aec12f9c62..f85e987487 100644 --- a/src/scripts/collect_event_chains/scenario_collect_event_chains.py +++ b/src/scripts/collect_event_chains/scenario_collect_event_chains.py @@ -18,35 +18,10 @@ import pandas as pd from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios from tlo.methods.fullmodel import fullmodel -from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario -from tlo.methods import ( - alri, - cardio_metabolic_disorders, - care_of_women_during_pregnancy, - contraception, - demography, - depression, - diarrhoea, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - rti, - labour, - malaria, - newborn_outcomes, - postnatal_supervisor, - pregnancy_supervisor, - stunting, - symptommanager, - tb, - wasting, -) + class GenerateEventChains(BaseScenario): def __init__(self): diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 0469dca438..9d8b1d5696 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -419,7 +419,8 @@ def reconstruct_event_chains(df): # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. recon['V'] = recon['V'].apply(str) - # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} ) + # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes + # (e.g. {a1:v1, a2:v2, a3:v3, ...} ) df_collapsed = ( recon.groupby(['E', 'date', 'EventName']) .apply(lambda g: dict(zip(g['A'], g['V']))) @@ -454,8 +455,11 @@ def truncate_dict(d): def extract_event_chains(results_folder: Path, ) -> dict: - """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. - Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. + """Utility function to collect chains of events. Individuals across runs of the same draw + will be combined into unique df. + Returns dictionary where keys are draws, and each draw is associated with a dataframe of + format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines + A&Vs for a particular individual + date + event name combination. """ module = 'tlo.collect_event_chains' key = 'event_chains' diff --git a/src/tlo/events.py b/src/tlo/events.py index 56acb82f43..74c28a1ded 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,6 +11,7 @@ from tlo.notify import notifier + class Priority(Enum): """Enumeration for the Priority, which is used in sorting the events in the simulation queue.""" START_OF_DAY = 0 @@ -65,13 +66,16 @@ def run(self): """Make the event happen.""" # Dispatch notification that event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, + "module" : self.module.name, + "link_info" : {"EventName": type(self).__name__}}) self.apply(self.target) self.post_apply_hook() # Dispatch notification that event has just ran - notifier.dispatch("event.has_just_ran", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.has_just_ran", data={"target": self.target, + "link_info" : {"EventName": type(self).__name__}}) class RegularEvent(Event): diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 289bd055dd..6a31e868fc 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -1,14 +1,13 @@ -from tlo.notify import notifier - +import copy from pathlib import Path -from typing import Optional, List -from tlo import Module, Parameter, Types, logging, population -from tlo.population import Population -import pandas as pd +from typing import List, Optional -from tlo.util import df_to_EAV, convert_chain_links_into_EAV, read_csv_files +import pandas as pd -import copy +from tlo import Module, Parameter, Types, logging +from tlo.notify import notifier +from tlo.population import Population +from tlo.util import convert_chain_links_into_EAV, df_to_EAV logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -84,8 +83,11 @@ def on_birth(self, mother, child): def on_notification_pop_has_been_initialised(self, data): - # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. - # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. + # When logging events for each individual to reconstruct chains, + # only the changes in individual properties will be logged. + # At the start of the simulation + when a new individual is born, + # we therefore want to store all of their properties + # at the start. if self.generate_event_chains: # EDNAV structure to capture status of individuals at the start of the simulation @@ -99,8 +101,9 @@ def on_notification_pop_has_been_initialised(self, data): def on_notification_of_birth(self, data): if self.generate_event_chains: - # When individual is born, store their initial properties to provide a starting point to the chain of property - # changes that this individual will undergo as a result of events taking place. + # When individual is born, store their initial properties to provide a starting point to the + # chain of property changes that this individual will undergo + # as a result of events taking place. link_info = data['link_info'] link_info.update(self.sim.population.props.loc[data['target']].to_dict()) chain_links = {} @@ -114,13 +117,19 @@ def on_notification_of_birth(self, data): def on_notification_event_about_to_run(self, data): - """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - + """Do this when notified that an event is about to run. + This function checks whether this event should be logged as part of the event chains, a + nd if so stored required information before the event has occurred. + """ + # Only log event if # 1) generate_event_chains is set to True # 2) the event belongs to modules of interest and # 3) the event is not in the list of events to ignore - if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore): + if (not self.generate_event_chains + or (data['module'] not in self.modules_of_interest) + or (data['link_info']['EventName'] in self.events_to_ignore) + ): return # Initialise these variables @@ -154,7 +163,8 @@ def on_notification_event_about_to_run(self, data): # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. self.df_before = self.sim.population.props.copy() if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + self.entire_mni_before = copy.deepcopy( + self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) else: self.entire_mni_before = None @@ -162,7 +172,9 @@ def on_notification_event_about_to_run(self, data): def on_notification_event_has_just_ran(self, data): - """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + """ If print_chains=True, this function logs the event and identifies and logs the any property + changes that have occured to one or multiple individuals as a result of the event taking place. + """ if not self.print_chains: return @@ -228,7 +240,10 @@ def on_notification_event_has_just_ran(self, data): entire_mni_after = None # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) + chain_links = self.compare_population_dataframe_and_mni(self.df_before, + df_after, + self.entire_mni_before, + entire_mni_after) # Log chains if chain_links: @@ -267,14 +282,16 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): for person in all_individuals: if person not in entire_mni_before: # but is afterward for key in entire_mni_after[person]: - if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if self.mni_values_differ(entire_mni_after[person][key], + self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] elif person not in entire_mni_after: # but is beforehand for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if self.mni_values_differ(entire_mni_before[person][key], + self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] @@ -290,8 +307,12 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): return diffs def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): - """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred. - It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ + """ + This function compares the population dataframe and mni dictionary before/after a population-wide e + vent has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, + and to store the properties which have changed as a result of it. + """ # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) @@ -300,9 +321,8 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be else: diff_mni = [] - # Create an empty list to store changes for each of the individuals + # Create an empty dict to store changes for each of the individuals chain_links = {} - len_of_diff = len(diff_mask) # Loop through each row of the mask persons_changed = [] @@ -344,7 +364,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be link_info[key_prop] = diff_mni[key][key_prop] chain_links[key] = link_info - return chain_links diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 32620f6c28..085d80683c 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -9,7 +9,6 @@ from tlo.events import Event from tlo.notify import notifier - if TYPE_CHECKING: from tlo import Module, Simulation from tlo.methods.healthsystem import HealthSystem @@ -201,7 +200,10 @@ def run(self, squeeze_factor): """Make the event happen.""" # Dispatch notification that HSI event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", + data={"target": self.target, + "module" : self.module.name, + "link_info" : {"EventName": type(self).__name__}}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() @@ -212,9 +214,10 @@ def run(self, squeeze_factor): footprint = updated_appt_footprint else: footprint = self.EXPECTED_APPT_FOOTPRINT - try: + + if self.facility_info: level = self.facility_info.level - except: + else: level = "N/A" notifier.dispatch("event.has_just_ran", diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index e1da725c53..bde1c72b76 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -9,6 +9,7 @@ from collections import Counter, OrderedDict from pathlib import Path from typing import TYPE_CHECKING, Optional + import numpy as np from tlo.notify import notifier @@ -27,7 +28,6 @@ topologically_sort_modules, ) from tlo.events import Event, IndividualScopeEventMixin -from tlo.notify import notifier from tlo.progressbar import ProgressBar if TYPE_CHECKING: @@ -305,7 +305,8 @@ def initialise(self, *, end_date: Date) -> None: module.initialise_simulation(self) self._initialised = True - # Since CollectEventChains listeners are added to notified upon module initialisation, this can only be dispatched here. + # Since CollectEventChains listeners are added to notified upon module initialisation, + # this can only be dispatched here. # Otherwise, would have to add listener outside of CollectEventChains initialisation # Dispatch notification that pop has been initialised From 44253a2c49e82211bf0483cb2b2d7e989acb923c Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 26 Nov 2025 14:47:19 +0000 Subject: [PATCH 60/97] Final style fixes --- .../analysis_extract_data.py | 555 ------------------ .../postprocess_events_chain.py | 157 ----- .../scenario_generate_chains.py | 94 --- .../analysis_extract_data.py | 83 +++ 4 files changed, 83 insertions(+), 806 deletions(-) delete mode 100644 src/scripts/analysis_data_generation/analysis_extract_data.py delete mode 100644 src/scripts/analysis_data_generation/postprocess_events_chain.py delete mode 100644 src/scripts/analysis_data_generation/scenario_generate_chains.py create mode 100644 src/scripts/collect_event_chains/analysis_extract_data.py diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py deleted file mode 100644 index 3063b6b425..0000000000 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ /dev/null @@ -1,555 +0,0 @@ -"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when -running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)""" - -# short tclose -> ideal case -# long tclose -> status quo -import argparse -from collections import Counter -from datetime import datetime -from pathlib import Path -from typing import Tuple - -import pandas as pd - -from tlo import Date -from tlo.analysis.utils import extract_event_chains, extract_results - -# Time simulated to collect data -start_date = Date(2010, 1, 1) -end_date = start_date + pd.DateOffset(months=13) - -# Range of years considered -min_year = 2010 -max_year = 2040 - - -def all_columns(_df): - return pd.Series(_df.all()) - -def check_if_beyond_time_range_considered(progression_properties): - matching_keys = [key for key in progression_properties.keys() if "rt_date_to_remove_daly" in key] - if matching_keys: - for key in matching_keys: - if progression_properties[key] > end_date: - print("Beyond time range considered, need at least ",progression_properties[key]) - -def print_filtered_df(df): - """ - Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. - """ - pd.set_option('display.max_colwidth', None) - filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] - - dict_cols = ["Info"] - max_items = 2 - # Step 2: Truncate dictionary columns for display - if dict_cols is not None: - for col in dict_cols: - def truncate_dict(d): - if isinstance(d, dict): - items = list(d.items())[:max_items] # keep only first `max_items` - return dict(items) - return d - filtered[col] = filtered[col].apply(truncate_dict) - print(filtered) - - -def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): - """Produce standard set of plots describing the effect of each TREATMENT_ID. - - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. - - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. - """ - pd.set_option('display.max_rows', None) - pd.set_option('display.max_colwidth', None) - - individual_event_chains = extract_event_chains(results_folder) - print_filtered_df(individual_event_chains[0]) - exit(-1) - - eval_env = { - 'datetime': datetime, # Add the datetime class to the eval environment - 'pd': pd, # Add pandas to handle Timestamp - 'Timestamp': pd.Timestamp, # Specifically add Timestamp for eval - 'NaT': pd.NaT, - 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) - } - - initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] - - # Will be added through computation: age at time of RTI - # Will be added through computation: total duration of event - - initial_rt_event_properties = set() - - num_individuals = 1000 - num_runs = 1 - record = [] - # Include results folder in output file name - name_tag = str(results_folder).replace("outputs/", "") - - - - for p in range(0,num_individuals): - - print("At person = ", p, " out of ", num_individuals) - - individual_event_chains = extract_results( - results_folder, - module='tlo.simulation', - key='event_chains', - column=str(p), - do_scaling=False - ) - - for r in range(0,num_runs): - initial_properties = {} - key_first_event = {} - key_last_event = {} - first_event = {} - last_event = {} - properties = {} - average_disability = 0 - total_dt_included = 0 - dt_in_prev_disability = 0 - prev_disability_incurred = 0 - ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} - # Count total appts - - list_for_individual = [] - for item,row in individual_event_chains.iterrows(): - value = individual_event_chains.loc[item,(0, r)] - if value !='' and isinstance(value, str): - evaluated = eval(value, eval_env) - list_for_individual.append(evaluated) - - for i in list_for_individual: - print(i) - - """ - # These are the properties of the individual before the start of the chain of events - initial_properties = list_for_individual[0] - - # Initialise first event by gathering parameters of interest from initial_properties - first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} - - # The changing or adding of properties from the first_event will be stored in progression_properties - progression_properties = {} - - for i in list_for_individual: - # Skip the initial_properties, or in other words only consider these if they are 'proper' events - if 'event' in i: - #print(i) - if 'RTIPolling' in i['event']: - - # Keep track of which properties are changed during polling events - for key,value in i.items(): - if 'rt_' in key: - initial_rt_event_properties.add(key) - - # Retain a copy of Polling event - polling_event = i.copy() - - # Update parameters of interest following RTI - key_first_event = {key: i[key] if key in i else value for key, value in first_event.items()} - - # Calculate age of individual at time of event - key_first_event['age_in_days_at_event'] = (i['rt_date_inj'] - initial_properties['date_of_birth']).days - - # Keep track of evolution in individual's properties - progression_properties = initial_properties.copy() - progression_properties.update(i) - - # Initialise chain of Dalys incurred - if 'rt_disability' in i: - prev_disability_incurred = i['rt_disability'] - prev_date = i['event_date'] - - else: - # Progress properties of individual, even if this event is a death - progression_properties.update(i) - - # If disability has changed as a result of this, recalculate and add previous to rolling average - if 'rt_disability' in i: - - dt_in_prev_disability = (i['event_date'] - prev_date).days - #print("Detected change in disability", i['rt_disability'], "after dt=", dt_in_prev_disability) - #print("Adding the following to the average", prev_disability_incurred, " x ", dt_in_prev_disability ) - average_disability += prev_disability_incurred*dt_in_prev_disability - total_dt_included += dt_in_prev_disability - # Update variables - prev_disability_incurred = i['rt_disability'] - prev_date = i['event_date'] - - # Update running footprint - if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()': - footprint = i['appt_footprint'] - if 'Counter' in footprint: - footprint = footprint[len("Counter("):-1] - apply = eval(footprint, eval_env) - ind_Counter[i['level']].update(Counter(apply)) - - # If the individual has died, ensure chain of event is interrupted here and update rolling average of DALYs - if 'is_alive' in i and i['is_alive'] is False: - if ((i['event_date'] - polling_event['rt_date_inj']).days) > total_dt_included: - dt_in_prev_disability = (i['event_date'] - prev_date).days - average_disability += prev_disability_incurred*dt_in_prev_disability - total_dt_included += dt_in_prev_disability - break - - # check_if_beyond_time_range_considered(progression_properties) - - # Compute final properties of individual - key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] - key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days - - # If individual didn't die and the key_last_event didn't result in a final change in DALYs, ensure that the last change is recorded here - if not key_first_event['rt_imm_death'] and (total_dt_included < key_last_event['duration_days']): - #print("Number of events", len(list_for_individual)) - #for i in list_for_individual: - # if 'event' in i: - # print(i) - dt_in_prev_disability = (progression_properties['event_date'] - prev_date).days - average_disability += prev_disability_incurred*dt_in_prev_disability - total_dt_included += dt_in_prev_disability - - # Now calculate the average disability incurred, and store any permanent disability and total footprint - if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0: - key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days'] - else: - key_last_event['rt_disability_average'] = 0.0 - - key_last_event['rt_disability_permanent'] = progression_properties['rt_disability'] - key_last_event.update({'total_footprint': ind_Counter}) - - if key_last_event['duration_days']!=total_dt_included: - print("The duration of event and total_dt_included don't match", key_last_event['duration_days'], total_dt_included) - exit(-1) - - properties = key_first_event | key_last_event - - record.append(properties) - """ - - df = pd.DataFrame(record) - df.to_csv("new_raw_data_" + name_tag + ".csv", index=False) - - print(df) - print(initial_rt_event_properties) - exit(-1) - #print(i) - - #dict = {} - #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: - # dict[i] = [] - - #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: - # event_chains = extract_results( - # results_folder, - # module='tlo.simulation'#, - # key='event_chains', - # column = str(i), - # #custom_generate_series=get_num_dalys_by_year, - # do_scaling=False - # ) - # print(event_chains) - # print(event_chains.index) - # print(event_chains.columns.levels) - - # for index, row in event_chains.iterrows(): - # if event_chains.iloc[index,0] is not None: - # if(event_chains.iloc[index,0]['person_ID']==i): #and 'event' in event_chains.iloc[index,0].keys()): - # dict[i].append(event_chains.iloc[index,0]) - #elif (event_chains.iloc[index,0]['person_ID']==i and 'event' not in event_chains.iloc[index,0].keys()): - #print(event_chains.iloc[index,0]['de_depr']) - # exit(-1) - #for item in dict[0]: - # print(item) - - #exit(-1) - - TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) - - # Definitions of general helper functions - lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 - - def target_period() -> str: - """Returns the target period as a string of the form YYYY-YYYY""" - return "-".join(str(t.year) for t in TARGET_PERIOD) - - def get_parameter_names_from_scenario_file() -> Tuple[str]: - """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" - from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import ( - ImpactOfHealthSystemMode, - ) - e = ImpactOfHealthSystemMode() - return tuple(e._scenarios.keys()) - - def get_num_deaths(_df): - """Return total number of Deaths (total within the TARGET_PERIOD) - """ - return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) - - def get_num_dalys(_df): - """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" - return pd.Series( - data=_df - .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] - .drop(columns=['date', 'sex', 'age_range', 'year']) - .sum().sum() - ) - - def get_num_dalys_by_cause(_df): - """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" - return pd.Series( - data=_df - .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] - .drop(columns=['date', 'sex', 'age_range', 'year']) - .sum() - ) - - def set_param_names_as_column_index_level_0(_df): - """Set the columns index (level 0) as the param_names.""" - ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} - names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] - assert len(names_of_cols_level0) == len(_df.columns.levels[0]) - _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) - return _df - - def find_difference_relative_to_comparison(_ser: pd.Series, - comparison: str, - scaled: bool = False, - drop_comparison: bool = True, - ): - """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) - within the runs (level 1), relative to where draw = `comparison`. - The comparison is `X - COMPARISON`.""" - return _ser \ - .unstack(level=0) \ - .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ - .drop(columns=([comparison] if drop_comparison else [])) \ - .stack() - - - def get_counts_of_hsi_by_treatment_id(_df): - """Get the counts of the short TREATMENT_IDs occurring""" - _counts_by_treatment_id = _df \ - .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ - .apply(pd.Series) \ - .sum() \ - .astype(int) - return _counts_by_treatment_id.groupby(level=0).sum() - - year_target = 2023 - def get_counts_of_hsi_by_treatment_id_by_year(_df): - """Get the counts of the short TREATMENT_IDs occurring""" - _counts_by_treatment_id = _df \ - .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ - .apply(pd.Series) \ - .sum() \ - .astype(int) - return _counts_by_treatment_id.groupby(level=0).sum() - - def get_counts_of_hsi_by_short_treatment_id(_df): - """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" - _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) - _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") - return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() - - def get_counts_of_hsi_by_short_treatment_id_by_year(_df): - """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" - _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) - _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") - return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() - - - # Obtain parameter names for this scenario file - param_names = get_parameter_names_from_scenario_file() - print(param_names) - - # ================================================================================================ - # TIME EVOLUTION OF TOTAL DALYs - # Plot DALYs averted compared to the ``No Policy'' policy - - year_target = 2023 # This global variable will be passed to custom function - def get_num_dalys_by_year(_df): - """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" - return pd.Series( - data=_df - .loc[_df.year == year_target] - .drop(columns=['date', 'sex', 'age_range', 'year']) - .sum().sum() - ) - - ALL = {} - # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred - # are consistent across different policies - this_min_year = 2010 - for year in range(this_min_year, max_year+1): - year_target = year - num_dalys_by_year = extract_results( - results_folder, - module='tlo.methods.healthburden', - key='dalys_stacked', - custom_generate_series=get_num_dalys_by_year, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - ALL[year_target] = num_dalys_by_year - # Concatenate the DataFrames into a single DataFrame - concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) - concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) - concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) - dalys_by_year = concatenated_df - print(dalys_by_year) - dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) - - # ================================================================================================ - # Print population under each scenario - pop_model = extract_results(results_folder, - module="tlo.methods.demography", - key="population", - column="total", - index="date", - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - - pop_model.index = pop_model.index.year - pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] - print(pop_model) - assert dalys_by_year.index.equals(pop_model.index) - assert all(dalys_by_year.columns == pop_model.columns) - pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) - - # ================================================================================================ - # DALYs BROKEN DOWN BY CAUSES AND YEAR - # DALYs by cause per year - # %% Quantify the health losses associated with all interventions combined. - - year_target = 2023 # This global variable will be passed to custom function - def get_num_dalys_by_year_and_cause(_df): - """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" - return pd.Series( - data=_df - .loc[_df.year == year_target] - .drop(columns=['date', 'sex', 'age_range', 'year']) - .sum() - ) - - ALL = {} - # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred - # are consistent across different policies - this_min_year = 2010 - for year in range(this_min_year, max_year+1): - year_target = year - num_dalys_by_year = extract_results( - results_folder, - module='tlo.methods.healthburden', - key='dalys_stacked', - custom_generate_series=get_num_dalys_by_year_and_cause, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) - - # Concatenate the DataFrames into a single DataFrame - concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) - - concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) - - df_total = concatenated_df - df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) - - ALL = {} - # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred - # are consistent across different policies - for year in range(min_year, max_year+1): - year_target = year - - hsi_delivered_by_year = extract_results( - results_folder, - module='tlo.methods.healthsystem.summary', - key='HSI_Event', - custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - ALL[year_target] = hsi_delivered_by_year - - # Concatenate the DataFrames into a single DataFrame - concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) - concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) - HSI_ran_by_year = concatenated_df - - del ALL - - ALL = {} - # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred - # are consistent across different policies - for year in range(min_year, max_year+1): - year_target = year - - hsi_not_delivered_by_year = extract_results( - results_folder, - module='tlo.methods.healthsystem.summary', - key='Never_ran_HSI_Event', - custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - ALL[year_target] = hsi_not_delivered_by_year - - # Concatenate the DataFrames into a single DataFrame - concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) - concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) - HSI_never_ran_by_year = concatenated_df - - HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( - HSI_ran_by_year = HSI_ran_by_year.fillna(0) - HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) - HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) - HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) - print(HSI_ran_by_year) - print(HSI_never_ran_by_year) - print(HSI_total_by_year) - -if __name__ == "__main__": - rfp = Path('resources') - - parser = argparse.ArgumentParser( - description="Produce plots to show the impact each set of treatments", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "--output-path", - help=( - "Directory to write outputs to. If not specified (set to None) outputs " - "will be written to value of --results-path argument." - ), - type=Path, - default=None, - required=False, - ) - parser.add_argument( - "--resources-path", - help="Directory containing resource files", - type=Path, - default=Path('resources'), - required=False, - ) - parser.add_argument( - "--results-path", - type=Path, - help=( - "Directory containing results from running " - "src/scripts/analysis_data_generation/scenario_generate_chains.py " - ), - default=None, - required=False - ) - args = parser.parse_args() - assert args.results_path is not None - results_path = args.results_path - - output_path = results_path if args.output_path is None else args.output_path - - apply( - results_folder=results_path, - output_folder=output_path, - resourcefilepath=args.resources_path - ) diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py deleted file mode 100644 index 3b4a00e110..0000000000 --- a/src/scripts/analysis_data_generation/postprocess_events_chain.py +++ /dev/null @@ -1,157 +0,0 @@ -import pandas as pd -from dateutil.relativedelta import relativedelta - - -# Remove from every individual's event chain all events that were fired after death -def cut_off_events_after_death(df): - - events_chain = df.groupby('person_ID') - - filtered_data = pd.DataFrame() - - for name, group in events_chain: - - # Find the first non-NaN 'date_of_death' and its index - first_non_nan_index = group['date_of_death'].first_valid_index() - - if first_non_nan_index is not None: - # Filter out all rows after the first non-NaN index - filtered_group = group.loc[:first_non_nan_index] # Keep rows up to and including the first valid index - filtered_data = pd.concat([filtered_data, filtered_group]) - else: - # If there are no non-NaN values, keep the original group - filtered_data = pd.concat([filtered_data, group]) - - return filtered_data - -# Load into DataFrame -def load_csv_to_dataframe(file_path): - try: - # Load raw chains into df - df = pd.read_csv(file_path) - print("Raw event chains loaded successfully!") - return df - except FileNotFoundError: - print(f"Error: The file '{file_path}' was not found.") - except Exception as e: - print(f"An error occurred: {e}") - -file_path = 'output.csv' # Replace with the path to your CSV file - -output = load_csv_to_dataframe(file_path) - -# Some of the dates appeared not to be in datetime format. Correct here. -output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce') -output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce') -if 'hv_date_inf' in output.columns: - output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce') - - -date_start = pd.to_datetime('2010-01-01') -if 'Other' in output['cause_of_death'].values: - print("ERROR: 'Other' was included in sim as possible cause of death") - exit(-1) - -# Choose which columns in individual properties to visualise -columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when'] -#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event'] - -# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison -columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint'] - -# If considering epidemiology consistent with sim, add check here. -check_ages_of_those_HIV_inf = False -if check_ages_of_those_HIV_inf: - for index, row in output.iterrows(): - if pd.isna(row['hv_date_inf']): - continue # Skip this iteration - diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth']) - if diff.years > 1 and diff.years<15: - print("Person contracted HIV infection at age younger than 15", diff) - -# Remove events after death -filtered_data = cut_off_events_after_death(output) - -print_raw_events = True # Print raw chain of events for each individual -print_selected_changes = False -print_all_changes = True -person_ID_of_interest = 494 - -pd.set_option('display.max_rows', None) - -for name, group in filtered_data.groupby('person_ID'): - list_of_dob = group['date_of_birth'] - - # Select individuals based on when they were born - if list_of_dob.iloc[0].year<2010: - - # Check that immutable properties are fixed for this individual, i.e. that events were collated properly: - all_identical_dob = group['date_of_birth'].nunique() == 1 - all_identical_sex = group['sex'].nunique() == 1 - if all_identical_dob is False or all_identical_sex is False: - print("Immutable properties are changing! This is not chain for single individual") - print(group) - exit(-1) - - print("----------------------------------------------------------------------") - print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0]) - print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event - number_of_events =len(group)/2 - number_of_changes=0 - if print_raw_events: - print(group) - - if print_all_changes: - # Check each row - comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999)) - - # Iterate over rows where any column has changed - for idx, row_changed in comparison.iloc[1:].iterrows(): - if row_changed.any(): # Check if any column changed in this row - number_of_changes+=1 - changed_columns = row_changed[row_changed].index.tolist() # Get the columns where changes occurred - print(f"Row {idx} - Changes detected in columns: {changed_columns}") - columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns - print(group.loc[idx, columns_output]) # Print only the changed columns - if group.loc[idx, 'when'] == 'Before': - print('-----> THIS CHANGE OCCURRED BEFORE EVENT!') - #print(group.loc[idx,columns_to_print]) - print() # For better readability - print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events") - - if print_selected_changes: - tb_inf_condition = ( - ((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) | - ((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) | - ((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) | - ((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) | - ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) | - ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) | - ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) | - ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) | - ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) | - ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not')) - ) - - alive_condition = ( - (group['is_alive'].shift(1) is True) & (group['is_alive'] is False) - ) - # Combine conditions for rows of interest - transition_condition = tb_inf_condition | alive_condition - - if list_of_dob.iloc[0].year >= 2010: - print("DETECTED OF INTEREST") - print(group[group['event'] == 'Birth'][columns_to_print]) - - # Filter the DataFrame based on the condition - filtered_transitions = group[transition_condition] - if not filtered_transitions.empty: - if list_of_dob.iloc[0].year < 2010: - print("DETECTED OF INTEREST") - print(filtered_transitions[columns_to_print]) - - -print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups) - - - diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py deleted file mode 100644 index 90d0801e2f..0000000000 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ /dev/null @@ -1,94 +0,0 @@ -"""This Scenario file run the model to generate event chans - -Run on the batch system using: -``` -tlo batch-submit - src/scripts/analysis_data_generation/scenario_generate_chains.py -``` - -or locally using: -``` - tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py -``` - -""" -from pathlib import Path -from typing import Dict - -import pandas as pd - -from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios -from tlo.methods.fullmodel import fullmodel -from tlo.scenario import BaseScenario - - -class GenerateEventChains(BaseScenario): - def __init__(self): - super().__init__() - self.seed = 42 - self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=1) - self.pop_size = 1000 - self._scenarios = self._get_scenarios() - self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 3 - self.generate_event_chains = True - - def log_configuration(self): - return { - 'filename': 'generate_event_chains', - 'directory': Path('./outputs'), # <- (specified only for local running) - 'custom_levels': { - '*': logging.WARNING, - 'tlo.methods.demography': logging.INFO, - 'tlo.methods.events': logging.INFO, - 'tlo.methods.demography.detail': logging.WARNING, - 'tlo.methods.healthburden': logging.INFO, - 'tlo.methods.healthsystem.summary': logging.INFO, - 'tlo.methods.collect_event_chains': logging.INFO - } - } - - def modules(self): - return ( - fullmodel() - ) - - def draw_parameters(self, draw_number, rng): - if draw_number < self.number_of_draws: - return list(self._scenarios.values())[draw_number] - else: - return - - def _get_scenarios(self) -> Dict[str, Dict]: - - return { - "Baseline": - mix_scenarios( - self._baseline(), - { - "CollectEventChains": { - "generate_event_chains": True, - }, - } - ), - - } - - def _baseline(self) -> Dict: - #Return the Dict with values for the parameter changes that define the baseline scenario. - return mix_scenarios( - get_parameters_for_status_quo(), - { - "HealthSystem": { - "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration - "cons_availability": "all", - } - }, - ) - -if __name__ == '__main__': - from tlo.cli import scenario_run - - scenario_run([__file__]) diff --git a/src/scripts/collect_event_chains/analysis_extract_data.py b/src/scripts/collect_event_chains/analysis_extract_data.py new file mode 100644 index 0000000000..bef6540934 --- /dev/null +++ b/src/scripts/collect_event_chains/analysis_extract_data.py @@ -0,0 +1,83 @@ +import argparse +from pathlib import Path + +import pandas as pd + +from tlo.analysis.utils import extract_event_chains + + +def print_filtered_df(df): + """ + Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. + """ + pd.set_option('display.max_colwidth', None) + filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] + + dict_cols = ["Info"] + max_items = 2 + # Step 2: Truncate dictionary columns for display + if dict_cols is not None: + for col in dict_cols: + def truncate_dict(d): + if isinstance(d, dict): + items = list(d.items())[:max_items] # keep only first `max_items` + return dict(items) + return d + filtered[col] = filtered[col].apply(truncate_dict) + print(filtered) + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Extract event chains + """ + pd.set_option('display.max_rows', None) + pd.set_option('display.max_colwidth', None) + + individual_event_chains = extract_event_chains(results_folder) + print_filtered_df(individual_event_chains[0]) + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/analysis_data_generation/scenario_collect_event_chains.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) From add05e9f91fedc4ee5fcd97868f0fa0090712e10 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 26 Nov 2025 15:01:33 +0000 Subject: [PATCH 61/97] Track PR specific test file --- tests/test_collect_event_chains.py | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tests/test_collect_event_chains.py diff --git a/tests/test_collect_event_chains.py b/tests/test_collect_event_chains.py new file mode 100644 index 0000000000..d77bec85d9 --- /dev/null +++ b/tests/test_collect_event_chains.py @@ -0,0 +1,91 @@ +import os +from pathlib import Path + +import pytest + +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file, reconstruct_event_chains +from tlo.methods import ( + chronicsyndrome, + collect_event_chains, + demography, + enhanced_lifestyle, + healthseekingbehaviour, + healthsystem, + mockitis, + simplified_births, + symptommanager, +) + +resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' + +start_date = Date(2010, 1, 1) +end_date = Date(2012, 1, 1) +popsize = 200 + +def check_dtypes(simulation): + # check types of columns + df = simulation.population.props + orig = simulation.population.new_row + assert (df.dtypes == orig.dtypes).all() + + +@pytest.mark.slow +def test_collection_of_event_chains(tmpdir, seed): + + # Establish the simulation object + sim = Simulation( + start_date=start_date, + seed=seed, + log_config={ + "filename": "log", + "directory": tmpdir, + "custom_levels": { + "tlo.methods.healthsystem": logging.DEBUG, + "tlo.methods.collect_event_chains": logging.INFO + } + }, resourcefilepath=resourcefilepath + ) + + # Register the core modules + sim.register(demography.Demography(), + simplified_births.SimplifiedBirths(), + enhanced_lifestyle.Lifestyle(), + healthsystem.HealthSystem(), + collect_event_chains.CollectEventChains(generate_event_chains=True), + symptommanager.SymptomManager(), + healthseekingbehaviour.HealthSeekingBehaviour(), + mockitis.Mockitis(), + chronicsyndrome.ChronicSyndrome() + ) + + # Run the simulation + sim.make_initial_population(n=popsize) + sim.simulate(end_date=end_date) + check_dtypes(sim) + + # read the results + output = parse_log_file(sim.log_filepath, level=logging.DEBUG) + output_chains = parse_log_file(sim.log_filepath, level=logging.INFO) + event_chains = reconstruct_event_chains(output_chains['tlo.methods.collect_event_chains']['event_chains']) + + # Check that we have a "StartOfSimulation" event for every individual in the initial population, + # and that this was logged at the start date + assert (event_chains['EventName'] == 'StartOfSimulation').sum() == popsize + assert (event_chains.loc[event_chains['EventName'] == 'StartOfSimulation', 'date'] == start_date).all() + + # Check that in the case of birth or start of simulation, all properties were logged + num_properties = len(sim.population.props.columns) + mask = event_chains["EventName"].isin(["Birth", "StartOfSimulation"]) + assert event_chains.loc[mask, "Info"].apply(len).eq(num_properties).all() + + # Assert that all HSI events that occurred were also collected in the event chains + HSIs_in_event_chains = event_chains["EventName"].str.contains('HSI', na=False).sum() + assert HSIs_in_event_chains == len(output['tlo.methods.healthsystem']['HSI_Event']) + + # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too + mask = (~event_chains["EventName"].isin(["StartOfSimulation", "Birth"])) & \ + (~event_chains["EventName"].str.contains("HSI", na=False)) + count = mask.sum() + assert count > 0 + From 0fe1d803fadb6a587a934d76cd421e462e98c98f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 09:38:49 +0000 Subject: [PATCH 62/97] Rename module and keys, and assume that if module is included then will want to track individual histories, i.e. remove option as module parameter --- .../parameter_values.csv | 3 - .../analysis_extract_data.py | 2 +- .../scenario_track_individual_histories.py} | 13 ++-- src/tlo/analysis/utils.py | 10 +-- src/tlo/methods/fullmodel.py | 4 +- ...hains.py => individual_history_tracker.py} | 61 ++++++++----------- ....py => test_individual_history_tracker.py} | 28 ++++----- 7 files changed, 52 insertions(+), 69 deletions(-) delete mode 100644 resources/ResourceFile_GenerateEventChains/parameter_values.csv rename src/scripts/{collect_event_chains => track_individual_histories}/analysis_extract_data.py (96%) rename src/scripts/{collect_event_chains/scenario_collect_event_chains.py => track_individual_histories/scenario_track_individual_histories.py} (85%) rename src/tlo/methods/{collect_event_chains.py => individual_history_tracker.py} (87%) rename tests/{test_collect_event_chains.py => test_individual_history_tracker.py} (65%) diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv deleted file mode 100644 index ebf20c5f79..0000000000 --- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:172a0c24c859aaafbad29f6016433cac7a7324efc582e6c4b19c74b6b97436e7 -size 420 diff --git a/src/scripts/collect_event_chains/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py similarity index 96% rename from src/scripts/collect_event_chains/analysis_extract_data.py rename to src/scripts/track_individual_histories/analysis_extract_data.py index bef6540934..291a430ad1 100644 --- a/src/scripts/collect_event_chains/analysis_extract_data.py +++ b/src/scripts/track_individual_histories/analysis_extract_data.py @@ -65,7 +65,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No type=Path, help=( "Directory containing results from running " - "src/scripts/analysis_data_generation/scenario_collect_event_chains.py " + "src/scripts/analysis_data_generation/scenario_track_individual_histories.py " ), default=None, required=False diff --git a/src/scripts/collect_event_chains/scenario_collect_event_chains.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py similarity index 85% rename from src/scripts/collect_event_chains/scenario_collect_event_chains.py rename to src/scripts/track_individual_histories/scenario_track_individual_histories.py index f85e987487..2df7f28c44 100644 --- a/src/scripts/collect_event_chains/scenario_collect_event_chains.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -1,14 +1,14 @@ -"""This Scenario file run the model to generate event chans +"""This Scenario file run the model to track individual histories Run on the batch system using: ``` tlo batch-submit - src/scripts/analysis_data_generation/scenario_generate_chains.py + src/scripts/analysis_data_generation/scenario_track_individual_histories.py ``` or locally using: ``` - tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py + tlo scenario-run src/scripts/analysis_data_generation/scenario_track_individual_histories.py ``` """ @@ -37,7 +37,7 @@ def __init__(self): def log_configuration(self): return { - 'filename': 'generate_event_chains', + 'filename': 'track_individual_histories', 'directory': Path('./outputs'), # <- (specified only for local running) 'custom_levels': { '*': logging.WARNING, @@ -46,7 +46,7 @@ def log_configuration(self): 'tlo.methods.demography.detail': logging.WARNING, 'tlo.methods.healthburden': logging.INFO, 'tlo.methods.healthsystem.summary': logging.INFO, - 'tlo.methods.collect_event_chains': logging.INFO + 'tlo.methods.individual_history_tracker': logging.INFO } } @@ -68,9 +68,6 @@ def _get_scenarios(self) -> Dict[str, Dict]: mix_scenarios( self._baseline(), { - "CollectEventChains": { - "generate_event_chains": True, - }, } ), diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 9d8b1d5696..d862a4e359 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -413,7 +413,7 @@ def unpack_dict_rows(df, non_dict_cols=None): return out.reset_index(drop=True) -def reconstruct_event_chains(df): +def reconstruct_individual_histories(df): recon = unpack_dict_rows(df, ['date']) @@ -453,7 +453,7 @@ def truncate_dict(d): print(filtered) -def extract_event_chains(results_folder: Path, +def extract_individual_histories(results_folder: Path, ) -> dict: """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. @@ -461,8 +461,8 @@ def extract_event_chains(results_folder: Path, format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. """ - module = 'tlo.collect_event_chains' - key = 'event_chains' + module = 'tlo.individual_history_tracker' + key = 'individual_histories' # get number of draws and numbers of runs info = get_scenario_info(results_folder) @@ -481,7 +481,7 @@ def extract_event_chains(results_folder: Path, try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - df_final = reconstruct_event_chains(df) + df_final = reconstruct_individual_histories(df) # Offset person ID to account for the fact that we are collecting chains across runs df_final['E'] = df_final['E'] + ID_offset diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py index 3c710c7dd2..83b30266c5 100644 --- a/src/tlo/methods/fullmodel.py +++ b/src/tlo/methods/fullmodel.py @@ -8,7 +8,7 @@ cardio_metabolic_disorders, care_of_women_during_pregnancy, cervical_cancer, - collect_event_chains, + individual_history_tracker, contraception, copd, demography, @@ -117,7 +117,7 @@ def fullmodel( copd.Copd, depression.Depression, epilepsy.Epilepsy, - collect_event_chains.CollectEventChains, + individual_history_tracker.IndividualHistoryTracker, ] return [ module_class( diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/individual_history_tracker.py similarity index 87% rename from src/tlo/methods/collect_event_chains.py rename to src/tlo/methods/individual_history_tracker.py index 6a31e868fc..e36a844fd8 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -12,19 +12,17 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -class CollectEventChains(Module): +class IndividualHistoryTracker(Module): def __init__( self, name: Optional[str] = None, - generate_event_chains: Optional[bool] = None, modules_of_interest: Optional[List[str]] = None, events_to_ignore: Optional[List[str]] = None ): super().__init__(name) - self.generate_event_chains = generate_event_chains self.modules_of_interest = modules_of_interest self.events_to_ignore = events_to_ignore @@ -39,9 +37,6 @@ def __init__( PARAMETERS = { # Options within module - "generate_event_chains": Parameter( - Types.BOOL, "Whether or not we want to collect chains of events for individuals" - ), "modules_of_interest": Parameter( Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules" ), @@ -57,13 +52,10 @@ def initialise_simulation(self, sim): notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) def read_parameters(self, resourcefilepath: Optional[Path] = None): - self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) + self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv")) def initialise_population(self, population): # Use parameter file values by default, if not overwritten - self.generate_event_chains = self.parameters['generate_event_chains'] \ - if self.generate_event_chains is None \ - else self.generate_event_chains self.modules_of_interest = self.parameters['modules_of_interest'] \ if self.modules_of_interest is None \ @@ -88,32 +80,30 @@ def on_notification_pop_has_been_initialised(self, data): # At the start of the simulation + when a new individual is born, # we therefore want to store all of their properties # at the start. - if self.generate_event_chains: - - # EDNAV structure to capture status of individuals at the start of the simulation - ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') + + # EDNAV structure to capture status of individuals at the start of the simulation + ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') - logger.info(key='event_chains', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + logger.info(key='individual_histories', + data = ednav.to_dict(), + description='Links forming chains of events for simulated individuals') def on_notification_of_birth(self, data): - if self.generate_event_chains: - # When individual is born, store their initial properties to provide a starting point to the - # chain of property changes that this individual will undergo - # as a result of events taking place. - link_info = data['link_info'] - link_info.update(self.sim.population.props.loc[data['target']].to_dict()) - chain_links = {} - chain_links[data['target']] = link_info + # When individual is born, store their initial properties to provide a starting point to the + # chain of property changes that this individual will undergo + # as a result of events taking place. + link_info = data['link_info'] + link_info.update(self.sim.population.props.loc[data['target']].to_dict()) + chain_links = {} + chain_links[data['target']] = link_info - ednav = convert_chain_links_into_EAV(chain_links) - - logger.info(key='event_chains', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + ednav = convert_chain_links_into_EAV(chain_links) + + logger.info(key='individual_histories', + data = ednav.to_dict(), + description='Links forming chains of events for simulated individuals') def on_notification_event_about_to_run(self, data): @@ -123,11 +113,10 @@ def on_notification_event_about_to_run(self, data): """ # Only log event if - # 1) generate_event_chains is set to True - # 2) the event belongs to modules of interest and - # 3) the event is not in the list of events to ignore - if (not self.generate_event_chains - or (data['module'] not in self.modules_of_interest) + # 1) the event belongs to modules of interest and + # 2) the event is not in the list of events to ignore + if ( + (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore) ): return @@ -251,7 +240,7 @@ def on_notification_event_has_just_ran(self, data): # Convert chain_links into EAV ednav = convert_chain_links_into_EAV(chain_links) - logger.info(key='event_chains', + logger.info(key='individual_histories', data= ednav.to_dict(), description='Links forming chains of events for simulated individuals') diff --git a/tests/test_collect_event_chains.py b/tests/test_individual_history_tracker.py similarity index 65% rename from tests/test_collect_event_chains.py rename to tests/test_individual_history_tracker.py index d77bec85d9..110b3ffa93 100644 --- a/tests/test_collect_event_chains.py +++ b/tests/test_individual_history_tracker.py @@ -4,10 +4,10 @@ import pytest from tlo import Date, Simulation, logging -from tlo.analysis.utils import parse_log_file, reconstruct_event_chains +from tlo.analysis.utils import parse_log_file, reconstruct_individual_histories from tlo.methods import ( chronicsyndrome, - collect_event_chains, + individual_history_tracker, demography, enhanced_lifestyle, healthseekingbehaviour, @@ -31,7 +31,7 @@ def check_dtypes(simulation): @pytest.mark.slow -def test_collection_of_event_chains(tmpdir, seed): +def test_individual_history_tracker(tmpdir, seed): # Establish the simulation object sim = Simulation( @@ -42,7 +42,7 @@ def test_collection_of_event_chains(tmpdir, seed): "directory": tmpdir, "custom_levels": { "tlo.methods.healthsystem": logging.DEBUG, - "tlo.methods.collect_event_chains": logging.INFO + "tlo.methods.individual_history_tracker": logging.INFO } }, resourcefilepath=resourcefilepath ) @@ -52,7 +52,7 @@ def test_collection_of_event_chains(tmpdir, seed): simplified_births.SimplifiedBirths(), enhanced_lifestyle.Lifestyle(), healthsystem.HealthSystem(), - collect_event_chains.CollectEventChains(generate_event_chains=True), + individual_history_tracker.IndividualHistoryTracker(), symptommanager.SymptomManager(), healthseekingbehaviour.HealthSeekingBehaviour(), mockitis.Mockitis(), @@ -67,25 +67,25 @@ def test_collection_of_event_chains(tmpdir, seed): # read the results output = parse_log_file(sim.log_filepath, level=logging.DEBUG) output_chains = parse_log_file(sim.log_filepath, level=logging.INFO) - event_chains = reconstruct_event_chains(output_chains['tlo.methods.collect_event_chains']['event_chains']) + individual_histories = reconstruct_individual_histories(output_chains['tlo.methods.individual_history_tracker']['individual_histories']) # Check that we have a "StartOfSimulation" event for every individual in the initial population, # and that this was logged at the start date - assert (event_chains['EventName'] == 'StartOfSimulation').sum() == popsize - assert (event_chains.loc[event_chains['EventName'] == 'StartOfSimulation', 'date'] == start_date).all() + assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize + assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation', 'date'] == start_date).all() # Check that in the case of birth or start of simulation, all properties were logged num_properties = len(sim.population.props.columns) - mask = event_chains["EventName"].isin(["Birth", "StartOfSimulation"]) - assert event_chains.loc[mask, "Info"].apply(len).eq(num_properties).all() + mask = individual_histories["EventName"].isin(["Birth", "StartOfSimulation"]) + assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all() # Assert that all HSI events that occurred were also collected in the event chains - HSIs_in_event_chains = event_chains["EventName"].str.contains('HSI', na=False).sum() - assert HSIs_in_event_chains == len(output['tlo.methods.healthsystem']['HSI_Event']) + HSIs_in_individual_histories = individual_histories["EventName"].str.contains('HSI', na=False).sum() + assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event']) # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too - mask = (~event_chains["EventName"].isin(["StartOfSimulation", "Birth"])) & \ - (~event_chains["EventName"].str.contains("HSI", na=False)) + mask = (~individual_histories["EventName"].isin(["StartOfSimulation", "Birth"])) & \ + (~individual_histories["EventName"].str.contains("HSI", na=False)) count = mask.sum() assert count > 0 From 65522a1d4f13eb2a84d1d338fec022ac78173305 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 09:41:24 +0000 Subject: [PATCH 63/97] Revert changes in rti module --- src/tlo/methods/rti.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 92f79f7538..a5f31e71b0 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): def __init__(self, module): """Schedule to take place every month """ - super().__init__(module, frequency=DateOffset(months=1000)) # Single polling event + super().__init__(module, frequency=DateOffset(months=1)) p = module.parameters # Parameters which transition the model between states self.base_1m_prob_rti = (p['base_rate_injrti'] / 12) From c10e6aab8b333ea38d01dcbd1888c7d118870d75 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 09:43:40 +0000 Subject: [PATCH 64/97] Revert changes in rti module --- src/tlo/methods/rti.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index a5f31e71b0..4ec4fe18a5 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2864,12 +2864,8 @@ def apply(self, population): .when('.between(70,79)', self.rr_injrti_age7079), Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) - #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - #pred = 1.0 - #else: + pred = eq.predict(df.loc[rt_current_non_ind]) - - random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] From ab975c2ad2e6b72635a37586a7f8f076f18a6818 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 09:59:33 +0000 Subject: [PATCH 65/97] change function names on listener's end --- src/tlo/events.py | 4 ++-- src/tlo/methods/hsi_event.py | 4 ++-- src/tlo/methods/individual_history_tracker.py | 16 ++++++++-------- src/tlo/simulation.py | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 74c28a1ded..0fff320c3c 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -66,7 +66,7 @@ def run(self): """Make the event happen.""" # Dispatch notification that event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, + notifier.dispatch("event.pre-run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) @@ -74,7 +74,7 @@ def run(self): self.post_apply_hook() # Dispatch notification that event has just ran - notifier.dispatch("event.has_just_ran", data={"target": self.target, + notifier.dispatch("event.post-run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 085d80683c..f3ee3c7a46 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -200,7 +200,7 @@ def run(self, squeeze_factor): """Make the event happen.""" # Dispatch notification that HSI event is about to run - notifier.dispatch("event.about_to_run", + notifier.dispatch("event.pre-run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) @@ -220,7 +220,7 @@ def run(self, squeeze_factor): else: level = "N/A" - notifier.dispatch("event.has_just_ran", + notifier.dispatch("event.post-run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__, "footprint": footprint, diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index e36a844fd8..c69a4b16fe 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -46,10 +46,10 @@ def __init__( } def initialise_simulation(self, sim): - notifier.add_listener("simulation.pop_has_been_initialised", self.on_notification_pop_has_been_initialised) - notifier.add_listener("simulation.on_birth", self.on_notification_of_birth) - notifier.add_listener("event.about_to_run", self.on_notification_event_about_to_run) - notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) + notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise) + notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth) + notifier.add_listener("event.pre-run", self.on_event_pre_run) + notifier.add_listener("event.post-run", self.on_event_post_run) def read_parameters(self, resourcefilepath: Optional[Path] = None): self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv")) @@ -73,7 +73,7 @@ def on_birth(self, mother, child): # Could the notification of birth simply take place here? pass - def on_notification_pop_has_been_initialised(self, data): + def on_simulation_post_initialise(self, data): # When logging events for each individual to reconstruct chains, # only the changes in individual properties will be logged. @@ -89,7 +89,7 @@ def on_notification_pop_has_been_initialised(self, data): description='Links forming chains of events for simulated individuals') - def on_notification_of_birth(self, data): + def on_simulation_post_do_birth(self, data): # When individual is born, store their initial properties to provide a starting point to the # chain of property changes that this individual will undergo @@ -106,7 +106,7 @@ def on_notification_of_birth(self, data): description='Links forming chains of events for simulated individuals') - def on_notification_event_about_to_run(self, data): + def on_event_pre_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, a nd if so stored required information before the event has occurred. @@ -160,7 +160,7 @@ def on_notification_event_about_to_run(self, data): return - def on_notification_event_has_just_ran(self, data): + def on_event_post_run(self, data): """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index bde1c72b76..4fec9ed36e 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -310,7 +310,7 @@ def initialise(self, *, end_date: Date) -> None: # Otherwise, would have to add listener outside of CollectEventChains initialisation # Dispatch notification that pop has been initialised - notifier.dispatch("simulation.pop_has_been_initialised", data={"EventName" : "StartOfSimulation"}) + notifier.dispatch("simulation.post-initialise", data={"EventName" : "StartOfSimulation"}) def finalise(self, wall_clock_time: Optional[float] = None) -> None: """Finalise all modules in simulation and close logging file if open. @@ -449,7 +449,7 @@ def do_birth(self, mother_id: int) -> int: module.on_birth(mother_id, child_id) # Dispatch notification that birth is about to occur - notifier.dispatch("simulation.on_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}}) + notifier.dispatch("simulation.post-do_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}}) return child_id From 450f06cd5670c7ac2c2ee05cdc68ef57e1d7be89 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 10:14:19 +0000 Subject: [PATCH 66/97] Restructure data passed by dispatcher --- src/tlo/events.py | 6 +++--- src/tlo/methods/hsi_event.py | 10 +++++----- src/tlo/methods/individual_history_tracker.py | 13 ++++++++----- src/tlo/simulation.py | 4 ++-- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 0fff320c3c..8aba0069bc 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -67,15 +67,15 @@ def run(self): # Dispatch notification that event is about to run notifier.dispatch("event.pre-run", data={"target": self.target, - "module" : self.module.name, - "link_info" : {"EventName": type(self).__name__}}) + "module" : self.module.name, + "EventName": type(self).__name__}) self.apply(self.target) self.post_apply_hook() # Dispatch notification that event has just ran notifier.dispatch("event.post-run", data={"target": self.target, - "link_info" : {"EventName": type(self).__name__}}) + "EventName": type(self).__name__}) class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index f3ee3c7a46..780b9afff6 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -203,7 +203,7 @@ def run(self, squeeze_factor): notifier.dispatch("event.pre-run", data={"target": self.target, "module" : self.module.name, - "link_info" : {"EventName": type(self).__name__}}) + "EventName": type(self).__name__}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() @@ -222,10 +222,10 @@ def run(self, squeeze_factor): notifier.dispatch("event.post-run", data={"target": self.target, - "link_info" : {"EventName": type(self).__name__, - "footprint": footprint, - "level": level - }}) + "EventName": type(self).__name__, + "footprint": footprint, + "level": level + }) return updated_appt_footprint diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index c69a4b16fe..e6317bf942 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -94,10 +94,10 @@ def on_simulation_post_do_birth(self, data): # When individual is born, store their initial properties to provide a starting point to the # chain of property changes that this individual will undergo # as a result of events taking place. - link_info = data['link_info'] - link_info.update(self.sim.population.props.loc[data['target']].to_dict()) + link_info = {'EventName': 'Birth'} + link_info.update(self.sim.population.props.loc[data['child_id']].to_dict()) chain_links = {} - chain_links[data['target']] = link_info + chain_links[data['child_id']] = link_info ednav = convert_chain_links_into_EAV(chain_links) @@ -117,7 +117,7 @@ def on_event_pre_run(self, data): # 2) the event is not in the list of events to ignore if ( (data['module'] not in self.modules_of_interest) - or (data['link_info']['EventName'] in self.events_to_ignore) + or (data['EventName'] in self.events_to_ignore) ): return @@ -186,7 +186,10 @@ def on_event_post_run(self, data): mni_instances_after = None # Create and store event for this individual, regardless of whether any property change occurred - link_info = data['link_info'] + link_info = {'EventName' : data['EventName']} + if 'footprint' in data.keys(): + link_info['footprint'] = data['footprint'] + link_info['level'] = data['level'] # Store (if any) property changes as a result of the event for this individual for key in self.row_before.index: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 4fec9ed36e..71a90b04ff 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -310,7 +310,7 @@ def initialise(self, *, end_date: Date) -> None: # Otherwise, would have to add listener outside of CollectEventChains initialisation # Dispatch notification that pop has been initialised - notifier.dispatch("simulation.post-initialise", data={"EventName" : "StartOfSimulation"}) + notifier.dispatch("simulation.post-initialise", data={}) def finalise(self, wall_clock_time: Optional[float] = None) -> None: """Finalise all modules in simulation and close logging file if open. @@ -449,7 +449,7 @@ def do_birth(self, mother_id: int) -> int: module.on_birth(mother_id, child_id) # Dispatch notification that birth is about to occur - notifier.dispatch("simulation.post-do_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}}) + notifier.dispatch("simulation.post-do_birth", data={'child_id': child_id}) return child_id From 9e36395e0c4f3615c626c555fc323d7b20f7a69f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 10:20:47 +0000 Subject: [PATCH 67/97] Style fixes --- src/tlo/methods/fullmodel.py | 3 ++- tests/test_individual_history_tracker.py | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py index 83b30266c5..b0c7abeef1 100644 --- a/src/tlo/methods/fullmodel.py +++ b/src/tlo/methods/fullmodel.py @@ -8,7 +8,6 @@ cardio_metabolic_disorders, care_of_women_during_pregnancy, cervical_cancer, - individual_history_tracker, contraception, copd, demography, @@ -21,6 +20,7 @@ healthseekingbehaviour, healthsystem, hiv, + individual_history_tracker, labour, malaria, measles, @@ -117,6 +117,7 @@ def fullmodel( copd.Copd, depression.Depression, epilepsy.Epilepsy, + # - Track Individual histories individual_history_tracker.IndividualHistoryTracker, ] return [ diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py index 110b3ffa93..db460187d8 100644 --- a/tests/test_individual_history_tracker.py +++ b/tests/test_individual_history_tracker.py @@ -7,11 +7,11 @@ from tlo.analysis.utils import parse_log_file, reconstruct_individual_histories from tlo.methods import ( chronicsyndrome, - individual_history_tracker, demography, enhanced_lifestyle, healthseekingbehaviour, healthsystem, + individual_history_tracker, mockitis, simplified_births, symptommanager, @@ -67,12 +67,14 @@ def test_individual_history_tracker(tmpdir, seed): # read the results output = parse_log_file(sim.log_filepath, level=logging.DEBUG) output_chains = parse_log_file(sim.log_filepath, level=logging.INFO) - individual_histories = reconstruct_individual_histories(output_chains['tlo.methods.individual_history_tracker']['individual_histories']) + individual_histories = reconstruct_individual_histories( + output_chains['tlo.methods.individual_history_tracker']['individual_histories']) # Check that we have a "StartOfSimulation" event for every individual in the initial population, # and that this was logged at the start date assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize - assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation', 'date'] == start_date).all() + assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation', + 'date'] == start_date).all() # Check that in the case of birth or start of simulation, all properties were logged num_properties = len(sim.population.props.columns) From 07beb70bab0570f9b74a55330dcf94bf2551928e Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 10:33:26 +0000 Subject: [PATCH 68/97] Remove individual history tracker from the fullmodule --- .../scenario_track_individual_histories.py | 3 ++- src/tlo/methods/fullmodel.py | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py index 2df7f28c44..69cd0438d9 100644 --- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -20,6 +20,7 @@ from tlo import Date, logging from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios from tlo.methods.fullmodel import fullmodel +from tlo.methods import individual_history_tracker from tlo.scenario import BaseScenario @@ -52,7 +53,7 @@ def log_configuration(self): def modules(self): return ( - fullmodel() + fullmodel()# + [individual_history_tracker.IndividualHistoryTracker()] ) def draw_parameters(self, draw_number, rng): diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py index b0c7abeef1..3f0c79434e 100644 --- a/src/tlo/methods/fullmodel.py +++ b/src/tlo/methods/fullmodel.py @@ -20,7 +20,6 @@ healthseekingbehaviour, healthsystem, hiv, - individual_history_tracker, labour, malaria, measles, @@ -117,8 +116,6 @@ def fullmodel( copd.Copd, depression.Depression, epilepsy.Epilepsy, - # - Track Individual histories - individual_history_tracker.IndividualHistoryTracker, ] return [ module_class( From 3644668eeb930b040f98f6c91f0240ebc1902099 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 12:02:32 +0000 Subject: [PATCH 69/97] Fix analysis scripts --- .../track_individual_histories/analysis_extract_data.py | 6 +++--- .../scenario_track_individual_histories.py | 6 +++--- src/tlo/analysis/utils.py | 7 +++++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/scripts/track_individual_histories/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py index 291a430ad1..db733ab69f 100644 --- a/src/scripts/track_individual_histories/analysis_extract_data.py +++ b/src/scripts/track_individual_histories/analysis_extract_data.py @@ -3,7 +3,7 @@ import pandas as pd -from tlo.analysis.utils import extract_event_chains +from tlo.analysis.utils import extract_individual_histories def print_filtered_df(df): @@ -33,8 +33,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) - individual_event_chains = extract_event_chains(results_folder) - print_filtered_df(individual_event_chains[0]) + individual_individual_histories = extract_individual_histories(results_folder) + print_filtered_df(individual_individual_histories[0]) if __name__ == "__main__": rfp = Path('resources') diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py index 69cd0438d9..5cc4d2eeeb 100644 --- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -19,12 +19,12 @@ from tlo import Date, logging from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +#from tlo.methods import individual_history_tracker from tlo.methods.fullmodel import fullmodel -from tlo.methods import individual_history_tracker from tlo.scenario import BaseScenario -class GenerateEventChains(BaseScenario): +class TrackIndividualHistories(BaseScenario): def __init__(self): super().__init__() self.seed = 42 @@ -47,7 +47,7 @@ def log_configuration(self): 'tlo.methods.demography.detail': logging.WARNING, 'tlo.methods.healthburden': logging.INFO, 'tlo.methods.healthsystem.summary': logging.INFO, - 'tlo.methods.individual_history_tracker': logging.INFO + #'tlo.methods.individual_history_tracker': logging.INFO } } diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index d862a4e359..4f56fd9b37 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -461,7 +461,7 @@ def extract_individual_histories(results_folder: Path, format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. """ - module = 'tlo.individual_history_tracker' + module = 'tlo.methods.individual_history_tracker' key = 'individual_histories' # get number of draws and numbers of runs @@ -478,9 +478,12 @@ def extract_individual_histories(results_folder: Path, for run in range(info['runs_per_draw']): + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] + print(df) + try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - + print(df) df_final = reconstruct_individual_histories(df) # Offset person ID to account for the fact that we are collecting chains across runs From 23f45e13acd48ab53d6d6af369bfba44335912ec Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:13:55 +0000 Subject: [PATCH 70/97] Fix retreival of class name --- src/tlo/events.py | 4 ++-- src/tlo/methods/hsi_event.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 8aba0069bc..1ceb30a576 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -68,14 +68,14 @@ def run(self): # Dispatch notification that event is about to run notifier.dispatch("event.pre-run", data={"target": self.target, "module" : self.module.name, - "EventName": type(self).__name__}) + "EventName": self.__class__.__name__}) self.apply(self.target) self.post_apply_hook() # Dispatch notification that event has just ran notifier.dispatch("event.post-run", data={"target": self.target, - "EventName": type(self).__name__}) + "EventName": self.__class__.__name__}) class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 780b9afff6..ad1f92eedd 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -203,7 +203,7 @@ def run(self, squeeze_factor): notifier.dispatch("event.pre-run", data={"target": self.target, "module" : self.module.name, - "EventName": type(self).__name__}) + "EventName": self.__class__.__name__}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() @@ -222,7 +222,7 @@ def run(self, squeeze_factor): notifier.dispatch("event.post-run", data={"target": self.target, - "EventName": type(self).__name__, + "EventName": self.__class__.__name__, "footprint": footprint, "level": level }) From e6f35cc42dd360cb87aee589eaa3dc8ddd50b001 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:56:09 +0000 Subject: [PATCH 71/97] Add resource file --- .../parameter_values.csv | 3 +++ .../scenario_track_individual_histories.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv diff --git a/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv b/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv new file mode 100644 index 0000000000..87a6ed9e99 --- /dev/null +++ b/resources/ResourceFile_IndividualHistoryTracker/parameter_values.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56ab1e989bf1133f8e52f81552cb55945d6bf14e1758ae1baa62b6e12b37ce2 +size 365 diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py index 5cc4d2eeeb..696612352f 100644 --- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -19,7 +19,7 @@ from tlo import Date, logging from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios -#from tlo.methods import individual_history_tracker +from tlo.methods import individual_history_tracker from tlo.methods.fullmodel import fullmodel from tlo.scenario import BaseScenario @@ -53,7 +53,7 @@ def log_configuration(self): def modules(self): return ( - fullmodel()# + [individual_history_tracker.IndividualHistoryTracker()] + fullmodel() + [individual_history_tracker.IndividualHistoryTracker()] ) def draw_parameters(self, draw_number, rng): From b4301cebdf89fa8d23e56452b318fb91b1ae1386 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 28 Nov 2025 11:20:11 +0000 Subject: [PATCH 72/97] Fix EventName error and logging of EAV dataframe --- src/tlo/methods/individual_history_tracker.py | 44 ++++++++++--------- src/tlo/util.py | 1 + tests/test_individual_history_tracker.py | 1 - 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index e6317bf942..26a0ed8708 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -73,6 +73,14 @@ def on_birth(self, mother, child): # Could the notification of birth simply take place here? pass + def log_EAV_dataframe_to_individual_histories(self, df): + + for idx, row in df.iterrows(): + print({"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName}) + logger.info(key='individual_histories', + data = {"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName}, + description='Links forming chains of events for simulated individuals') + def on_simulation_post_initialise(self, data): # When logging events for each individual to reconstruct chains, @@ -82,12 +90,10 @@ def on_simulation_post_initialise(self, data): # at the start. # EDNAV structure to capture status of individuals at the start of the simulation - ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') - - logger.info(key='individual_histories', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') - + eav_plus_EventName = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') + self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) + + return def on_simulation_post_do_birth(self, data): @@ -99,12 +105,10 @@ def on_simulation_post_do_birth(self, data): chain_links = {} chain_links[data['child_id']] = link_info - ednav = convert_chain_links_into_EAV(chain_links) - - logger.info(key='individual_histories', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + eav_plus_EventName = convert_chain_links_into_EAV(chain_links) + self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) + return def on_event_pre_run(self, data): """Do this when notified that an event is about to run. @@ -235,17 +239,15 @@ def on_event_post_run(self, data): chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, - entire_mni_after) + entire_mni_after, + data['EventName']) # Log chains if chain_links: - - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) - - logger.info(key='individual_histories', - data= ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + # Convert chain_links into EAV-type dataframe + eav_plus_EventName = convert_chain_links_into_EAV(chain_links) + # log it + self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) # Reset variables self.print_chains = False @@ -298,7 +300,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): return diffs - def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, EventName): """ This function compares the population dataframe and mni dictionary before/after a population-wide e vent has occurred. @@ -327,7 +329,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a dictionary for this person # First add event info link_info = { - 'EventName': type(self).__name__, + 'EventName': EventName, } # Store the new values from df_after for the changed columns diff --git a/src/tlo/util.py b/src/tlo/util.py index 98b13e45fd..e34a887e42 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -105,6 +105,7 @@ def df_to_EAV(df, date, event_name): def convert_chain_links_into_EAV(chain_links): + df = pd.DataFrame.from_dict(chain_links, orient="index") id_cols = ["EventName"] diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py index db460187d8..619c062925 100644 --- a/tests/test_individual_history_tracker.py +++ b/tests/test_individual_history_tracker.py @@ -90,4 +90,3 @@ def test_individual_history_tracker(tmpdir, seed): (~individual_histories["EventName"].str.contains("HSI", na=False)) count = mask.sum() assert count > 0 - From 8177340a3892b492dcb8514fc80cfe242adf79e2 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 28 Nov 2025 12:12:17 +0000 Subject: [PATCH 73/97] Adjust utily functions based on new logging --- .../scenario_track_individual_histories.py | 2 +- src/tlo/analysis/utils.py | 109 ++++-------------- src/tlo/methods/individual_history_tracker.py | 1 - 3 files changed, 24 insertions(+), 88 deletions(-) diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py index 696612352f..ded9e4d5ed 100644 --- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -47,7 +47,7 @@ def log_configuration(self): 'tlo.methods.demography.detail': logging.WARNING, 'tlo.methods.healthburden': logging.INFO, 'tlo.methods.healthsystem.summary': logging.INFO, - #'tlo.methods.individual_history_tracker': logging.INFO + 'tlo.methods.individual_history_tracker': logging.INFO } } diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 4f56fd9b37..73e31ee944 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -366,91 +366,35 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: return _concat -def unpack_dict_rows(df, non_dict_cols=None): - """ - Reconstruct a full DataFrame from rows where most columns are dictionaries. - Non-dict columns (e.g., 'date') are propagated to all reconstructed rows. - - Parameters: - df: pd.DataFrame - non_dict_cols: list of columns that are NOT dictionaries - """ - if non_dict_cols is None: - non_dict_cols = [] - - original_cols = ['E', 'date', 'EventName', 'A', 'V'] - - reconstructed_rows = [] - - for _, row in df.iterrows(): - # Determine dict columns for this row - dict_cols = [col for col in original_cols if col not in non_dict_cols] - - if not dict_cols: - # No dict columns, just append row - reconstructed_rows.append(row.to_dict()) - continue - - # Use the first dict column to get the block length - first_dict_col = dict_cols[0] - block_length = len(row[first_dict_col]) - - # Build each expanded row - for i in range(block_length): - new_row = {} - for col in original_cols: - cell = row[col] - if col in dict_cols: - # Access the dict using string or integer keys - new_row[col] = cell.get(str(i), cell.get(i)) - else: - # Propagate non-dict value - new_row[col] = cell - reconstructed_rows.append(new_row) - - # Build DataFrame in original column order - out = pd.DataFrame(reconstructed_rows)[original_cols] - - return out.reset_index(drop=True) - def reconstruct_individual_histories(df): - recon = unpack_dict_rows(df, ['date']) - - # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. - recon['V'] = recon['V'].apply(str) # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes # (e.g. {a1:v1, a2:v2, a3:v3, ...} ) df_collapsed = ( - recon.groupby(['E', 'date', 'EventName']) + df.groupby(['E', 'date', 'EventName']) .apply(lambda g: dict(zip(g['A'], g['V']))) .reset_index(name='Info') ) - df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True) - #birth_count = (df_final['EventName'] == 'Birth').sum() + + first_events = ["StartOfSimulation", "Birth"] - return df_final + # Ensure that if E and date are the same, StartOfSimulation or Birth come first + df_collapsed["EventName"] = pd.Categorical( + df_collapsed["EventName"], + categories=first_events + sorted( + x for x in df_collapsed["EventName"].unique() + if x not in first_events + ), + ordered=True, + ) - -def print_filtered_df(df): - """ - Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. - """ - pd.set_option('display.max_colwidth', None) - filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] - - dict_cols = ["Info"] - max_items = 2 - # Step 2: Truncate dictionary columns for display - if dict_cols is not None: - for col in dict_cols: - def truncate_dict(d): - if isinstance(d, dict): - items = list(d.items())[:max_items] # keep only first `max_items` - return dict(items) - return d - filtered[col] = filtered[col].apply(truncate_dict) - print(filtered) + df_final = ( + df_collapsed + .sort_values(by=['E', 'date', 'EventName']) + .reset_index(drop=True) + ) + + return df_final def extract_individual_histories(results_folder: Path, @@ -478,22 +422,18 @@ def extract_individual_histories(results_folder: Path, for run in range(info['runs_per_draw']): - df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - print(df) - try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - print(df) - df_final = reconstruct_individual_histories(df) + df_single_run= reconstruct_individual_histories(df) # Offset person ID to account for the fact that we are collecting chains across runs - df_final['E'] = df_final['E'] + ID_offset + df_single_run['E'] = df_single_run['E'] + ID_offset # Calculate ID offset for next run - ID_offset = (max(df_final['E']) + 1) + ID_offset = (max(df_single_run['E']) + 1) # Append these chains to list - dfs_from_runs.append(df_final) + dfs_from_runs.append(df_single_run) except KeyError: # Some logs could not be found - probably because this run failed. @@ -503,9 +443,6 @@ def extract_individual_histories(results_folder: Path, # Combine all dfs into a single DataFrame res[draw] = pd.concat(dfs_from_runs, ignore_index=True) - # Optionally, sort by 'E' and 'EventDate' after combining - res[draw] = res[draw].sort_values(by=['E', 'date']).reset_index(drop=True) - return res diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index 26a0ed8708..972223694a 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -76,7 +76,6 @@ def on_birth(self, mother, child): def log_EAV_dataframe_to_individual_histories(self, df): for idx, row in df.iterrows(): - print({"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName}) logger.info(key='individual_histories', data = {"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName}, description='Links forming chains of events for simulated individuals') From 2215645f1de2611d2e800d56758886ef85dd1b7f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 28 Nov 2025 12:23:43 +0000 Subject: [PATCH 74/97] Rename E column --- .../track_individual_histories/analysis_extract_data.py | 1 - src/tlo/analysis/utils.py | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/scripts/track_individual_histories/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py index db733ab69f..e88f68bfe9 100644 --- a/src/scripts/track_individual_histories/analysis_extract_data.py +++ b/src/scripts/track_individual_histories/analysis_extract_data.py @@ -34,7 +34,6 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No pd.set_option('display.max_colwidth', None) individual_individual_histories = extract_individual_histories(results_folder) - print_filtered_df(individual_individual_histories[0]) if __name__ == "__main__": rfp = Path('resources') diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 73e31ee944..bf76625bef 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -393,7 +393,7 @@ def reconstruct_individual_histories(df): .sort_values(by=['E', 'date', 'EventName']) .reset_index(drop=True) ) - + return df_final @@ -432,6 +432,9 @@ def extract_individual_histories(results_folder: Path, # Calculate ID offset for next run ID_offset = (max(df_single_run['E']) + 1) + # The E has now become an ID for the individual in the draw overall, so rename column as such + df_single_run = df_single_run.rename(columns={"E": "person ID in draw"}) + # Append these chains to list dfs_from_runs.append(df_single_run) From d869f17008f169bb24bf03e6cd8fd46e57faf35a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 28 Nov 2025 16:13:55 +0000 Subject: [PATCH 75/97] Rename column name --- src/tlo/analysis/utils.py | 4 ++-- src/tlo/methods/individual_history_tracker.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index bf76625bef..2ce404e821 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -433,8 +433,8 @@ def extract_individual_histories(results_folder: Path, ID_offset = (max(df_single_run['E']) + 1) # The E has now become an ID for the individual in the draw overall, so rename column as such - df_single_run = df_single_run.rename(columns={"E": "person ID in draw"}) - + df_single_run = df_single_run.rename(columns={'E': 'person_ID_in_draw'}) + # Append these chains to list dfs_from_runs.append(df_single_run) diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index 972223694a..189b8f2052 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -77,7 +77,7 @@ def log_EAV_dataframe_to_individual_histories(self, df): for idx, row in df.iterrows(): logger.info(key='individual_histories', - data = {"E": row.E, "A": row.A, "V": row.V, "EventName": row.EventName}, + data = {"E": row.E, "A": row.A, "V": str(row.V), "EventName": row.EventName}, description='Links forming chains of events for simulated individuals') def on_simulation_post_initialise(self, data): From 3d1154539b513807d3c226fce9243b6db32f7564 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 28 Nov 2025 16:58:41 +0000 Subject: [PATCH 76/97] Check changes in df --- src/tlo/analysis/utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 2ce404e821..96c4337f4a 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -365,6 +365,40 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: _concat.columns.names = ['draw', 'run'] # name the levels of the columns multi-index return _concat +def check_info_value_changes(df): + # Ensure rows are sorted within each person + problems = [] # store violations + + # iterate group-by-group + for E, g in df.groupby("E"): + prev_info = {} + + for _, row in g.iterrows(): + current_info = row["Info"] + + for key, value in current_info.items(): + if key in prev_info and key != 'footprint' and key != 'level': + # compare with previous value + if prev_info[key] == value: + problems.append({ + "key": key, + "value": value, + "message": "Value repeated but should differ" + }) + + + # update latest value + if len(problems)>0: + print(prev_info) + print(current_info) + print(problems) + problems = [] + print() + prev_info = row["Info"] + exit(-1) + + return pd.DataFrame(problems) + def reconstruct_individual_histories(df): @@ -394,6 +428,10 @@ def reconstruct_individual_histories(df): .reset_index(drop=True) ) + problems = check_info_value_changes(df_final) + print(problems) + exit(-1) + return df_final From c1463265a13b5d47a38e2479474e9c7d6e8b8bac Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 28 Nov 2025 19:16:44 +0000 Subject: [PATCH 77/97] Ensure order of events on same date is preserved --- .../scenario_track_individual_histories.py | 6 +-- src/tlo/analysis/utils.py | 43 +++++-------------- 2 files changed, 13 insertions(+), 36 deletions(-) diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py index ded9e4d5ed..e8ef3fb929 100644 --- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -29,11 +29,11 @@ def __init__(self): super().__init__() self.seed = 42 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=1) - self.pop_size = 1000 + self.end_date = self.start_date + pd.DateOffset(years=5) + self.pop_size = 100 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 3 + self.runs_per_draw = 1 self.generate_event_chains = True def log_configuration(self): diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 96c4337f4a..9aba6111d0 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -366,8 +366,8 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: return _concat def check_info_value_changes(df): - # Ensure rows are sorted within each person - problems = [] # store violations + + problems = {} # store issues # iterate group-by-group for E, g in df.groupby("E"): @@ -379,25 +379,13 @@ def check_info_value_changes(df): for key, value in current_info.items(): if key in prev_info and key != 'footprint' and key != 'level': # compare with previous value - if prev_info[key] == value: - problems.append({ - "key": key, - "value": value, - "message": "Value repeated but should differ" - }) - - + if prev_info[key] == value and key not in problems.keys(): + problems[key] = value + # update latest value - if len(problems)>0: - print(prev_info) - print(current_info) - print(problems) - problems = [] - print() prev_info = row["Info"] - exit(-1) - return pd.DataFrame(problems) + return problems def reconstruct_individual_histories(df): @@ -405,32 +393,19 @@ def reconstruct_individual_histories(df): # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes # (e.g. {a1:v1, a2:v2, a3:v3, ...} ) df_collapsed = ( - df.groupby(['E', 'date', 'EventName']) + df.groupby(['E', 'date', 'EventName'], sort=False) .apply(lambda g: dict(zip(g['A'], g['V']))) .reset_index(name='Info') ) - first_events = ["StartOfSimulation", "Birth"] - - # Ensure that if E and date are the same, StartOfSimulation or Birth come first - df_collapsed["EventName"] = pd.Categorical( - df_collapsed["EventName"], - categories=first_events + sorted( - x for x in df_collapsed["EventName"].unique() - if x not in first_events - ), - ordered=True, - ) - df_final = ( df_collapsed - .sort_values(by=['E', 'date', 'EventName']) + .sort_values(by=['E', 'date']) .reset_index(drop=True) ) problems = check_info_value_changes(df_final) print(problems) - exit(-1) return df_final @@ -483,6 +458,8 @@ def extract_individual_histories(results_folder: Path, # Combine all dfs into a single DataFrame res[draw] = pd.concat(dfs_from_runs, ignore_index=True) + + res[0].to_csv('individual_histories.csv') return res From d48376fdc54674520fc51346c3ec9ece4a1014b4 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Tue, 2 Dec 2025 10:20:36 +0000 Subject: [PATCH 78/97] Rename module to allow bundling other useful things in there --- .../scenario_track_individual_histories.py | 10 ++--- src/tlo/analysis/utils.py | 40 +++++++++---------- ...story_tracker.py => individual_history.py} | 0 tests/test_individual_history_tracker.py | 14 +++---- 4 files changed, 32 insertions(+), 32 deletions(-) rename src/tlo/methods/{individual_history_tracker.py => individual_history.py} (100%) diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py index e8ef3fb929..0c6a43b127 100644 --- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -2,7 +2,7 @@ Run on the batch system using: ``` -tlo batch-submit +tlo batch-submit src/scripts/analysis_data_generation/scenario_track_individual_histories.py ``` @@ -19,7 +19,7 @@ from tlo import Date, logging from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios -from tlo.methods import individual_history_tracker +from tlo.methods import individual_history from tlo.methods.fullmodel import fullmodel from tlo.scenario import BaseScenario @@ -47,13 +47,13 @@ def log_configuration(self): 'tlo.methods.demography.detail': logging.WARNING, 'tlo.methods.healthburden': logging.INFO, 'tlo.methods.healthsystem.summary': logging.INFO, - 'tlo.methods.individual_history_tracker': logging.INFO + 'tlo.methods.individual_history': logging.INFO } } def modules(self): return ( - fullmodel() + [individual_history_tracker.IndividualHistoryTracker()] + fullmodel() + [individual_history.IndividualHistoryTracker()] ) def draw_parameters(self, draw_number, rng): @@ -73,7 +73,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: ), } - + def _baseline(self) -> Dict: #Return the Dict with values for the parameter changes that define the baseline scenario. return mix_scenarios( diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 9aba6111d0..66e4487467 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -372,7 +372,7 @@ def check_info_value_changes(df): # iterate group-by-group for E, g in df.groupby("E"): prev_info = {} - + for _, row in g.iterrows(): current_info = row["Info"] @@ -381,15 +381,15 @@ def check_info_value_changes(df): # compare with previous value if prev_info[key] == value and key not in problems.keys(): problems[key] = value - + # update latest value prev_info = row["Info"] - + return problems def reconstruct_individual_histories(df): - + # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes # (e.g. {a1:v1, a2:v2, a3:v3, ...} ) df_collapsed = ( @@ -397,28 +397,28 @@ def reconstruct_individual_histories(df): .apply(lambda g: dict(zip(g['A'], g['V']))) .reset_index(name='Info') ) - + df_final = ( df_collapsed .sort_values(by=['E', 'date']) .reset_index(drop=True) ) - + problems = check_info_value_changes(df_final) print(problems) - + return df_final - - + + def extract_individual_histories(results_folder: Path, ) -> dict: """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. - Returns dictionary where keys are draws, and each draw is associated with a dataframe of - format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines + Returns dictionary where keys are draws, and each draw is associated with a dataframe of + format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. """ - module = 'tlo.methods.individual_history_tracker' + module = 'tlo.methods.individual_history' key = 'individual_histories' # get number of draws and numbers of runs @@ -426,13 +426,13 @@ def extract_individual_histories(results_folder: Path, # Collect results from each draw/run. Individuals across runs of the same draw will be combined into unique df. res = dict() - + for draw in range(info['number_of_draws']): - + # All individuals in same draw will be combined across runs, so their ID will be offset. dfs_from_runs = [] ID_offset = 0 - + for run in range(info['runs_per_draw']): try: @@ -441,24 +441,24 @@ def extract_individual_histories(results_folder: Path, # Offset person ID to account for the fact that we are collecting chains across runs df_single_run['E'] = df_single_run['E'] + ID_offset - + # Calculate ID offset for next run ID_offset = (max(df_single_run['E']) + 1) - + # The E has now become an ID for the individual in the draw overall, so rename column as such df_single_run = df_single_run.rename(columns={'E': 'person_ID_in_draw'}) # Append these chains to list dfs_from_runs.append(df_single_run) - + except KeyError: # Some logs could not be found - probably because this run failed. # Simply to not append anything to the df collecting chains. print("Run failed") - + # Combine all dfs into a single DataFrame res[draw] = pd.concat(dfs_from_runs, ignore_index=True) - + res[0].to_csv('individual_histories.csv') return res diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history.py similarity index 100% rename from src/tlo/methods/individual_history_tracker.py rename to src/tlo/methods/individual_history.py diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py index 619c062925..e065e20c84 100644 --- a/tests/test_individual_history_tracker.py +++ b/tests/test_individual_history_tracker.py @@ -11,7 +11,7 @@ enhanced_lifestyle, healthseekingbehaviour, healthsystem, - individual_history_tracker, + individual_history, mockitis, simplified_births, symptommanager, @@ -42,7 +42,7 @@ def test_individual_history_tracker(tmpdir, seed): "directory": tmpdir, "custom_levels": { "tlo.methods.healthsystem": logging.DEBUG, - "tlo.methods.individual_history_tracker": logging.INFO + "tlo.methods.individual_history": logging.INFO } }, resourcefilepath=resourcefilepath ) @@ -52,7 +52,7 @@ def test_individual_history_tracker(tmpdir, seed): simplified_births.SimplifiedBirths(), enhanced_lifestyle.Lifestyle(), healthsystem.HealthSystem(), - individual_history_tracker.IndividualHistoryTracker(), + individual_history.IndividualHistoryTracker(), symptommanager.SymptomManager(), healthseekingbehaviour.HealthSeekingBehaviour(), mockitis.Mockitis(), @@ -68,19 +68,19 @@ def test_individual_history_tracker(tmpdir, seed): output = parse_log_file(sim.log_filepath, level=logging.DEBUG) output_chains = parse_log_file(sim.log_filepath, level=logging.INFO) individual_histories = reconstruct_individual_histories( - output_chains['tlo.methods.individual_history_tracker']['individual_histories']) - + output_chains['tlo.methods.individual_history']['individual_histories']) + # Check that we have a "StartOfSimulation" event for every individual in the initial population, # and that this was logged at the start date assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation', 'date'] == start_date).all() - + # Check that in the case of birth or start of simulation, all properties were logged num_properties = len(sim.population.props.columns) mask = individual_histories["EventName"].isin(["Birth", "StartOfSimulation"]) assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all() - + # Assert that all HSI events that occurred were also collected in the event chains HSIs_in_individual_histories = individual_histories["EventName"].str.contains('HSI', na=False).sum() assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event']) From af4ace9291ad60ef8d50cbf1999410e7379cba37 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Tue, 2 Dec 2025 22:08:23 +0000 Subject: [PATCH 79/97] Clean up variable names --- .../analysis_extract_data.py | 10 ++--- src/tlo/analysis/utils.py | 18 ++++----- src/tlo/events.py | 8 ++-- src/tlo/methods/hsi_event.py | 16 ++++---- src/tlo/methods/individual_history.py | 39 ++++++++++++------- src/tlo/util.py | 26 ++++++------- tests/test_individual_history_tracker.py | 14 +++---- 7 files changed, 70 insertions(+), 61 deletions(-) diff --git a/src/scripts/track_individual_histories/analysis_extract_data.py b/src/scripts/track_individual_histories/analysis_extract_data.py index e88f68bfe9..fc67df6879 100644 --- a/src/scripts/track_individual_histories/analysis_extract_data.py +++ b/src/scripts/track_individual_histories/analysis_extract_data.py @@ -8,11 +8,11 @@ def print_filtered_df(df): """ - Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. + Prints rows of the DataFrame excluding event_name 'Initialise' and 'Birth'. """ pd.set_option('display.max_colwidth', None) - filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] - + filtered = df # [~df['event_name'].isin(['StartOfSimulation', 'Birth'])] + dict_cols = ["Info"] max_items = 2 # Step 2: Truncate dictionary columns for display @@ -32,9 +32,9 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No """ pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) - + individual_individual_histories = extract_individual_histories(results_folder) - + if __name__ == "__main__": rfp = Path('resources') diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 66e4487467..360fc36416 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -370,7 +370,7 @@ def check_info_value_changes(df): problems = {} # store issues # iterate group-by-group - for E, g in df.groupby("E"): + for E, g in df.groupby("entity"): prev_info = {} for _, row in g.iterrows(): @@ -390,17 +390,17 @@ def check_info_value_changes(df): def reconstruct_individual_histories(df): - # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes + # Collapse into 'entity', 'date', 'event_name', 'Info' format where 'Info' is dict listing attributes # (e.g. {a1:v1, a2:v2, a3:v3, ...} ) df_collapsed = ( - df.groupby(['E', 'date', 'EventName'], sort=False) - .apply(lambda g: dict(zip(g['A'], g['V']))) + df.groupby(['entity', 'date', 'event_name'], sort=False) + .apply(lambda g: dict(zip(g['attribute'], g['value']))) .reset_index(name='Info') ) df_final = ( df_collapsed - .sort_values(by=['E', 'date']) + .sort_values(by=['entity', 'date']) .reset_index(drop=True) ) @@ -415,7 +415,7 @@ def extract_individual_histories(results_folder: Path, """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. Returns dictionary where keys are draws, and each draw is associated with a dataframe of - format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines + format 'entity', 'date', 'event_name', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. """ module = 'tlo.methods.individual_history' @@ -440,13 +440,13 @@ def extract_individual_histories(results_folder: Path, df_single_run= reconstruct_individual_histories(df) # Offset person ID to account for the fact that we are collecting chains across runs - df_single_run['E'] = df_single_run['E'] + ID_offset + df_single_run['entity'] = df_single_run['entity'] + ID_offset # Calculate ID offset for next run - ID_offset = (max(df_single_run['E']) + 1) + ID_offset = (max(df_single_run['entity']) + 1) # The E has now become an ID for the individual in the draw overall, so rename column as such - df_single_run = df_single_run.rename(columns={'E': 'person_ID_in_draw'}) + df_single_run = df_single_run.rename(columns={'entity': 'person_ID_in_draw'}) # Append these chains to list dfs_from_runs.append(df_single_run) diff --git a/src/tlo/events.py b/src/tlo/events.py index 1ceb30a576..afed91afea 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -68,14 +68,14 @@ def run(self): # Dispatch notification that event is about to run notifier.dispatch("event.pre-run", data={"target": self.target, "module" : self.module.name, - "EventName": self.__class__.__name__}) - + "event_name": self.__class__.__name__}) + self.apply(self.target) self.post_apply_hook() - + # Dispatch notification that event has just ran notifier.dispatch("event.post-run", data={"target": self.target, - "EventName": self.__class__.__name__}) + "event_name": self.__class__.__name__}) class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index ad1f92eedd..a7b6a440e9 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -194,16 +194,16 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - + def run(self, squeeze_factor): """Make the event happen.""" - + # Dispatch notification that HSI event is about to run notifier.dispatch("event.pre-run", data={"target": self.target, "module" : self.module.name, - "EventName": self.__class__.__name__}) + "event_name": self.__class__.__name__}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() @@ -214,21 +214,21 @@ def run(self, squeeze_factor): footprint = updated_appt_footprint else: footprint = self.EXPECTED_APPT_FOOTPRINT - + if self.facility_info: level = self.facility_info.level else: level = "N/A" - + notifier.dispatch("event.post-run", data={"target": self.target, - "EventName": self.__class__.__name__, + "event_name": self.__class__.__name__, "footprint": footprint, "level": level }) - + return updated_appt_footprint - + def get_consumables( self, diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py index 189b8f2052..0a04d211d3 100644 --- a/src/tlo/methods/individual_history.py +++ b/src/tlo/methods/individual_history.py @@ -77,7 +77,12 @@ def log_EAV_dataframe_to_individual_histories(self, df): for idx, row in df.iterrows(): logger.info(key='individual_histories', - data = {"E": row.E, "A": row.A, "V": str(row.V), "EventName": row.EventName}, + data = { + "entity": row.entity, + "attribute": row.attribute, + "value": str(row.value), + "event_name": row.event_name + }, description='Links forming chains of events for simulated individuals') def on_simulation_post_initialise(self, data): @@ -89,9 +94,9 @@ def on_simulation_post_initialise(self, data): # at the start. # EDNAV structure to capture status of individuals at the start of the simulation - eav_plus_EventName = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') - self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) - + eav_plus_event = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') + self.log_EAV_dataframe_to_individual_histories(eav_plus_event) + return def on_simulation_post_do_birth(self, data): @@ -99,7 +104,7 @@ def on_simulation_post_do_birth(self, data): # When individual is born, store their initial properties to provide a starting point to the # chain of property changes that this individual will undergo # as a result of events taking place. - link_info = {'EventName': 'Birth'} + link_info = {'event_name': 'Birth'} link_info.update(self.sim.population.props.loc[data['child_id']].to_dict()) chain_links = {} chain_links[data['child_id']] = link_info @@ -107,6 +112,9 @@ def on_simulation_post_do_birth(self, data): eav_plus_EventName = convert_chain_links_into_EAV(chain_links) self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) + eav_plus_event = convert_chain_links_into_EAV(chain_links) + self.log_EAV_dataframe_to_individual_histories(eav_plus_event) + return def on_event_pre_run(self, data): @@ -120,7 +128,7 @@ def on_event_pre_run(self, data): # 2) the event is not in the list of events to ignore if ( (data['module'] not in self.modules_of_interest) - or (data['EventName'] in self.events_to_ignore) + or (data['event_name'] in self.events_to_ignore) ): return @@ -189,7 +197,7 @@ def on_event_post_run(self, data): mni_instances_after = None # Create and store event for this individual, regardless of whether any property change occurred - link_info = {'EventName' : data['EventName']} + link_info = {'event_name' : data['event_name']} if 'footprint' in data.keys(): link_info['footprint'] = data['footprint'] link_info['level'] = data['level'] @@ -244,10 +252,12 @@ def on_event_post_run(self, data): # Log chains if chain_links: # Convert chain_links into EAV-type dataframe - eav_plus_EventName = convert_chain_links_into_EAV(chain_links) + eav_plus_event = convert_chain_links_into_EAV(chain_links) # log it self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) + self.log_EAV_dataframe_to_individual_histories(eav_plus_event) + # Reset variables self.print_chains = False self.df_before = [] @@ -301,10 +311,13 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, EventName): """ + + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name): + """ This function compares the population dataframe and mni dictionary before/after a population-wide e - vent has occurred. - It allows us to identify the individuals for which this event led to a significant (i.e. property) change, - and to store the properties which have changed as a result of it. + vent has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, + and to store the properties which have changed as a result of it. """ # Create a mask of where values are different @@ -328,7 +341,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a dictionary for this person # First add event info link_info = { - 'EventName': EventName, + 'event_name': event_name, } # Store the new values from df_after for the changed columns @@ -350,7 +363,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be if key not in persons_changed: # If individual hadn't been previously added due to changes in pop df, add it here link_info = { - 'EventName': type(self).__name__, + 'event_name': self.__class__.__name__, } for key_prop in diff_mni[key]: diff --git a/src/tlo/util.py b/src/tlo/util.py index e34a887e42..6ab3f67b20 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -97,30 +97,26 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng: def df_to_EAV(df, date, event_name): """Function to convert dataframe into EAV""" eav = df.stack(dropna=False).reset_index() - eav.columns = ['E', 'A', 'V'] - eav['EventName'] = event_name - eav = eav[["E", "EventName", "A", "V"]] - + eav.columns = ['entity', 'attribute', 'value'] + eav['event_name'] = event_name + eav = eav[["entity", "event_name", "attribute", "value"]] return eav - - -def convert_chain_links_into_EAV(chain_links): + +def convert_chain_links_into_EAV(chain_links): df = pd.DataFrame.from_dict(chain_links, orient="index") - id_cols = ["EventName"] + id_cols = ["event_name"] eav = df.reset_index().melt( id_vars=["index"] + id_cols, # index = person ID - var_name="A", - value_name="V" + var_name="attribute", + value_name="value" ) - eav.rename(columns={"index": "E"}, inplace=True) - - eav = eav[["E", "EventName", "A", "V"]] - + eav.rename(columns={"index": "entity"}, inplace=True) + eav = eav[["entity", "event_name", "attribute", "value"]] return eav - + def sample_outcome(probs: pd.DataFrame, rng: np.random.RandomState): """ Helper function to randomly sample an outcome for each individual in a population from a set of probabilities diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py index e065e20c84..20ce42eb44 100644 --- a/tests/test_individual_history_tracker.py +++ b/tests/test_individual_history_tracker.py @@ -71,22 +71,22 @@ def test_individual_history_tracker(tmpdir, seed): output_chains['tlo.methods.individual_history']['individual_histories']) # Check that we have a "StartOfSimulation" event for every individual in the initial population, - # and that this was logged at the start date - assert (individual_histories['EventName'] == 'StartOfSimulation').sum() == popsize - assert (individual_histories.loc[individual_histories['EventName'] == 'StartOfSimulation', + #   and that this was logged at the start date + assert (individual_histories['event_name'] == 'StartOfSimulation').sum() == popsize + assert (individual_histories.loc[individual_histories['event_name'] == 'StartOfSimulation', 'date'] == start_date).all() # Check that in the case of birth or start of simulation, all properties were logged num_properties = len(sim.population.props.columns) - mask = individual_histories["EventName"].isin(["Birth", "StartOfSimulation"]) + mask = individual_histories["event_name"].isin(["Birth", "StartOfSimulation"]) assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all() # Assert that all HSI events that occurred were also collected in the event chains - HSIs_in_individual_histories = individual_histories["EventName"].str.contains('HSI', na=False).sum() + HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum() assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event']) # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too - mask = (~individual_histories["EventName"].isin(["StartOfSimulation", "Birth"])) & \ - (~individual_histories["EventName"].str.contains("HSI", na=False)) + mask = (~individual_histories["event_name"].isin(["StartOfSimulation", "Birth"])) & \ + (~individual_histories["event_name"].str.contains("HSI", na=False)) count = mask.sum() assert count > 0 From fcf803a5032330fd2810c421368febcc4ee2045b Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Tue, 2 Dec 2025 22:17:35 +0000 Subject: [PATCH 80/97] Fix variable names --- src/tlo/methods/individual_history.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py index 0a04d211d3..48b8cbf1c7 100644 --- a/src/tlo/methods/individual_history.py +++ b/src/tlo/methods/individual_history.py @@ -108,10 +108,7 @@ def on_simulation_post_do_birth(self, data): link_info.update(self.sim.population.props.loc[data['child_id']].to_dict()) chain_links = {} chain_links[data['child_id']] = link_info - - eav_plus_EventName = convert_chain_links_into_EAV(chain_links) - self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) - + eav_plus_event = convert_chain_links_into_EAV(chain_links) self.log_EAV_dataframe_to_individual_histories(eav_plus_event) @@ -247,17 +244,15 @@ def on_event_post_run(self, data): df_after, self.entire_mni_before, entire_mni_after, - data['EventName']) + data['event_name']) # Log chains if chain_links: # Convert chain_links into EAV-type dataframe eav_plus_event = convert_chain_links_into_EAV(chain_links) # log it - self.log_EAV_dataframe_to_individual_histories(eav_plus_EventName) - self.log_EAV_dataframe_to_individual_histories(eav_plus_event) - + # Reset variables self.print_chains = False self.df_before = [] @@ -309,9 +304,6 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): return diffs - def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, EventName): - """ - def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name): """ This function compares the population dataframe and mni dictionary before/after a population-wide e From f615f140657fd585c9e4f2f831c28c1a9045c385 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Tue, 2 Dec 2025 23:24:52 +0000 Subject: [PATCH 81/97] Move individual-history-specific utils into its module --- src/tlo/methods/individual_history.py | 166 +++++++++++++------------- src/tlo/util.py | 24 ---- 2 files changed, 84 insertions(+), 106 deletions(-) diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py index 48b8cbf1c7..b97d5e8e09 100644 --- a/src/tlo/methods/individual_history.py +++ b/src/tlo/methods/individual_history.py @@ -7,7 +7,6 @@ from tlo import Module, Parameter, Types, logging from tlo.notify import notifier from tlo.population import Population -from tlo.util import convert_chain_links_into_EAV, df_to_EAV logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -19,13 +18,12 @@ def __init__( name: Optional[str] = None, modules_of_interest: Optional[List[str]] = None, events_to_ignore: Optional[List[str]] = None - ): super().__init__(name) - + self.modules_of_interest = modules_of_interest self.events_to_ignore = events_to_ignore - + # This is how I am passing data from fnc taking place before event to the one after # It doesn't seem very elegant but not sure how else to go about it self.print_chains = False @@ -34,7 +32,7 @@ def __init__( self.mni_instances_before = False self.mni_row_before = {} self.entire_mni_before = {} - + PARAMETERS = { # Options within module "modules_of_interest": Parameter( @@ -43,28 +41,27 @@ def __init__( "events_to_ignore": Parameter( Types.LIST, "Events to be ignored when collecting chains" ), - } - + } + def initialise_simulation(self, sim): notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise) notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth) notifier.add_listener("event.pre-run", self.on_event_pre_run) notifier.add_listener("event.post-run", self.on_event_post_run) - + def read_parameters(self, resourcefilepath: Optional[Path] = None): - self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv")) - + self.load_parameters_from_dataframe( + pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv") + ) + def initialise_population(self, population): # Use parameter file values by default, if not overwritten - - self.modules_of_interest = self.parameters['modules_of_interest'] \ - if self.modules_of_interest is None \ - else self.modules_of_interest - - self.events_to_ignore = self.parameters['events_to_ignore'] \ - if self.events_to_ignore is None \ - else self.events_to_ignore - + if self.modules_of_interest is None: + self.modules_of_interest = self.parameters['modules_of_interest'] + + if self.events_to_ignore is None: + self.events_to_ignore = self.parameters['events_to_ignore'] + # If modules of interest is '*', set by default to all modules included in the simulation if self.modules_of_interest == ['*']: self.modules_of_interest = list(self.sim.modules.keys()) @@ -72,9 +69,8 @@ def initialise_population(self, population): def on_birth(self, mother, child): # Could the notification of birth simply take place here? pass - - def log_EAV_dataframe_to_individual_histories(self, df): - + + def log_eav_dataframe_to_individual_histories(self, df): for idx, row in df.iterrows(): logger.info(key='individual_histories', data = { @@ -84,51 +80,41 @@ def log_EAV_dataframe_to_individual_histories(self, df): "event_name": row.event_name }, description='Links forming chains of events for simulated individuals') - - def on_simulation_post_initialise(self, data): + def on_simulation_post_initialise(self, data): # When logging events for each individual to reconstruct chains, # only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, # we therefore want to store all of their properties # at the start. - + # EDNAV structure to capture status of individuals at the start of the simulation - eav_plus_event = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') - self.log_EAV_dataframe_to_individual_histories(eav_plus_event) + eav_plus_event = df_to_eav(self.sim.population.props, self.sim.date, 'StartOfSimulation') + self.log_eav_dataframe_to_individual_histories(eav_plus_event) - return - def on_simulation_post_do_birth(self, data): - # When individual is born, store their initial properties to provide a starting point to the # chain of property changes that this individual will undergo # as a result of events taking place. link_info = {'event_name': 'Birth'} link_info.update(self.sim.population.props.loc[data['child_id']].to_dict()) - chain_links = {} - chain_links[data['child_id']] = link_info - - eav_plus_event = convert_chain_links_into_EAV(chain_links) - self.log_EAV_dataframe_to_individual_histories(eav_plus_event) + chain_links = {data['child_id']: link_info} + + eav_plus_event = convert_chain_links_into_eav(chain_links) + self.log_eav_dataframe_to_individual_histories(eav_plus_event) - return - def on_event_pre_run(self, data): - """Do this when notified that an event is about to run. + """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, a nd if so stored required information before the event has occurred. """ - + # Only log event if # 1) the event belongs to modules of interest and # 2) the event is not in the list of events to ignore - if ( - (data['module'] not in self.modules_of_interest) - or (data['event_name'] in self.events_to_ignore) - ): + if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore): return - + # Initialise these variables self.print_chains = False self.df_before = [] @@ -136,15 +122,15 @@ def on_event_pre_run(self, data): self.mni_instances_before = False self.mni_row_before = {} self.entire_mni_before = {} - + self.print_chains = True - + # Target is single individual if not isinstance(data['target'], Population): # Save row for comparison after event has occurred self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) - + # Check if individual is already in mni dictionary, if so copy her original status if 'PregnancySupervisor' in self.sim.modules: mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info @@ -153,7 +139,7 @@ def on_event_pre_run(self, data): self.mni_row_before = mni[data['target']].copy() else: self.mni_row_before = None - + else: # This will be a population-wide event. In order to find individuals for which this led to @@ -165,25 +151,22 @@ def on_event_pre_run(self, data): else: self.entire_mni_before = None - return - - def on_event_post_run(self, data): - """ If print_chains=True, this function logs the event and identifies and logs the any property - changes that have occured to one or multiple individuals as a result of the event taking place. + """ If print_chains=True, this function logs the event and identifies and logs the any property + changes that have occured to one or multiple individuals as a result of the event taking place. """ - + if not self.print_chains: return - + chain_links = {} - + # Target is single individual if not isinstance(data["target"], Population): - + # Copy full new status for individual row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) - + # Check if individual is in mni after the event mni_instances_after = False if 'PregnancySupervisor' in self.sim.modules: @@ -192,18 +175,18 @@ def on_event_post_run(self, data): mni_instances_after = True else: mni_instances_after = None - + # Create and store event for this individual, regardless of whether any property change occurred link_info = {'event_name' : data['event_name']} if 'footprint' in data.keys(): link_info['footprint'] = data['footprint'] link_info['level'] = data['level'] - + # Store (if any) property changes as a result of the event for this individual for key in self.row_before.index: if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe link_info[key] = row_after[key] - + if 'PregnancySupervisor' in self.sim.modules: # Now check and store changes in the mni dictionary, accounting for following cases: # Individual is in mni dictionary before and after @@ -224,21 +207,21 @@ def on_event_post_run(self, data): if self.mni_values_differ(default[key], mni[data['target']][key]): link_info[key] = mni[data['target']][key] # Else, no need to do anything - + # Add individual to the chain links chain_links[data['target']] = link_info - + else: # Target is entire population. Identify individuals for which properties have changed # and store their changes. - + # Population frame after event df_after = self.sim.population.props if 'PregnancySupervisor' in self.sim.modules: entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) else: entire_mni_after = None - + # Create and store the event and dictionary of changes for affected individuals chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, @@ -249,10 +232,10 @@ def on_event_post_run(self, data): # Log chains if chain_links: # Convert chain_links into EAV-type dataframe - eav_plus_event = convert_chain_links_into_EAV(chain_links) + eav_plus_event = convert_chain_links_into_eav(chain_links) # log it - self.log_EAV_dataframe_to_individual_histories(eav_plus_event) - + self.log_eav_dataframe_to_individual_histories(eav_plus_event) + # Reset variables self.print_chains = False self.df_before = [] @@ -261,8 +244,6 @@ def on_event_post_run(self, data): self.mni_row_before = {} self.entire_mni_before = {} - return - def mni_values_differ(self, v1, v2): if isinstance(v1, list) and isinstance(v2, list): @@ -271,12 +252,12 @@ def mni_values_differ(self, v1, v2): if pd.isna(v1) and pd.isna(v2): return False # treat both NaT/NaN as equal return v1 != v2 - + def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): diffs = {} all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) - + for person in all_individuals: if person not in entire_mni_before: # but is afterward for key in entire_mni_after[person]: @@ -285,7 +266,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] - + elif person not in entire_mni_after: # but is beforehand for key in entire_mni_before[person]: if self.mni_values_differ(entire_mni_before[person][key], @@ -303,7 +284,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): diffs[person][key] = entire_mni_after[person][key] return diffs - + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name): """ This function compares the population dataframe and mni dictionary before/after a population-wide e @@ -311,20 +292,20 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ - + # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) if 'PregnancySupervisor' in self.sim.modules: diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) else: diff_mni = [] - + # Create an empty dict to store changes for each of the individuals chain_links = {} # Loop through each row of the mask persons_changed = [] - + for idx, row in diff_mask.iterrows(): changed_cols = row.index[row].tolist() @@ -335,7 +316,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be link_info = { 'event_name': event_name, } - + # Store the new values from df_after for the changed columns for col in changed_cols: link_info[col] = df_after.at[idx, col] @@ -347,7 +328,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Append the event and changes to the individual key chain_links[idx] = link_info - + if 'PregnancySupervisor' in self.sim.modules: # For individuals which only underwent changes in mni dictionary, save changes here if len(diff_mni)>0: @@ -357,12 +338,33 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be link_info = { 'event_name': self.__class__.__name__, } - + for key_prop in diff_mni[key]: link_info[key_prop] = diff_mni[key][key_prop] - + chain_links[key] = link_info return chain_links - +def df_to_eav(df, date, event_name): + """Function to convert dataframe into EAV""" + eav = df.stack(dropna=False).reset_index() + eav.columns = ['entity', 'attribute', 'value'] + eav['event_name'] = event_name + eav = eav[["entity", "event_name", "attribute", "value"]] + return eav + + +def convert_chain_links_into_eav(chain_links): + df = pd.DataFrame.from_dict(chain_links, orient="index") + id_cols = ["event_name"] + + eav = df.reset_index().melt( + id_vars=["index"] + id_cols, # index = person ID + var_name="attribute", + value_name="value" + ) + + eav.rename(columns={"index": "entity"}, inplace=True) + eav = eav[["entity", "event_name", "attribute", "value"]] + return eav diff --git a/src/tlo/util.py b/src/tlo/util.py index 6ab3f67b20..efe17a9920 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -94,30 +94,6 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng: return final_states -def df_to_EAV(df, date, event_name): - """Function to convert dataframe into EAV""" - eav = df.stack(dropna=False).reset_index() - eav.columns = ['entity', 'attribute', 'value'] - eav['event_name'] = event_name - eav = eav[["entity", "event_name", "attribute", "value"]] - return eav - - -def convert_chain_links_into_EAV(chain_links): - df = pd.DataFrame.from_dict(chain_links, orient="index") - id_cols = ["event_name"] - - eav = df.reset_index().melt( - id_vars=["index"] + id_cols, # index = person ID - var_name="attribute", - value_name="value" - ) - - eav.rename(columns={"index": "entity"}, inplace=True) - eav = eav[["entity", "event_name", "attribute", "value"]] - return eav - - def sample_outcome(probs: pd.DataFrame, rng: np.random.RandomState): """ Helper function to randomly sample an outcome for each individual in a population from a set of probabilities that are specific to each individual. From 21482a6999bb891f31cc579d6d0196985f4f6eb3 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Wed, 3 Dec 2025 01:49:18 +0000 Subject: [PATCH 82/97] Don't send data if there isn't any - default is `None` --- src/tlo/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 71a90b04ff..4d0c5d4cee 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -310,7 +310,7 @@ def initialise(self, *, end_date: Date) -> None: # Otherwise, would have to add listener outside of CollectEventChains initialisation # Dispatch notification that pop has been initialised - notifier.dispatch("simulation.post-initialise", data={}) + notifier.dispatch("simulation.post-initialise") def finalise(self, wall_clock_time: Optional[float] = None) -> None: """Finalise all modules in simulation and close logging file if open. From 87f4710228778387d8edb675da95b0323ac4f2e7 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Wed, 3 Dec 2025 01:55:42 +0000 Subject: [PATCH 83/97] Rename HSI event notifications - should only come from one place - same listener can listen for different notifications --- src/tlo/methods/hsi_event.py | 4 ++-- src/tlo/methods/individual_history.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index a7b6a440e9..db5d599493 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -200,7 +200,7 @@ def run(self, squeeze_factor): """Make the event happen.""" # Dispatch notification that HSI event is about to run - notifier.dispatch("event.pre-run", + notifier.dispatch("hsi_event.pre-run", data={"target": self.target, "module" : self.module.name, "event_name": self.__class__.__name__}) @@ -220,7 +220,7 @@ def run(self, squeeze_factor): else: level = "N/A" - notifier.dispatch("event.post-run", + notifier.dispatch("hsi_event.post-run", data={"target": self.target, "event_name": self.__class__.__name__, "footprint": footprint, diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py index b97d5e8e09..6265c7b928 100644 --- a/src/tlo/methods/individual_history.py +++ b/src/tlo/methods/individual_history.py @@ -48,6 +48,8 @@ def initialise_simulation(self, sim): notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth) notifier.add_listener("event.pre-run", self.on_event_pre_run) notifier.add_listener("event.post-run", self.on_event_post_run) + notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run) + notifier.add_listener("hsi_event.post-run", self.on_event_post_run) def read_parameters(self, resourcefilepath: Optional[Path] = None): self.load_parameters_from_dataframe( From d44285e95bde37305d5e2f64fde1bbb3727f8f75 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 4 Dec 2025 15:28:26 +0000 Subject: [PATCH 84/97] Simplify handling of mni differences for individual --- src/tlo/methods/individual_history.py | 34 +++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py index 6265c7b928..420710a9d9 100644 --- a/src/tlo/methods/individual_history.py +++ b/src/tlo/methods/individual_history.py @@ -166,17 +166,10 @@ def on_event_post_run(self, data): # Target is single individual if not isinstance(data["target"], Population): - # Copy full new status for individual - row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) + pop = self.sim.population.props - # Check if individual is in mni after the event - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - mni_instances_after = True - else: - mni_instances_after = None + # Copy full new status for individual + row_after = pop.loc[data['target']].fillna(-99999) # Create and store event for this individual, regardless of whether any property change occurred link_info = {'event_name' : data['event_name']} @@ -189,26 +182,37 @@ def on_event_post_run(self, data): if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe link_info[key] = row_after[key] - if 'PregnancySupervisor' in self.sim.modules: + if 'PregnancySupervisor' in self.sim.modules and pop.loc[data['target']].sex == 'F': + + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + # Check if individual is in mni after the event + mni_instances_after = False + if data['target'] in mni: + mni_instances_after = True + # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after + + # 1. Individual is not in mni neither before nor after event, can pass + if not self.mni_instances_before and not self.mni_instances_after: + pass + # 2. Individual is in mni dictionary before and after if self.mni_instances_before and mni_instances_after: for key in self.mni_row_before: if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]): link_info[key] = mni[data['target']][key] - # Individual is only in mni dictionary before event + # 3. Individual is only in mni dictionary before event elif self.mni_instances_before and not mni_instances_after: default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in self.mni_row_before: if self.mni_values_differ(self.mni_row_before[key], default[key]): link_info[key] = default[key] - # Individual is only in mni dictionary after event + # 4. Individual is only in mni dictionary after event elif mni_instances_after and not self.mni_instances_before: default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in default: if self.mni_values_differ(default[key], mni[data['target']][key]): link_info[key] = mni[data['target']][key] - # Else, no need to do anything # Add individual to the chain links chain_links[data['target']] = link_info From d01fb3e9f5bf4b6224d57197145a3190ac344fb1 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 5 Dec 2025 11:34:18 +0000 Subject: [PATCH 85/97] Fix issue with nan changes being saved. This was down to EAV conversion, not df comparison --- src/tlo/methods/individual_history.py | 50 +++++++++++++++++---------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py index 420710a9d9..0411ee016d 100644 --- a/src/tlo/methods/individual_history.py +++ b/src/tlo/methods/individual_history.py @@ -194,7 +194,7 @@ def on_event_post_run(self, data): # Now check and store changes in the mni dictionary, accounting for following cases: # 1. Individual is not in mni neither before nor after event, can pass - if not self.mni_instances_before and not self.mni_instances_after: + if not self.mni_instances_before and not mni_instances_after: pass # 2. Individual is in mni dictionary before and after if self.mni_instances_before and mni_instances_after: @@ -223,6 +223,7 @@ def on_event_post_run(self, data): # Population frame after event df_after = self.sim.population.props + if 'PregnancySupervisor' in self.sim.modules: entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) else: @@ -298,20 +299,22 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ + # Create an empty dict to store changes for each of the individuals + chain_links = {} + + # Individuals undergoing changes in the generap pop dataframe + persons_changed = [] - # Create a mask of where values are different - diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + # Collect changes in the pop dataframe before/after the event + same = df_before.eq(df_after) | (df_before.isna() & df_after.isna()) + diff_mask = ~same + + # Collect changes in the mni dictionary if 'PregnancySupervisor' in self.sim.modules: diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) else: diff_mni = [] - # Create an empty dict to store changes for each of the individuals - chain_links = {} - - # Loop through each row of the mask - persons_changed = [] - for idx, row in diff_mask.iterrows(): changed_cols = row.index[row].tolist() @@ -334,7 +337,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Append the event and changes to the individual key chain_links[idx] = link_info - + if 'PregnancySupervisor' in self.sim.modules: # For individuals which only underwent changes in mni dictionary, save changes here if len(diff_mni)>0: @@ -349,6 +352,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be link_info[key_prop] = diff_mni[key][key_prop] chain_links[key] = link_info + return chain_links @@ -362,15 +366,23 @@ def df_to_eav(df, date, event_name): def convert_chain_links_into_eav(chain_links): - df = pd.DataFrame.from_dict(chain_links, orient="index") - id_cols = ["event_name"] - eav = df.reset_index().melt( - id_vars=["index"] + id_cols, # index = person ID - var_name="attribute", - value_name="value" - ) + rows = [] - eav.rename(columns={"index": "entity"}, inplace=True) - eav = eav[["entity", "event_name", "attribute", "value"]] + for e, data in chain_links.items(): + event_name = data.get("event_name") + + for attr, val in data.items(): + if attr == "event_name": + continue + + rows.append({ + "entity": e, + "event_name": event_name, + "attribute": attr, + "value": val + }) + + eav = pd.DataFrame(rows) + return eav From 1e0c55ae4e8785c262a8f6167665180d93b572e3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 5 Dec 2025 16:39:56 +0000 Subject: [PATCH 86/97] Unify approach taken to copy pop dataframe and mni, and remove all events logged following death in postprocessing --- src/tlo/analysis/utils.py | 28 +++++++++++++- src/tlo/methods/individual_history.py | 53 +++++++++++++++++++++------ 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 360fc36416..c0cafa0f7c 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -386,7 +386,27 @@ def check_info_value_changes(df): prev_info = row["Info"] return problems + +def remove_events_for_individual_after_death(df): + rows_to_drop = [] + # Group by entity + for entity, g in df.groupby("entity"): + died = False + + for idx, row in g.iterrows(): + current_info = row["Info"] + + if not died: + # Check if this row marks death + if isinstance(current_info, dict) and current_info.get("is_alive") is False: + died = True + else: + # Already dead → mark this row for removal + rows_to_drop.append(idx) + + # Drop all marked rows + return df.drop(index=rows_to_drop) def reconstruct_individual_histories(df): @@ -404,8 +424,14 @@ def reconstruct_individual_histories(df): .reset_index(drop=True) ) + df_final = remove_events_for_individual_after_death(df_final) + problems = check_info_value_changes(df_final) - print(problems) + if len(problems)>0: + print("Values didn't change but were still detected") + print(problems) + + return df_final diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py index 0411ee016d..23af6d94ff 100644 --- a/src/tlo/methods/individual_history.py +++ b/src/tlo/methods/individual_history.py @@ -72,6 +72,34 @@ def on_birth(self, mother, child): # Could the notification of birth simply take place here? pass + def copy_of_pop_dataframe(self): + df_copy = self.sim.population.props.copy() + for col in df_copy.columns: + df_copy[col] = df_copy[col].apply( + lambda x: copy.deepcopy(x) if isinstance(x, (list, dict, pd.Series)) else x + ) + return df_copy + + def copy_of_pop_dataframe_row(self, person_ID): + copy_of_row = self.sim.population.props.loc[person_ID].copy() + for col,val in copy_of_row.items(): + if isinstance(val, (list, dict, pd.Series)): + copy_of_row[col] = copy.deepcopy(val) + copy_of_row = copy_of_row.fillna(-99999) + return copy_of_row + + def copy_of_mni(self): + """Function to safely copy entire mni dictionary, ensuring that series attributes + are safely copied too. + """ + return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + + def copy_of_mni_row(self, person_ID): + """Function to safely copy mni entry for single individual, ensuring that series attributes + are safely copied too. + """ + return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info[person_ID]) + def log_eav_dataframe_to_individual_histories(self, df): for idx, row in df.iterrows(): logger.info(key='individual_histories', @@ -131,14 +159,14 @@ def on_event_pre_run(self, data): if not isinstance(data['target'], Population): # Save row for comparison after event has occurred - self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) + self.row_before = self.copy_of_pop_dataframe_row(data['target']) # Check if individual is already in mni dictionary, if so copy her original status if 'PregnancySupervisor' in self.sim.modules: mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info if data['target'] in mni: self.mni_instances_before = True - self.mni_row_before = mni[data['target']].copy() + self.mni_row_before = self.copy_of_mni_row(data['target']) else: self.mni_row_before = None @@ -146,10 +174,9 @@ def on_event_pre_run(self, data): # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - self.df_before = self.sim.population.props.copy() + self.df_before = self.copy_of_pop_dataframe() if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = copy.deepcopy( - self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + self.entire_mni_before = self.copy_of_mni() else: self.entire_mni_before = None @@ -164,12 +191,12 @@ def on_event_post_run(self, data): chain_links = {} # Target is single individual - if not isinstance(data["target"], Population): + if not isinstance(data['target'], Population): pop = self.sim.population.props # Copy full new status for individual - row_after = pop.loc[data['target']].fillna(-99999) + row_after = self.copy_of_pop_dataframe_row(data['target']) # Create and store event for this individual, regardless of whether any property change occurred link_info = {'event_name' : data['event_name']} @@ -221,11 +248,11 @@ def on_event_post_run(self, data): # Target is entire population. Identify individuals for which properties have changed # and store their changes. - # Population frame after event - df_after = self.sim.population.props + # Population dataframe after event + df_after = self.copy_of_pop_dataframe() if 'PregnancySupervisor' in self.sim.modules: - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + entire_mni_after = self.copy_of_mni() else: entire_mni_after = None @@ -357,7 +384,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be def df_to_eav(df, date, event_name): - """Function to convert dataframe into EAV""" + """Function to convert entire population dataframe into custom EAV""" eav = df.stack(dropna=False).reset_index() eav.columns = ['entity', 'attribute', 'value'] eav['event_name'] = event_name @@ -366,7 +393,7 @@ def df_to_eav(df, date, event_name): def convert_chain_links_into_eav(chain_links): - + """Function to convert chain links into custom EAV""" rows = [] for e, data in chain_links.items(): @@ -386,3 +413,5 @@ def convert_chain_links_into_eav(chain_links): eav = pd.DataFrame(rows) return eav + + From 1269bda421b6a7d774c8b1692b3d3e07bde822a4 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 18 Dec 2025 12:50:05 +0000 Subject: [PATCH 87/97] Add tracking property for individual --- src/tlo/methods/individual_history.py | 417 ----------------------- tests/test_individual_history_tracker.py | 36 +- 2 files changed, 25 insertions(+), 428 deletions(-) delete mode 100644 src/tlo/methods/individual_history.py diff --git a/src/tlo/methods/individual_history.py b/src/tlo/methods/individual_history.py deleted file mode 100644 index 23af6d94ff..0000000000 --- a/src/tlo/methods/individual_history.py +++ /dev/null @@ -1,417 +0,0 @@ -import copy -from pathlib import Path -from typing import List, Optional - -import pandas as pd - -from tlo import Module, Parameter, Types, logging -from tlo.notify import notifier -from tlo.population import Population - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -class IndividualHistoryTracker(Module): - - def __init__( - self, - name: Optional[str] = None, - modules_of_interest: Optional[List[str]] = None, - events_to_ignore: Optional[List[str]] = None - ): - super().__init__(name) - - self.modules_of_interest = modules_of_interest - self.events_to_ignore = events_to_ignore - - # This is how I am passing data from fnc taking place before event to the one after - # It doesn't seem very elegant but not sure how else to go about it - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} - - PARAMETERS = { - # Options within module - "modules_of_interest": Parameter( - Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules" - ), - "events_to_ignore": Parameter( - Types.LIST, "Events to be ignored when collecting chains" - ), - } - - def initialise_simulation(self, sim): - notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise) - notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth) - notifier.add_listener("event.pre-run", self.on_event_pre_run) - notifier.add_listener("event.post-run", self.on_event_post_run) - notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run) - notifier.add_listener("hsi_event.post-run", self.on_event_post_run) - - def read_parameters(self, resourcefilepath: Optional[Path] = None): - self.load_parameters_from_dataframe( - pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv") - ) - - def initialise_population(self, population): - # Use parameter file values by default, if not overwritten - if self.modules_of_interest is None: - self.modules_of_interest = self.parameters['modules_of_interest'] - - if self.events_to_ignore is None: - self.events_to_ignore = self.parameters['events_to_ignore'] - - # If modules of interest is '*', set by default to all modules included in the simulation - if self.modules_of_interest == ['*']: - self.modules_of_interest = list(self.sim.modules.keys()) - - def on_birth(self, mother, child): - # Could the notification of birth simply take place here? - pass - - def copy_of_pop_dataframe(self): - df_copy = self.sim.population.props.copy() - for col in df_copy.columns: - df_copy[col] = df_copy[col].apply( - lambda x: copy.deepcopy(x) if isinstance(x, (list, dict, pd.Series)) else x - ) - return df_copy - - def copy_of_pop_dataframe_row(self, person_ID): - copy_of_row = self.sim.population.props.loc[person_ID].copy() - for col,val in copy_of_row.items(): - if isinstance(val, (list, dict, pd.Series)): - copy_of_row[col] = copy.deepcopy(val) - copy_of_row = copy_of_row.fillna(-99999) - return copy_of_row - - def copy_of_mni(self): - """Function to safely copy entire mni dictionary, ensuring that series attributes - are safely copied too. - """ - return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - - def copy_of_mni_row(self, person_ID): - """Function to safely copy mni entry for single individual, ensuring that series attributes - are safely copied too. - """ - return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info[person_ID]) - - def log_eav_dataframe_to_individual_histories(self, df): - for idx, row in df.iterrows(): - logger.info(key='individual_histories', - data = { - "entity": row.entity, - "attribute": row.attribute, - "value": str(row.value), - "event_name": row.event_name - }, - description='Links forming chains of events for simulated individuals') - - def on_simulation_post_initialise(self, data): - # When logging events for each individual to reconstruct chains, - # only the changes in individual properties will be logged. - # At the start of the simulation + when a new individual is born, - # we therefore want to store all of their properties - # at the start. - - # EDNAV structure to capture status of individuals at the start of the simulation - eav_plus_event = df_to_eav(self.sim.population.props, self.sim.date, 'StartOfSimulation') - self.log_eav_dataframe_to_individual_histories(eav_plus_event) - - def on_simulation_post_do_birth(self, data): - # When individual is born, store their initial properties to provide a starting point to the - # chain of property changes that this individual will undergo - # as a result of events taking place. - link_info = {'event_name': 'Birth'} - link_info.update(self.sim.population.props.loc[data['child_id']].to_dict()) - chain_links = {data['child_id']: link_info} - - eav_plus_event = convert_chain_links_into_eav(chain_links) - self.log_eav_dataframe_to_individual_histories(eav_plus_event) - - def on_event_pre_run(self, data): - """Do this when notified that an event is about to run. - This function checks whether this event should be logged as part of the event chains, a - nd if so stored required information before the event has occurred. - """ - - # Only log event if - # 1) the event belongs to modules of interest and - # 2) the event is not in the list of events to ignore - if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore): - return - - # Initialise these variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} - - self.print_chains = True - - # Target is single individual - if not isinstance(data['target'], Population): - - # Save row for comparison after event has occurred - self.row_before = self.copy_of_pop_dataframe_row(data['target']) - - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - self.mni_instances_before = True - self.mni_row_before = self.copy_of_mni_row(data['target']) - else: - self.mni_row_before = None - - else: - - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - self.df_before = self.copy_of_pop_dataframe() - if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = self.copy_of_mni() - else: - self.entire_mni_before = None - - def on_event_post_run(self, data): - """ If print_chains=True, this function logs the event and identifies and logs the any property - changes that have occured to one or multiple individuals as a result of the event taking place. - """ - - if not self.print_chains: - return - - chain_links = {} - - # Target is single individual - if not isinstance(data['target'], Population): - - pop = self.sim.population.props - - # Copy full new status for individual - row_after = self.copy_of_pop_dataframe_row(data['target']) - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = {'event_name' : data['event_name']} - if 'footprint' in data.keys(): - link_info['footprint'] = data['footprint'] - link_info['level'] = data['level'] - - # Store (if any) property changes as a result of the event for this individual - for key in self.row_before.index: - if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules and pop.loc[data['target']].sex == 'F': - - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - - # Check if individual is in mni after the event - mni_instances_after = False - if data['target'] in mni: - mni_instances_after = True - - # Now check and store changes in the mni dictionary, accounting for following cases: - - # 1. Individual is not in mni neither before nor after event, can pass - if not self.mni_instances_before and not mni_instances_after: - pass - # 2. Individual is in mni dictionary before and after - if self.mni_instances_before and mni_instances_after: - for key in self.mni_row_before: - if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - # 3. Individual is only in mni dictionary before event - elif self.mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in self.mni_row_before: - if self.mni_values_differ(self.mni_row_before[key], default[key]): - link_info[key] = default[key] - # 4. Individual is only in mni dictionary after event - elif mni_instances_after and not self.mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - - # Add individual to the chain links - chain_links[data['target']] = link_info - - else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population dataframe after event - df_after = self.copy_of_pop_dataframe() - - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_after = self.copy_of_mni() - else: - entire_mni_after = None - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe_and_mni(self.df_before, - df_after, - self.entire_mni_before, - entire_mni_after, - data['event_name']) - - # Log chains - if chain_links: - # Convert chain_links into EAV-type dataframe - eav_plus_event = convert_chain_links_into_eav(chain_links) - # log it - self.log_eav_dataframe_to_individual_histories(eav_plus_event) - - # Reset variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} - - def mni_values_differ(self, v1, v2): - - if isinstance(v1, list) and isinstance(v2, list): - return v1 != v2 # simple element-wise comparison - - if pd.isna(v1) and pd.isna(v2): - return False # treat both NaT/NaN as equal - return v1 != v2 - - def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): - diffs = {} - - all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) - - for person in all_individuals: - if person not in entire_mni_before: # but is afterward - for key in entire_mni_after[person]: - if self.mni_values_differ(entire_mni_after[person][key], - self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = entire_mni_after[person][key] - - elif person not in entire_mni_after: # but is beforehand - for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key], - self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] - - else: # person is in both - # Compare properties - for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = entire_mni_after[person][key] - - return diffs - - def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name): - """ - This function compares the population dataframe and mni dictionary before/after a population-wide e - vent has occurred. - It allows us to identify the individuals for which this event led to a significant (i.e. property) change, - and to store the properties which have changed as a result of it. - """ - # Create an empty dict to store changes for each of the individuals - chain_links = {} - - # Individuals undergoing changes in the generap pop dataframe - persons_changed = [] - - # Collect changes in the pop dataframe before/after the event - same = df_before.eq(df_after) | (df_before.isna() & df_after.isna()) - diff_mask = ~same - - # Collect changes in the mni dictionary - if 'PregnancySupervisor' in self.sim.modules: - diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) - else: - diff_mni = [] - - for idx, row in diff_mask.iterrows(): - changed_cols = row.index[row].tolist() - - if changed_cols: # Proceed only if there are changes in the row - persons_changed.append(idx) - # Create a dictionary for this person - # First add event info - link_info = { - 'event_name': event_name, - } - - # Store the new values from df_after for the changed columns - for col in changed_cols: - link_info[col] = df_after.at[idx, col] - - if idx in diff_mni: - # This person has also undergone changes in the mni dictionary, so add these here - for key in diff_mni[idx]: - link_info[col] = diff_mni[idx][key] - - # Append the event and changes to the individual key - chain_links[idx] = link_info - - if 'PregnancySupervisor' in self.sim.modules: - # For individuals which only underwent changes in mni dictionary, save changes here - if len(diff_mni)>0: - for key in diff_mni: - if key not in persons_changed: - # If individual hadn't been previously added due to changes in pop df, add it here - link_info = { - 'event_name': self.__class__.__name__, - } - - for key_prop in diff_mni[key]: - link_info[key_prop] = diff_mni[key][key_prop] - - chain_links[key] = link_info - - return chain_links - - -def df_to_eav(df, date, event_name): - """Function to convert entire population dataframe into custom EAV""" - eav = df.stack(dropna=False).reset_index() - eav.columns = ['entity', 'attribute', 'value'] - eav['event_name'] = event_name - eav = eav[["entity", "event_name", "attribute", "value"]] - return eav - - -def convert_chain_links_into_eav(chain_links): - """Function to convert chain links into custom EAV""" - rows = [] - - for e, data in chain_links.items(): - event_name = data.get("event_name") - - for attr, val in data.items(): - if attr == "event_name": - continue - - rows.append({ - "entity": e, - "event_name": event_name, - "attribute": attr, - "value": val - }) - - eav = pd.DataFrame(rows) - - return eav - - diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py index 20ce42eb44..7d4a34a591 100644 --- a/tests/test_individual_history_tracker.py +++ b/tests/test_individual_history_tracker.py @@ -11,10 +11,16 @@ enhanced_lifestyle, healthseekingbehaviour, healthsystem, - individual_history, + contraception, + individual_history_tracker, mockitis, - simplified_births, + newborn_outcomes, + pregnancy_supervisor, + care_of_women_during_pregnancy, + labour, + postnatal_supervisor, symptommanager, + hiv, ) resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' @@ -42,21 +48,26 @@ def test_individual_history_tracker(tmpdir, seed): "directory": tmpdir, "custom_levels": { "tlo.methods.healthsystem": logging.DEBUG, - "tlo.methods.individual_history": logging.INFO + "tlo.methods.individual_history_tracker": logging.INFO } }, resourcefilepath=resourcefilepath ) # Register the core modules sim.register(demography.Demography(), - simplified_births.SimplifiedBirths(), enhanced_lifestyle.Lifestyle(), healthsystem.HealthSystem(), - individual_history.IndividualHistoryTracker(), + individual_history_tracker.IndividualHistoryTracker(), symptommanager.SymptomManager(), healthseekingbehaviour.HealthSeekingBehaviour(), - mockitis.Mockitis(), - chronicsyndrome.ChronicSyndrome() + chronicsyndrome.ChronicSyndrome(), + contraception.Contraception(), + newborn_outcomes.NewbornOutcomes(), + pregnancy_supervisor.PregnancySupervisor(), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(), + labour.Labour(), + postnatal_supervisor.PostnatalSupervisor(), + hiv.DummyHivModule(), ) # Run the simulation @@ -68,7 +79,7 @@ def test_individual_history_tracker(tmpdir, seed): output = parse_log_file(sim.log_filepath, level=logging.DEBUG) output_chains = parse_log_file(sim.log_filepath, level=logging.INFO) individual_histories = reconstruct_individual_histories( - output_chains['tlo.methods.individual_history']['individual_histories']) + output_chains['tlo.methods.individual_history_tracker']['individual_histories']) # Check that we have a "StartOfSimulation" event for every individual in the initial population, #   and that this was logged at the start date @@ -81,9 +92,12 @@ def test_individual_history_tracker(tmpdir, seed): mask = individual_histories["event_name"].isin(["Birth", "StartOfSimulation"]) assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all() - # Assert that all HSI events that occurred were also collected in the event chains - HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum() - assert HSIs_in_individual_histories == len(output['tlo.methods.healthsystem']['HSI_Event']) + # Assert that all HSI events that occurred were also collected in the event chains. Do not include Inpatient_Care HSIs, as these + # are not currently treated as being individual-specific + Num_of_HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum() + Num_of_HSIs_in_hs_log = len(output['tlo.methods.healthsystem']['HSI_Event'].loc[ + output['tlo.methods.healthsystem']['HSI_Event']['Event_Name'] != 'Inpatient_Care']) + assert Num_of_HSIs_in_individual_histories == Num_of_HSIs_in_hs_log # Check that aside from HSIs, StartOfSimulation, and Birth, other events were collected too mask = (~individual_histories["event_name"].isin(["StartOfSimulation", "Birth"])) & \ From b8b858ecc2c24f7306358e50c342a3d6474ea9b7 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 19 Dec 2025 12:10:16 +0000 Subject: [PATCH 88/97] Track consumable access --- .../scenario_track_individual_histories.py | 4 +- src/tlo/methods/consumables.py | 41 +- src/tlo/methods/hsi_event.py | 8 +- src/tlo/methods/individual_history_tracker.py | 468 ++++++++++++++++++ 4 files changed, 502 insertions(+), 19 deletions(-) create mode 100644 src/tlo/methods/individual_history_tracker.py diff --git a/src/scripts/track_individual_histories/scenario_track_individual_histories.py b/src/scripts/track_individual_histories/scenario_track_individual_histories.py index 0c6a43b127..0b9eaf9263 100644 --- a/src/scripts/track_individual_histories/scenario_track_individual_histories.py +++ b/src/scripts/track_individual_histories/scenario_track_individual_histories.py @@ -19,7 +19,7 @@ from tlo import Date, logging from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios -from tlo.methods import individual_history +from tlo.methods import individual_history_tracker from tlo.methods.fullmodel import fullmodel from tlo.scenario import BaseScenario @@ -53,7 +53,7 @@ def log_configuration(self): def modules(self): return ( - fullmodel() + [individual_history.IndividualHistoryTracker()] + fullmodel() + [individual_history_tracker.IndividualHistoryTracker()] ) def draw_parameters(self, draw_number, rng): diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index c16b22e6cd..46a2dd94ae 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -8,6 +8,8 @@ import pandas as pd from tlo import logging +from tlo.notify import notifier + logger = logging.getLogger('tlo.methods.healthsystem') logger_summary = logging.getLogger('tlo.methods.healthsystem.summary') @@ -249,7 +251,11 @@ def _request_consumables(self, essential_item_codes: dict, optional_item_codes: Optional[dict] = None, to_log: bool = True, + to_broadcast: bool = True, treatment_id: Optional[str] = None, + target: Optional[int] = None, + event_name: Optional[str] = None, + module: Optional[str] = None ) -> dict: """This is a private function called by 'get_consumables` in the `HSI_Event` base class. It queries whether item_codes are currently available at a particular Facility_ID and logs the request. @@ -282,28 +288,31 @@ def _request_consumables(self, override_probability=override_probability) # Log the request and the outcome: - if to_log: + if to_log or to_broadcast: items_available = {k: v for k, v in _all_item_codes.items() if available[k]} items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]} # Log items used if all essential items are available items_used = items_available if all(available.get(k, False) for k in essential_item_codes) else {} - logger.info( - key='Consumables', - data={ - 'TREATMENT_ID': treatment_id or "", - 'Item_Available': str(items_available), - 'Item_NotAvailable': str(items_not_available), - 'Item_Used': str(items_used), - }, - description="Record of requested and used consumable items." - ) - self._summary_counter.record_availability( - items_available=items_available, - items_not_available=items_not_available, - items_used=items_used, - ) + if to_log: + logger.info( + key='Consumables', + data={ + 'TREATMENT_ID': treatment_id or "", + 'Item_Available': str(items_available), + 'Item_NotAvailable': str(items_not_available), + 'Item_Used': str(items_used), + }, + description="Record of requested and used consumable items." + ) + self._summary_counter.record_availability( + items_available=items_available, + items_not_available=items_not_available, + items_used=items_used, + ) + + notifier.dispatch("consumables._request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) # Return the result of the check on availability return available diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index e0cc1adda9..c48d1a4ca0 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -227,7 +227,8 @@ def run(self, squeeze_factor): data={"target": self.target, "event_name": self.__class__.__name__, "footprint": footprint, - "level": level + "level": level, + "treatment_ID": self.TREATMENT_ID }) return updated_appt_footprint @@ -263,14 +264,19 @@ def get_consumables( # Determine if the request should be logged (over-ride argument provided if HealthSystem is disabled). _to_log = to_log if not self.healthcare_system.disable else False + _to_broadcast = True if 'IndividualHistoryTracker' in self.module.sim.modules else False # Checking the availability and logging: rtn = self.healthcare_system.consumables._request_consumables( essential_item_codes=_item_codes, optional_item_codes=_optional_item_codes, to_log=_to_log, + to_broadcast = _to_broadcast, facility_info=self.facility_info, treatment_id=self.TREATMENT_ID, + target=self.target, + event_name=self.__class__.__name__, + module = self.module ) # Return result in expected format: diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py new file mode 100644 index 0000000000..175198948c --- /dev/null +++ b/src/tlo/methods/individual_history_tracker.py @@ -0,0 +1,468 @@ +import copy +from pathlib import Path +from typing import List, Optional + +import pandas as pd + +from tlo import Module, Parameter, Property, Types, logging +from tlo.notify import notifier +from tlo.population import Population + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +class IndividualHistoryTracker(Module): + + def __init__( + self, + name: Optional[str] = None, + modules_of_interest: Optional[List[str]] = None, + events_to_ignore: Optional[List[str]] = None + ): + super().__init__(name) + + self.modules_of_interest = modules_of_interest + self.events_to_ignore = events_to_ignore + + # This is how I am passing data from fnc taking place before event to the one after + # It doesn't seem very elegant but not sure how else to go about it + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + PARAMETERS = { + # Options within module + "modules_of_interest": Parameter( + Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules" + ), + "events_to_ignore": Parameter( + Types.LIST, "Events to be ignored when collecting chains" + ), + } + + PROPERTIES = { + "track_history": Property(Types.BOOL, "Whether the individual should be tracked by the individual history tracker or not") + } + + def initialise_simulation(self, sim): + notifier.add_listener("simulation.post-initialise", self.on_simulation_post_initialise) + notifier.add_listener("simulation.post-do_birth", self.on_simulation_post_do_birth) + notifier.add_listener("event.pre-run", self.on_event_pre_run) + notifier.add_listener("event.post-run", self.on_event_post_run) + notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run) + notifier.add_listener("hsi_event.post-run", self.on_event_post_run) + notifier.add_listener("consumables._request-consumables", self.on_consumable_request) + + def read_parameters(self, resourcefilepath: Optional[Path] = None): + self.load_parameters_from_dataframe( + pd.read_csv(resourcefilepath/"ResourceFile_IndividualHistoryTracker/parameter_values.csv") + ) + + def initialise_population(self, population): + # Use parameter file values by default, if not overwritten + if self.modules_of_interest is None: + self.modules_of_interest = self.parameters['modules_of_interest'] + + if self.events_to_ignore is None: + self.events_to_ignore = self.parameters['events_to_ignore'] + + # If modules of interest is '*', set by default to all modules included in the simulation + if self.modules_of_interest == ['*']: + self.modules_of_interest = list(self.sim.modules.keys()) + + # Initialise all individuals as being tracked by default + pop = self.sim.population.props + pop.loc[pop.is_alive, "track_history"] = True + + def on_birth(self, mother, child): + self.sim.population.props.at[child, "track_history"] = True + return + + def copy_of_pop_dataframe(self): + df_copy = self.sim.population.props.copy() + for col in df_copy.columns: + df_copy[col] = df_copy[col].apply( + lambda x: copy.deepcopy(x) if isinstance(x, (list, dict, pd.Series)) else x + ) + return df_copy + + def copy_of_pop_dataframe_row(self, person_ID): + copy_of_row = self.sim.population.props.loc[person_ID].copy() + for col,val in copy_of_row.items(): + if isinstance(val, (list, dict, pd.Series)): + copy_of_row[col] = copy.deepcopy(val) + copy_of_row = copy_of_row.fillna(-99999) + return copy_of_row + + def copy_of_mni(self): + """Function to safely copy entire mni dictionary, ensuring that series attributes + are safely copied too. + """ + return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + + def copy_of_mni_row(self, person_ID): + """Function to safely copy mni entry for single individual, ensuring that series attributes + are safely copied too. + """ + return copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info[person_ID]) + + def log_eav_dataframe_to_individual_histories(self, df): + for idx, row in df.iterrows(): + logger.info(key='individual_histories', + data = { + "entity": row.entity, + "attribute": row.attribute, + "value": str(row.value), + "event_name": row.event_name + }, + description='Links forming chains of events for simulated individuals') + + def on_simulation_post_initialise(self, data): + # When logging events for each individual to reconstruct chains, + # only the changes in individual properties will be logged. + # At the start of the simulation + when a new individual is born, + # we therefore want to store all of their properties + # at the start. + + # EDNAV structure to capture status of individuals at the start of the simulation + eav_plus_event = df_to_eav(self.sim.population.props, self.sim.date, 'StartOfSimulation') + self.log_eav_dataframe_to_individual_histories(eav_plus_event) + + def on_simulation_post_do_birth(self, data): + # When individual is born, store their initial properties to provide a starting point to the + # chain of property changes that this individual will undergo + # as a result of events taking place. + link_info = {'event_name': 'Birth'} + link_info.update(self.sim.population.props.loc[data['child_id']].to_dict()) + chain_links = {data['child_id']: link_info} + + eav_plus_event = convert_chain_links_into_eav(chain_links) + self.log_eav_dataframe_to_individual_histories(eav_plus_event) + + def on_consumable_request(self,data): + """Do this when notified that an individual has accessed consumables""" + # Only log event if + # 1) the event belongs to modules of interest and + # 2) the event is not in the list of events to ignore + if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore): + return + + # Copy this info for individual + chain_links = {} + chain_links[data['target']] = {k: v for k, v in data.items() if k != 'target'} + + # Convert chain_links into EAV-type dataframe + eav_plus_event = convert_chain_links_into_eav(chain_links) + # log it + self.log_eav_dataframe_to_individual_histories(eav_plus_event) + + return + + + def on_event_pre_run(self, data): + """Do this when notified that an event is about to run. + This function checks whether this event should be logged as part of the event chains, a + nd if so stored required information before the event has occurred. + """ + + # Only log event if + # 1) the event belongs to modules of interest and + # 2) the event is not in the list of events to ignore + if (data['module'] not in self.modules_of_interest) or (data['event_name'] in self.events_to_ignore): + self.print_chains = False + return + + # Initialise these variables + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + self.print_chains = True + + # Target is single individual + if not isinstance(data['target'], Population): + + # Save pop dataframe row for comparison after event has occurred + self.row_before = self.copy_of_pop_dataframe_row(data['target']) + + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules and (self.sim.population.props.loc[data['target'],'sex'] == 'F'): + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = self.copy_of_mni_row(data['target']) + else: + self.mni_row_before = None + + else: + + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.copy_of_pop_dataframe() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = self.copy_of_mni() + else: + self.entire_mni_before = None + + def on_event_post_run(self, data): + """ If print_chains=True, this function logs the event and identifies and logs the any property + changes that have occured to one or multiple individuals as a result of the event taking place. + """ + + if not self.print_chains: + return + + chain_links = {} + + # Target is single individual + if not isinstance(data['target'], Population): + + pop = self.sim.population.props + + # Copy full new status for individual + row_after = self.copy_of_pop_dataframe_row(data['target']) + + # If individual qualified for the 'tracked' category either before OR after the event occurred, the event will be logged: + if self.row_before['track_history'] or row_after['track_history']: + + # Create and store event for this individual, regardless of whether any property change occurred + link_info = {'event_name' : data['event_name']} + if 'footprint' in data.keys(): + link_info['footprint'] = data['footprint'] + link_info['level'] = data['level'] + link_info['treatment_ID'] = data['treatment_ID'] + + # Store (if any) property changes as a result of the event for this individual + for key in self.row_before.index: + if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + # Check for any changes in mni dictionary + if 'PregnancySupervisor' in self.sim.modules and pop.loc[data['target']].sex == 'F': + + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + # Check if individual is in mni after the event + mni_instances_after = False + if data['target'] in mni: + mni_instances_after = True + + # Now check and store changes in the mni dictionary, accounting for following cases: + + # 1. Individual is not in mni neither before nor after event, can pass + if not self.mni_instances_before and not mni_instances_after: + pass + # 2. Individual is in mni dictionary before and after + if self.mni_instances_before and mni_instances_after: + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # 3. Individual is only in mni dictionary before event + elif self.mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], default[key]): + link_info[key] = default[key] + # 4. Individual is only in mni dictionary after event + elif mni_instances_after and not self.mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + + # Add individual to the chain links + chain_links[data['target']] = link_info + + else: + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population dataframe after event + df_after = self.copy_of_pop_dataframe() + + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = self.copy_of_mni() + else: + entire_mni_after = None + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(self.df_before, + df_after, + self.entire_mni_before, + entire_mni_after, + data['event_name']) + + # Log chains + if chain_links: + # Convert chain_links into EAV-type dataframe + eav_plus_event = convert_chain_links_into_eav(chain_links) + # log it + self.log_eav_dataframe_to_individual_histories(eav_plus_event) + + # Reset variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + def mni_values_differ(self, v1, v2): + + if isinstance(v1, list) and isinstance(v2, list): + return v1 != v2 # simple element-wise comparison + + if pd.isna(v1) and pd.isna(v2): + return False # treat both NaT/NaN as equal + return v1 != v2 + + def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after, set_of_tracked_individuals): + diffs = {} + + all_individuals_in_mni = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) + + in_mni_and_tracked = all_individuals_in_mni.intersection(set_of_tracked_individuals) + + for person in in_mni_and_tracked: + if person not in entire_mni_before: # but is afterward + for key in entire_mni_after[person]: + if self.mni_values_differ(entire_mni_after[person][key], + self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + elif person not in entire_mni_after: # but is beforehand + for key in entire_mni_before[person]: + if self.mni_values_differ(entire_mni_before[person][key], + self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] + + else: # person is in both + # Compare properties + for key in entire_mni_before[person]: + if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + return diffs + + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after, event_name): + """ + This function compares the population dataframe and mni dictionary before/after a population-wide e + vent has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, + and to store the properties which have changed as a result of it. + """ + # Create an empty dict to store changes for each of the individuals + chain_links = {} + + # Individuals undergoing changes in the generap pop dataframe + persons_changed = [] + + # Find individuals which qualify as being tracked because they satisfied requirements either before OR after + # the event occurred. + assert df_before.index.equals(df_after.index), "Indices are not identical!" + assert df_before.columns.equals(df_after.columns), "Columns of df_before and df_after do not match!" + + mask_of_tracked_individuals = df_before['track_history'] | df_after['track_history'] + set_of_tracked_individuals = set(mask_of_tracked_individuals.index[mask_of_tracked_individuals]) + + # Only keep those individuals in dataframes + df_before = df_before[mask_of_tracked_individuals] + df_after = df_after[mask_of_tracked_individuals] + + # For those individuals, collect changes in the pop dataframe before/after the event + same = df_before.eq(df_after) | (df_before.isna() & df_after.isna()) + diff_mask = ~same + + # Collect changes in the mni dictionary + if 'PregnancySupervisor' in self.sim.modules: + diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after, set_of_tracked_individuals) + else: + diff_mni = [] + + # Iterate over tracked individuals who experienced changes to properties as a result of the event + for idx, row in diff_mask.iterrows(): + + changed_cols = row.index[row].tolist() + + if changed_cols: # Proceed only if there are changes in the row + + persons_changed.append(idx) + # Create a dictionary for this person + # First add event info + link_info = { + 'event_name': event_name, + } + + # Store the new values from df_after for the changed columns + for col in changed_cols: + link_info[col] = df_after.at[idx, col] + + # This person has also undergone changes in the mni dictionary, so add these here + if idx in diff_mni: + for key in diff_mni[idx]: + link_info[col] = diff_mni[idx][key] + + # Append the event and changes to the individual key + chain_links[idx] = link_info + + if 'PregnancySupervisor' in self.sim.modules: + # For individuals which only underwent changes in mni dictionary, save changes here + if len(diff_mni)>0: + for key in diff_mni: + # If individual didn't also undergo changes in pop dataframe AND is tracked, add + if key not in persons_changed and key in set_of_tracked_individuals: + # If individual hadn't been previously added due to changes in pop df, add it here + link_info = { + 'event_name': self.__class__.__name__, + } + + for key_prop in diff_mni[key]: + link_info[key_prop] = diff_mni[key][key_prop] + + chain_links[key] = link_info + + return chain_links + + +def df_to_eav(df, date, event_name): + """Function to convert entire population dataframe into custom EAV""" + eav = df.stack(dropna=False).reset_index() + eav.columns = ['entity', 'attribute', 'value'] + eav['event_name'] = event_name + eav = eav[["entity", "event_name", "attribute", "value"]] + return eav + + +def convert_chain_links_into_eav(chain_links): + """Function to convert chain links into custom EAV""" + rows = [] + + for e, data in chain_links.items(): + event_name = data.get("event_name") + + for attr, val in data.items(): + if attr == "event_name": + continue + + rows.append({ + "entity": e, + "event_name": event_name, + "attribute": attr, + "value": val + }) + + eav = pd.DataFrame(rows) + + return eav + + From 71a7776394b6ac0617fd836eb3a67fa335dac357 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 19 Dec 2025 13:25:42 +0000 Subject: [PATCH 89/97] Additionally log equipment and beddays --- src/tlo/methods/hsi_event.py | 4 +++- src/tlo/methods/individual_history_tracker.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index c48d1a4ca0..357e0b7893 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -228,7 +228,9 @@ def run(self, squeeze_factor): "event_name": self.__class__.__name__, "footprint": footprint, "level": level, - "treatment_ID": self.TREATMENT_ID + "treatment_ID": self.TREATMENT_ID, + "equipment", self._EQUIPMENT, + "bed_days", self.bed_days_allocated_to_this_event, }) return updated_appt_footprint diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index 175198948c..856228a393 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -232,9 +232,9 @@ def on_event_post_run(self, data): # Create and store event for this individual, regardless of whether any property change occurred link_info = {'event_name' : data['event_name']} if 'footprint' in data.keys(): - link_info['footprint'] = data['footprint'] - link_info['level'] = data['level'] - link_info['treatment_ID'] = data['treatment_ID'] + HSI_specific_fields = {'footprint','level','treatment_ID','equipment','bed_days'} + for field in HSI_specific_fields: + link_info[field] = data[field] # Store (if any) property changes as a result of the event for this individual for key in self.row_before.index: From 2feff779ffb64f799f1555cb995394e212cd2626 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 19 Dec 2025 15:05:51 +0000 Subject: [PATCH 90/97] Log consumable access as part of HSI --- src/tlo/methods/hsi_event.py | 4 ++-- src/tlo/methods/individual_history_tracker.py | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 357e0b7893..e4199dd790 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -229,8 +229,8 @@ def run(self, squeeze_factor): "footprint": footprint, "level": level, "treatment_ID": self.TREATMENT_ID, - "equipment", self._EQUIPMENT, - "bed_days", self.bed_days_allocated_to_this_event, + "equipment": self._EQUIPMENT, + "bed_days": self.bed_days_allocated_to_this_event, }) return updated_appt_footprint diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index 856228a393..c0aeca1e96 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -32,6 +32,7 @@ def __init__( self.mni_instances_before = False self.mni_row_before = {} self.entire_mni_before = {} + self.consumable_access = {} PARAMETERS = { # Options within module @@ -154,14 +155,10 @@ def on_consumable_request(self,data): chain_links = {} chain_links[data['target']] = {k: v for k, v in data.items() if k != 'target'} - # Convert chain_links into EAV-type dataframe - eav_plus_event = convert_chain_links_into_eav(chain_links) - # log it - self.log_eav_dataframe_to_individual_histories(eav_plus_event) + self.consumable_access = chain_links return - def on_event_pre_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, a @@ -177,6 +174,7 @@ def on_event_pre_run(self, data): # Initialise these variables self.df_before = [] + self.consumable_access = {} self.row_before = pd.Series() self.mni_instances_before = False self.mni_row_before = {} @@ -276,6 +274,15 @@ def on_event_post_run(self, data): # Add individual to the chain links chain_links[data['target']] = link_info + + # Update with consumable access info + # Consumable access is only at individual level, so this should either be size 0 or 1 + assert len(self.consumable_access) == 0 or len(self.consumable_access) == 1 + if len(self.consumable_access) == 1: + chain_links[data['target']].update({k: v for k, v in + self.consumable_access[data['target']].items() if k not in chains_links[data['target']]}) + self.consumable_access = {} + else: # Target is entire population. Identify individuals for which properties have changed @@ -310,6 +317,7 @@ def on_event_post_run(self, data): self.mni_instances_before = False self.mni_row_before = {} self.entire_mni_before = {} + self.consumable_access = {} def mni_values_differ(self, v1, v2): From 3b9cf72c649293dd53a36bc9e0db732c8b1617eb Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 8 Jan 2026 09:34:35 +0000 Subject: [PATCH 91/97] Add prefix on property name --- src/tlo/methods/individual_history_tracker.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index c0aeca1e96..fe4b77d80d 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -45,7 +45,7 @@ def __init__( } PROPERTIES = { - "track_history": Property(Types.BOOL, "Whether the individual should be tracked by the individual history tracker or not") + "iht_track_history": Property(Types.BOOL, "Whether the individual should be tracked by the individual history tracker or not") } def initialise_simulation(self, sim): @@ -76,10 +76,10 @@ def initialise_population(self, population): # Initialise all individuals as being tracked by default pop = self.sim.population.props - pop.loc[pop.is_alive, "track_history"] = True + pop.loc[pop.is_alive, "iht_track_history"] = True def on_birth(self, mother, child): - self.sim.population.props.at[child, "track_history"] = True + self.sim.population.props.at[child, "iht_track_history"] = True return def copy_of_pop_dataframe(self): @@ -225,7 +225,7 @@ def on_event_post_run(self, data): row_after = self.copy_of_pop_dataframe_row(data['target']) # If individual qualified for the 'tracked' category either before OR after the event occurred, the event will be logged: - if self.row_before['track_history'] or row_after['track_history']: + if self.row_before['iht_track_history'] or row_after['iht_track_history']: # Create and store event for this individual, regardless of whether any property change occurred link_info = {'event_name' : data['event_name']} @@ -380,7 +380,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be assert df_before.index.equals(df_after.index), "Indices are not identical!" assert df_before.columns.equals(df_after.columns), "Columns of df_before and df_after do not match!" - mask_of_tracked_individuals = df_before['track_history'] | df_after['track_history'] + mask_of_tracked_individuals = df_before['iht_track_history'] | df_after['iht_track_history'] set_of_tracked_individuals = set(mask_of_tracked_individuals.index[mask_of_tracked_individuals]) # Only keep those individuals in dataframes From 5eff1c2e9a4df0abbf48ea0e46720ec3ce86e754 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 8 Jan 2026 10:11:30 +0000 Subject: [PATCH 92/97] Remove to_broadcast input to function --- src/tlo/methods/consumables.py | 3 +-- src/tlo/methods/hsi_event.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index 46a2dd94ae..2ab2e1891d 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -251,7 +251,6 @@ def _request_consumables(self, essential_item_codes: dict, optional_item_codes: Optional[dict] = None, to_log: bool = True, - to_broadcast: bool = True, treatment_id: Optional[str] = None, target: Optional[int] = None, event_name: Optional[str] = None, @@ -288,7 +287,7 @@ def _request_consumables(self, override_probability=override_probability) # Log the request and the outcome: - if to_log or to_broadcast: + if to_log or 'IndividualHistoryTracker' in self.sim.modules: items_available = {k: v for k, v in _all_item_codes.items() if available[k]} items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]} diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index e4199dd790..56febfd12b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -266,14 +266,12 @@ def get_consumables( # Determine if the request should be logged (over-ride argument provided if HealthSystem is disabled). _to_log = to_log if not self.healthcare_system.disable else False - _to_broadcast = True if 'IndividualHistoryTracker' in self.module.sim.modules else False # Checking the availability and logging: rtn = self.healthcare_system.consumables._request_consumables( essential_item_codes=_item_codes, optional_item_codes=_optional_item_codes, to_log=_to_log, - to_broadcast = _to_broadcast, facility_info=self.facility_info, treatment_id=self.TREATMENT_ID, target=self.target, From 98b2d9787668e5f035e2546b0d8175eee2a6c508 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 8 Jan 2026 10:15:51 +0000 Subject: [PATCH 93/97] Change name of broadcasting for consumable request --- src/tlo/methods/consumables.py | 2 +- src/tlo/methods/individual_history_tracker.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index 2ab2e1891d..d11acccce3 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -311,7 +311,7 @@ def _request_consumables(self, items_used=items_used, ) - notifier.dispatch("consumables._request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) + notifier.dispatch("consumables.on_request-consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) # Return the result of the check on availability return available diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index fe4b77d80d..1185699add 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -55,7 +55,7 @@ def initialise_simulation(self, sim): notifier.add_listener("event.post-run", self.on_event_post_run) notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run) notifier.add_listener("hsi_event.post-run", self.on_event_post_run) - notifier.add_listener("consumables._request-consumables", self.on_consumable_request) + notifier.add_listener("consumables.on_request-consumables", self.on_consumable_request) def read_parameters(self, resourcefilepath: Optional[Path] = None): self.load_parameters_from_dataframe( From 17f9f13d6e6d9fdf6e3aa4f9351d70280e35ee85 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 8 Jan 2026 10:39:52 +0000 Subject: [PATCH 94/97] Declare demography as dependency to ensure iht_track_individual can work --- src/tlo/methods/individual_history_tracker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index 1185699add..03400f86fa 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -34,6 +34,8 @@ def __init__( self.entire_mni_before = {} self.consumable_access = {} + INIT_DEPENDENCIES = {"Demography"} + PARAMETERS = { # Options within module "modules_of_interest": Parameter( From 4bf655f75c108d28a9c0a51e4742cc20707df324 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 8 Jan 2026 10:42:26 +0000 Subject: [PATCH 95/97] Prepare consumable data if notifier has listeners --- src/tlo/methods/consumables.py | 4 ++-- src/tlo/methods/individual_history_tracker.py | 2 +- src/tlo/notify.py | 9 +++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index d11acccce3..b9a597ca14 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -287,7 +287,7 @@ def _request_consumables(self, override_probability=override_probability) # Log the request and the outcome: - if to_log or 'IndividualHistoryTracker' in self.sim.modules: + if to_log or notifier.has_listeners('consumables.on-request_consumables'): items_available = {k: v for k, v in _all_item_codes.items() if available[k]} items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]} @@ -311,7 +311,7 @@ def _request_consumables(self, items_used=items_used, ) - notifier.dispatch("consumables.on_request-consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) + notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) # Return the result of the check on availability return available diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index 03400f86fa..3bf40f98ff 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -57,7 +57,7 @@ def initialise_simulation(self, sim): notifier.add_listener("event.post-run", self.on_event_post_run) notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run) notifier.add_listener("hsi_event.post-run", self.on_event_post_run) - notifier.add_listener("consumables.on_request-consumables", self.on_consumable_request) + notifier.add_listener("consumables.on-request_consumables", self.on_consumable_request) def read_parameters(self, resourcefilepath: Optional[Path] = None): self.load_parameters_from_dataframe( diff --git a/src/tlo/notify.py b/src/tlo/notify.py index b1b4434ba9..e90166d472 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -66,6 +66,15 @@ def clear_listeners(self): e.g. if you are running multiple tests or simulations in the same process. """ self.listeners.clear() + + def has_listeners(self, notification_key): + """ + Check if there are any listeners registered for a specific notification. + + :param notification_key: The identifier to check. + :return: True if there are listeners, False otherwise. + """ + return notification_key in self.listeners and len(self.listeners[notification_key]) > 0 # Create a global notifier instance From 3935113306d19b22f78e6e2298805583640a9f2d Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:02:36 +0000 Subject: [PATCH 96/97] Fix lack of module entry for broadcasted data --- src/tlo/methods/consumables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index b9a597ca14..41c1bd322c 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -311,7 +311,7 @@ def _request_consumables(self, items_used=items_used, ) - notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) + notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'module' : module, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) # Return the result of the check on availability return available From c3834cd052f13e7230e735e728db3366b300131f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:11:43 +0000 Subject: [PATCH 97/97] Change name of dispacher key to post-request_consumables --- src/tlo/methods/consumables.py | 2 +- src/tlo/methods/individual_history_tracker.py | 2 +- tests/test_individual_history_tracker.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index 41c1bd322c..e6de9c27e9 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -311,7 +311,7 @@ def _request_consumables(self, items_used=items_used, ) - notifier.dispatch("consumables.on-request_consumables", data={'target' : target, 'module' : module, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) + notifier.dispatch("consumables.post-request_consumables", data={'target' : target, 'module' : module, 'event_name' : event_name, 'Item_Available': str(items_available),'Item_NotAvailable': str(items_not_available), 'Item_Used': str(items_used)}) # Return the result of the check on availability return available diff --git a/src/tlo/methods/individual_history_tracker.py b/src/tlo/methods/individual_history_tracker.py index 3bf40f98ff..5edb6e9f0a 100644 --- a/src/tlo/methods/individual_history_tracker.py +++ b/src/tlo/methods/individual_history_tracker.py @@ -57,7 +57,7 @@ def initialise_simulation(self, sim): notifier.add_listener("event.post-run", self.on_event_post_run) notifier.add_listener("hsi_event.pre-run", self.on_event_pre_run) notifier.add_listener("hsi_event.post-run", self.on_event_post_run) - notifier.add_listener("consumables.on-request_consumables", self.on_consumable_request) + notifier.add_listener("consumables.post-request_consumables", self.on_consumable_request) def read_parameters(self, resourcefilepath: Optional[Path] = None): self.load_parameters_from_dataframe( diff --git a/tests/test_individual_history_tracker.py b/tests/test_individual_history_tracker.py index 7d4a34a591..1ac9eac01b 100644 --- a/tests/test_individual_history_tracker.py +++ b/tests/test_individual_history_tracker.py @@ -92,7 +92,8 @@ def test_individual_history_tracker(tmpdir, seed): mask = individual_histories["event_name"].isin(["Birth", "StartOfSimulation"]) assert individual_histories.loc[mask, "Info"].apply(len).eq(num_properties).all() - # Assert that all HSI events that occurred were also collected in the event chains. Do not include Inpatient_Care HSIs, as these + # Assert that all HSI events that occurred were also collected in the event chains. + # Do not include Inpatient_Care HSIs, as these # are not currently treated as being individual-specific Num_of_HSIs_in_individual_histories = individual_histories["event_name"].str.contains('HSI', na=False).sum() Num_of_HSIs_in_hs_log = len(output['tlo.methods.healthsystem']['HSI_Event'].loc[