Source code for minos.modules.income

"""
Module for income in Minos.
Calculation of monthly household income
Possible extension to interaction with employment/education and any spatial/interaction effects.
"""

import pandas as pd
import minos.modules.r_utils as r_utils
from minos.modules.base_module import Base
import matplotlib.pyplot as plt
from seaborn import histplot
import numpy as np
import logging


[docs] class Income(Base): # Special methods used by vivarium. @property def name(self): return 'income' def __repr__(self): return "Income()"
[docs] def setup(self, builder): """ Initialise the module during simulation.setup(). Notes ----- - Load in data from pre_setup - Register any value producers/modifiers for income - Add required columns to population data frame - Update other required items such as randomness stream. Parameters ---------- builder : vivarium.engine.Builder Vivarium's control object. Stores all simulation metadata and allows modules to use it. """ # Load in inputs from pre-setup. # self.transition_model = builder.data.load("income_transition") self.rpy2Modules = builder.data.load("rpy2_modules") # Build vivarium objects for calculating transition probabilities. # Typically this is registering rate/lookup tables. See vivarium docs/other modules for examples. # self.transition_coefficients = builder. # Assign randomness streams if necessary. self.random = builder.randomness.get_stream(self.generate_random_crn_key()) # Determine which subset of the main population is used in this module. # columns_created is the columns created by this module. # view_columns is the columns from the main population used in this module. # In this case, view_columns are taken straight from the transition model view_columns = ['pidp', 'age', 'sex', 'ethnicity', 'region', 'hh_income', 'job_sec', 'S7_labour_state', 'education_state', 'SF_12', 'housing_quality', 'job_sector'] # view_columns += self.transition_model.rx2('model').names self.population_view = builder.population.get_view(columns=view_columns) # Population initialiser. When new individuals are added to the microsimulation a constructer is called for each # module. Declare what constructer is used. usually on_initialize_simulants method is called. Inidividuals are # created at the start of a model "setup" or after some deterministic (add cohorts) or random (births) event. # builder.population.initializes_simulants(self.on_initialize_simulants) # Declare events in the module. At what times do individuals transition states from this module. E.g. when does # individual graduate in an education module. # builder.event.register_listener("time_step", self.on_time_step, priority=self.priority) super().setup(builder)
[docs] def on_initialize_simulants(self, pop_data): """ Initiate columns for hh_income when new simulants are added. Only column needed is the diff column for rate of change model predictions. Parameters ---------- pop_data: vivarium.framework.population.SimulantData Custom vivarium class for interacting with the population data frame. It is essentially a pandas DataFrame with a few extra attributes such as the creation_time, creation_window, and current simulation state (setup/running/etc.). """ # Create frame with new 3 columns and add it to the main population frame. # This is the same for both new cohorts and newborn babies. # Neither should be dead yet. pop_update = pd.DataFrame({'hh_income_diff': 0}, index=pop_data.index) self.population_view.update(pop_update)
[docs] def on_time_step(self, event): """ Predicts the hh_income for the next timestep. Parameters ---------- event : vivarium.population.PopulationEvent The event time_step that called this function. """ logging.info("INCOME") # Get living people to update their income pop = self.population_view.get(event.index, query="alive =='alive'") self.year = event.time.year ## Predict next income value newWaveIncome = self.calculate_income(pop) newWaveIncome = pd.DataFrame(newWaveIncome, columns=['hh_income']) # newWaveIncome = newWaveIncome.rename(columns={"new_dependent": "hh_income", # "predicted": "hh_income_diff"}) # newWaveIncome = newWaveIncome.to_frame(name='hh_income') # Set index back to population of interest. newWaveIncome.index = pop.index # Draw individuals next states randomly from this distribution. # Update population with new income self.population_view.update(newWaveIncome['hh_income'])
[docs] def calculate_income(self, pop): """Calculate income transition distribution based on provided people/indices Parameters ---------- pop: PopulationView Population from MINOS to calculate next income for. Returns ------- nextWaveIncome: pd.Series Vector of new household incomes from OLS prediction. """ # load transition model based on year. year = min(self.year, 2019) transition_model = r_utils.load_transitions(f"hh_income/ols/hh_income_{year}_{year + 1}", self.rpy2Modules, path=self.transition_dir) nextWaveIncome = r_utils.predict_next_timestep_ols(transition_model, self.rpy2Modules, pop, dependent='hh_income') return nextWaveIncome
[docs] def calculate_income_rateofchange(self, pop): """Calculate income transition with rate of change (diff) models Parameters ---------- pop: PopulationView Population from MINOS to calculate next income for. Returns ------- nextWaveIncome: pd.Dataframe Dataframe of new predicted hh_income value and difference from previous year. """ # load transition model based on year. if self.cross_validation: # if cross-val, fix year to final year model year = 2019 else: year = min(self.year, 2019) transition_model = r_utils.load_transitions(f"hh_income/ols_diff/hh_income_{year}_{year + 1}", self.rpy2Modules, path=self.transition_dir) # The calculation relies on the R predict method and the model that has already been specified nextWaveIncome = r_utils.predict_next_timestep_ols(transition_model, self.rpy2Modules, pop, dependent='hh_income', year=self.year) return nextWaveIncome
[docs] def plot(self, pop, config): file_name = config.output_plots_dir + f"income_hist_{self.year}.pdf" f = plt.figure() histplot(pop, x="hh_income", stat='density') plt.savefig(file_name) plt.close()
class geeIncome(Base): # Special methods used by vivarium. @property def name(self): return 'geeIncome' def __repr__(self): return "geeIncome()" def setup(self, builder): """ Initialise the module during simulation.setup(). Notes ----- - Load in data from pre_setup - Register any value producers/modifiers for income - Add required columns to population data frame - Update other required items such as randomness stream. Parameters ---------- builder : vivarium.engine.Builder Vivarium's control object. Stores all simulation metadata and allows modules to use it. """ # Load in inputs from pre-setup. # self.transition_model = builder.data.load("income_transition") self.rpy2Modules = builder.data.load("rpy2_modules") # Build vivarium objects for calculating transition probabilities. # Typically this is registering rate/lookup tables. See vivarium docs/other modules for examples. # self.transition_coefficients = builder. # Assign randomness streams if necessary. self.random = builder.randomness.get_stream(self.generate_random_crn_key()) # Determine which subset of the main population is used in this module. # columns_created is the columns created by this module. # view_columns is the columns from the main population used in this module. # In this case, view_columns are taken straight from the transition model view_columns = ['pidp', 'weight', 'age', 'sex', 'ethnicity', 'region', 'hh_income', 'job_sec', 'labour_state', 'education_state', 'SF_12', 'housing_quality', 'job_sector'] # view_columns += self.transition_model.rx2('model').names self.population_view = builder.population.get_view(columns=view_columns) # Population initialiser. When new individuals are added to the microsimulation a constructer is called for each # module. Declare what constructer is used. usually on_initialize_simulants method is called. Inidividuals are # created at the start of a model "setup" or after some deterministic (add cohorts) or random (births) event. # builder.population.initializes_simulants(self.on_initialize_simulants) # Declare events in the module. At what times do individuals transition states from this module. E.g. when does # individual graduate in an education module. # builder.event.register_listener("time_step", self.on_time_step, priority=self.priority) super().setup(builder) # just load this once. self.gee_transition_model = r_utils.load_transitions(f"hh_income/gee/hh_income_GEE", self.rpy2Modules, path=self.transition_dir) self.min_hh_income = None def on_initialize_simulants(self, pop_data): """ Initiate columns for hh_income when new simulants are added. Only column needed is the diff column for rate of change model predictions. Parameters ---------- pop_data: vivarium.framework.population.SimulantData Custom vivarium class for interacting with the population data frame. It is essentially a pandas DataFrame with a few extra attributes such as the creation_time, creation_window, and current simulation state (setup/running/etc.). """ # Create frame with new 3 columns and add it to the main population frame. # This is the same for both new cohorts and newborn babies. # Neither should be dead yet. pop_update = pd.DataFrame({'hh_income_diff': 0}, index=pop_data.index) self.population_view.update(pop_update) def on_time_step(self, event): """ Predicts the hh_income for the next timestep. Parameters ---------- event : vivarium.population.PopulationEvent The event time_step that called this function. """ # Get living people to update their income pop = self.population_view.get(event.index, query="alive =='alive'") self.year = event.time.year if self.min_hh_income == None: self.min_hh_income = np.min(pop["hh_income"]) self.income_std = np.std(pop["hh_income"]) ## Predict next income value newWaveIncome = self.calculate_income(pop) newWaveIncome = pd.DataFrame(newWaveIncome, columns=['hh_income']) # newWaveIncome = newWaveIncome.rename(columns={"new_dependent": "hh_income", # "predicted": "hh_income_diff"}) # newWaveIncome = newWaveIncome.to_frame(name='hh_income') # Set index back to population of interest. newWaveIncome.index = pop.index newWaveIncome['hh_income'] += self.generate_gaussian_noise(pop.index, 0, 1000) # Draw individuals next states randomly from this distribution. # Update population with new income self.population_view.update(newWaveIncome['hh_income']) def calculate_income(self, pop): """Calculate income transition distribution based on provided people/indices Parameters ---------- pop: PopulationView Population from MINOS to calculate next income for. Returns ------- nextWaveIncome: pd.Series Vector of new household incomes from OLS prediction. """ # load transition model based on year. nextWaveIncome = r_utils.predict_next_timestep_gee(self.gee_transition_model, self.rpy2Modules, pop, dependent='hh_income') return nextWaveIncome + self.min_hh_income def calculate_income_rateofchange(self, pop): """Calculate income transition with rate of change (diff) models Parameters ---------- pop: PopulationView Population from MINOS to calculate next income for. Returns ------- nextWaveIncome: pd.Dataframe Dataframe of new predicted hh_income value and difference from previous year. """ # load transition model based on year. year = min(self.year, 2019) transition_model = r_utils.load_transitions(f"hh_income/ols/hh_income_{year}_{year + 1}", self.rpy2Modules, path=self.transition_dir) # The calculation relies on the R predict method and the model that has already been specified nextWaveIncome = r_utils.predict_next_timestep_ols(transition_model, self.rpy2Modules, pop, dependent='hh_income', year=self.year) return nextWaveIncome def plot(self, pop, config): file_name = config.output_plots_dir + f"income_hist_{self.year}.pdf" f = plt.figure() histplot(pop, x="hh_income", stat='density') plt.savefig(file_name) plt.close() class geeYJIncome(Base): # Special methods used by vivarium. @property def name(self): return 'geeYJIncome' def __repr__(self): return "geeYJIncome()" def setup(self, builder): """ Initialise the module during simulation.setup(). Notes ----- - Load in data from pre_setup - Register any value producers/modifiers for income - Add required columns to population data frame - Update other required items such as randomness stream. Parameters ---------- builder : vivarium.engine.Builder Vivarium's control object. Stores all simulation metadata and allows modules to use it. """ # Load in inputs from pre-setup. # self.transition_model = builder.data.load("income_transition") self.rpy2Modules = builder.data.load("rpy2_modules") # Build vivarium objects for calculating transition probabilities. # Typically this is registering rate/lookup tables. See vivarium docs/other modules for examples. # self.transition_coefficients = builder. # Assign randomness streams if necessary. self.random = builder.randomness.get_stream(self.generate_random_crn_key()) # Determine which subset of the main population is used in this module. # columns_created is the columns created by this module. # view_columns is the columns from the main population used in this module. # In this case, view_columns are taken straight from the transition model #view_columns = ['pidp', # 'age', # 'sex', # 'ethnicity', # 'region', # 'hh_income', # 'job_sec', # #'labour_state', # 'education_state', # 'SF_12', # 'weight', # #'housing_quality', # 'job_sector'] view_columns = [ 'hh_income', 'age', 'sex', 'ethnicity', 'region', 'education_state', 'job_sec', 'job_sector', 'time', 'pidp', 'weight' ] # view_columns += self.transition_model.rx2('model').names self.population_view = builder.population.get_view(columns=view_columns) # Population initialiser. When new individuals are added to the microsimulation a constructer is called for each # module. Declare what constructer is used. usually on_initialize_simulants method is called. Inidividuals are # created at the start of a model "setup" or after some deterministic (add cohorts) or random (births) event. # builder.population.initializes_simulants(self.on_initialize_simulants) # Declare events in the module. At what times do individuals transition states from this module. E.g. when does # individual graduate in an education module. # builder.event.register_listener("time_step", self.on_time_step, priority=self.priority) super().setup(builder) # just load this once. #self.gee_transition_model = r_utils.load_transitions(f"hh_income/gee_yj/hh_income_GEE_YJ", self.rpy2Modules, # path=self.transition_dir) self.gee_transition_model = r_utils.load_transitions(f"hh_income/gee_yj_gamma/hh_income_GEE_YJ_GAMMA", self.rpy2Modules, path=self.transition_dir) self.history_data = self.generate_history_dataframe("final_US", [2018, 2019, 2020], view_columns) def on_initialize_simulants(self, pop_data): """ Initiate columns for hh_income when new simulants are added. Only column needed is the diff column for rate of change model predictions. Parameters ---------- pop_data: vivarium.framework.population.SimulantData Custom vivarium class for interacting with the population data frame. It is essentially a pandas DataFrame with a few extra attributes such as the creation_time, creation_window, and current simulation state (setup/running/etc.). """ # Create frame with new 3 columns and add it to the main population frame. # This is the same for both new cohorts and newborn babies. # Neither should be dead yet. pop_update = pd.DataFrame({'hh_income_diff': 0}, index=pop_data.index) self.population_view.update(pop_update) def on_time_step(self, event): """ Predicts the hh_income for the next timestep. Parameters ---------- event : vivarium.population.PopulationEvent The event time_step that called this function. """ # Get living people to update their income pop = self.population_view.get(event.index, query="alive =='alive'") pop = pop.sort_values('pidp') self.year = event.time.year ## Predict next income value newWaveIncome = self.calculate_income(pop) newWaveIncome = pd.DataFrame(newWaveIncome, columns=['hh_income']) # newWaveIncome = newWaveIncome.rename(columns={"new_dependent": "hh_income", # "predicted": "hh_income_diff"}) # newWaveIncome = newWaveIncome.to_frame(name='hh_income') # Set index back to population of interest. newWaveIncome.index = pop.index #newWaveIncome['hh_income'] += self.generate_gaussian_noise(pop.index, 0, 1000) # Draw individuals next states randomly from this distribution. # Update population with new income self.population_view.update(newWaveIncome['hh_income']) # def calculate_income(self, pop): # """Calculate income transition distribution based on provided people/indices # # Parameters # ---------- # pop: PopulationView # Population from MINOS to calculate next income for. # Returns # ------- # nextWaveIncome: pd.Series # Vector of new household incomes from OLS prediction. # """ # # load transition model based on year. # #if self.year != 2020: # self.update_history_dataframe(pop, self.year) # nextWaveIncome = r_utils.predict_next_timestep_yj_gaussian_gee(self.gee_transition_model, # self.rpy2Modules, # self.history_data, # dependent='hh_income', # noise_std=1)#2 # # return nextWaveIncome.iloc[self.history_data.loc[self.history_data['time']==self.year].index] def calculate_income(self, pop): """Calculate income transition distribution based on provided people/indices Parameters ---------- pop: PopulationView Population from MINOS to calculate next income for. Returns ------- nextWaveIncome: pd.Series Vector of new household incomes from OLS prediction. """ # load transition model based on year. #if self.year != 2020: self.update_history_dataframe(pop, self.year) nextWaveIncome = r_utils.predict_next_timestep_yj_gamma_gee(self.gee_transition_model, self.rpy2Modules, self.history_data, dependent='hh_income', reflect=False, noise_std=0.5)#2 return nextWaveIncome.iloc[self.history_data.loc[self.history_data['time']==self.year].index] def plot(self, pop, config): file_name = config.output_plots_dir + f"income_hist_{self.year}.pdf" f = plt.figure() histplot(pop, x="hh_income", stat='density') plt.savefig(file_name) plt.close() class lmmYJIncome(Base): # Special methods used by vivarium. @property def name(self): return 'lmmYJIncome' def __repr__(self): return "lmmYJIncome()" def setup(self, builder): """ Initialise the module during simulation.setup(). Notes ----- - Load in data from pre_setup - Register any value producers/modifiers for income - Add required columns to population data frame - Update other required items such as randomness stream. Parameters ---------- builder : vivarium.engine.Builder Vivarium's control object. Stores all simulation metadata and allows modules to use it. """ # Load in inputs from pre-setup. # self.transition_model = builder.data.load("income_transition") self.rpy2Modules = builder.data.load("rpy2_modules") # Build vivarium objects for calculating transition probabilities. # Typically this is registering rate/lookup tables. See vivarium docs/other modules for examples. # self.transition_coefficients = builder. # Assign randomness streams if necessary. self.random = builder.randomness.get_stream(self.generate_random_crn_key()) # Determine which subset of the main population is used in this module. # columns_created is the columns created by this module. # view_columns is the columns from the main population used in this module. # In this case, view_columns are taken straight from the transition model #view_columns = ['pidp', # 'age', # 'sex', # 'ethnicity', # 'region', # 'hh_income', # 'job_sec', # #'labour_state', # 'education_state', # 'SF_12', # 'weight', # #'housing_quality', # 'job_sector'] view_columns = ["age", "sex", "ethnicity", "region", "education_state", 'job_sec', 'SF_12', 'pidp', 'hh_income', 'hh_income_diff', 'S7_labour_state' ] #columns_created = ['hh_income_diff'] # view_columns += self.transition_model.rx2('model').names self.population_view = builder.population.get_view(columns=view_columns)# + columns_created) # Population initialiser. When new individuals are added to the microsimulation a constructer is called for each # module. Declare what constructer is used. usually on_initialize_simulants method is called. Inidividuals are # created at the start of a model "setup" or after some deterministic (add cohorts) or random (births) event. #builder.population.initializes_simulants(self.on_initialize_simulants, creates_columns=columns_created) # Declare events in the module. At what times do individuals transition states from this module. E.g. when does # individual graduate in an education module. # builder.event.register_listener("time_step", self.on_time_step, priority=self.priority) super().setup(builder) # just load this once. #self.gee_transition_model = r_utils.load_transitions(f"hh_income/gee_yj/hh_income_GEE_YJ", self.rpy2Modules, # path=self.transition_dir) #self.gee_transition_model = r_utils.load_transitions(f"hh_income/gee_diff/hh_income_GEE_DIFF", self.rpy2Modules, # path=self.transition_dir) self.transition_model = r_utils.load_transitions(f"hh_income/glmm/hh_income_new_GLMM", self.rpy2Modules, path=self.transition_dir) self.transition_model = r_utils.randomise_fixed_effects(self.transition_model, self.rpy2Modules, "glmm") #self.history_data = self.generate_history_dataframe("final_US", [2018, 2019], view_columns) #self.history_data["hh_income_diff"] = self.history_data['hh_income'] - self.history_data.groupby(['pidp'])['hh_income'].shift(1) def on_initialize_simulants(self, pop_data): """ Initiate columns for hh_income when new simulants are added. Only column needed is the diff column for rate of change model predictions. Parameters ---------- pop_data: vivarium.framework.population.SimulantData Custom vivarium class for interacting with the population data frame. It is essentially a pandas DataFrame with a few extra attributes such as the creation_time, creation_window, and current simulation state (setup/running/etc.). """ # Create frame with new 3 columns and add it to the main population frame. # This is the same for both new cohorts and newborn babies. # Neither should be dead yet. pop_update = pd.DataFrame({'hh_income_diff': 0.}, index=pop_data.index) self.population_view.update(pop_update) def on_time_step(self, event): """ Predicts the hh_income for the next timestep. Parameters ---------- event : vivarium.population.PopulationEvent The event time_step that called this function. """ # Get living people to update their income pop = self.population_view.get(event.index, query="alive =='alive'") #pop = pop.sort_values('pidp') self.year = event.time.year # dummy column to load new prediction into. pop['hh_income_new'] = pop['hh_income'] ## Predict next income values newWaveIncome = pd.DataFrame(columns=['hh_income']) newWaveIncome['hh_income'] = self.calculate_income(pop) newWaveIncome.index = pop.index # calculate household income mean income_mean = np.mean(newWaveIncome["hh_income"]) # calculate change in standard deviation between waves. std_ratio = (np.std(pop['hh_income'])/np.std(newWaveIncome["hh_income"])) # rescale income to have new mean but keep old standard deviation. newWaveIncome["hh_income"] *= std_ratio newWaveIncome["hh_income"] -= ((std_ratio-1)*income_mean) #newWaveIncome["hh_income"] -= 75 # difference in hh income newWaveIncome['hh_income_diff'] = newWaveIncome['hh_income'] - pop['hh_income'] self.population_view.update(newWaveIncome[['hh_income', 'hh_income_diff']]) def calculate_income(self, pop): """Calculate income transition distribution based on provided people/indices Parameters ---------- pop: PopulationView Population from MINOS to calculate next income for. Returns ------- nextWaveIncome: pd.Series Vector of new household incomes from OLS prediction. """ # load transition model based on year. nextWaveIncome = r_utils.predict_next_timestep_yj_gamma_glmm(self.transition_model, self.rpy2Modules, pop, dependent='hh_income_new', yeo_johnson=True, reflect=False, noise_std=0.175) #0.45 for yj. 100? for non yj. # get new hh income diffs and update them into history_data. #self.update_history_dataframe(pop, self.year-1) #new_history_data = self.history_data.loc[self.history_data['time']==self.year].index # who in current_year #next_diffs = nextWaveIncome.iloc[new_history_data] return nextWaveIncome def plot(self, pop, config): file_name = config.output_plots_dir + f"income_hist_{self.year}.pdf" f = plt.figure() histplot(pop, x="hh_income", stat='density') plt.savefig(file_name) plt.close() class lmmDiffIncome(Base): # Special methods used by vivarium. @property def name(self): return 'lmmDiffIncome' def __repr__(self): return "lmmDiffIncome()" def setup(self, builder): """ Initialise the module during simulation.setup(). Notes ----- - Load in data from pre_setup - Register any value producers/modifiers for income - Add required columns to population data frame - Update other required items such as randomness stream. Parameters ---------- builder : vivarium.engine.Builder Vivarium's control object. Stores all simulation metadata and allows modules to use it. """ # Load in inputs from pre-setup. # self.transition_model = builder.data.load("income_transition") self.rpy2Modules = builder.data.load("rpy2_modules") # Build vivarium objects for calculating transition probabilities. # Typically this is registering rate/lookup tables. See vivarium docs/other modules for examples. # self.transition_coefficients = builder. # Assign randomness streams if necessary. self.random = builder.randomness.get_stream(self.generate_random_crn_key()) # Determine which subset of the main population is used in this module. # columns_created is the columns created by this module. # view_columns is the columns from the main population used in this module. # In this case, view_columns are taken straight from the transition model #view_columns = ['pidp', # 'age', # 'sex', # 'ethnicity', # 'region', # 'hh_income', # 'job_sec', # #'labour_state', # 'education_state', # 'SF_12', # 'weight', # #'housing_quality', # 'job_sector'] view_columns = [ 'hh_income', 'age', 'sex', 'ethnicity', 'region', 'education_state', 'job_sec', #'job_sector', 'time', 'pidp', 'weight', #'hh_income_diff' ] columns_created = ['hh_income_diff'] # view_columns += self.transition_model.rx2('model').names self.population_view = builder.population.get_view(columns=view_columns + columns_created) # Population initialiser. When new individuals are added to the microsimulation a constructer is called for each # module. Declare what constructer is used. usually on_initialize_simulants method is called. Inidividuals are # created at the start of a model "setup" or after some deterministic (add cohorts) or random (births) event. builder.population.initializes_simulants(self.on_initialize_simulants, creates_columns=columns_created) # Declare events in the module. At what times do individuals transition states from this module. E.g. when does # individual graduate in an education module. # builder.event.register_listener("time_step", self.on_time_step, priority=self.priority) super().setup(builder) # just load this once. #self.gee_transition_model = r_utils.load_transitions(f"hh_income/gee_yj/hh_income_GEE_YJ", self.rpy2Modules, # path=self.transition_dir) #self.gee_transition_model = r_utils.load_transitions(f"hh_income/gee_diff/hh_income_GEE_DIFF", self.rpy2Modules, # path=self.transition_dir) self.gee_transition_model = r_utils.load_transitions(f"hh_income/lmm_diff/hh_income_LMM_DIFF", self.rpy2Modules, path=self.transition_dir) #self.history_data = self.generate_history_dataframe("final_US", [2018, 2019], view_columns) #self.history_data["hh_income_diff"] = self.history_data['hh_income'] - self.history_data.groupby(['pidp'])['hh_income'].shift(1) def on_initialize_simulants(self, pop_data): """ Initiate columns for hh_income when new simulants are added. Only column needed is the diff column for rate of change model predictions. Parameters ---------- pop_data: vivarium.framework.population.SimulantData Custom vivarium class for interacting with the population data frame. It is essentially a pandas DataFrame with a few extra attributes such as the creation_time, creation_window, and current simulation state (setup/running/etc.). """ # Create frame with new 3 columns and add it to the main population frame. # This is the same for both new cohorts and newborn babies. # Neither should be dead yet. pop_update = pd.DataFrame({'hh_income_diff': 0.}, index=pop_data.index) self.population_view.update(pop_update) def on_time_step(self, event): """ Predicts the hh_income for the next timestep. Parameters ---------- event : vivarium.population.PopulationEvent The event time_step that called this function. """ # Get living people to update their income pop = self.population_view.get(event.index, query="alive =='alive'") pop = pop.sort_values('pidp') self.year = event.time.year #pop['hh_income_diff_last'] = pop['hh_income_diff'] ## Predict next income value newWaveIncome = pd.DataFrame(columns=['hh_income', 'hh_income_diff']) newWaveIncome['hh_income_diff'] = self.calculate_income(pop) newWaveIncome.index = pop.index newWaveIncome['hh_income'] = pop['hh_income'] + newWaveIncome['hh_income_diff'] # newWaveIncome = newWaveIncome.rename(columns={"new_dependent": "hh_income", # "predicted": "hh_income_diff"}) # newWaveIncome = newWaveIncome.to_frame(name='hh_income') # Set index back to population of interest. #newWaveIncome['hh_income'] += self.generate_gaussian_noise(pop.index, 0, 1000) # Draw individuals next states randomly from this distribution. # Update population with new income print("income", np.mean(newWaveIncome['hh_income'])) self.population_view.update(newWaveIncome[['hh_income', 'hh_income_diff']]) # def calculate_income(self, pop): # """Calculate income transition distribution based on provided people/indices # # Parameters # ---------- # pop: PopulationView # Population from MINOS to calculate next income for. # Returns # ------- # nextWaveIncome: pd.Series # Vector of new household incomes from OLS prediction. # """ # # load transition model based on year. # #if self.year != 2020: # self.update_history_dataframe(pop, self.year) # nextWaveIncome = r_utils.predict_next_timestep_yj_gaussian_gee(self.gee_transition_model, # self.rpy2Modules, # self.history_data, # dependent='hh_income', # noise_std=1)#2 # # return nextWaveIncome.iloc[self.history_data.loc[self.history_data['time']==self.year].index] def calculate_income(self, pop): """Calculate income transition distribution based on provided people/indices Parameters ---------- pop: PopulationView Population from MINOS to calculate next income for. Returns ------- nextWaveIncome: pd.Series Vector of new household incomes from OLS prediction. """ # load transition model based on year. nextWaveIncome = r_utils.predict_next_timestep_yj_gaussian_lmm(self.gee_transition_model, self.rpy2Modules, pop, dependent='hh_income_diff', yeo_johnson = True, reflect=False, noise_std= 0.05)#0.45 # get new hh income diffs and update them into history_data. #self.update_history_dataframe(pop, self.year-1) #new_history_data = self.history_data.loc[self.history_data['time']==self.year].index # who in current_year #next_diffs = nextWaveIncome.iloc[new_history_data] return nextWaveIncome def plot(self, pop, config): file_name = config.output_plots_dir + f"income_hist_{self.year}.pdf" f = plt.figure() histplot(pop, x="hh_income", stat='density') plt.savefig(file_name) plt.close() # old net income test keeping in case I need any of it later. # # Annual average inflation based on CPI index. # # https://www.statista.com/statistics/270384/inflation-rate-in-the-united-kingdom/ # annual_cpi_rates = { # 2017: 2.68, # 2018: 2.48, # 2019: 1.79, # 2020: 0.85, # 2021: 2.59, # 2022: 9.12, # 2023: 8.99, # 2024: 3.73, # 2025: 1.82, # 2026: 2.00, # # Stabilises to 2% because they can't predict this far ahead.. All years beyond have this value. # # This is a Bank of England target and is highly optimistic.. # } # # class lmmYJNetIncome(Base): # # # Special methods used by vivarium. # @property # def name(self): # return 'net_income' # # def __repr__(self): # return "netIncome()" # # def setup(self, builder): # """ Initialise the module during simulation.setup(). # Notes # ----- # - Load in data from pre_setup # - Register any value producers/modifiers for income # - Add required columns to population data frame # - Update other required items such as randomness stream. # Parameters # ---------- # builder : vivarium.engine.Builder # Vivarium's control object. Stores all simulation metadata and allows modules to use it. # """ # # # Load in inputs from pre-setup. # # self.transition_model = builder.data.load("income_transition") # self.rpy2_modules = builder.data.load("rpy2_modules") # # # Build vivarium objects for calculating transition probabilities. # # Typically this is registering rate/lookup tables. See vivarium docs/other modules for examples. # # self.transition_coefficients = builder. # # # Assign randomness streams if necessary. # self.random = builder.randomness.get_stream(self.generate_random_crn_key()) # # # Determine which subset of the main population is used in this module. # # columns_created is the columns created by this module. # # view_columns is the columns from the main population used in this module. # # In this case, view_columns are taken straight from the transition model # view_columns = ['pidp', # 'age', # 'sex', # 'ethnicity', # 'region', # 'net_hh_income', # 'job_sec', # 'labour_state', # 'education_state', # 'SF_12', # 'housing_quality', # 'job_sector', # 'hh_income', # 'oecd_equiv', # 'outgoings', # 'tenure', # 'yearly_energy', # #'financial_situation', # 'marital_status', # 'hhsize', # #'FP10', # #'boost_amount' # ] # # view_columns += self.transition_model.rx2('model').names # self.population_view = builder.population.get_view(columns=view_columns) # # # Population initialiser. When new individuals are added to the microsimulation a constructer is called for each # # module. Declare what constructer is used. usually on_initialize_simulants method is called. Inidividuals are # # created at the start of a model "setup" or after some deterministic (add cohorts) or random (births) event. # builder.population.initializes_simulants(self.on_initialize_simulants) # # # Declare events in the module. At what times do individuals transition states from this module. E.g. when does # # individual graduate in an education module. # builder.event.register_listener("time_step", self.on_time_step, priority=2) # # self.income_transition_model = r_utils.load_transitions(f"net_hh_income/lmm/net_hh_income_LMM", self.rpy2_modules) # # def on_time_step(self, event): # """ Predicts the hh_income for the next timestep. # Parameters # ---------- # event : vivarium.population.PopulationEvent # The event time_step that called this function. # """ # # Get living people to update their income # pop = self.population_view.get(event.index, query="alive =='alive'") # self.year = event.time.year # # ## Predict next income value # newWaveNetIncome = self.calculate_income(pop) # newWaveNetIncome = pd.DataFrame(newWaveNetIncome, columns=["net_hh_income"]) # net hh income pre overheads/bills. # # Set index type to int (instead of object as previous) # newWaveNetIncome.index = pop.index # # adjust for inflation # newWaveNetIncome['net_hh_income'] = self.adjust_inflation(newWaveNetIncome['net_hh_income'], self.year) # # calculate net (disposable) income based on overheads and house size. # newWaveNetIncome['hh_income'] = self.transform_net_to_disposable_income(newWaveNetIncome['net_hh_income'], # pop['outgoings'], # pop['oecd_equiv']) # # # nextWaveFinancialSituation = self.calculate_financial_situation(pop) # # nextWaveFinancialSituation["financial_situation"] = self.random.choice(nextWaveFinancialSituation.index, list(nextWaveFinancialSituation.columns+1), # # nextWaveFinancialSituation).astype(float) # # nextWaveFinancialSituation.index = pop.index # # newWaveNetIncome['financial_situation'] = nextWaveFinancialSituation['financial_situation'] # # calculate FP10 based on energy usage and Net income # # newWaveNetIncome['FP10'] = self.calculate_fp10(newWaveNetIncome['hh_income'], pop['yearly_energy']) # # # Draw individuals next states randomly from this distribution. # # Update population with new income. # # self.population_view.update(newWaveNetIncome[['Net_hh_income', 'net_hh_income', 'FP10', 'financial_situation']]) # self.population_view.update(newWaveNetIncome[['hh_income', 'net_hh_income']]) # # def calculate_income(self, pop): # """Calculate income transition distribution based on provided people/indices # Parameters # ---------- # pop: PopulationView # Population from MINOS to calculate next income for. # Returns # ------- # nextWaveIncome: pd.Series # Vector of new household incomes from OLS prediction. # """ # # load transition model based on year. # year = min(self.year, 2018) # # The calculation relies on the R predict method and the model that has already been specified # nextWaveNetIncome = r_utils.predict_next_timestep_ols(self.income_transition_model, self.rpy2_modules, pop, dependent='Net_hh_income', transform = True) # return nextWaveNetIncome # # def calculate_financial_situation(self, pop): # year = min(self.year, 2018) # transition_model = r_utils.load_transitions(f"financial_situation/clm/financial_situation_{year}_{year + 1}", self.rpy2_modules) # return r_utils.predict_next_timestep_clm(transition_model, self.rpy2_modules, pop, dependent='financial_situation') # # # def transform_net_to_disposable_income(self, net_income, outgoings, oecd_equiv): # # convert net to disposable income # return (net_income - outgoings) / oecd_equiv # # def calculate_fp10(self, net_income, energy_use): # # Is 10% of net/disposable income spent on fuel? # return (10 * energy_use) >= net_income # # def adjust_inflation(self, net_income, year): # """ Adjust disposable income according to yearly inflation.""" # if year >= 2026: # yearly_inf_rate = 1.02 # else: # yearly_inf_rate = 1 + (annual_cpi_rates[year] / 100) # Get forecasted inflation rate due to CPI. # # # Widely assumed to be 2% after 2026 for UK. # return net_income / yearly_inf_rate