Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 72 additions & 24 deletions cps_stage2/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@ def target(target_val, pop, factor, value):
weights * factors["APOPSNR"][year],
weights * factors["ARETS"][year],
)
single_returns = np.where((data["mars"] == 1) & (data["filer"] == 1), s006, 0)
joint_returns = np.where((data["mars"] == 2) & (data["filer"] == 1), s006, 0)
single_returns = np.where(
(data["mars"] == 1) & (data["filer"] == 1), s006, 0
)
joint_returns = np.where(
(data["mars"] == 2) & (data["filer"] == 1), s006, 0
)
hh_returns = np.where((data["mars"] == 4) & (data["filer"] == 1), s006, 0)
returns_w_ss = np.where((data["e02400"] > 0) & (data["filer"] == 1), s006, 0)
returns_w_ss = np.where(
(data["e02400"] > 0) & (data["filer"] == 1), s006, 0
)
dep_exemptions = (
np.where(data["mars"] == 2, data["XTOT"] - 2, data["XTOT"] - 1) * s006
)
Expand All @@ -45,27 +51,39 @@ def target(target_val, pop, factor, value):
# wage distribution
wage1 = np.where(data["agi"] <= 10000, data["e00200"], 0) * s006
wage2 = (
np.where((data["agi"] > 10000) & (data["agi"] <= 20000), data["e00200"], 0)
np.where(
(data["agi"] > 10000) & (data["agi"] <= 20000), data["e00200"], 0
)
* s006
)
wage3 = (
np.where((data["agi"] > 20000) & (data["agi"] <= 30000), data["e00200"], 0)
np.where(
(data["agi"] > 20000) & (data["agi"] <= 30000), data["e00200"], 0
)
* s006
)
wage4 = (
np.where((data["agi"] > 30000) & (data["agi"] <= 40000), data["e00200"], 0)
np.where(
(data["agi"] > 30000) & (data["agi"] <= 40000), data["e00200"], 0
)
* s006
)
wage5 = (
np.where((data["agi"] > 40000) & (data["agi"] <= 50000), data["e00200"], 0)
np.where(
(data["agi"] > 40000) & (data["agi"] <= 50000), data["e00200"], 0
)
* s006
)
wage6 = (
np.where((data["agi"] > 50000) & (data["agi"] <= 75000), data["e00200"], 0)
np.where(
(data["agi"] > 50000) & (data["agi"] <= 75000), data["e00200"], 0
)
* s006
)
wage7 = (
np.where((data["agi"] > 75000) & (data["agi"] <= 100_000), data["e00200"], 0)
np.where(
(data["agi"] > 75000) & (data["agi"] <= 100_000), data["e00200"], 0
)
* s006
)
wage8 = np.where(data["agi"] > 100_000, data["e00200"], 0) * s006
Expand Down Expand Up @@ -118,15 +136,27 @@ def target(target_val, pop, factor, value):
target_name = "SS_return"
rhs_vars["returns_w_ss"] = targets[year][target_name] - returns_w_ss.sum()
target_name = "Dep_return"
rhs_vars["dep_exemptions"] = targets[year][target_name] - dep_exemptions.sum()
rhs_vars["interest"] = target(targets[year]["INTS"], apopn, aints, interest.sum())
rhs_vars["dividend"] = target(targets[year]["DIVS"], apopn, adivs, dividend.sum())
rhs_vars["dep_exemptions"] = (
targets[year][target_name] - dep_exemptions.sum()
)
rhs_vars["interest"] = target(
targets[year]["INTS"], apopn, aints, interest.sum()
)
rhs_vars["dividend"] = target(
targets[year]["DIVS"], apopn, adivs, dividend.sum()
)
rhs_vars["biz_income"] = target(
targets[year]["SCHCI"], apopn, aschci, biz_income.sum()
)
rhs_vars["biz_loss"] = target(targets[year]["SCHCL"], apopn, aschcl, biz_loss.sum())
rhs_vars["cap_gain"] = target(targets[year]["CGNS"], apopn, acgns, cap_gain.sum())
rhs_vars["pension"] = target(targets[year]["Pension"], apopn, atxpy, pension.sum())
rhs_vars["biz_loss"] = target(
targets[year]["SCHCL"], apopn, aschcl, biz_loss.sum()
)
rhs_vars["cap_gain"] = target(
targets[year]["CGNS"], apopn, acgns, cap_gain.sum()
)
rhs_vars["pension"] = target(
targets[year]["Pension"], apopn, atxpy, pension.sum()
)
rhs_vars["sch_e_income"] = target(
targets[year]["SCHEI"], apopn, aschei, sch_e_income.sum()
)
Expand All @@ -136,15 +166,33 @@ def target(target_val, pop, factor, value):
rhs_vars["ss_income"] = target(
targets[year]["SS"], apopsnr, asocsec, ss_income.sum()
)
rhs_vars["ucomp"] = target(targets[year]["UCOMP"], apopn, aucomp, ucomp.sum())
rhs_vars["wage1"] = target(targets[year]["wage1"], apopn, awage, wage1.sum())
rhs_vars["wage2"] = target(targets[year]["wage2"], apopn, awage, wage2.sum())
rhs_vars["wage3"] = target(targets[year]["wage3"], apopn, awage, wage3.sum())
rhs_vars["wage4"] = target(targets[year]["wage4"], apopn, awage, wage4.sum())
rhs_vars["wage5"] = target(targets[year]["wage5"], apopn, awage, wage5.sum())
rhs_vars["wage6"] = target(targets[year]["wage6"], apopn, awage, wage6.sum())
rhs_vars["wage7"] = target(targets[year]["wage7"], apopn, awage, wage7.sum())
rhs_vars["wage8"] = target(targets[year]["wage8"], apopn, awage, wage8.sum())
rhs_vars["ucomp"] = target(
targets[year]["UCOMP"], apopn, aucomp, ucomp.sum()
)
rhs_vars["wage1"] = target(
targets[year]["wage1"], apopn, awage, wage1.sum()
)
rhs_vars["wage2"] = target(
targets[year]["wage2"], apopn, awage, wage2.sum()
)
rhs_vars["wage3"] = target(
targets[year]["wage3"], apopn, awage, wage3.sum()
)
rhs_vars["wage4"] = target(
targets[year]["wage4"], apopn, awage, wage4.sum()
)
rhs_vars["wage5"] = target(
targets[year]["wage5"], apopn, awage, wage5.sum()
)
rhs_vars["wage6"] = target(
targets[year]["wage6"], apopn, awage, wage6.sum()
)
rhs_vars["wage7"] = target(
targets[year]["wage7"], apopn, awage, wage7.sum()
)
rhs_vars["wage8"] = target(
targets[year]["wage8"], apopn, awage, wage8.sum()
)

model_vars = [
"single_returns",
Expand Down
4 changes: 3 additions & 1 deletion cps_stage2/stage2.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ def main():
for year in range(START_YEAR, END_YEAR + 1):
try:
factor_match = _factors[year].equals(CUR_FACTORS[year])
target_match = stage_2_targets[f"{year}"].equals(CUR_TARGETS[f"{year}"])
target_match = stage_2_targets[f"{year}"].equals(
CUR_TARGETS[f"{year}"]
)
if files_match and factor_match and target_match:
print(f"Skipping {year}")
skipped_years.append(year)
Expand Down
29 changes: 22 additions & 7 deletions createpuf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,18 @@ def dataprep(data):
"""
# we use a slightly modified version of mars for matching.
# _mars = 1 if single, 3 if HoH, 2 any type of joint filer
data["_mars"] = np.where(data["mars"] == 1, 1, np.where(data["mars"] == 4, 3, 2))
data["_mars"] = np.where(
data["mars"] == 1, 1, np.where(data["mars"] == 4, 3, 2)
)
data["const"] = 1
data["bil"] = np.maximum(0, data["e00900"])
data["fil"] = np.maximum(0, data["e02100"])
data["tpi"] = data[INC_VARS].sum(axis=1)
data["wage_share"] = np.divide(
data["e00200"], data["tpi"], out=np.zeros(data.shape[0]), where=data["tpi"] != 0
data["e00200"],
data["tpi"],
out=np.zeros(data.shape[0]),
where=data["tpi"] != 0,
)
data["cap_inc"] = data[CAP_VARS].sum(axis=1)
data["cap_share"] = np.divide(
Expand All @@ -73,25 +78,33 @@ def dataprep(data):
),
0,
)
data["people"] = np.where(data["_mars"] == 2, data["depne"] + 2, data["depne"] + 1)
data["people"] = np.where(
data["_mars"] == 2, data["depne"] + 2, data["depne"] + 1
)
data["people"] = np.minimum(5, data["people"])
wage_flag = (data["e00200"] != 0).astype(int)
# self employment flag
se_flag = np.logical_or(data["e00900"] != 0, data["e02100"] != 0).astype(int)
se_flag = np.logical_or(data["e00900"] != 0, data["e02100"] != 0).astype(
int
)
# income source flags
data["se1"] = np.where(wage_flag & ~se_flag, 1, 0)
data["se2"] = np.where(~wage_flag & se_flag, 1, 0)
data["se3"] = np.where(wage_flag & se_flag, 1, 0)
data["_depne"] = np.where(
np.logical_and(data["mars"] == 3, data["_depne"] == 0), 1, data["_depne"]
np.logical_and(data["mars"] == 3, data["_depne"] == 0),
1,
data["_depne"],
)

return data


# create CPS tax units
print("Creating CPS tax units")
raw_cps = cps.create(DATA_PATH, exportpkl=True, cps_files=[CPS_YEAR], benefits=False)
raw_cps = cps.create(
DATA_PATH, exportpkl=True, cps_files=[CPS_YEAR], benefits=False
)
# minor PUF prep
print("Prepping PUF")
puf2011 = pd.read_csv(Path(DATA_PATH, "puf2011.csv"))
Expand Down Expand Up @@ -158,7 +171,9 @@ def dataprep(data):

# merge all the data together
print("Merging matched data")
data = pd.merge(raw_puf, match_index, how="inner", left_on="recid", right_on="recip")
data = pd.merge(
raw_puf, match_index, how="inner", left_on="recid", right_on="recip"
)
data = pd.merge(
data,
filers,
Expand Down
36 changes: 26 additions & 10 deletions history/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ def report():
img_path = Path(CUR_PATH, f"{var}.png")
plot.write_image(str(img_path))
plot_paths.append(img_path)
growth_rate_projections.append(f"![]({str(img_path)})" + "{.center}")
growth_rate_projections.append(
f"![]({str(img_path)})" + "{.center}"
)

template_args["growth_rate_projections"] = growth_rate_projections

Expand Down Expand Up @@ -204,13 +206,18 @@ def report():

# compare tax calculator projections
# baseline CPS calculator
base_cps = tc.Calculator(records=tc.Records.cps_constructor(), policy=tc.Policy())
base_cps = tc.Calculator(
records=tc.Records.cps_constructor(), policy=tc.Policy()
)
base_cps.advance_to_year(first_year)
base_cps.calc_all()
# updated CPS calculator
cps = pd.read_csv(Path(CUR_PATH, "..", "data", "cps.csv.gz"), index_col=None)
cps = pd.read_csv(
Path(CUR_PATH, "..", "data", "cps.csv.gz"), index_col=None
)
cps_weights = pd.read_csv(
Path(CUR_PATH, "..", "cps_stage2", "cps_weights.csv.gz"), index_col=None
Path(CUR_PATH, "..", "cps_stage2", "cps_weights.csv.gz"),
index_col=None,
)
gfactor_path_str = str(GROW_FACTORS_PATH)
gft = tc.GrowFactors(growfactors_filename=gfactor_path_str)
Expand All @@ -232,13 +239,18 @@ def report():

# Validation with CBO tax model
# baseline CPS calculator
base_cps = tc.Calculator(records=tc.Records.cps_constructor(), policy=tc.Policy())
base_cps = tc.Calculator(
records=tc.Records.cps_constructor(), policy=tc.Policy()
)
base_cps.advance_to_year(first_year)
base_cps.calc_all()
# updated CPS calculator
cps = pd.read_csv(Path(CUR_PATH, "..", "data", "cps.csv.gz"), index_col=None)
cps = pd.read_csv(
Path(CUR_PATH, "..", "data", "cps.csv.gz"), index_col=None
)
cps_weights = pd.read_csv(
Path(CUR_PATH, "..", "cps_stage2", "cps_weights.csv.gz"), index_col=None
Path(CUR_PATH, "..", "cps_stage2", "cps_weights.csv.gz"),
index_col=None,
)
gfactor_path_str = str(GROW_FACTORS_PATH)
gft = tc.GrowFactors(growfactors_filename=gfactor_path_str)
Expand Down Expand Up @@ -267,7 +279,8 @@ def report():
base_puf.calc_all()
# updated puf calculator
puf_weights = pd.read_csv(
Path(CUR_PATH, "..", "puf_stage2", "puf_weights.csv.gz"), index_col=None
Path(CUR_PATH, "..", "puf_stage2", "puf_weights.csv.gz"),
index_col=None,
)
puf_ratios = pd.read_csv(
Path(CUR_PATH, "..", "puf_stage3", "puf_ratios.csv"), index_col=0
Expand All @@ -293,7 +306,8 @@ def report():
base_puf.calc_all()
# updated puf calculator
puf_weights = pd.read_csv(
Path(CUR_PATH, "..", "puf_stage2", "puf_weights.csv.gz"), index_col=None
Path(CUR_PATH, "..", "puf_stage2", "puf_weights.csv.gz"),
index_col=None,
)
puf_ratios = pd.read_csv(
Path(CUR_PATH, "..", "puf_stage3", "puf_ratios.csv"), index_col=0
Expand All @@ -317,7 +331,9 @@ def report():
template_args["puf_income_table"] = None
template_args["puf_payroll_table"] = None
template_args["puf_salaries_and_wages_table"] = None
template_args["puf_taxable_interest_and_ordinary_dividends_table"] = None
template_args["puf_taxable_interest_and_ordinary_dividends_table"] = (
None
)
template_args["puf_qualified_dividends_table"] = None
template_args["puf_capital_table"] = None
template_args["puf_business_table"] = None
Expand Down
Loading
Loading