Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/check_black.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v5
- uses: actions/setup-python@v2
- uses: psf/black@stable
with:
options: " --check"
options: "-l 79 --check"
src: "."
6 changes: 3 additions & 3 deletions .github/workflows/deploy_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
uses: actions/checkout@v5
with:
persist-credentials: false

- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: taxdata-dev
environment-file: environment.yml
python-version: 3.7
python-version: 3.13
auto-activate-base: false

- name: Build # Build Jupyter Book
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docs_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
uses: actions/checkout@v5
with:
persist-credentials: false

- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: taxdata-dev
environment-file: environment.yml
python-version: 3.7
python-version: 3.13
auto-activate-base: false

- name: Build # Build Jupyter Book
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
name: Test data
name: Test source code

on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [3.7]
os: [ubuntu-latest, windows-latest]
python-version: ['3.11', '3.12', '3.13']

steps:
- name: checkout
uses: actions/checkout@master
with:
persist-credentials: false

- name: Setup Miniconda using Python ${{ matrix.python-version }}
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: taxdata-dev
environment-file: environment.yml
Expand Down
96 changes: 72 additions & 24 deletions cps_stage2/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@ def target(target_val, pop, factor, value):
weights * factors["APOPSNR"][year],
weights * factors["ARETS"][year],
)
single_returns = np.where((data["mars"] == 1) & (data["filer"] == 1), s006, 0)
joint_returns = np.where((data["mars"] == 2) & (data["filer"] == 1), s006, 0)
single_returns = np.where(
(data["mars"] == 1) & (data["filer"] == 1), s006, 0
)
joint_returns = np.where(
(data["mars"] == 2) & (data["filer"] == 1), s006, 0
)
hh_returns = np.where((data["mars"] == 4) & (data["filer"] == 1), s006, 0)
returns_w_ss = np.where((data["e02400"] > 0) & (data["filer"] == 1), s006, 0)
returns_w_ss = np.where(
(data["e02400"] > 0) & (data["filer"] == 1), s006, 0
)
dep_exemptions = (
np.where(data["mars"] == 2, data["XTOT"] - 2, data["XTOT"] - 1) * s006
)
Expand All @@ -45,27 +51,39 @@ def target(target_val, pop, factor, value):
# wage distribution
wage1 = np.where(data["agi"] <= 10000, data["e00200"], 0) * s006
wage2 = (
np.where((data["agi"] > 10000) & (data["agi"] <= 20000), data["e00200"], 0)
np.where(
(data["agi"] > 10000) & (data["agi"] <= 20000), data["e00200"], 0
)
* s006
)
wage3 = (
np.where((data["agi"] > 20000) & (data["agi"] <= 30000), data["e00200"], 0)
np.where(
(data["agi"] > 20000) & (data["agi"] <= 30000), data["e00200"], 0
)
* s006
)
wage4 = (
np.where((data["agi"] > 30000) & (data["agi"] <= 40000), data["e00200"], 0)
np.where(
(data["agi"] > 30000) & (data["agi"] <= 40000), data["e00200"], 0
)
* s006
)
wage5 = (
np.where((data["agi"] > 40000) & (data["agi"] <= 50000), data["e00200"], 0)
np.where(
(data["agi"] > 40000) & (data["agi"] <= 50000), data["e00200"], 0
)
* s006
)
wage6 = (
np.where((data["agi"] > 50000) & (data["agi"] <= 75000), data["e00200"], 0)
np.where(
(data["agi"] > 50000) & (data["agi"] <= 75000), data["e00200"], 0
)
* s006
)
wage7 = (
np.where((data["agi"] > 75000) & (data["agi"] <= 100_000), data["e00200"], 0)
np.where(
(data["agi"] > 75000) & (data["agi"] <= 100_000), data["e00200"], 0
)
* s006
)
wage8 = np.where(data["agi"] > 100_000, data["e00200"], 0) * s006
Expand Down Expand Up @@ -118,15 +136,27 @@ def target(target_val, pop, factor, value):
target_name = "SS_return"
rhs_vars["returns_w_ss"] = targets[year][target_name] - returns_w_ss.sum()
target_name = "Dep_return"
rhs_vars["dep_exemptions"] = targets[year][target_name] - dep_exemptions.sum()
rhs_vars["interest"] = target(targets[year]["INTS"], apopn, aints, interest.sum())
rhs_vars["dividend"] = target(targets[year]["DIVS"], apopn, adivs, dividend.sum())
rhs_vars["dep_exemptions"] = (
targets[year][target_name] - dep_exemptions.sum()
)
rhs_vars["interest"] = target(
targets[year]["INTS"], apopn, aints, interest.sum()
)
rhs_vars["dividend"] = target(
targets[year]["DIVS"], apopn, adivs, dividend.sum()
)
rhs_vars["biz_income"] = target(
targets[year]["SCHCI"], apopn, aschci, biz_income.sum()
)
rhs_vars["biz_loss"] = target(targets[year]["SCHCL"], apopn, aschcl, biz_loss.sum())
rhs_vars["cap_gain"] = target(targets[year]["CGNS"], apopn, acgns, cap_gain.sum())
rhs_vars["pension"] = target(targets[year]["Pension"], apopn, atxpy, pension.sum())
rhs_vars["biz_loss"] = target(
targets[year]["SCHCL"], apopn, aschcl, biz_loss.sum()
)
rhs_vars["cap_gain"] = target(
targets[year]["CGNS"], apopn, acgns, cap_gain.sum()
)
rhs_vars["pension"] = target(
targets[year]["Pension"], apopn, atxpy, pension.sum()
)
rhs_vars["sch_e_income"] = target(
targets[year]["SCHEI"], apopn, aschei, sch_e_income.sum()
)
Expand All @@ -136,15 +166,33 @@ def target(target_val, pop, factor, value):
rhs_vars["ss_income"] = target(
targets[year]["SS"], apopsnr, asocsec, ss_income.sum()
)
rhs_vars["ucomp"] = target(targets[year]["UCOMP"], apopn, aucomp, ucomp.sum())
rhs_vars["wage1"] = target(targets[year]["wage1"], apopn, awage, wage1.sum())
rhs_vars["wage2"] = target(targets[year]["wage2"], apopn, awage, wage2.sum())
rhs_vars["wage3"] = target(targets[year]["wage3"], apopn, awage, wage3.sum())
rhs_vars["wage4"] = target(targets[year]["wage4"], apopn, awage, wage4.sum())
rhs_vars["wage5"] = target(targets[year]["wage5"], apopn, awage, wage5.sum())
rhs_vars["wage6"] = target(targets[year]["wage6"], apopn, awage, wage6.sum())
rhs_vars["wage7"] = target(targets[year]["wage7"], apopn, awage, wage7.sum())
rhs_vars["wage8"] = target(targets[year]["wage8"], apopn, awage, wage8.sum())
rhs_vars["ucomp"] = target(
targets[year]["UCOMP"], apopn, aucomp, ucomp.sum()
)
rhs_vars["wage1"] = target(
targets[year]["wage1"], apopn, awage, wage1.sum()
)
rhs_vars["wage2"] = target(
targets[year]["wage2"], apopn, awage, wage2.sum()
)
rhs_vars["wage3"] = target(
targets[year]["wage3"], apopn, awage, wage3.sum()
)
rhs_vars["wage4"] = target(
targets[year]["wage4"], apopn, awage, wage4.sum()
)
rhs_vars["wage5"] = target(
targets[year]["wage5"], apopn, awage, wage5.sum()
)
rhs_vars["wage6"] = target(
targets[year]["wage6"], apopn, awage, wage6.sum()
)
rhs_vars["wage7"] = target(
targets[year]["wage7"], apopn, awage, wage7.sum()
)
rhs_vars["wage8"] = target(
targets[year]["wage8"], apopn, awage, wage8.sum()
)

model_vars = [
"single_returns",
Expand Down
4 changes: 3 additions & 1 deletion cps_stage2/stage2.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ def main():
for year in range(START_YEAR, END_YEAR + 1):
try:
factor_match = _factors[year].equals(CUR_FACTORS[year])
target_match = stage_2_targets[f"{year}"].equals(CUR_TARGETS[f"{year}"])
target_match = stage_2_targets[f"{year}"].equals(
CUR_TARGETS[f"{year}"]
)
if files_match and factor_match and target_match:
print(f"Skipping {year}")
skipped_years.append(year)
Expand Down
29 changes: 22 additions & 7 deletions createpuf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,18 @@ def dataprep(data):
"""
# we use a slightly modified version of mars for matching.
# _mars = 1 if single, 3 if HoH, 2 any type of joint filer
data["_mars"] = np.where(data["mars"] == 1, 1, np.where(data["mars"] == 4, 3, 2))
data["_mars"] = np.where(
data["mars"] == 1, 1, np.where(data["mars"] == 4, 3, 2)
)
data["const"] = 1
data["bil"] = np.maximum(0, data["e00900"])
data["fil"] = np.maximum(0, data["e02100"])
data["tpi"] = data[INC_VARS].sum(axis=1)
data["wage_share"] = np.divide(
data["e00200"], data["tpi"], out=np.zeros(data.shape[0]), where=data["tpi"] != 0
data["e00200"],
data["tpi"],
out=np.zeros(data.shape[0]),
where=data["tpi"] != 0,
)
data["cap_inc"] = data[CAP_VARS].sum(axis=1)
data["cap_share"] = np.divide(
Expand All @@ -73,25 +78,33 @@ def dataprep(data):
),
0,
)
data["people"] = np.where(data["_mars"] == 2, data["depne"] + 2, data["depne"] + 1)
data["people"] = np.where(
data["_mars"] == 2, data["depne"] + 2, data["depne"] + 1
)
data["people"] = np.minimum(5, data["people"])
wage_flag = (data["e00200"] != 0).astype(int)
# self employment flag
se_flag = np.logical_or(data["e00900"] != 0, data["e02100"] != 0).astype(int)
se_flag = np.logical_or(data["e00900"] != 0, data["e02100"] != 0).astype(
int
)
# income source flags
data["se1"] = np.where(wage_flag & ~se_flag, 1, 0)
data["se2"] = np.where(~wage_flag & se_flag, 1, 0)
data["se3"] = np.where(wage_flag & se_flag, 1, 0)
data["_depne"] = np.where(
np.logical_and(data["mars"] == 3, data["_depne"] == 0), 1, data["_depne"]
np.logical_and(data["mars"] == 3, data["_depne"] == 0),
1,
data["_depne"],
)

return data


# create CPS tax units
print("Creating CPS tax units")
raw_cps = cps.create(DATA_PATH, exportpkl=True, cps_files=[CPS_YEAR], benefits=False)
raw_cps = cps.create(
DATA_PATH, exportpkl=True, cps_files=[CPS_YEAR], benefits=False
)
# minor PUF prep
print("Prepping PUF")
puf2011 = pd.read_csv(Path(DATA_PATH, "puf2011.csv"))
Expand Down Expand Up @@ -158,7 +171,9 @@ def dataprep(data):

# merge all the data together
print("Merging matched data")
data = pd.merge(raw_puf, match_index, how="inner", left_on="recid", right_on="recip")
data = pd.merge(
raw_puf, match_index, how="inner", left_on="recid", right_on="recip"
)
data = pd.merge(
data,
filers,
Expand Down
10 changes: 5 additions & 5 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ name: taxdata-dev
channels:
- conda-forge
dependencies:
- python>=3.6.5
- numpy>=1.12.1
- pandas>=0.20.3
- "python>=3.11, <3.14"
- "numpy>=1.26"
- "pandas>=2.2"
- "bokeh>=3.7"
- scipy>=0.18.1
- bokeh>=0.12.3
- statsmodels
- pytest
- pulp
Expand All @@ -21,9 +21,9 @@ dependencies:
- python-chromedriver-binary
- tabulate
- pre-commit
- taxcalc>=3.5.0
- pip
- pip:
- requests-html
- black
- taxcalc>=5.3.0
- jupyter-book>=0.9.1
Loading
Loading