Coverage for HARK/Calibration/SCF/WealthIncomeDist/SCFDistTools.py: 78%
40 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 05:14 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 05:14 +0000
1"""
2Created on Fri Jan 8 15:36:14 2021
4@author: Mateo
5"""
7import os
9import numpy as np
10import pandas as pd
12from HARK import _log
13from HARK.Calibration.cpi.us.CPITools import cpi_deflator
15scf_sumstats_dir = os.path.dirname(os.path.abspath(__file__))
18def get_scf_distr_stats():
19 """
20 A function to read the full table of SCF summary statistics as a Pandas
21 DataFrame
23 Returns
24 -------
25 table : pandas DataFrame
26 A pandas representation of file WealthIncomeStats.csv. See ./README.md
27 for an explanation of the variables in the table and its source.
28 """
30 # Form the file name
31 filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")
33 # Read csv
34 table = pd.read_csv(filename, sep=",")
36 return table
39def parse_scf_distr_stats(age=None, education=None, wave=None):
40 """
41 A funtion to retreive SCF summary statistics regarding wealth and
42 permanent income for a specific SCF wave, age bracket, and education
43 level.
45 Parameters
46 ----------
47 age : int, optional
48 Age for which to retreive summary statistics. The statistics are
49 calculated using 5-year age bins. Therefore, for instance, Age = 23
50 will return statistics computed on ages (20,25].
51 The default is None. In such case, the function will return statistics
52 for the group without any age filtration.
53 education : str, optional
54 Education level for which to retreive summary statistics. Must be one
55 of 'NoHS' (no high-school or GED), 'HS' (high-school or GED), or
56 'College'.
57 The default is None. In such case, no education filtration is applied
58 (all groups are pooled).
59 wave : int, optional
60 SCF wave to use for summary statistics. Must be one of 1995, 1998,
61 2001, 2004, 2007, 2010, 2013, 2016, 2019.
62 The default is None. In such case, all waves are used.
64 Returns
65 -------
66 row_dict : dict
67 Dictionary with summary statistics for wealth and permanent income
68 for the specified group. Its fields correspond to the columns of
69 ./WealthIncomeStats.csv, which are described in ./README.md.
71 """
73 # Pre-process year to make it a five-year bracket as in the table
74 if age is not None:
75 u_bound = int(np.ceil(age / 5) * 5)
76 l_bound = u_bound - 5
77 age_bracket = "(" + str(l_bound) + "," + str(u_bound) + "]"
79 _log.debug("Returning SCF summary statistics for ages " + age_bracket + ".")
81 else:
82 # If no age is given, use all age brackets.
83 age_bracket = "All"
85 # Check whether education is in one of the allowed categories
86 if education is not None:
87 message = (
88 "If an education level is provided, it must be one of "
89 + "'NoHS', 'HS', or 'College'."
90 )
91 assert education in ["NoHS", "HS", "College"], message
93 else:
94 education = "All"
96 # Parse the wave
97 wave_str = "All" if wave is None else str(int(wave))
99 # Read table
100 filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")
102 # Read csv
103 table = pd.read_csv(
104 filename,
105 sep=",",
106 index_col=["Educ", "YEAR", "Age_grp"],
107 dtype={"Educ": str, "YEAR": str, "Age_grp": str, "BASE_YR": int},
108 )
110 # Try to access the requested combination
111 try:
112 row = table.loc[(education, wave_str, age_bracket)]
114 except KeyError as e:
115 message = (
116 "The summary statistics do not contain the "
117 + "Age/Wave/Education combination that was requested."
118 )
119 raise Exception(message).with_traceback(e.__traceback__)
121 # Check for NAs
122 if any(row.isna()):
123 _log.debug(
124 "There were not enough observations in the requested "
125 + "Age/Wave/Education combination to compute all summary"
126 + "statistics."
127 )
129 # to_dict transforms BASE_YR to float from int. Manually fix this
130 row_dict = row.to_dict()
131 row_dict["BASE_YR"] = int(row_dict["BASE_YR"])
133 return row_dict
136def income_wealth_dists_from_scf(base_year, age=None, education=None, wave=None):
137 """
138 Finds and formats parameters for the initial distributions of permanent
139 income and normalized wealth from the SCF's summary statistics.
141 Many of HARK's models (e.g. PerfForesightConsumerType.sim_birth(),
142 GenIncProcessConsumerType.sim_birth()) assume the initial distribution
143 of permanent income (pLvl) and normalized wealth (aNrm) are log-normal.
144 They construct these distributions from their means and standard
145 deviations, which are parameters to the models. This fuction assigns these
146 parameters using summary statistics from the SCF.
148 Parameters
149 ----------
150 base_year : int
151 Base year to use for nominal quantities.
152 age : int, optional
153 Age for which to retreive summary statistics. See
154 parse_scf_distr_stats(). The default is None.
155 education : str, optional
156 Edcuational attainment level for which to retreive summary
157 statistics. See parse_scf_distr_stats(). The default is None.
158 wave : int, optional
159 SCF wave to use for summary statistics. See parse_scf_distr_stats().
160 The default is None.
162 Returns
163 -------
164 param_dict : dict
165 Dictionary with means and standard deviations of the distributions
166 of permanent income and normalized wealth.
167 """
169 # Extract summary statistics from the SCF table
170 stats = parse_scf_distr_stats(age, education, wave)
172 # Find the deflator to adjust nominal quantities. The SCF summary files
173 # use the september CPI measurement to deflate, so use that.
174 deflator = cpi_deflator(
175 from_year=stats["BASE_YR"], to_year=base_year, base_month="SEP"
176 )[0]
178 # log(X*deflator) = log(x) + deflator.
179 # Therefore, the deflator does not apply to:
180 # - NrmWealth: it's the ratio of two nominal quantities, so unaltered by base changes.
181 # - sd(ln(Permanent income)): the deflator is an additive shift to log-permanent income
182 # so the standard deviation is unchanged.
184 log_deflator = np.log(deflator)
185 param_dict = {
186 "aNrmInitMean": stats[
187 "lnNrmWealth.mean"
188 ], # Mean of log initial assets (only matters for simulation)
189 "aNrmInitStd": stats[
190 "lnNrmWealth.sd"
191 ], # Standard deviation of log initial assets (only for simulation)
192 "pLvlInitMean": stats["lnPermIncome.mean"]
193 + log_deflator, # Mean of log initial permanent income (only matters for simulation)
194 "pLvlInitStd": stats[
195 "lnPermIncome.sd"
196 ], # Standard deviation of log initial permanent income (only matters for simulation)
197 }
199 return param_dict