Coverage for HARK/Calibration/SCF/WealthIncomeDist/SCFDistTools.py: 78%

1"""

2Created on Fri Jan 8 15:36:14 2021

4@author: Mateo

5"""

7import os

9import numpy as np

10import pandas as pd

12from HARK import _log

13from HARK.Calibration.cpi.us.CPITools import cpi_deflator

15scf_sumstats_dir = os.path.dirname(os.path.abspath(__file__))

18def get_scf_distr_stats():

19 """

20 A function to read the full table of SCF summary statistics as a Pandas

21 DataFrame

23 Returns

24 -------

25 table : pandas DataFrame

26 A pandas representation of file WealthIncomeStats.csv. See ./README.md

27 for an explanation of the variables in the table and its source.

28 """

30 # Form the file name

31 filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")

33 # Read csv

34 table = pd.read_csv(filename, sep=",")

36 return table

39def parse_scf_distr_stats(age=None, education=None, wave=None):

40 """

41 A funtion to retreive SCF summary statistics regarding wealth and

42 permanent income for a specific SCF wave, age bracket, and education

43 level.

45 Parameters

46 ----------

47 age : int, optional

48 Age for which to retreive summary statistics. The statistics are

49 calculated using 5-year age bins. Therefore, for instance, Age = 23

50 will return statistics computed on ages (20,25].

51 The default is None. In such case, the function will return statistics

52 for the group without any age filtration.

53 education : str, optional

54 Education level for which to retreive summary statistics. Must be one

55 of 'NoHS' (no high-school or GED), 'HS' (high-school or GED), or

56 'College'.

57 The default is None. In such case, no education filtration is applied

58 (all groups are pooled).

59 wave : int, optional

60 SCF wave to use for summary statistics. Must be one of 1995, 1998,

61 2001, 2004, 2007, 2010, 2013, 2016, 2019.

62 The default is None. In such case, all waves are used.

64 Returns

65 -------

66 row_dict : dict

67 Dictionary with summary statistics for wealth and permanent income

68 for the specified group. Its fields correspond to the columns of

69 ./WealthIncomeStats.csv, which are described in ./README.md.

71 """

73 # Pre-process year to make it a five-year bracket as in the table

74 if age is not None:

75 u_bound = int(np.ceil(age / 5) * 5)

76 l_bound = u_bound - 5

77 age_bracket = "(" + str(l_bound) + "," + str(u_bound) + "]"

79 _log.debug("Returning SCF summary statistics for ages " + age_bracket + ".")

81 else:

82 # If no age is given, use all age brackets.

83 age_bracket = "All"

85 # Check whether education is in one of the allowed categories

86 if education is not None:

87 message = (

88 "If an education level is provided, it must be one of "

89 + "'NoHS', 'HS', or 'College'."

90 )

91 assert education in ["NoHS", "HS", "College"], message

93 else:

94 education = "All"

96 # Parse the wave

97 wave_str = "All" if wave is None else str(int(wave))

99 # Read table

100 filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")

101

102 # Read csv

103 table = pd.read_csv(

104 filename,

105 sep=",",

106 index_col=["Educ", "YEAR", "Age_grp"],

107 dtype={"Educ": str, "YEAR": str, "Age_grp": str, "BASE_YR": int},

108 )

109

110 # Try to access the requested combination

111 try:

112 row = table.loc[(education, wave_str, age_bracket)]

113

114 except KeyError as e:

115 message = (

116 "The summary statistics do not contain the "

117 + "Age/Wave/Education combination that was requested."

118 )

119 raise Exception(message).with_traceback(e.__traceback__)

120

121 # Check for NAs

122 if any(row.isna()):

123 _log.debug(

124 "There were not enough observations in the requested "

125 + "Age/Wave/Education combination to compute all summary"

126 + "statistics."

127 )

128

129 # to_dict transforms BASE_YR to float from int. Manually fix this

130 row_dict = row.to_dict()

131 row_dict["BASE_YR"] = int(row_dict["BASE_YR"])

132

133 return row_dict

134

135

136def income_wealth_dists_from_scf(base_year, age=None, education=None, wave=None):

137 """

138 Finds and formats parameters for the initial distributions of permanent

139 income and normalized wealth from the SCF's summary statistics.

140

141 Many of HARK's models (e.g. PerfForesightConsumerType.sim_birth(),

142 GenIncProcessConsumerType.sim_birth()) assume the initial distribution

143 of permanent income (pLvl) and normalized wealth (aNrm) are log-normal.

144 They construct these distributions from their means and standard

145 deviations, which are parameters to the models. This fuction assigns these

146 parameters using summary statistics from the SCF.

147

148 Parameters

149 ----------

150 base_year : int

151 Base year to use for nominal quantities.

152 age : int, optional

153 Age for which to retreive summary statistics. See

154 parse_scf_distr_stats(). The default is None.

155 education : str, optional

156 Edcuational attainment level for which to retreive summary

157 statistics. See parse_scf_distr_stats(). The default is None.

158 wave : int, optional

159 SCF wave to use for summary statistics. See parse_scf_distr_stats().

160 The default is None.

161

162 Returns

163 -------

164 param_dict : dict

165 Dictionary with means and standard deviations of the distributions

166 of permanent income and normalized wealth.

167 """

168

169 # Extract summary statistics from the SCF table

170 stats = parse_scf_distr_stats(age, education, wave)

171

172 # Find the deflator to adjust nominal quantities. The SCF summary files

173 # use the september CPI measurement to deflate, so use that.

174 deflator = cpi_deflator(

175 from_year=stats["BASE_YR"], to_year=base_year, base_month="SEP"

176 )[0]

177

178 # log(X*deflator) = log(x) + deflator.

179 # Therefore, the deflator does not apply to:

180 # - NrmWealth: it's the ratio of two nominal quantities, so unaltered by base changes.

181 # - sd(ln(Permanent income)): the deflator is an additive shift to log-permanent income

182 # so the standard deviation is unchanged.

183

184 log_deflator = np.log(deflator)

185 param_dict = {

186 "aNrmInitMean": stats[

187 "lnNrmWealth.mean"

188 ], # Mean of log initial assets (only matters for simulation)

189 "aNrmInitStd": stats[

190 "lnNrmWealth.sd"

191 ], # Standard deviation of log initial assets (only for simulation)

192 "pLvlInitMean": stats["lnPermIncome.mean"]

193 + log_deflator, # Mean of log initial permanent income (only matters for simulation)

194 "pLvlInitStd": stats[

195 "lnPermIncome.sd"

196 ], # Standard deviation of log initial permanent income (only matters for simulation)

197 }

198

199 return param_dict