Coverage for HARK/Calibration/SCF/WealthIncomeDist/SCFDistTools.py: 78%

40 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-02 05:14 +0000

1""" 

2Created on Fri Jan 8 15:36:14 2021 

3 

4@author: Mateo 

5""" 

6 

7import os 

8 

9import numpy as np 

10import pandas as pd 

11 

12from HARK import _log 

13from HARK.Calibration.cpi.us.CPITools import cpi_deflator 

14 

15scf_sumstats_dir = os.path.dirname(os.path.abspath(__file__)) 

16 

17 

18def get_scf_distr_stats(): 

19 """ 

20 A function to read the full table of SCF summary statistics as a Pandas 

21 DataFrame 

22 

23 Returns 

24 ------- 

25 table : pandas DataFrame 

26 A pandas representation of file WealthIncomeStats.csv. See ./README.md 

27 for an explanation of the variables in the table and its source. 

28 """ 

29 

30 # Form the file name 

31 filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv") 

32 

33 # Read csv 

34 table = pd.read_csv(filename, sep=",") 

35 

36 return table 

37 

38 

39def parse_scf_distr_stats(age=None, education=None, wave=None): 

40 """ 

41 A funtion to retreive SCF summary statistics regarding wealth and 

42 permanent income for a specific SCF wave, age bracket, and education 

43 level. 

44 

45 Parameters 

46 ---------- 

47 age : int, optional 

48 Age for which to retreive summary statistics. The statistics are 

49 calculated using 5-year age bins. Therefore, for instance, Age = 23 

50 will return statistics computed on ages (20,25]. 

51 The default is None. In such case, the function will return statistics 

52 for the group without any age filtration. 

53 education : str, optional 

54 Education level for which to retreive summary statistics. Must be one 

55 of 'NoHS' (no high-school or GED), 'HS' (high-school or GED), or 

56 'College'. 

57 The default is None. In such case, no education filtration is applied 

58 (all groups are pooled). 

59 wave : int, optional 

60 SCF wave to use for summary statistics. Must be one of 1995, 1998, 

61 2001, 2004, 2007, 2010, 2013, 2016, 2019. 

62 The default is None. In such case, all waves are used. 

63 

64 Returns 

65 ------- 

66 row_dict : dict 

67 Dictionary with summary statistics for wealth and permanent income 

68 for the specified group. Its fields correspond to the columns of 

69 ./WealthIncomeStats.csv, which are described in ./README.md. 

70 

71 """ 

72 

73 # Pre-process year to make it a five-year bracket as in the table 

74 if age is not None: 

75 u_bound = int(np.ceil(age / 5) * 5) 

76 l_bound = u_bound - 5 

77 age_bracket = "(" + str(l_bound) + "," + str(u_bound) + "]" 

78 

79 _log.debug("Returning SCF summary statistics for ages " + age_bracket + ".") 

80 

81 else: 

82 # If no age is given, use all age brackets. 

83 age_bracket = "All" 

84 

85 # Check whether education is in one of the allowed categories 

86 if education is not None: 

87 message = ( 

88 "If an education level is provided, it must be one of " 

89 + "'NoHS', 'HS', or 'College'." 

90 ) 

91 assert education in ["NoHS", "HS", "College"], message 

92 

93 else: 

94 education = "All" 

95 

96 # Parse the wave 

97 wave_str = "All" if wave is None else str(int(wave)) 

98 

99 # Read table 

100 filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv") 

101 

102 # Read csv 

103 table = pd.read_csv( 

104 filename, 

105 sep=",", 

106 index_col=["Educ", "YEAR", "Age_grp"], 

107 dtype={"Educ": str, "YEAR": str, "Age_grp": str, "BASE_YR": int}, 

108 ) 

109 

110 # Try to access the requested combination 

111 try: 

112 row = table.loc[(education, wave_str, age_bracket)] 

113 

114 except KeyError as e: 

115 message = ( 

116 "The summary statistics do not contain the " 

117 + "Age/Wave/Education combination that was requested." 

118 ) 

119 raise Exception(message).with_traceback(e.__traceback__) 

120 

121 # Check for NAs 

122 if any(row.isna()): 

123 _log.debug( 

124 "There were not enough observations in the requested " 

125 + "Age/Wave/Education combination to compute all summary" 

126 + "statistics." 

127 ) 

128 

129 # to_dict transforms BASE_YR to float from int. Manually fix this 

130 row_dict = row.to_dict() 

131 row_dict["BASE_YR"] = int(row_dict["BASE_YR"]) 

132 

133 return row_dict 

134 

135 

136def income_wealth_dists_from_scf(base_year, age=None, education=None, wave=None): 

137 """ 

138 Finds and formats parameters for the initial distributions of permanent 

139 income and normalized wealth from the SCF's summary statistics. 

140 

141 Many of HARK's models (e.g. PerfForesightConsumerType.sim_birth(), 

142 GenIncProcessConsumerType.sim_birth()) assume the initial distribution 

143 of permanent income (pLvl) and normalized wealth (aNrm) are log-normal. 

144 They construct these distributions from their means and standard 

145 deviations, which are parameters to the models. This fuction assigns these 

146 parameters using summary statistics from the SCF. 

147 

148 Parameters 

149 ---------- 

150 base_year : int 

151 Base year to use for nominal quantities. 

152 age : int, optional 

153 Age for which to retreive summary statistics. See 

154 parse_scf_distr_stats(). The default is None. 

155 education : str, optional 

156 Edcuational attainment level for which to retreive summary 

157 statistics. See parse_scf_distr_stats(). The default is None. 

158 wave : int, optional 

159 SCF wave to use for summary statistics. See parse_scf_distr_stats(). 

160 The default is None. 

161 

162 Returns 

163 ------- 

164 param_dict : dict 

165 Dictionary with means and standard deviations of the distributions 

166 of permanent income and normalized wealth. 

167 """ 

168 

169 # Extract summary statistics from the SCF table 

170 stats = parse_scf_distr_stats(age, education, wave) 

171 

172 # Find the deflator to adjust nominal quantities. The SCF summary files 

173 # use the september CPI measurement to deflate, so use that. 

174 deflator = cpi_deflator( 

175 from_year=stats["BASE_YR"], to_year=base_year, base_month="SEP" 

176 )[0] 

177 

178 # log(X*deflator) = log(x) + deflator. 

179 # Therefore, the deflator does not apply to: 

180 # - NrmWealth: it's the ratio of two nominal quantities, so unaltered by base changes. 

181 # - sd(ln(Permanent income)): the deflator is an additive shift to log-permanent income 

182 # so the standard deviation is unchanged. 

183 

184 log_deflator = np.log(deflator) 

185 param_dict = { 

186 "aNrmInitMean": stats[ 

187 "lnNrmWealth.mean" 

188 ], # Mean of log initial assets (only matters for simulation) 

189 "aNrmInitStd": stats[ 

190 "lnNrmWealth.sd" 

191 ], # Standard deviation of log initial assets (only for simulation) 

192 "pLvlInitMean": stats["lnPermIncome.mean"] 

193 + log_deflator, # Mean of log initial permanent income (only matters for simulation) 

194 "pLvlInitStd": stats[ 

195 "lnPermIncome.sd" 

196 ], # Standard deviation of log initial permanent income (only matters for simulation) 

197 } 

198 

199 return param_dict