Coverage for HARK / Calibration / life_tables / us_ssa / SSATools.py: 81%

48 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-07 05:16 +0000

1""" 

2Created on Fri Jan 8 15:36:14 2021 

3 

4@author: Mateo 

5""" 

6 

7import os 

8 

9import numpy as np 

10import pandas as pd 

11 

12from HARK import _log 

13 

14ssa_tables_dir = os.path.dirname(os.path.abspath(__file__)) 

15 

16 

17def get_ssa_life_tables(): 

18 """ 

19 Reads all the SSA life tables and combines them, adding columns indicating 

20 where each row came from (male or female, historical or projected). 

21 

22 Returns 

23 ------- 

24 Pandas DataFrame 

25 A DataFrame containing the information in SSA life-tables for both 

26 sexes and all the available years. It returns all the columns in the 

27 original tables. 

28 

29 """ 

30 # Read the four tables and add columns identifying them 

31 dsets = [] 

32 for sex in ["M", "F"]: 

33 for method in ["Historical", "Projected"]: 

34 # Construct file name 

35 infix = "Hist" if method == "Historical" else "Alt2" 

36 filename = os.path.join( 

37 ssa_tables_dir, "PerLifeTables_" + sex + "_" + infix + "_TR2020.csv" 

38 ) 

39 

40 # Read csv 

41 table = pd.read_csv(filename, sep=",", skiprows=4) 

42 

43 # Add identifying info 

44 table["Sex"] = sex 

45 table["Method"] = method 

46 

47 dsets.append(table) 

48 

49 # Concatenate tables by row and return them 

50 return pd.concat(dsets) 

51 

52 

53def parse_ssa_life_table( 

54 age_min, 

55 age_max, 

56 female=True, 

57 cohort=None, 

58 cross_sec=False, 

59 year=None, 

60 terminal=False, 

61): 

62 """ 

63 Reads (year,age)-specifc death probabilities form SSA life tables and 

64 transforms them to a list of survival probabilities in the format that 

65 HARK expects. 

66 

67 Two methods are supported: 

68 - Cross-sectional: finds the 1-year survival probabilities for 

69 individuals in the age range for a fixed year. 

70 In the output, 

71 SurvPrb(age) = 1 - DeathPrb(age, year) 

72 

73 - Longitudinal: finds the 1-year survival probabilities for individuals 

74 of a fixed cohort at different ages (and years). 

75 In the output, 

76 SurvPrb(age) = 1 - DeathPrb(age, cohort + age) 

77 

78 Parameters 

79 ---------- 

80 age_min : int 

81 Minimum age for survival probabilities. 

82 age_max : int 

83 Maximum age for survival probabilities. 

84 female : bool, optional 

85 Boolean indicating wether to use female or male survival probabilities. 

86 The default is True (female). 

87 cohort : int, optional 

88 If longitudinal probabilities are requested, this is the birth year of 

89 the cohort that will be tracked. The default is None. 

90 cross_sec : bool, optional 

91 Boolean indicating whether the cross-sectional method should be used. 

92 The default is False (using the longitudinal method). 

93 year : int, optional 

94 If cross-sectional probabilities are requested, this is the year at 

95 which they will be taken. The default is None. 

96 terminal : bool, optional 

97 Indicator for whether the mortality probability for age_max should be 

98 included (default False). Default behavior matches format of parse_income_spec. 

99 

100 Returns 

101 ------- 

102 LivPrb : [float] 

103 List of 1-year survival probabilities. 

104 LivPrb[n] corresponds to the probability that an indivivual of age 

105 'min_age' + n survives one year, in the year 'year' if the 

106 cross-sectional method is used or 'cohort' + ('min_age' + n) if the 

107 longitudinal method is used. 

108 

109 """ 

110 

111 # Infix for file name depending on sex 

112 abb = "F" if female else "M" 

113 

114 # Find year - age combinations that we need 

115 assert age_max >= age_min, "The maximum age can not be lower than the minimum age." 

116 if terminal: 

117 ages = np.arange(age_min, age_max + 1) 

118 else: 

119 ages = np.arange(age_min, age_max) 

120 age_count = ages.size 

121 

122 if cross_sec: 

123 if year is None: 

124 raise ( 

125 TypeError( 

126 "You must provide a year when using " 

127 + "cross-sectional survival probabilities." 

128 ) 

129 ) 

130 

131 years = np.repeat(year, ages.shape) 

132 

133 else: 

134 if cohort is None: 

135 raise ( 

136 TypeError( 

137 "You must provide a cohort (birth year) when " 

138 + "using longitudinal survival probabilities." 

139 ) 

140 ) 

141 

142 years = cohort + ages 

143 

144 # Create filenames 

145 

146 # Historical and forecasted 

147 file_hist = os.path.join( 

148 ssa_tables_dir, "PerLifeTables_" + abb + "_Hist_TR2020.csv" 

149 ) 

150 file_fore = os.path.join( 

151 ssa_tables_dir, "PerLifeTables_" + abb + "_Alt2_TR2020.csv" 

152 ) 

153 

154 # Read them 

155 hist_tab = pd.read_csv( 

156 file_hist, 

157 sep=",", 

158 skiprows=4, 

159 usecols=["Year", "x", "q(x)"], 

160 index_col=["Year", "x"], 

161 ) 

162 fore_tab = pd.read_csv( 

163 file_fore, 

164 sep=",", 

165 skiprows=4, 

166 usecols=["Year", "x", "q(x)"], 

167 index_col=["Year", "x"], 

168 ) 

169 

170 # Find the point at which projections start 

171 max_hist = max(hist_tab.index.get_level_values("Year")) 

172 

173 # Warn the user if projections are used. 

174 if max(years) > max_hist: 

175 message = f"Survival probabilities beyond {max_hist} are projections." 

176 _log.debug(message) 

177 

178 # Concatenate them 

179 tab = pd.concat([hist_tab, fore_tab]) 

180 

181 # Subset and sort deathrates. 

182 

183 message = ( 

184 "Parsed life tables do not contain all the requested " 

185 + "age-year combinations." 

186 ) 

187 try: 

188 DeathPrb = tab.loc[zip(years, ages)].sort_values(by="x") 

189 except KeyError as e: 

190 raise Exception(message).with_traceback(e.__traceback__) 

191 

192 # Transform to numpy survival probabilities 

193 LivPrb = 1 - DeathPrb["q(x)"].to_numpy() 

194 

195 # Make sure we got all the probabilities 

196 assert len(LivPrb) == age_count, message 

197 

198 # Transform from array to list 

199 LivPrb = list(LivPrb) 

200 return LivPrb