Coverage for HARK/Calibration/life_tables/us_ssa/SSATools.py: 60%

45 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-02 05:14 +0000

1""" 

2Created on Fri Jan 8 15:36:14 2021 

3 

4@author: Mateo 

5""" 

6 

7import os 

8 

9import numpy as np 

10import pandas as pd 

11 

12from HARK import _log 

13 

14ssa_tables_dir = os.path.dirname(os.path.abspath(__file__)) 

15 

16 

17def get_ssa_life_tables(): 

18 """ 

19 Reads all the SSA life tables and combines them, adding columns indicating 

20 where each row came from (male or female, historical or projected). 

21 

22 Returns 

23 ------- 

24 Pandas DataFrame 

25 A DataFrame containing the information in SSA life-tables for both 

26 sexes and all the available years. It returns all the columns in the 

27 original tables. 

28 

29 """ 

30 # Read the four tables and add columns identifying them 

31 dsets = [] 

32 for sex in ["M", "F"]: 

33 for method in ["Historical", "Projected"]: 

34 # Construct file name 

35 infix = "Hist" if method == "Historical" else "Alt2" 

36 filename = os.path.join( 

37 ssa_tables_dir, "PerLifeTables_" + sex + "_" + infix + "_TR2020.csv" 

38 ) 

39 

40 # Read csv 

41 table = pd.read_csv(filename, sep=",", skiprows=4) 

42 

43 # Add identifying info 

44 table["Sex"] = sex 

45 table["Method"] = method 

46 

47 dsets.append(table) 

48 

49 # Concatenate tables by row and return them 

50 return pd.concat(dsets) 

51 

52 

53def parse_ssa_life_table( 

54 min_age, max_age, female=True, cohort=None, cross_sec=False, year=None 

55): 

56 """ 

57 Reads (year,age)-specifc death probabilities form SSA life tables and 

58 transforms them to a list of survival probabilities in the format that 

59 HARK expects. 

60 

61 Two methods are supported: 

62 - Cross-sectional: finds the 1-year survival probabilities for 

63 individuals in the age range for a fixed year. 

64 In the output, 

65 SurvPrb(age) = 1 - DeathPrb(age, year) 

66 

67 - Longitudinal: finds the 1-year survival probabilities for individuals 

68 of a fixed cohort at different ages (and years). 

69 In the output, 

70 SurvPrb(age) = 1 - DeathPrb(age, cohort + age) 

71 

72 Parameters 

73 ---------- 

74 min_age : int 

75 Minimum age for survival probabilities. 

76 max_age : int 

77 Maximum age for survival probabilities. 

78 female : bool, optional 

79 Boolean indicating wether to use female or male survival probabilities. 

80 The default is True (female). 

81 cohort : int, optional 

82 If longitudinal probabilities are requested, this is the birth year of 

83 the cohort that will be tracked. The default is None. 

84 cross_sec : bool, optional 

85 Boolean indicating whether the cross-sectional method should be used. 

86 The default is False (using the longitudinal method). 

87 year : int, optional 

88 If cross-sectional probabilities are requested, this is the year at 

89 which they will be taken. The default is None. 

90 

91 Returns 

92 ------- 

93 LivPrb : [float] 

94 List of 1-year survival probabilities. 

95 LivPrb[n] corresponds to the probability that an indivivual of age 

96 'min_age' + n survives one year, in the year 'year' if the 

97 cross-sectional method is used or 'cohort' + ('min_age' + n) if the 

98 longitudinal method is used. 

99 

100 """ 

101 

102 # Infix for file name depending on sex 

103 abb = "F" if female else "M" 

104 

105 # Find year - age combinations that we need 

106 assert max_age >= min_age, "The maximum age can not be lower than the minimum age." 

107 ages = np.arange(min_age, max_age + 1) 

108 

109 if cross_sec: 

110 if year is None: 

111 raise ( 

112 TypeError( 

113 "You must provide a year when using " 

114 + "cross-sectional survival probabilities." 

115 ) 

116 ) 

117 

118 years = np.repeat(year, ages.shape) 

119 

120 else: 

121 if cohort is None: 

122 raise ( 

123 TypeError( 

124 "You must provide a cohort (birth year) when " 

125 + "using longitudinal survival probabilities." 

126 ) 

127 ) 

128 

129 years = cohort + ages 

130 

131 # Create filenames 

132 

133 # Historical and forecasted 

134 file_hist = os.path.join( 

135 ssa_tables_dir, "PerLifeTables_" + abb + "_Hist_TR2020.csv" 

136 ) 

137 file_fore = os.path.join( 

138 ssa_tables_dir, "PerLifeTables_" + abb + "_Alt2_TR2020.csv" 

139 ) 

140 

141 # Read them 

142 hist_tab = pd.read_csv( 

143 file_hist, 

144 sep=",", 

145 skiprows=4, 

146 usecols=["Year", "x", "q(x)"], 

147 index_col=["Year", "x"], 

148 ) 

149 fore_tab = pd.read_csv( 

150 file_fore, 

151 sep=",", 

152 skiprows=4, 

153 usecols=["Year", "x", "q(x)"], 

154 index_col=["Year", "x"], 

155 ) 

156 

157 # Find the point at which projections start 

158 max_hist = max(hist_tab.index.get_level_values("Year")) 

159 

160 # Warn the user if projections are used. 

161 if max(years) > max_hist: 

162 message = f"Survival probabilities beyond {max_hist} are projections." 

163 _log.debug(message) 

164 

165 # Concatenate them 

166 tab = pd.concat([hist_tab, fore_tab]) 

167 

168 # Subset and sort deathrates. 

169 

170 message = ( 

171 "Parsed life tables do not contain all the requested " 

172 + "age-year combinations." 

173 ) 

174 try: 

175 DeathPrb = tab.loc[zip(years, ages)].sort_values(by="x") 

176 

177 except KeyError as e: 

178 raise Exception(message).with_traceback(e.__traceback__) 

179 

180 # Transform to numpy survival probabilities 

181 LivPrb = 1 - DeathPrb["q(x)"].to_numpy() 

182 

183 # Make sure we got all the probabilities 

184 assert len(LivPrb) == max_age - min_age + 1, message 

185 

186 # Transform from array to list 

187 LivPrb = list(LivPrb) 

188 

189 return LivPrb