Coverage for HARK/Calibration/life_tables/us

1"""

2Created on Fri Jan 8 15:36:14 2021

4@author: Mateo

5"""

7import os

9import numpy as np

10import pandas as pd

12from HARK import _log

14ssa_tables_dir = os.path.dirname(os.path.abspath(__file__))

17def get_ssa_life_tables():

18 """

19 Reads all the SSA life tables and combines them, adding columns indicating

20 where each row came from (male or female, historical or projected).

22 Returns

23 -------

24 Pandas DataFrame

25 A DataFrame containing the information in SSA life-tables for both

26 sexes and all the available years. It returns all the columns in the

27 original tables.

29 """

30 # Read the four tables and add columns identifying them

31 dsets = []

32 for sex in ["M", "F"]:

33 for method in ["Historical", "Projected"]:

34 # Construct file name

35 infix = "Hist" if method == "Historical" else "Alt2"

36 filename = os.path.join(

37 ssa_tables_dir, "PerLifeTables_" + sex + "_" + infix + "_TR2020.csv"

38 )

40 # Read csv

41 table = pd.read_csv(filename, sep=",", skiprows=4)

43 # Add identifying info

44 table["Sex"] = sex

45 table["Method"] = method

47 dsets.append(table)

49 # Concatenate tables by row and return them

50 return pd.concat(dsets)

53def parse_ssa_life_table(

54 age_min,

55 age_max,

56 female=True,

57 cohort=None,

58 cross_sec=False,

59 year=None,

60 terminal=False,

61):

62 """

63 Reads (year,age)-specifc death probabilities form SSA life tables and

64 transforms them to a list of survival probabilities in the format that

65 HARK expects.

67 Two methods are supported:

68 - Cross-sectional: finds the 1-year survival probabilities for

69 individuals in the age range for a fixed year.

70 In the output,

71 SurvPrb(age) = 1 - DeathPrb(age, year)

73 - Longitudinal: finds the 1-year survival probabilities for individuals

74 of a fixed cohort at different ages (and years).

75 In the output,

76 SurvPrb(age) = 1 - DeathPrb(age, cohort + age)

78 Parameters

79 ----------

80 age_min : int

81 Minimum age for survival probabilities.

82 age_max : int

83 Maximum age for survival probabilities.

84 female : bool, optional

85 Boolean indicating wether to use female or male survival probabilities.

86 The default is True (female).

87 cohort : int, optional

88 If longitudinal probabilities are requested, this is the birth year of

89 the cohort that will be tracked. The default is None.

90 cross_sec : bool, optional

91 Boolean indicating whether the cross-sectional method should be used.

92 The default is False (using the longitudinal method).

93 year : int, optional

94 If cross-sectional probabilities are requested, this is the year at

95 which they will be taken. The default is None.

96 terminal : bool, optional

97 Indicator for whether the mortality probability for age_max should be

98 included (default False). Default behavior matches format of parse_income_spec.

100 Returns

101 -------

102 LivPrb : [float]

103 List of 1-year survival probabilities.

104 LivPrb[n] corresponds to the probability that an indivivual of age

105 'min_age' + n survives one year, in the year 'year' if the

106 cross-sectional method is used or 'cohort' + ('min_age' + n) if the

107 longitudinal method is used.

108

109 """

110

111 # Infix for file name depending on sex

112 abb = "F" if female else "M"

113

114 # Find year - age combinations that we need

115 assert age_max >= age_min, "The maximum age can not be lower than the minimum age."

116 if terminal:

117 ages = np.arange(age_min, age_max + 1)

118 else:

119 ages = np.arange(age_min, age_max)

120 age_count = ages.size

121

122 if cross_sec:

123 if year is None:

124 raise (

125 TypeError(

126 "You must provide a year when using "

127 + "cross-sectional survival probabilities."

128 )

129 )

130

131 years = np.repeat(year, ages.shape)

132

133 else:

134 if cohort is None:

135 raise (

136 TypeError(

137 "You must provide a cohort (birth year) when "

138 + "using longitudinal survival probabilities."

139 )

140 )

141

142 years = cohort + ages

143

144 # Create filenames

145

146 # Historical and forecasted

147 file_hist = os.path.join(

148 ssa_tables_dir, "PerLifeTables_" + abb + "_Hist_TR2020.csv"

149 )

150 file_fore = os.path.join(

151 ssa_tables_dir, "PerLifeTables_" + abb + "_Alt2_TR2020.csv"

152 )

153

154 # Read them

155 hist_tab = pd.read_csv(

156 file_hist,

157 sep=",",

158 skiprows=4,

159 usecols=["Year", "x", "q(x)"],

160 index_col=["Year", "x"],

161 )

162 fore_tab = pd.read_csv(

163 file_fore,

164 sep=",",

165 skiprows=4,

166 usecols=["Year", "x", "q(x)"],

167 index_col=["Year", "x"],

168 )

169

170 # Find the point at which projections start

171 max_hist = max(hist_tab.index.get_level_values("Year"))

172

173 # Warn the user if projections are used.

174 if max(years) > max_hist:

175 message = f"Survival probabilities beyond {max_hist} are projections."

176 _log.debug(message)

177

178 # Concatenate them

179 tab = pd.concat([hist_tab, fore_tab])

180

181 # Subset and sort deathrates.

182

183 message = (

184 "Parsed life tables do not contain all the requested "

185 + "age-year combinations."

186 )

187 try:

188 DeathPrb = tab.loc[zip(years, ages)].sort_values(by="x")

189 except KeyError as e:

190 raise Exception(message).with_traceback(e.__traceback__)

191

192 # Transform to numpy survival probabilities

193 LivPrb = 1 - DeathPrb["q(x)"].to_numpy()

194

195 # Make sure we got all the probabilities

196 assert len(LivPrb) == age_count, message

197

198 # Transform from array to list

199 LivPrb = list(LivPrb)

200 return LivPrb

Coverage for HARK / Calibration / life_tables / us_ssa / SSATools.py: 81%

48 statements