Coverage for HARK/Calibration/life_tables/us

1"""

2Created on Fri Jan 8 15:36:14 2021

4@author: Mateo

5"""

7import os

9import numpy as np

10import pandas as pd

12from HARK import _log

14ssa_tables_dir = os.path.dirname(os.path.abspath(__file__))

17def get_ssa_life_tables():

18 """

19 Reads all the SSA life tables and combines them, adding columns indicating

20 where each row came from (male or female, historical or projected).

22 Returns

23 -------

24 Pandas DataFrame

25 A DataFrame containing the information in SSA life-tables for both

26 sexes and all the available years. It returns all the columns in the

27 original tables.

29 """

30 # Read the four tables and add columns identifying them

31 dsets = []

32 for sex in ["M", "F"]:

33 for method in ["Historical", "Projected"]:

34 # Construct file name

35 infix = "Hist" if method == "Historical" else "Alt2"

36 filename = os.path.join(

37 ssa_tables_dir, "PerLifeTables_" + sex + "_" + infix + "_TR2020.csv"

38 )

40 # Read csv

41 table = pd.read_csv(filename, sep=",", skiprows=4)

43 # Add identifying info

44 table["Sex"] = sex

45 table["Method"] = method

47 dsets.append(table)

49 # Concatenate tables by row and return them

50 return pd.concat(dsets)

53def parse_ssa_life_table(

54 min_age, max_age, female=True, cohort=None, cross_sec=False, year=None

55):

56 """

57 Reads (year,age)-specifc death probabilities form SSA life tables and

58 transforms them to a list of survival probabilities in the format that

59 HARK expects.

61 Two methods are supported:

62 - Cross-sectional: finds the 1-year survival probabilities for

63 individuals in the age range for a fixed year.

64 In the output,

65 SurvPrb(age) = 1 - DeathPrb(age, year)

67 - Longitudinal: finds the 1-year survival probabilities for individuals

68 of a fixed cohort at different ages (and years).

69 In the output,

70 SurvPrb(age) = 1 - DeathPrb(age, cohort + age)

72 Parameters

73 ----------

74 min_age : int

75 Minimum age for survival probabilities.

76 max_age : int

77 Maximum age for survival probabilities.

78 female : bool, optional

79 Boolean indicating wether to use female or male survival probabilities.

80 The default is True (female).

81 cohort : int, optional

82 If longitudinal probabilities are requested, this is the birth year of

83 the cohort that will be tracked. The default is None.

84 cross_sec : bool, optional

85 Boolean indicating whether the cross-sectional method should be used.

86 The default is False (using the longitudinal method).

87 year : int, optional

88 If cross-sectional probabilities are requested, this is the year at

89 which they will be taken. The default is None.

91 Returns

92 -------

93 LivPrb : [float]

94 List of 1-year survival probabilities.

95 LivPrb[n] corresponds to the probability that an indivivual of age

96 'min_age' + n survives one year, in the year 'year' if the

97 cross-sectional method is used or 'cohort' + ('min_age' + n) if the

98 longitudinal method is used.

100 """

101

102 # Infix for file name depending on sex

103 abb = "F" if female else "M"

104

105 # Find year - age combinations that we need

106 assert max_age >= min_age, "The maximum age can not be lower than the minimum age."

107 ages = np.arange(min_age, max_age + 1)

108

109 if cross_sec:

110 if year is None:

111 raise (

112 TypeError(

113 "You must provide a year when using "

114 + "cross-sectional survival probabilities."

115 )

116 )

117

118 years = np.repeat(year, ages.shape)

119

120 else:

121 if cohort is None:

122 raise (

123 TypeError(

124 "You must provide a cohort (birth year) when "

125 + "using longitudinal survival probabilities."

126 )

127 )

128

129 years = cohort + ages

130

131 # Create filenames

132

133 # Historical and forecasted

134 file_hist = os.path.join(

135 ssa_tables_dir, "PerLifeTables_" + abb + "_Hist_TR2020.csv"

136 )

137 file_fore = os.path.join(

138 ssa_tables_dir, "PerLifeTables_" + abb + "_Alt2_TR2020.csv"

139 )

140

141 # Read them

142 hist_tab = pd.read_csv(

143 file_hist,

144 sep=",",

145 skiprows=4,

146 usecols=["Year", "x", "q(x)"],

147 index_col=["Year", "x"],

148 )

149 fore_tab = pd.read_csv(

150 file_fore,

151 sep=",",

152 skiprows=4,

153 usecols=["Year", "x", "q(x)"],

154 index_col=["Year", "x"],

155 )

156

157 # Find the point at which projections start

158 max_hist = max(hist_tab.index.get_level_values("Year"))

159

160 # Warn the user if projections are used.

161 if max(years) > max_hist:

162 message = f"Survival probabilities beyond {max_hist} are projections."

163 _log.debug(message)

164

165 # Concatenate them

166 tab = pd.concat([hist_tab, fore_tab])

167

168 # Subset and sort deathrates.

169

170 message = (

171 "Parsed life tables do not contain all the requested "

172 + "age-year combinations."

173 )

174 try:

175 DeathPrb = tab.loc[zip(years, ages)].sort_values(by="x")

176

177 except KeyError as e:

178 raise Exception(message).with_traceback(e.__traceback__)

179

180 # Transform to numpy survival probabilities

181 LivPrb = 1 - DeathPrb["q(x)"].to_numpy()

182

183 # Make sure we got all the probabilities

184 assert len(LivPrb) == max_age - min_age + 1, message

185

186 # Transform from array to list

187 LivPrb = list(LivPrb)

188

189 return LivPrb

Coverage for HARK/Calibration/life_tables/us_ssa/SSATools.py: 60%

45 statements