Coverage for HARK / Calibration / life_tables / us_ssa / SSATools.py: 81%
48 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-07 05:16 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-07 05:16 +0000
1"""
2Created on Fri Jan 8 15:36:14 2021
4@author: Mateo
5"""
7import os
9import numpy as np
10import pandas as pd
12from HARK import _log
14ssa_tables_dir = os.path.dirname(os.path.abspath(__file__))
17def get_ssa_life_tables():
18 """
19 Reads all the SSA life tables and combines them, adding columns indicating
20 where each row came from (male or female, historical or projected).
22 Returns
23 -------
24 Pandas DataFrame
25 A DataFrame containing the information in SSA life-tables for both
26 sexes and all the available years. It returns all the columns in the
27 original tables.
29 """
30 # Read the four tables and add columns identifying them
31 dsets = []
32 for sex in ["M", "F"]:
33 for method in ["Historical", "Projected"]:
34 # Construct file name
35 infix = "Hist" if method == "Historical" else "Alt2"
36 filename = os.path.join(
37 ssa_tables_dir, "PerLifeTables_" + sex + "_" + infix + "_TR2020.csv"
38 )
40 # Read csv
41 table = pd.read_csv(filename, sep=",", skiprows=4)
43 # Add identifying info
44 table["Sex"] = sex
45 table["Method"] = method
47 dsets.append(table)
49 # Concatenate tables by row and return them
50 return pd.concat(dsets)
53def parse_ssa_life_table(
54 age_min,
55 age_max,
56 female=True,
57 cohort=None,
58 cross_sec=False,
59 year=None,
60 terminal=False,
61):
62 """
63 Reads (year,age)-specifc death probabilities form SSA life tables and
64 transforms them to a list of survival probabilities in the format that
65 HARK expects.
67 Two methods are supported:
68 - Cross-sectional: finds the 1-year survival probabilities for
69 individuals in the age range for a fixed year.
70 In the output,
71 SurvPrb(age) = 1 - DeathPrb(age, year)
73 - Longitudinal: finds the 1-year survival probabilities for individuals
74 of a fixed cohort at different ages (and years).
75 In the output,
76 SurvPrb(age) = 1 - DeathPrb(age, cohort + age)
78 Parameters
79 ----------
80 age_min : int
81 Minimum age for survival probabilities.
82 age_max : int
83 Maximum age for survival probabilities.
84 female : bool, optional
85 Boolean indicating wether to use female or male survival probabilities.
86 The default is True (female).
87 cohort : int, optional
88 If longitudinal probabilities are requested, this is the birth year of
89 the cohort that will be tracked. The default is None.
90 cross_sec : bool, optional
91 Boolean indicating whether the cross-sectional method should be used.
92 The default is False (using the longitudinal method).
93 year : int, optional
94 If cross-sectional probabilities are requested, this is the year at
95 which they will be taken. The default is None.
96 terminal : bool, optional
97 Indicator for whether the mortality probability for age_max should be
98 included (default False). Default behavior matches format of parse_income_spec.
100 Returns
101 -------
102 LivPrb : [float]
103 List of 1-year survival probabilities.
104 LivPrb[n] corresponds to the probability that an indivivual of age
105 'min_age' + n survives one year, in the year 'year' if the
106 cross-sectional method is used or 'cohort' + ('min_age' + n) if the
107 longitudinal method is used.
109 """
111 # Infix for file name depending on sex
112 abb = "F" if female else "M"
114 # Find year - age combinations that we need
115 assert age_max >= age_min, "The maximum age can not be lower than the minimum age."
116 if terminal:
117 ages = np.arange(age_min, age_max + 1)
118 else:
119 ages = np.arange(age_min, age_max)
120 age_count = ages.size
122 if cross_sec:
123 if year is None:
124 raise (
125 TypeError(
126 "You must provide a year when using "
127 + "cross-sectional survival probabilities."
128 )
129 )
131 years = np.repeat(year, ages.shape)
133 else:
134 if cohort is None:
135 raise (
136 TypeError(
137 "You must provide a cohort (birth year) when "
138 + "using longitudinal survival probabilities."
139 )
140 )
142 years = cohort + ages
144 # Create filenames
146 # Historical and forecasted
147 file_hist = os.path.join(
148 ssa_tables_dir, "PerLifeTables_" + abb + "_Hist_TR2020.csv"
149 )
150 file_fore = os.path.join(
151 ssa_tables_dir, "PerLifeTables_" + abb + "_Alt2_TR2020.csv"
152 )
154 # Read them
155 hist_tab = pd.read_csv(
156 file_hist,
157 sep=",",
158 skiprows=4,
159 usecols=["Year", "x", "q(x)"],
160 index_col=["Year", "x"],
161 )
162 fore_tab = pd.read_csv(
163 file_fore,
164 sep=",",
165 skiprows=4,
166 usecols=["Year", "x", "q(x)"],
167 index_col=["Year", "x"],
168 )
170 # Find the point at which projections start
171 max_hist = max(hist_tab.index.get_level_values("Year"))
173 # Warn the user if projections are used.
174 if max(years) > max_hist:
175 message = f"Survival probabilities beyond {max_hist} are projections."
176 _log.debug(message)
178 # Concatenate them
179 tab = pd.concat([hist_tab, fore_tab])
181 # Subset and sort deathrates.
183 message = (
184 "Parsed life tables do not contain all the requested "
185 + "age-year combinations."
186 )
187 try:
188 DeathPrb = tab.loc[zip(years, ages)].sort_values(by="x")
189 except KeyError as e:
190 raise Exception(message).with_traceback(e.__traceback__)
192 # Transform to numpy survival probabilities
193 LivPrb = 1 - DeathPrb["q(x)"].to_numpy()
195 # Make sure we got all the probabilities
196 assert len(LivPrb) == age_count, message
198 # Transform from array to list
199 LivPrb = list(LivPrb)
200 return LivPrb