Coverage for HARK/Calibration/life_tables/us_ssa/SSATools.py: 60%
45 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 05:14 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 05:14 +0000
1"""
2Created on Fri Jan 8 15:36:14 2021
4@author: Mateo
5"""
7import os
9import numpy as np
10import pandas as pd
12from HARK import _log
14ssa_tables_dir = os.path.dirname(os.path.abspath(__file__))
17def get_ssa_life_tables():
18 """
19 Reads all the SSA life tables and combines them, adding columns indicating
20 where each row came from (male or female, historical or projected).
22 Returns
23 -------
24 Pandas DataFrame
25 A DataFrame containing the information in SSA life-tables for both
26 sexes and all the available years. It returns all the columns in the
27 original tables.
29 """
30 # Read the four tables and add columns identifying them
31 dsets = []
32 for sex in ["M", "F"]:
33 for method in ["Historical", "Projected"]:
34 # Construct file name
35 infix = "Hist" if method == "Historical" else "Alt2"
36 filename = os.path.join(
37 ssa_tables_dir, "PerLifeTables_" + sex + "_" + infix + "_TR2020.csv"
38 )
40 # Read csv
41 table = pd.read_csv(filename, sep=",", skiprows=4)
43 # Add identifying info
44 table["Sex"] = sex
45 table["Method"] = method
47 dsets.append(table)
49 # Concatenate tables by row and return them
50 return pd.concat(dsets)
53def parse_ssa_life_table(
54 min_age, max_age, female=True, cohort=None, cross_sec=False, year=None
55):
56 """
57 Reads (year,age)-specifc death probabilities form SSA life tables and
58 transforms them to a list of survival probabilities in the format that
59 HARK expects.
61 Two methods are supported:
62 - Cross-sectional: finds the 1-year survival probabilities for
63 individuals in the age range for a fixed year.
64 In the output,
65 SurvPrb(age) = 1 - DeathPrb(age, year)
67 - Longitudinal: finds the 1-year survival probabilities for individuals
68 of a fixed cohort at different ages (and years).
69 In the output,
70 SurvPrb(age) = 1 - DeathPrb(age, cohort + age)
72 Parameters
73 ----------
74 min_age : int
75 Minimum age for survival probabilities.
76 max_age : int
77 Maximum age for survival probabilities.
78 female : bool, optional
79 Boolean indicating wether to use female or male survival probabilities.
80 The default is True (female).
81 cohort : int, optional
82 If longitudinal probabilities are requested, this is the birth year of
83 the cohort that will be tracked. The default is None.
84 cross_sec : bool, optional
85 Boolean indicating whether the cross-sectional method should be used.
86 The default is False (using the longitudinal method).
87 year : int, optional
88 If cross-sectional probabilities are requested, this is the year at
89 which they will be taken. The default is None.
91 Returns
92 -------
93 LivPrb : [float]
94 List of 1-year survival probabilities.
95 LivPrb[n] corresponds to the probability that an indivivual of age
96 'min_age' + n survives one year, in the year 'year' if the
97 cross-sectional method is used or 'cohort' + ('min_age' + n) if the
98 longitudinal method is used.
100 """
102 # Infix for file name depending on sex
103 abb = "F" if female else "M"
105 # Find year - age combinations that we need
106 assert max_age >= min_age, "The maximum age can not be lower than the minimum age."
107 ages = np.arange(min_age, max_age + 1)
109 if cross_sec:
110 if year is None:
111 raise (
112 TypeError(
113 "You must provide a year when using "
114 + "cross-sectional survival probabilities."
115 )
116 )
118 years = np.repeat(year, ages.shape)
120 else:
121 if cohort is None:
122 raise (
123 TypeError(
124 "You must provide a cohort (birth year) when "
125 + "using longitudinal survival probabilities."
126 )
127 )
129 years = cohort + ages
131 # Create filenames
133 # Historical and forecasted
134 file_hist = os.path.join(
135 ssa_tables_dir, "PerLifeTables_" + abb + "_Hist_TR2020.csv"
136 )
137 file_fore = os.path.join(
138 ssa_tables_dir, "PerLifeTables_" + abb + "_Alt2_TR2020.csv"
139 )
141 # Read them
142 hist_tab = pd.read_csv(
143 file_hist,
144 sep=",",
145 skiprows=4,
146 usecols=["Year", "x", "q(x)"],
147 index_col=["Year", "x"],
148 )
149 fore_tab = pd.read_csv(
150 file_fore,
151 sep=",",
152 skiprows=4,
153 usecols=["Year", "x", "q(x)"],
154 index_col=["Year", "x"],
155 )
157 # Find the point at which projections start
158 max_hist = max(hist_tab.index.get_level_values("Year"))
160 # Warn the user if projections are used.
161 if max(years) > max_hist:
162 message = f"Survival probabilities beyond {max_hist} are projections."
163 _log.debug(message)
165 # Concatenate them
166 tab = pd.concat([hist_tab, fore_tab])
168 # Subset and sort deathrates.
170 message = (
171 "Parsed life tables do not contain all the requested "
172 + "age-year combinations."
173 )
174 try:
175 DeathPrb = tab.loc[zip(years, ages)].sort_values(by="x")
177 except KeyError as e:
178 raise Exception(message).with_traceback(e.__traceback__)
180 # Transform to numpy survival probabilities
181 LivPrb = 1 - DeathPrb["q(x)"].to_numpy()
183 # Make sure we got all the probabilities
184 assert len(LivPrb) == max_age - min_age + 1, message
186 # Transform from array to list
187 LivPrb = list(LivPrb)
189 return LivPrb