Coverage for HARK/distributions/continuous.py: 89%

158 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-02 05:14 +0000

1from typing import Any, Optional, Union 

2 

3import numpy as np 

4from scipy.special import erfc 

5from scipy.stats import ( 

6 rv_continuous, 

7 multivariate_normal, 

8 norm, 

9 lognorm, 

10 uniform, 

11 weibull_min, 

12) 

13from scipy.stats._distn_infrastructure import rv_continuous_frozen 

14 

15from HARK.distributions.base import Distribution 

16from HARK.distributions.discrete import DiscreteDistribution 

17 

18# CONTINUOUS DISTRIBUTIONS 

19 

20 

21class ContinuousFrozenDistribution(rv_continuous_frozen, Distribution): 

22 """ 

23 Parameterized continuous distribution from scipy.stats with seed management. 

24 """ 

25 

26 def __init__( 

27 self, dist: rv_continuous, *args: Any, seed: int = 0, **kwds: Any 

28 ) -> None: 

29 """ 

30 Parameterized continuous distribution from scipy.stats with seed management. 

31 

32 Parameters 

33 ---------- 

34 dist : rv_continuous 

35 Continuous distribution from scipy.stats. 

36 seed : int, optional 

37 Seed for random number generator, by default 0 

38 """ 

39 rv_continuous_frozen.__init__(self, dist, *args, **kwds) 

40 Distribution.__init__(self, seed=seed) 

41 

42 

43class Normal(ContinuousFrozenDistribution): 

44 """ 

45 A Normal distribution. 

46 

47 Parameters 

48 ---------- 

49 mu : float or [float] 

50 One or more means. Number of elements T in mu determines number 

51 of rows of output. 

52 sigma : float or [float] 

53 One or more standard deviations. Number of elements T in sigma 

54 determines number of rows of output. 

55 seed : int 

56 Seed for random number generator. 

57 """ 

58 

59 def __init__(self, mu=0.0, sigma=1.0, seed=0): 

60 self.mu = np.asarray(mu) 

61 self.sigma = np.asarray(sigma) 

62 

63 if self.mu.size != self.sigma.size: 

64 raise AttributeError( 

65 "mu and sigma must be of same size, are %s, %s" 

66 % ((self.mu.size), (self.sigma.size)) 

67 ) 

68 

69 super().__init__(norm, loc=mu, scale=sigma, seed=seed) 

70 self.infimum = -np.inf * np.ones(self.mu.size) 

71 self.supremum = np.inf * np.ones(self.mu.size) 

72 

73 def discretize(self, N, method="hermite", endpoints=False): 

74 """ 

75 For normal distributions, the Gauss-Hermite quadrature rule is 

76 used as the default method for discretization. 

77 """ 

78 

79 return super().discretize(N, method, endpoints) 

80 

81 def _approx_hermite(self, N, endpoints=False): 

82 """ 

83 Returns a discrete approximation of this distribution 

84 using the Gauss-Hermite quadrature rule. 

85 

86 Parameters 

87 ---------- 

88 N : int 

89 Number of discrete points to approximate the distribution. 

90 

91 Returns 

92 ------- 

93 DiscreteDistribution 

94 Discrete approximation of this distribution. 

95 """ 

96 

97 x, w = np.polynomial.hermite.hermgauss(N) 

98 # normalize w 

99 pmv = w * np.pi**-0.5 

100 # correct x 

101 atoms = np.sqrt(2.0) * self.sigma * x + self.mu 

102 

103 limit = {"dist": self, "method": "hermite", "N": N, "endpoints": endpoints} 

104 

105 return DiscreteDistribution( 

106 pmv, 

107 atoms, 

108 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"), 

109 limit=limit, 

110 ) 

111 

112 def _approx_equiprobable(self, N, endpoints=False): 

113 """ 

114 Returns a discrete equiprobable approximation of this distribution. 

115 

116 Parameters 

117 ---------- 

118 N : int 

119 Number of discrete points to approximate the distribution. 

120 

121 Returns 

122 ------- 

123 DiscreteDistribution 

124 Discrete approximation of this distribution. 

125 """ 

126 

127 CDF = np.linspace(0, 1, N + 1) 

128 lims = norm.ppf(CDF) 

129 pdf = norm.pdf(lims) 

130 

131 # Find conditional means using Mills's ratio 

132 pmv = np.diff(CDF) 

133 atoms = self.mu - np.diff(pdf) / pmv * self.sigma 

134 

135 limit = {"dist": self, "method": "equiprobable", "N": N, "endpoints": endpoints} 

136 

137 return DiscreteDistribution( 

138 pmv, 

139 atoms, 

140 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"), 

141 limit=limit, 

142 ) 

143 

144 

145class MultivariateNormal(ContinuousFrozenDistribution): 

146 """ 

147 Class for representing multivariate normal distributions in HARK. With no 

148 parameters passed, it defaults to a standard normal distribution represented 

149 as a MultivariateNormal instance. Currently lacks any discretization method. 

150 

151 Parameters 

152 ---------- 

153 mu : [float] or np.array 

154 Vector of means of each dimension of the distribution. 

155 sigma : [[float]] or np.array 

156 Symmetric, positive semidefinite covariance matrix for the distribution. 

157 seed: int 

158 Seed for the RNG. 

159 """ 

160 

161 def __init__( 

162 self, 

163 mu=np.array([0.0]), 

164 sigma=np.array([[1.0]]), 

165 seed=0, 

166 ): 

167 self.mu = np.array(mu) 

168 self.sigma = np.array(sigma) 

169 self.seed = seed 

170 self.reset() 

171 

172 def reset(self): 

173 self.dstn = multivariate_normal(mean=self.mu, cov=self.sigma, seed=self.seed) 

174 

175 def draw(self, N=1): 

176 return self.dstn.rvs(size=N).T 

177 

178 

179class MultivariateLognormal(ContinuousFrozenDistribution): 

180 """ 

181 Class for representing multivariate lognormal distributions in HARK. With no 

182 parameters passed, it defaults to a standard normal distribution represented 

183 as a MultivariateLognormal instance. Currently lacks any discretization method. 

184 

185 Parameters 

186 ---------- 

187 mu : [float] or np.array 

188 Vector of means of the underling normal of each dimension of the distribution. 

189 sigma : [[float]] or np.array 

190 Symmetric, positive semidefinite covariance matrix for the underlying normal 

191 distribution. 

192 seed: int 

193 Seed for the RNG. 

194 """ 

195 

196 def __init__( 

197 self, 

198 mu=np.array([0.0]), 

199 sigma=np.array([[1.0]]), 

200 seed=0, 

201 ): 

202 self.mu = np.array(mu) 

203 self.sigma = np.array(sigma) 

204 self.seed = seed 

205 self.reset() 

206 

207 def reset(self): 

208 self.dstn = multivariate_normal(mean=self.mu, cov=self.sigma, seed=self.seed) 

209 

210 def draw(self, N=1): 

211 return np.exp(self.dstn.rvs(size=N).T) 

212 

213 

214class Lognormal(ContinuousFrozenDistribution): 

215 """ 

216 A Lognormal distribution 

217 

218 Parameters 

219 ---------- 

220 mu : float or [float] 

221 One or more means of underlying normal distribution. 

222 Number of elements T in mu determines number of rows of output. 

223 sigma : float or [float] 

224 One or more standard deviations of underlying normal distribution. 

225 Number of elements T in sigma determines number of rows of output. 

226 seed : int 

227 Seed for random number generator. 

228 """ 

229 

230 def __new__( 

231 cls, 

232 mu: Union[float, np.ndarray] = 0.0, 

233 sigma: Union[float, np.ndarray] = 1.0, 

234 seed: Optional[int] = 0, 

235 mean=None, 

236 std=None, 

237 ): 

238 """ 

239 Create a new Lognormal distribution. If sigma is zero, return a 

240 DiscreteDistribution with a single atom at exp(mu). 

241 

242 Parameters 

243 ---------- 

244 mu : Union[float, np.ndarray], optional 

245 Mean of underlying normal distribution, by default 0.0 

246 sigma : Union[float, np.ndarray], optional 

247 Standard deviation of underlying normal distribution, by default 1.0 

248 seed : Optional[int], optional 

249 Seed for random number generator, by default None 

250 mean: optional 

251 For alternative mean/std parameterization, the mean of the lognormal distribution 

252 std: optional 

253 For alternative mean/std parameterization, the standard deviation of the lognormal distribution 

254 

255 Returns 

256 ------- 

257 Lognormal or DiscreteDistribution 

258 Lognormal distribution or DiscreteDistribution with a single atom. 

259 """ 

260 

261 if sigma == 0: 

262 # If sigma is zero, return a DiscreteDistribution with a single atom 

263 if mean is not None: 

264 return DiscreteDistribution([1.0], mean, seed=seed) 

265 return DiscreteDistribution([1.0], [np.exp(mu)], seed=seed) 

266 

267 return super().__new__(cls) 

268 

269 def __init__( 

270 self, 

271 mu: Union[float, np.ndarray] = 0.0, 

272 sigma: Union[float, np.ndarray] = 1.0, 

273 seed: Optional[int] = 0, 

274 mean=None, 

275 std=None, 

276 ): 

277 if mean is not None and std is not None: 

278 mean_squared = mean**2 

279 variance = std**2 

280 mu = np.log(mean / (np.sqrt(1.0 + variance / mean_squared))) 

281 sigma = np.sqrt(np.log(1.0 + variance / mean_squared)) 

282 

283 if mean is not None and std is None: 

284 mu = np.log(mean) - sigma**2 / 2 

285 

286 self.mu = np.asarray(mu) 

287 self.sigma = np.asarray(sigma) 

288 

289 if self.mu.size != self.sigma.size: 

290 raise AttributeError( 

291 "mu and sigma must be of same size, are %s, %s" 

292 % ((self.mu.size), (self.sigma.size)) 

293 ) 

294 

295 # Set up the RNG 

296 super().__init__(lognorm, s=self.sigma, scale=np.exp(self.mu), loc=0, seed=seed) 

297 self.infimum = np.array([0.0]) 

298 self.supremum = np.array([np.inf]) 

299 

300 def _approx_equiprobable( 

301 self, N, endpoints=False, tail_N=0, tail_bound=None, tail_order=np.e 

302 ): 

303 """ 

304 Construct a discrete approximation to a lognormal distribution with underlying 

305 normal distribution N(mu,sigma). Makes an equiprobable distribution by 

306 default, but user can optionally request augmented tails with exponentially 

307 sized point masses. This can improve solution accuracy in some models. 

308 

309 TODO: add endpoints option 

310 

311 Parameters 

312 ---------- 

313 N: int 

314 Number of discrete points in the "main part" of the approximation. 

315 tail_N: int 

316 Number of points in each "tail part" of the approximation; 0 = no tail. 

317 tail_bound: [float] 

318 CDF boundaries of the tails vs main portion; tail_bound[0] is the lower 

319 tail bound, tail_bound[1] is the upper tail bound. Inoperative when 

320 tail_N = 0. Can make "one tailed" approximations with 0.0 or 1.0. 

321 tail_order: float 

322 Factor by which consecutive point masses in a "tail part" differ in 

323 probability. Should be >= 1 for sensible spacing. 

324 

325 Returns 

326 ------- 

327 d : DiscreteDistribution 

328 Probability associated with each point in array of discrete 

329 points for discrete probability mass function. 

330 """ 

331 tail_bound = tail_bound if tail_bound is not None else [0.02, 0.98] 

332 

333 # Handle the trivial case first 

334 if self.sigma == 0.0: 

335 pmv = np.ones(N) / N 

336 atoms = np.exp(self.mu) * np.ones(N) 

337 

338 else: 

339 # Find the CDF boundaries of each segment 

340 if tail_N > 0: 

341 lo_cut = tail_bound[0] 

342 hi_cut = tail_bound[1] 

343 else: 

344 lo_cut = 0.0 

345 hi_cut = 1.0 

346 inner_size = hi_cut - lo_cut 

347 inner_CDF_vals = [ 

348 lo_cut + x * N ** (-1.0) * inner_size for x in range(1, N) 

349 ] 

350 if inner_size < 1.0: 

351 scale = 1.0 / tail_order 

352 mag = (1.0 - scale**tail_N) / (1.0 - scale) 

353 lower_CDF_vals = [0.0] 

354 if lo_cut > 0.0: 

355 for x in range(tail_N - 1, -1, -1): 

356 lower_CDF_vals.append(lower_CDF_vals[-1] + lo_cut * scale**x / mag) 

357 upper_CDF_vals = [hi_cut] 

358 if hi_cut < 1.0: 

359 for x in range(tail_N): 

360 upper_CDF_vals.append( 

361 upper_CDF_vals[-1] + (1.0 - hi_cut) * scale**x / mag 

362 ) 

363 CDF_vals = np.array(lower_CDF_vals + inner_CDF_vals + upper_CDF_vals) 

364 CDF_vals[-1] = 1.0 

365 CDF_vals[0] = 0.0 # somehow these need fixing sometimes 

366 

367 # Calculate probability masses for each node 

368 pmv = CDF_vals[1:] - CDF_vals[:-1] 

369 pmv /= np.sum(pmv) 

370 

371 # Translate the CDF values to z-scores (stdevs from mean), then get q-scores 

372 z_cuts = norm.ppf(CDF_vals) 

373 q_cuts = (z_cuts - self.sigma) / np.sqrt(2) 

374 

375 # Evaluate the (complementary) error function at the q values 

376 erf_q = erfc(q_cuts) 

377 erf_q_neg = erfc(-q_cuts) 

378 

379 # Evaluate the base for the conditional expectations 

380 vals_base = erf_q[:-1] - erf_q[1:] 

381 these = q_cuts[:-1] < -2.0 # flag low q values and use the *other* version 

382 vals_base[these] = erf_q_neg[1:][these] - erf_q_neg[:-1][these] 

383 

384 # Make and apply the normalization factor and probability weights 

385 norm_fac = 0.5 * np.exp(self.mu + 0.5 * self.sigma**2) / pmv 

386 atoms = vals_base * norm_fac 

387 

388 limit = { 

389 "dist": self, 

390 "method": "equiprobable", 

391 "N": N, 

392 "endpoints": endpoints, 

393 "tail_N": tail_N, 

394 "tail_bound": tail_bound, 

395 "tail_order": tail_order, 

396 } 

397 

398 return DiscreteDistribution( 

399 pmv, 

400 atoms, 

401 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"), 

402 limit=limit, 

403 ) 

404 

405 def _approx_hermite(self, N, endpoints=False): 

406 """ 

407 Returns a discrete approximation of this distribution 

408 using the Gauss-Hermite quadrature rule. 

409 

410 Parameters 

411 ---------- 

412 N : int 

413 Number of discrete points to approximate the distribution. 

414 

415 Returns 

416 ------- 

417 DiscreteDistribution 

418 Discrete approximation of this distribution. 

419 """ 

420 

421 x, w = np.polynomial.hermite.hermgauss(N) 

422 # normalize w 

423 pmv = w * np.pi**-0.5 

424 # correct x 

425 atoms = np.exp(np.sqrt(2.0) * self.sigma * x + self.mu) 

426 

427 limit = {"dist": self, "method": "hermite", "N": N, "endpoints": endpoints} 

428 

429 return DiscreteDistribution( 

430 pmv, 

431 atoms, 

432 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"), 

433 limit=limit, 

434 ) 

435 

436 @classmethod 

437 def from_mean_std(cls, mean, std, seed=0): 

438 """ 

439 Construct a LogNormal distribution from its 

440 mean and standard deviation. 

441 

442 This is unlike the normal constructor for the distribution, 

443 which takes the mu and sigma for the normal distribution 

444 that is the logarithm of the Log Normal distribution. 

445 

446 Parameters 

447 ---------- 

448 mean : float or [float] 

449 One or more means. Number of elements T in mu determines number 

450 of rows of output. 

451 std : float or [float] 

452 One or more standard deviations. Number of elements T in sigma 

453 determines number of rows of output. 

454 seed : int 

455 Seed for random number generator. 

456 

457 Returns 

458 --------- 

459 LogNormal 

460 

461 """ 

462 mean_squared = mean**2 

463 variance = std**2 

464 mu = np.log(mean / (np.sqrt(1.0 + variance / mean_squared))) 

465 sigma = np.sqrt(np.log(1.0 + variance / mean_squared)) 

466 

467 return cls(mu=mu, sigma=sigma, seed=seed) 

468 

469 

470class MeanOneLogNormal(Lognormal): 

471 """ 

472 A Lognormal distribution with mean 1. 

473 """ 

474 

475 def __init__(self, sigma=1.0, seed=0): 

476 mu = -0.5 * sigma**2 

477 super().__init__(mu=mu, sigma=sigma, seed=seed) 

478 

479 

480class Uniform(ContinuousFrozenDistribution): 

481 """ 

482 A Uniform distribution. 

483 

484 Parameters 

485 ---------- 

486 bot : float or [float] 

487 One or more bottom values. 

488 Number of elements T in mu determines number 

489 of rows of output. 

490 top : float or [float] 

491 One or more top values. 

492 Number of elements T in top determines number of 

493 rows of output. 

494 seed : int 

495 Seed for random number generator. 

496 """ 

497 

498 def __init__(self, bot=0.0, top=1.0, seed=0): 

499 self.bot = np.asarray(bot) 

500 self.top = np.asarray(top) 

501 

502 super().__init__(uniform, loc=self.bot, scale=self.top - self.bot, seed=seed) 

503 self.infimum = np.array([0.0]) 

504 self.supremum = np.array([np.inf]) 

505 

506 def _approx_equiprobable(self, N, endpoints=False): 

507 """ 

508 Makes a discrete approximation to this uniform distribution. 

509 

510 Parameters 

511 ---------- 

512 N : int 

513 The number of points in the discrete approximation. 

514 endpoints : bool 

515 Whether to include the endpoints in the approximation. 

516 

517 Returns 

518 ------- 

519 d : DiscreteDistribution 

520 Probability associated with each point in array of discrete 

521 points for discrete probability mass function. 

522 """ 

523 pmv = np.ones(N) / float(N) 

524 

525 center = (self.top + self.bot) / 2.0 

526 width = (self.top - self.bot) / 2.0 

527 atoms = center + width * np.linspace(-(N - 1.0) / 2.0, (N - 1.0) / 2.0, N) / ( 

528 N / 2.0 

529 ) 

530 

531 if endpoints: # insert endpoints with infinitesimally small mass 

532 atoms = np.concatenate(([self.bot], atoms, [self.top])) 

533 pmv = np.concatenate(([0.0], pmv, [0.0])) 

534 

535 limit = { 

536 "dist": self, 

537 "method": "equiprobable", 

538 "N": N, 

539 "endpoints": endpoints, 

540 } 

541 

542 return DiscreteDistribution( 

543 pmv, 

544 atoms, 

545 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"), 

546 limit=limit, 

547 ) 

548 

549 

550class Weibull(ContinuousFrozenDistribution): 

551 """ 

552 A Weibull distribution. 

553 

554 Parameters 

555 ---------- 

556 scale : float or [float] 

557 One or more scales. Number of elements T in scale 

558 determines number of 

559 rows of output. 

560 shape : float or [float] 

561 One or more shape parameters. Number of elements T in scale 

562 determines number of rows of output. 

563 seed : int 

564 Seed for random number generator. 

565 """ 

566 

567 def __init__(self, scale=1.0, shape=1.0, seed=0): 

568 self.scale = np.asarray(scale) 

569 self.shape = np.asarray(shape) 

570 

571 # Set up the RNG 

572 super().__init__(weibull_min, c=shape, scale=scale, seed=seed) 

573 self.infimum = np.array([0.0]) 

574 self.supremum = np.array([np.inf])