Coverage for HARK/distributions/continuous.py: 89%

1from typing import Any, Optional, Union

3import numpy as np

4from scipy.special import erfc

5from scipy.stats import (

6 rv_continuous,

7 multivariate_normal,

8 norm,

9 lognorm,

10 uniform,

11 weibull_min,

12)

13from scipy.stats._distn_infrastructure import rv_continuous_frozen

15from HARK.distributions.base import Distribution

16from HARK.distributions.discrete import DiscreteDistribution

18# CONTINUOUS DISTRIBUTIONS

21class ContinuousFrozenDistribution(rv_continuous_frozen, Distribution):

22 """

23 Parameterized continuous distribution from scipy.stats with seed management.

24 """

26 def __init__(

27 self, dist: rv_continuous, *args: Any, seed: int = 0, **kwds: Any

28 ) -> None:

29 """

30 Parameterized continuous distribution from scipy.stats with seed management.

32 Parameters

33 ----------

34 dist : rv_continuous

35 Continuous distribution from scipy.stats.

36 seed : int, optional

37 Seed for random number generator, by default 0

38 """

39 rv_continuous_frozen.__init__(self, dist, *args, **kwds)

40 Distribution.__init__(self, seed=seed)

43class Normal(ContinuousFrozenDistribution):

44 """

45 A Normal distribution.

47 Parameters

48 ----------

49 mu : float or [float]

50 One or more means. Number of elements T in mu determines number

51 of rows of output.

52 sigma : float or [float]

53 One or more standard deviations. Number of elements T in sigma

54 determines number of rows of output.

55 seed : int

56 Seed for random number generator.

57 """

59 def __init__(self, mu=0.0, sigma=1.0, seed=0):

60 self.mu = np.asarray(mu)

61 self.sigma = np.asarray(sigma)

63 if self.mu.size != self.sigma.size:

64 raise AttributeError(

65 "mu and sigma must be of same size, are %s, %s"

66 % ((self.mu.size), (self.sigma.size))

67 )

69 super().__init__(norm, loc=mu, scale=sigma, seed=seed)

70 self.infimum = -np.inf * np.ones(self.mu.size)

71 self.supremum = np.inf * np.ones(self.mu.size)

73 def discretize(self, N, method="hermite", endpoints=False):

74 """

75 For normal distributions, the Gauss-Hermite quadrature rule is

76 used as the default method for discretization.

77 """

79 return super().discretize(N, method, endpoints)

81 def _approx_hermite(self, N, endpoints=False):

82 """

83 Returns a discrete approximation of this distribution

84 using the Gauss-Hermite quadrature rule.

86 Parameters

87 ----------

88 N : int

89 Number of discrete points to approximate the distribution.

91 Returns

92 -------

93 DiscreteDistribution

94 Discrete approximation of this distribution.

95 """

97 x, w = np.polynomial.hermite.hermgauss(N)

98 # normalize w

99 pmv = w * np.pi**-0.5

100 # correct x

101 atoms = np.sqrt(2.0) * self.sigma * x + self.mu

102

103 limit = {"dist": self, "method": "hermite", "N": N, "endpoints": endpoints}

104

105 return DiscreteDistribution(

106 pmv,

107 atoms,

108 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"),

109 limit=limit,

110 )

111

112 def _approx_equiprobable(self, N, endpoints=False):

113 """

114 Returns a discrete equiprobable approximation of this distribution.

115

116 Parameters

117 ----------

118 N : int

119 Number of discrete points to approximate the distribution.

120

121 Returns

122 -------

123 DiscreteDistribution

124 Discrete approximation of this distribution.

125 """

126

127 CDF = np.linspace(0, 1, N + 1)

128 lims = norm.ppf(CDF)

129 pdf = norm.pdf(lims)

130

131 # Find conditional means using Mills's ratio

132 pmv = np.diff(CDF)

133 atoms = self.mu - np.diff(pdf) / pmv * self.sigma

134

135 limit = {"dist": self, "method": "equiprobable", "N": N, "endpoints": endpoints}

136

137 return DiscreteDistribution(

138 pmv,

139 atoms,

140 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"),

141 limit=limit,

142 )

143

144

145class MultivariateNormal(ContinuousFrozenDistribution):

146 """

147 Class for representing multivariate normal distributions in HARK. With no

148 parameters passed, it defaults to a standard normal distribution represented

149 as a MultivariateNormal instance. Currently lacks any discretization method.

150

151 Parameters

152 ----------

153 mu : [float] or np.array

154 Vector of means of each dimension of the distribution.

155 sigma : [[float]] or np.array

156 Symmetric, positive semidefinite covariance matrix for the distribution.

157 seed: int

158 Seed for the RNG.

159 """

160

161 def __init__(

162 self,

163 mu=np.array([0.0]),

164 sigma=np.array([[1.0]]),

165 seed=0,

166 ):

167 self.mu = np.array(mu)

168 self.sigma = np.array(sigma)

169 self.seed = seed

170 self.reset()

171

172 def reset(self):

173 self.dstn = multivariate_normal(mean=self.mu, cov=self.sigma, seed=self.seed)

174

175 def draw(self, N=1):

176 return self.dstn.rvs(size=N).T

177

178

179class MultivariateLognormal(ContinuousFrozenDistribution):

180 """

181 Class for representing multivariate lognormal distributions in HARK. With no

182 parameters passed, it defaults to a standard normal distribution represented

183 as a MultivariateLognormal instance. Currently lacks any discretization method.

184

185 Parameters

186 ----------

187 mu : [float] or np.array

188 Vector of means of the underling normal of each dimension of the distribution.

189 sigma : [[float]] or np.array

190 Symmetric, positive semidefinite covariance matrix for the underlying normal

191 distribution.

192 seed: int

193 Seed for the RNG.

194 """

195

196 def __init__(

197 self,

198 mu=np.array([0.0]),

199 sigma=np.array([[1.0]]),

200 seed=0,

201 ):

202 self.mu = np.array(mu)

203 self.sigma = np.array(sigma)

204 self.seed = seed

205 self.reset()

206

207 def reset(self):

208 self.dstn = multivariate_normal(mean=self.mu, cov=self.sigma, seed=self.seed)

209

210 def draw(self, N=1):

211 return np.exp(self.dstn.rvs(size=N).T)

212

213

214class Lognormal(ContinuousFrozenDistribution):

215 """

216 A Lognormal distribution

217

218 Parameters

219 ----------

220 mu : float or [float]

221 One or more means of underlying normal distribution.

222 Number of elements T in mu determines number of rows of output.

223 sigma : float or [float]

224 One or more standard deviations of underlying normal distribution.

225 Number of elements T in sigma determines number of rows of output.

226 seed : int

227 Seed for random number generator.

228 """

229

230 def __new__(

231 cls,

232 mu: Union[float, np.ndarray] = 0.0,

233 sigma: Union[float, np.ndarray] = 1.0,

234 seed: Optional[int] = 0,

235 mean=None,

236 std=None,

237 ):

238 """

239 Create a new Lognormal distribution. If sigma is zero, return a

240 DiscreteDistribution with a single atom at exp(mu).

241

242 Parameters

243 ----------

244 mu : Union[float, np.ndarray], optional

245 Mean of underlying normal distribution, by default 0.0

246 sigma : Union[float, np.ndarray], optional

247 Standard deviation of underlying normal distribution, by default 1.0

248 seed : Optional[int], optional

249 Seed for random number generator, by default None

250 mean: optional

251 For alternative mean/std parameterization, the mean of the lognormal distribution

252 std: optional

253 For alternative mean/std parameterization, the standard deviation of the lognormal distribution

254

255 Returns

256 -------

257 Lognormal or DiscreteDistribution

258 Lognormal distribution or DiscreteDistribution with a single atom.

259 """

260

261 if sigma == 0:

262 # If sigma is zero, return a DiscreteDistribution with a single atom

263 if mean is not None:

264 return DiscreteDistribution([1.0], mean, seed=seed)

265 return DiscreteDistribution([1.0], [np.exp(mu)], seed=seed)

266

267 return super().__new__(cls)

268

269 def __init__(

270 self,

271 mu: Union[float, np.ndarray] = 0.0,

272 sigma: Union[float, np.ndarray] = 1.0,

273 seed: Optional[int] = 0,

274 mean=None,

275 std=None,

276 ):

277 if mean is not None and std is not None:

278 mean_squared = mean**2

279 variance = std**2

280 mu = np.log(mean / (np.sqrt(1.0 + variance / mean_squared)))

281 sigma = np.sqrt(np.log(1.0 + variance / mean_squared))

282

283 if mean is not None and std is None:

284 mu = np.log(mean) - sigma**2 / 2

285

286 self.mu = np.asarray(mu)

287 self.sigma = np.asarray(sigma)

288

289 if self.mu.size != self.sigma.size:

290 raise AttributeError(

291 "mu and sigma must be of same size, are %s, %s"

292 % ((self.mu.size), (self.sigma.size))

293 )

294

295 # Set up the RNG

296 super().__init__(lognorm, s=self.sigma, scale=np.exp(self.mu), loc=0, seed=seed)

297 self.infimum = np.array([0.0])

298 self.supremum = np.array([np.inf])

299

300 def _approx_equiprobable(

301 self, N, endpoints=False, tail_N=0, tail_bound=None, tail_order=np.e

302 ):

303 """

304 Construct a discrete approximation to a lognormal distribution with underlying

305 normal distribution N(mu,sigma). Makes an equiprobable distribution by

306 default, but user can optionally request augmented tails with exponentially

307 sized point masses. This can improve solution accuracy in some models.

308

309 TODO: add endpoints option

310

311 Parameters

312 ----------

313 N: int

314 Number of discrete points in the "main part" of the approximation.

315 tail_N: int

316 Number of points in each "tail part" of the approximation; 0 = no tail.

317 tail_bound: [float]

318 CDF boundaries of the tails vs main portion; tail_bound[0] is the lower

319 tail bound, tail_bound[1] is the upper tail bound. Inoperative when

320 tail_N = 0. Can make "one tailed" approximations with 0.0 or 1.0.

321 tail_order: float

322 Factor by which consecutive point masses in a "tail part" differ in

323 probability. Should be >= 1 for sensible spacing.

324

325 Returns

326 -------

327 d : DiscreteDistribution

328 Probability associated with each point in array of discrete

329 points for discrete probability mass function.

330 """

331 tail_bound = tail_bound if tail_bound is not None else [0.02, 0.98]

332

333 # Handle the trivial case first

334 if self.sigma == 0.0:

335 pmv = np.ones(N) / N

336 atoms = np.exp(self.mu) * np.ones(N)

337

338 else:

339 # Find the CDF boundaries of each segment

340 if tail_N > 0:

341 lo_cut = tail_bound[0]

342 hi_cut = tail_bound[1]

343 else:

344 lo_cut = 0.0

345 hi_cut = 1.0

346 inner_size = hi_cut - lo_cut

347 inner_CDF_vals = [

348 lo_cut + x * N ** (-1.0) * inner_size for x in range(1, N)

349 ]

350 if inner_size < 1.0:

351 scale = 1.0 / tail_order

352 mag = (1.0 - scale**tail_N) / (1.0 - scale)

353 lower_CDF_vals = [0.0]

354 if lo_cut > 0.0:

355 for x in range(tail_N - 1, -1, -1):

356 lower_CDF_vals.append(lower_CDF_vals[-1] + lo_cut * scale**x / mag)

357 upper_CDF_vals = [hi_cut]

358 if hi_cut < 1.0:

359 for x in range(tail_N):

360 upper_CDF_vals.append(

361 upper_CDF_vals[-1] + (1.0 - hi_cut) * scale**x / mag

362 )

363 CDF_vals = np.array(lower_CDF_vals + inner_CDF_vals + upper_CDF_vals)

364 CDF_vals[-1] = 1.0

365 CDF_vals[0] = 0.0 # somehow these need fixing sometimes

366

367 # Calculate probability masses for each node

368 pmv = CDF_vals[1:] - CDF_vals[:-1]

369 pmv /= np.sum(pmv)

370

371 # Translate the CDF values to z-scores (stdevs from mean), then get q-scores

372 z_cuts = norm.ppf(CDF_vals)

373 q_cuts = (z_cuts - self.sigma) / np.sqrt(2)

374

375 # Evaluate the (complementary) error function at the q values

376 erf_q = erfc(q_cuts)

377 erf_q_neg = erfc(-q_cuts)

378

379 # Evaluate the base for the conditional expectations

380 vals_base = erf_q[:-1] - erf_q[1:]

381 these = q_cuts[:-1] < -2.0 # flag low q values and use the *other* version

382 vals_base[these] = erf_q_neg[1:][these] - erf_q_neg[:-1][these]

383

384 # Make and apply the normalization factor and probability weights

385 norm_fac = 0.5 * np.exp(self.mu + 0.5 * self.sigma**2) / pmv

386 atoms = vals_base * norm_fac

387

388 limit = {

389 "dist": self,

390 "method": "equiprobable",

391 "N": N,

392 "endpoints": endpoints,

393 "tail_N": tail_N,

394 "tail_bound": tail_bound,

395 "tail_order": tail_order,

396 }

397

398 return DiscreteDistribution(

399 pmv,

400 atoms,

401 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"),

402 limit=limit,

403 )

404

405 def _approx_hermite(self, N, endpoints=False):

406 """

407 Returns a discrete approximation of this distribution

408 using the Gauss-Hermite quadrature rule.

409

410 Parameters

411 ----------

412 N : int

413 Number of discrete points to approximate the distribution.

414

415 Returns

416 -------

417 DiscreteDistribution

418 Discrete approximation of this distribution.

419 """

420

421 x, w = np.polynomial.hermite.hermgauss(N)

422 # normalize w

423 pmv = w * np.pi**-0.5

424 # correct x

425 atoms = np.exp(np.sqrt(2.0) * self.sigma * x + self.mu)

426

427 limit = {"dist": self, "method": "hermite", "N": N, "endpoints": endpoints}

428

429 return DiscreteDistribution(

430 pmv,

431 atoms,

432 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"),

433 limit=limit,

434 )

435

436 @classmethod

437 def from_mean_std(cls, mean, std, seed=0):

438 """

439 Construct a LogNormal distribution from its

440 mean and standard deviation.

441

442 This is unlike the normal constructor for the distribution,

443 which takes the mu and sigma for the normal distribution

444 that is the logarithm of the Log Normal distribution.

445

446 Parameters

447 ----------

448 mean : float or [float]

449 One or more means. Number of elements T in mu determines number

450 of rows of output.

451 std : float or [float]

452 One or more standard deviations. Number of elements T in sigma

453 determines number of rows of output.

454 seed : int

455 Seed for random number generator.

456

457 Returns

458 ---------

459 LogNormal

460

461 """

462 mean_squared = mean**2

463 variance = std**2

464 mu = np.log(mean / (np.sqrt(1.0 + variance / mean_squared)))

465 sigma = np.sqrt(np.log(1.0 + variance / mean_squared))

466

467 return cls(mu=mu, sigma=sigma, seed=seed)

468

469

470class MeanOneLogNormal(Lognormal):

471 """

472 A Lognormal distribution with mean 1.

473 """

474

475 def __init__(self, sigma=1.0, seed=0):

476 mu = -0.5 * sigma**2

477 super().__init__(mu=mu, sigma=sigma, seed=seed)

478

479

480class Uniform(ContinuousFrozenDistribution):

481 """

482 A Uniform distribution.

483

484 Parameters

485 ----------

486 bot : float or [float]

487 One or more bottom values.

488 Number of elements T in mu determines number

489 of rows of output.

490 top : float or [float]

491 One or more top values.

492 Number of elements T in top determines number of

493 rows of output.

494 seed : int

495 Seed for random number generator.

496 """

497

498 def __init__(self, bot=0.0, top=1.0, seed=0):

499 self.bot = np.asarray(bot)

500 self.top = np.asarray(top)

501

502 super().__init__(uniform, loc=self.bot, scale=self.top - self.bot, seed=seed)

503 self.infimum = np.array([0.0])

504 self.supremum = np.array([np.inf])

505

506 def _approx_equiprobable(self, N, endpoints=False):

507 """

508 Makes a discrete approximation to this uniform distribution.

509

510 Parameters

511 ----------

512 N : int

513 The number of points in the discrete approximation.

514 endpoints : bool

515 Whether to include the endpoints in the approximation.

516

517 Returns

518 -------

519 d : DiscreteDistribution

520 Probability associated with each point in array of discrete

521 points for discrete probability mass function.

522 """

523 pmv = np.ones(N) / float(N)

524

525 center = (self.top + self.bot) / 2.0

526 width = (self.top - self.bot) / 2.0

527 atoms = center + width * np.linspace(-(N - 1.0) / 2.0, (N - 1.0) / 2.0, N) / (

528 N / 2.0

529 )

530

531 if endpoints: # insert endpoints with infinitesimally small mass

532 atoms = np.concatenate(([self.bot], atoms, [self.top]))

533 pmv = np.concatenate(([0.0], pmv, [0.0]))

534

535 limit = {

536 "dist": self,

537 "method": "equiprobable",

538 "N": N,

539 "endpoints": endpoints,

540 }

541

542 return DiscreteDistribution(

543 pmv,

544 atoms,

545 seed=self._rng.integers(0, 2**31 - 1, dtype="int32"),

546 limit=limit,

547 )

548

549

550class Weibull(ContinuousFrozenDistribution):

551 """

552 A Weibull distribution.

553

554 Parameters

555 ----------

556 scale : float or [float]

557 One or more scales. Number of elements T in scale

558 determines number of

559 rows of output.

560 shape : float or [float]

561 One or more shape parameters. Number of elements T in scale

562 determines number of rows of output.

563 seed : int

564 Seed for random number generator.

565 """

566

567 def __init__(self, scale=1.0, shape=1.0, seed=0):

568 self.scale = np.asarray(scale)

569 self.shape = np.asarray(shape)

570

571 # Set up the RNG

572 super().__init__(weibull_min, c=shape, scale=scale, seed=seed)

573 self.infimum = np.array([0.0])

574 self.supremum = np.array([np.inf])