Coverage for HARK/estimation.py: 95%

1"""Functions for estimating structural models, including optimization methods

2and bootstrapping tools.

3"""

5import csv

6import multiprocessing

7import warnings

8from copy import deepcopy # For replicating complex objects

9from time import time # Used to time execution

11import estimagic as em

12import numpy as np # Numerical Python

13from joblib import Parallel, delayed

14from scipy.optimize import fmin, fmin_powell # off-the-shelf minimizers

16from HARK.core import AgentType

18__all__ = [

19 "minimize_nelder_mead",

20 "minimize_powell",

21 "bootstrap_sample_from_data",

22 "parallelNelderMead",

23]

26def minimize_nelder_mead(

27 objective_func,

28 parameter_guess,

29 verbose=False,

30 which_vars=None,

31 **kwargs,

32):

33 """Minimizes the objective function using the Nelder-Mead simplex algorithm,

34 starting from an initial parameter guess.

36 Parameters

37 ----------

38 objective_func : function

39 The function to be minimized. It should take only a single argument, which

40 should be a list representing the parameters to be estimated.

41 parameter_guess : [float]

42 A starting point for the Nelder-Mead algorithm, which must be a valid

43 input for objective_func.

44 which_vars : np.array or None

45 Array of booleans indicating which parameters should be estimated. When

46 not provided, estimation is performed on all parameters.

47 verbose : boolean

48 A flag for the amount of output to print.

50 Returns

51 -------

52 xopt : [float]

53 The values that minimize objective_func.

55 """

56 # Specify a temporary "modified objective function" that restricts parameters to be estimated

57 if which_vars is None:

58 which_vars = np.ones(len(parameter_guess), dtype=bool)

60 def objective_func_mod(params):

61 params_full = np.copy(parameter_guess)

62 params_full[which_vars] = params

63 out = objective_func(params_full)

64 return out

66 # convert parameter guess to np array to slice it with boolean array

67 parameter_guess_mod = np.array(parameter_guess)[which_vars]

69 # Execute the minimization, starting from the given parameter guess

70 t0 = time() # Time the process

71 OUTPUT = fmin(

72 objective_func_mod,

73 parameter_guess_mod,

74 full_output=1,

75 disp=verbose,

76 **kwargs,

77 )

78 t1 = time()

80 # Extract values from optimization output:

81 xopt = OUTPUT[0] # Parameters that minimize function.

82 fopt = OUTPUT[1] # Value of function at minimum: ``fopt = func(xopt)``.

83 optiter = OUTPUT[2] # Number of iterations performed.

84 funcalls = OUTPUT[3] # Number of function calls made.

85 warnflag = OUTPUT[4] # warnflag : int

86 # 1 : Maximum number of function evaluations made.

87 # 2 : Maximum number of iterations reached.

88 # Check that optimization succeeded:

89 if warnflag != 0:

90 warnings.warn(

91 "Minimization failed! xopt="

92 + str(xopt)

93 + ", fopt="

94 + str(fopt)

95 + ", optiter="

96 + str(optiter)

97 + ", funcalls="

98 + str(funcalls)

99 + ", warnflag="

100 + str(warnflag),

101 )

102 xopt_full = np.copy(parameter_guess)

103 xopt_full[which_vars] = xopt

104

105 # Display and return the results:

106 if verbose:

107 print("Time to estimate is " + str(t1 - t0) + " seconds.")

108 return xopt_full

109

110

111def minimize_powell(objective_func, parameter_guess, verbose=False):

112 """Minimizes the objective function using a derivative-free Powell algorithm,

113 starting from an initial parameter guess.

114

115 Parameters

116 ----------

117 objective_func : function

118 The function to be minimized. It should take only a single argument, which

119 should be a list representing the parameters to be estimated.

120 parameter_guess : [float]

121 A starting point for the Powell algorithm, which must be a valid

122 input for objective_func.

123 verbose : boolean

124 A flag for the amount of output to print.

125

126 Returns

127 -------

128 xopt : [float]

129 The values that minimize objective_func.

130

131 """

132 # Execute the minimization, starting from the given parameter guess

133 t0 = time() # Time the process

134 OUTPUT = fmin_powell(

135 objective_func,

136 parameter_guess,

137 full_output=1,

138 maxiter=1000,

139 disp=verbose,

140 )

141 t1 = time()

142

143 # Extract values from optimization output:

144 xopt = OUTPUT[0] # Parameters that minimize function.

145 fopt = OUTPUT[1] # Value of function at minimum: ``fopt = func(xopt)``.

146 direc = OUTPUT[2]

147 optiter = OUTPUT[3] # Number of iterations performed.

148 funcalls = OUTPUT[4] # Number of function calls made.

149 warnflag = OUTPUT[5] # warnflag : int

150 # 1 : Maximum number of function evaluations made.

151 # 2 : Maximum number of iterations reached.

152

153 # Check that optimization succeeded:

154 if warnflag != 0:

155 warnings.warn(

156 "Minimization failed! xopt="

157 + str(xopt)

158 + ", fopt="

159 + str(fopt)

160 + ", direc="

161 + str(direc)

162 + ", optiter="

163 + str(optiter)

164 + ", funcalls="

165 + str(funcalls)

166 + ", warnflag="

167 + str(warnflag),

168 )

169

170 # Display and return the results:

171 if verbose:

172 print("Time to estimate is " + str(t1 - t0) + " seconds.")

173 return xopt

174

175

176def bootstrap_sample_from_data(data, weights=None, seed=0):

177 """Samples rows from the input array of data, generating a new data array with

178 an equal number of rows (records). Rows are drawn with equal probability

179 by default, but probabilities can be specified with weights (must sum to 1).

180

181 Parameters

182 ----------

183 data : np.array

184 An array of data, with each row representing a record.

185 weights : np.array

186 A weighting array with length equal to data.shape[0].

187 seed : int

188 A seed for the random number generator.

189

190 Returns

191 -------

192 new_data : np.array

193 A resampled version of input data.

194

195 """

196 # Set up the random number generator

197 RNG = np.random.default_rng(seed)

198 N = data.shape[0]

199

200 # Set up weights

201 if weights is not None:

202 cutoffs = np.cumsum(weights)

203 else:

204 cutoffs = np.linspace(0, 1, N)

205

206 # Draw random indices

207 indices = np.searchsorted(cutoffs, RNG.uniform(size=N))

208

209 # Create a bootstrapped sample

210 new_data = deepcopy(data[indices,])

211 return new_data

212

213

214def parallelNelderMead(

215 obj_func,

216 guess,

217 perturb=None,

218 P=1,

219 ftol=1e-8,

220 xtol=1e-8,

221 maxiter=np.inf,

222 maxeval=np.inf,

223 r_param=1.0,

224 e_param=1.0,

225 c_param=0.5,

226 s_param=0.5,

227 maxthreads=None,

228 name=None,

229 resume=False,

230 savefreq=None,

231 verbose=1,

232):

233 """A parallel implementation of the Nelder-Mead minimization algorithm, as

234 described in Lee and Wiswall. For long optimization procedures, it can

235 save progress between iterations and resume later.

236

237 Parameters

238 ----------

239 obj_func : function

240 The objective function to be minimized. Takes a single 1D array as input.

241 guess : np.array

242 Initial starting point for the simplex, representing an input for obj_func.

243 perturb : np.array

244 Perturbation vector for the simplex, of the same length as an input to

245 obj_func. If perturb[j] is non-zero, a simplex point will be created

246 that perturbs the j-th element of guess by perturb[j]; if it is zero,

247 then the j-th parameter of obj_func will not be optimized over. By

248 default, perturb=None, indicating that all parameters should be optimized,

249 with an initial perturbation of 0.1*guess.

250 P : int

251 Degree of parallelization: the number of vertices of the simplex to try

252 to update on each iteration of the process.

253 ftol : float

254 Absolute tolerance of the objective function for convergence. If suc-

255 cessive iterations return minimum function values that differ by less

256 than ftol, the process terminates successfully.

257 xtol : float

258 Absolute tolerance of the input values for convergence. If the maximum

259 distance between the current minimum point and the worst point in the

260 simplex is less than xtol, then the process terminates successfully.

261 maxiter : int

262 Maximum number of Nelder-Mead iterations; reaching iters=maxiter is

263 reported as an "unsuccessful" minimization.

264 maxeval : int

265 Maximum number of evaluations of obj_func (across all processes); reaching

266 evals=maxeval is reported as an "unsuccessful" minimization.

267 r_param: float

268 Parameter indicating magnitude of the reflection point calculation.

269 e_param: float

270 Parameter indicating magnitude of the expansion point calculation.

271 c_param: float

272 Parameter indicating magnitude of the contraction point calculation.

273 s_param: float

274 Parameter indicating magnitude of the shrink calculation.

275 maxthreads : int

276 The maximum number of CPU cores that the optimization should use,

277 regardless of the size of the problem.

278 name : string

279 A filename for (optionally) saving the progress of the Nelder-Mead search,

280 and for resuming a previous search (when resume=True). Useful for long

281 searches that could potentially be interrupted by computer down time.

282 resume : boolean

283 An indicator for whether the search should resume from earlier progress.

284 When True, the process will load a progress file named in input name.

285 savefreq : int

286 When not None, search progress will be saved to name.txt every savefreq

287 iterations, to be loaded later with resume=True).

288 verbose : int

289 Indicator for the verbosity of the optimization routine. Higher values

290 generate more text output; verbose=0 produces no text output.

291

292 Returns

293 -------

294 min_point : np.array

295 The input that minimizes obj_func, as found by the minimization.

296 fmin : float

297 The minimum of obj_func; fmin = obj_func(min_point).

298

299 """

300 # If this is a resumed search, load the data

301 if resume:

302 simplex, fvals, iters, evals = load_nelder_mead_data(name)

303 dim_count = fvals.size - 1

304 N = dim_count + 1 # Number of points in simplex

305 K = simplex.shape[1] # Total number of parameters

306

307 # Otherwise, construct the initial simplex and array of function values

308 else:

309 if perturb is None: # Default: perturb each parameter by 10%

310 perturb = 0.1 * guess

311 perturb[guess == 0] = 0.1

312

313 params_to_opt = np.where(perturb != 0)[

314 0

315 ] # Indices of which parameters to optimize

316 dim_count = params_to_opt.size # Number of parameters to search over

317 N = dim_count + 1 # Number of points in simplex

318 K = guess.size # Total number of parameters

319 simplex = np.tile(guess, (N, 1))

320 for j in range(

321 dim_count,

322 ): # Perturb each parameter to optimize by the specified distance

323 simplex[j + 1, params_to_opt[j]] = (

324 simplex[j + 1, params_to_opt[j]] + perturb[params_to_opt[j]]

325 )

326

327 # Initialize iteration and evaluation counts, plus a 1D array of function values

328 fvals = np.zeros(dim_count + 1) + np.nan

329

330 iters = 0

331 evals = 0

332

333 # Make sure degree of parallelization is not illegal

334 if P > N - 1:

335 warnings.warn(

336 "Requested degree of simplex parallelization is "

337 + str(P)

338 + ", but dimension of optimization problem is only "

339 + str(N - 1)

340 + ".",

341 )

342 warnings.warn(

343 "Degree of parallelization has been reduced to " + str(N - 1) + "."

344 )

345 P = N - 2

346

347 # Create the pool of worker processes

348 cpu_cores = multiprocessing.cpu_count() # Total number of available CPU cores

349 cores_to_use = min(cpu_cores, dim_count)

350 if maxthreads is not None: # Cap the number of cores if desired

351 cores_to_use = min(cores_to_use, maxthreads)

352 parallel = Parallel(n_jobs=cores_to_use)

353 use_parallel = cores_to_use > 1

354

355 # Begin a new Nelder-Mead search

356 if not resume:

357 temp_simplex = list(simplex) # Evaluate the initial simplex

358 if use_parallel:

359 fvals = np.array(

360 parallel(delayed(obj_func)(params) for params in temp_simplex)

361 )

362 else:

363 fvals = np.array([obj_func(params) for params in temp_simplex])

364 evals += N

365 # Reorder the initial simplex

366 order = np.argsort(fvals)

367 fvals = fvals[order]

368 simplex = simplex[order, :]

369 fmin = fvals[0]

370 f_dist = np.abs(fmin - fvals[-1])

371 x_dist = np.max(

372 np.sqrt(np.sum((simplex - np.tile(simplex[0, :], (N, 1))) ** 2.0, axis=1)),

373 )

374 if verbose > 0:

375 print(

376 "Evaluated the initial simplex: fmin="

377 + str(fmin)

378 + ", f_dist="

379 + str(f_dist)

380 + ", x_dist="

381 + str(x_dist),

382 )

383 if savefreq is not None:

384 save_nelder_mead_data(name, simplex, fvals, iters, evals)

385 if verbose > 0:

386 print("Saved search progress in " + name + ".txt")

387 elif verbose > 0:

388 print(

389 "Resuming search after "

390 + str(iters)

391 + " iterations and "

392 + str(evals)

393 + " function evaluations.",

394 )

395

396 # Initialize some inputs for the multithreader

397 j_list = range(N - P, N)

398 opt_params = [r_param, c_param, e_param]

399

400 # Run the Nelder-Mead algorithm until a terminal condition is met

401 go = True

402 while go:

403 t_start = time()

404 iters += 1

405 if verbose > 0:

406 print("Beginning iteration #" + str(iters) + " now.")

407

408 # Update the P worst points of the simplex

409 if use_parallel:

410 output = parallel(

411 delayed(parallel_nelder_mead_worker)(

412 obj_func,

413 simplex,

414 fvals,

415 j,

416 P,

417 opt_params,

418 )

419 for j in j_list

420 )

421 else:

422 output = [

423 parallel_nelder_mead_worker(obj_func, simplex, fvals, j, P, opt_params)

424 for j in j_list

425 ]

426

427 # Extract the output for each node

428 new_subsimplex = np.zeros((P, K)) + np.nan

429 new_vals = np.zeros(P) + np.nan

430 new_evals = 0

431 for i in range(P):

432 new_subsimplex[i, :] = output[i][0]

433 new_vals[i] = output[i][1]

434 new_evals += output[i][2]

435 evals += new_evals

436

437 # Check whether any updates actually happened

438 old_subsimplex = simplex[(N - P) : N, :]

439 if np.max(np.abs(new_subsimplex - old_subsimplex)) == 0:

440 if verbose > 0:

441 print("Updated the simplex, but must perform a shrink step.")

442 # If every attempted update was unsuccessful, must shrink the simplex

443 simplex = (

444 s_param * np.tile(simplex[0, :], (N, 1)) + (1.0 - s_param) * simplex

445 )

446 temp_simplex = list(simplex[1:N, :])

447 if use_parallel:

448 fvals = np.array(

449 [fvals[0]]

450 + parallel(delayed(obj_func)(params) for params in temp_simplex),

451 )

452 else:

453 fvals = np.array(

454 [fvals[0]] + [obj_func(params) for params in temp_simplex]

455 )

456 new_evals += N - 1

457 evals += N - 1

458 else:

459 if verbose > 0:

460 print("Updated the simplex successfully.")

461 # Otherwise, update the simplex with the new results

462 simplex[(N - P) : N, :] = new_subsimplex

463 fvals[(N - P) : N] = new_vals

464

465 # Reorder the simplex from best to worst

466 order = np.argsort(fvals)

467 fvals = fvals[order]

468 simplex = simplex[order, :]

469 fmin = fvals[0]

470 f_dist = np.abs(fmin - fvals[-1])

471 x_dist = np.max(

472 np.sqrt(np.sum((simplex - np.tile(simplex[0, :], (N, 1))) ** 2.0, axis=1)),

473 )

474 t_end = time()

475 if verbose > 0:

476 t_iter = t_end - t_start

477 print(

478 "Finished iteration #"

479 + str(iters)

480 + " with "

481 + str(new_evals)

482 + " evaluations ("

483 + str(evals)

484 + " cumulative) in "

485 + str(t_iter)

486 + " seconds.",

487 )

488 print(

489 "Simplex status: fmin="

490 + str(fmin)

491 + ", f_dist="

492 + str(f_dist)

493 + ", x_dist="

494 + str(x_dist),

495 )

496

497 # Check for terminal conditions

498 if iters >= maxiter:

499 go = False

500 print("Maximum iterations reached, terminating unsuccessfully.")

501 if evals >= maxeval:

502 go = False

503 print("Maximum evaluations reached, terminating unsuccessfully.")

504 if f_dist < ftol:

505 go = False

506 print("Function tolerance reached, terminating successfully.")

507 if x_dist < xtol:

508 go = False

509 print("Parameter tolerance reached, terminating successfully.")

510

511 # Save the progress of the estimation if desired

512 if savefreq is not None:

513 if (iters % savefreq) == 0:

514 save_nelder_mead_data(name, simplex, fvals, iters, evals)

515 if verbose > 0:

516 print("Saved search progress in " + name + ".txt")

517

518 # Return the results

519 xopt = simplex[0, :]

520 return xopt

521

522

523def save_nelder_mead_data(name, simplex, fvals, iters, evals):

524 """Stores the progress of a parallel Nelder-Mead search in a text file so that

525 it can be resumed later (after manual termination or a crash).

526

527 Parameters

528 ----------

529 name : string

530 Name of the txt file in which to store search progress.

531 simplex : np.array

532 The current state of the simplex of parameter guesses.

533 fvals : np.array

534 The objective function value at each row of simplex.

535 iters : int

536 The number of completed Nelder-Mead iterations.

537 evals : int

538 The cumulative number of function evaluations in the search process.

539

540 Returns

541 -------

542 None

543

544 """

545 N = simplex.shape[0] # Number of points in simplex

546

547 with open(name + ".txt", "w", newline="") as f:

548 my_writer = csv.writer(f, delimiter=",")

549 my_writer.writerow(simplex.shape)

550 my_writer.writerow([iters, evals])

551 for n in range(N):

552 my_writer.writerow(simplex[n, :])

553 my_writer.writerow(fvals)

554

555

556def load_nelder_mead_data(name):

557 """Reads the progress of a parallel Nelder-Mead search from a text file, as

558 created by save_nelder_mead_data().

559

560 Parameters

561 ----------

562 name : string

563 Name of the txt file from which to read search progress.

564

565 Returns

566 -------

567 simplex : np.array

568 The current state of the simplex of parameter guesses.

569 fvals : np.array

570 The objective function value at each row of simplex.

571 iters : int

572 The number of completed Nelder-Mead iterations.

573 evals : int

574 The cumulative number of function evaluations in the search process.

575

576 """

577 # Open the Nelder-Mead progress file

578 with open(name + ".txt", newline="") as f:

579 my_reader = csv.reader(f, delimiter=",")

580

581 # Get the shape of the simplex and initialize it

582 my_shape_txt = my_reader.__next__()

583 N = int(my_shape_txt[0])

584 K = int(my_shape_txt[1])

585 simplex = np.zeros((N, K)) + np.nan

586

587 # Get number of iterations and cumulative evaluations from the next line

588 my_nums_txt = my_reader.__next__()

589 iters = int(my_nums_txt[0])

590 evals = int(my_nums_txt[1])

591

592 # Read one line per point of the simplex

593 for n in range(N):

594 simplex[n, :] = np.array(my_reader.__next__(), dtype=float)

595

596 # Read the final line to get function values

597 fvals = np.array(my_reader.__next__(), dtype=float)

598

599 return simplex, fvals, iters, evals

600

601

602def parallel_nelder_mead_worker(obj_func, simplex, f_vals, j, P, opt_params):

603 """A worker process for the parallel Nelder-Mead algorithm. Updates one point

604 in the simplex, returning its function value as well. Should basically

605 never be called directly, only by parallelNelderMead().

606

607 Parameters

608 ----------

609 obj_func : function

610 The function to be minimized; takes a single 1D array as input.

611 simplex : numpy.array

612 The current simplex for minimization; simplex[k,:] is an input for obj_func.

613 f_vals : numpy.array

614 The values of the objective function at each point of the simplex:

615 f_vals[k] = obj_func(simplex[k,:])

616 j : int

617 Index of the point in the simplex to update: simplex[j,:]

618 P : int

619 Degree of parallelization of the algorithm.

620 opt_params : numpy.array

621 Three element array with parameters for reflection, contraction, expansion.

622

623 Returns

624 -------

625 new_point : numpy.array

626 An updated point for the simplex; might be the same as simplex[j,:].

627 new_val : float

628 The value of the objective function at the new point: obj_func(new_point).

629 evals : int

630 Number of evaluations of obj_func by this worker.

631

632 """

633 # Unpack the input parameters

634 alpha = opt_params[0] # reflection parameter

635 beta = opt_params[1] # contraction parameter

636 gamma = opt_params[2] # expansion parameter

637 my_point = simplex[j, :] # vertex to update

638 my_val = f_vals[j] # value at the vertex to update

639 best_val = f_vals[0] # best value in the vertex

640 next_val = f_vals[j - 1] # next best point in the simplex

641 evals = 0

642

643 # Calculate the centroid of the "good" simplex points

644 N = simplex.shape[0] # number of points in simplex

645 centroid = np.mean(simplex[0 : (N - P), :], axis=0)

646

647 # Calculate the reflection point and its function value

648 r_point = centroid + alpha * (centroid - my_point)

649 r_val = obj_func(r_point)

650 evals += 1

651

652 # Case 1: the reflection point is better than best point

653 if r_val < best_val:

654 e_point = r_point + gamma * (r_point - centroid)

655 e_val = obj_func(e_point) # Calculate expansion point

656 evals += 1

657 if e_val < r_val:

658 new_point = e_point

659 new_val = e_val

660 else:

661 new_point = r_point

662 new_val = r_val

663 # Case 2: the reflection point is better than the next best point

664 elif r_val < next_val:

665 new_point = r_point # Report reflection point

666 new_val = r_val

667 # Case 3: the reflection point is worse than the next best point

668 else:

669 if r_val < my_val:

670 temp_point = r_point # Check whether reflection or original point

671 temp_val = r_val # is better and use it temporarily

672 else:

673 temp_point = my_point

674 temp_val = my_val

675 c_point = temp_point + beta * (centroid - temp_point)

676 c_val = obj_func(c_point) # Calculate contraction point

677 evals += 1

678 if c_val < temp_val:

679 new_point = c_point

680 new_val = c_val # Check whether the contraction point is better

681 else:

682 new_point = temp_point

683 new_val = temp_val

684

685 # Return the outputs

686 return new_point, new_val, evals

687

688

689def estimate_msm(

690 agent: AgentType,

691 params: dict,

692 empirical_moments: dict,

693 moments_cov: dict | np.ndarray,

694 simulate_moments: callable,

695 optimize_options: dict,

696 simulate_moments_kwargs: dict = None,

697 weights: str = "diagonal",

698 estimagic_options: dict = {},

699):

700 """Use the method of simulated moments to estimate a model."""

701 simulate_moments_kwargs = simulate_moments_kwargs or {}

702 simulate_moments_kwargs.setdefault("agent", agent)

703

704 res = em.estimate_msm(

705 simulate_moments,

706 empirical_moments,

707 moments_cov,

708 params,

709 optimize_options=optimize_options,

710 simulate_moments_kwargs=simulate_moments_kwargs,

711 weights=weights,

712 **estimagic_options,

713 )

714

715 return res

Coverage for HARK / estimation.py: 95%

240 statements