Coverage for HARK / estimation.py: 95%
240 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-07 05:16 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-07 05:16 +0000
1"""Functions for estimating structural models, including optimization methods
2and bootstrapping tools.
3"""
5import csv
6import multiprocessing
7import warnings
8from copy import deepcopy # For replicating complex objects
9from time import time # Used to time execution
11import estimagic as em
12import numpy as np # Numerical Python
13from joblib import Parallel, delayed
14from scipy.optimize import fmin, fmin_powell # off-the-shelf minimizers
16from HARK.core import AgentType
18__all__ = [
19 "minimize_nelder_mead",
20 "minimize_powell",
21 "bootstrap_sample_from_data",
22 "parallelNelderMead",
23]
26def minimize_nelder_mead(
27 objective_func,
28 parameter_guess,
29 verbose=False,
30 which_vars=None,
31 **kwargs,
32):
33 """Minimizes the objective function using the Nelder-Mead simplex algorithm,
34 starting from an initial parameter guess.
36 Parameters
37 ----------
38 objective_func : function
39 The function to be minimized. It should take only a single argument, which
40 should be a list representing the parameters to be estimated.
41 parameter_guess : [float]
42 A starting point for the Nelder-Mead algorithm, which must be a valid
43 input for objective_func.
44 which_vars : np.array or None
45 Array of booleans indicating which parameters should be estimated. When
46 not provided, estimation is performed on all parameters.
47 verbose : boolean
48 A flag for the amount of output to print.
50 Returns
51 -------
52 xopt : [float]
53 The values that minimize objective_func.
55 """
56 # Specify a temporary "modified objective function" that restricts parameters to be estimated
57 if which_vars is None:
58 which_vars = np.ones(len(parameter_guess), dtype=bool)
60 def objective_func_mod(params):
61 params_full = np.copy(parameter_guess)
62 params_full[which_vars] = params
63 out = objective_func(params_full)
64 return out
66 # convert parameter guess to np array to slice it with boolean array
67 parameter_guess_mod = np.array(parameter_guess)[which_vars]
69 # Execute the minimization, starting from the given parameter guess
70 t0 = time() # Time the process
71 OUTPUT = fmin(
72 objective_func_mod,
73 parameter_guess_mod,
74 full_output=1,
75 disp=verbose,
76 **kwargs,
77 )
78 t1 = time()
80 # Extract values from optimization output:
81 xopt = OUTPUT[0] # Parameters that minimize function.
82 fopt = OUTPUT[1] # Value of function at minimum: ``fopt = func(xopt)``.
83 optiter = OUTPUT[2] # Number of iterations performed.
84 funcalls = OUTPUT[3] # Number of function calls made.
85 warnflag = OUTPUT[4] # warnflag : int
86 # 1 : Maximum number of function evaluations made.
87 # 2 : Maximum number of iterations reached.
88 # Check that optimization succeeded:
89 if warnflag != 0:
90 warnings.warn(
91 "Minimization failed! xopt="
92 + str(xopt)
93 + ", fopt="
94 + str(fopt)
95 + ", optiter="
96 + str(optiter)
97 + ", funcalls="
98 + str(funcalls)
99 + ", warnflag="
100 + str(warnflag),
101 )
102 xopt_full = np.copy(parameter_guess)
103 xopt_full[which_vars] = xopt
105 # Display and return the results:
106 if verbose:
107 print("Time to estimate is " + str(t1 - t0) + " seconds.")
108 return xopt_full
111def minimize_powell(objective_func, parameter_guess, verbose=False):
112 """Minimizes the objective function using a derivative-free Powell algorithm,
113 starting from an initial parameter guess.
115 Parameters
116 ----------
117 objective_func : function
118 The function to be minimized. It should take only a single argument, which
119 should be a list representing the parameters to be estimated.
120 parameter_guess : [float]
121 A starting point for the Powell algorithm, which must be a valid
122 input for objective_func.
123 verbose : boolean
124 A flag for the amount of output to print.
126 Returns
127 -------
128 xopt : [float]
129 The values that minimize objective_func.
131 """
132 # Execute the minimization, starting from the given parameter guess
133 t0 = time() # Time the process
134 OUTPUT = fmin_powell(
135 objective_func,
136 parameter_guess,
137 full_output=1,
138 maxiter=1000,
139 disp=verbose,
140 )
141 t1 = time()
143 # Extract values from optimization output:
144 xopt = OUTPUT[0] # Parameters that minimize function.
145 fopt = OUTPUT[1] # Value of function at minimum: ``fopt = func(xopt)``.
146 direc = OUTPUT[2]
147 optiter = OUTPUT[3] # Number of iterations performed.
148 funcalls = OUTPUT[4] # Number of function calls made.
149 warnflag = OUTPUT[5] # warnflag : int
150 # 1 : Maximum number of function evaluations made.
151 # 2 : Maximum number of iterations reached.
153 # Check that optimization succeeded:
154 if warnflag != 0:
155 warnings.warn(
156 "Minimization failed! xopt="
157 + str(xopt)
158 + ", fopt="
159 + str(fopt)
160 + ", direc="
161 + str(direc)
162 + ", optiter="
163 + str(optiter)
164 + ", funcalls="
165 + str(funcalls)
166 + ", warnflag="
167 + str(warnflag),
168 )
170 # Display and return the results:
171 if verbose:
172 print("Time to estimate is " + str(t1 - t0) + " seconds.")
173 return xopt
176def bootstrap_sample_from_data(data, weights=None, seed=0):
177 """Samples rows from the input array of data, generating a new data array with
178 an equal number of rows (records). Rows are drawn with equal probability
179 by default, but probabilities can be specified with weights (must sum to 1).
181 Parameters
182 ----------
183 data : np.array
184 An array of data, with each row representing a record.
185 weights : np.array
186 A weighting array with length equal to data.shape[0].
187 seed : int
188 A seed for the random number generator.
190 Returns
191 -------
192 new_data : np.array
193 A resampled version of input data.
195 """
196 # Set up the random number generator
197 RNG = np.random.default_rng(seed)
198 N = data.shape[0]
200 # Set up weights
201 if weights is not None:
202 cutoffs = np.cumsum(weights)
203 else:
204 cutoffs = np.linspace(0, 1, N)
206 # Draw random indices
207 indices = np.searchsorted(cutoffs, RNG.uniform(size=N))
209 # Create a bootstrapped sample
210 new_data = deepcopy(data[indices,])
211 return new_data
214def parallelNelderMead(
215 obj_func,
216 guess,
217 perturb=None,
218 P=1,
219 ftol=1e-8,
220 xtol=1e-8,
221 maxiter=np.inf,
222 maxeval=np.inf,
223 r_param=1.0,
224 e_param=1.0,
225 c_param=0.5,
226 s_param=0.5,
227 maxthreads=None,
228 name=None,
229 resume=False,
230 savefreq=None,
231 verbose=1,
232):
233 """A parallel implementation of the Nelder-Mead minimization algorithm, as
234 described in Lee and Wiswall. For long optimization procedures, it can
235 save progress between iterations and resume later.
237 Parameters
238 ----------
239 obj_func : function
240 The objective function to be minimized. Takes a single 1D array as input.
241 guess : np.array
242 Initial starting point for the simplex, representing an input for obj_func.
243 perturb : np.array
244 Perturbation vector for the simplex, of the same length as an input to
245 obj_func. If perturb[j] is non-zero, a simplex point will be created
246 that perturbs the j-th element of guess by perturb[j]; if it is zero,
247 then the j-th parameter of obj_func will not be optimized over. By
248 default, perturb=None, indicating that all parameters should be optimized,
249 with an initial perturbation of 0.1*guess.
250 P : int
251 Degree of parallelization: the number of vertices of the simplex to try
252 to update on each iteration of the process.
253 ftol : float
254 Absolute tolerance of the objective function for convergence. If suc-
255 cessive iterations return minimum function values that differ by less
256 than ftol, the process terminates successfully.
257 xtol : float
258 Absolute tolerance of the input values for convergence. If the maximum
259 distance between the current minimum point and the worst point in the
260 simplex is less than xtol, then the process terminates successfully.
261 maxiter : int
262 Maximum number of Nelder-Mead iterations; reaching iters=maxiter is
263 reported as an "unsuccessful" minimization.
264 maxeval : int
265 Maximum number of evaluations of obj_func (across all processes); reaching
266 evals=maxeval is reported as an "unsuccessful" minimization.
267 r_param: float
268 Parameter indicating magnitude of the reflection point calculation.
269 e_param: float
270 Parameter indicating magnitude of the expansion point calculation.
271 c_param: float
272 Parameter indicating magnitude of the contraction point calculation.
273 s_param: float
274 Parameter indicating magnitude of the shrink calculation.
275 maxthreads : int
276 The maximum number of CPU cores that the optimization should use,
277 regardless of the size of the problem.
278 name : string
279 A filename for (optionally) saving the progress of the Nelder-Mead search,
280 and for resuming a previous search (when resume=True). Useful for long
281 searches that could potentially be interrupted by computer down time.
282 resume : boolean
283 An indicator for whether the search should resume from earlier progress.
284 When True, the process will load a progress file named in input name.
285 savefreq : int
286 When not None, search progress will be saved to name.txt every savefreq
287 iterations, to be loaded later with resume=True).
288 verbose : int
289 Indicator for the verbosity of the optimization routine. Higher values
290 generate more text output; verbose=0 produces no text output.
292 Returns
293 -------
294 min_point : np.array
295 The input that minimizes obj_func, as found by the minimization.
296 fmin : float
297 The minimum of obj_func; fmin = obj_func(min_point).
299 """
300 # If this is a resumed search, load the data
301 if resume:
302 simplex, fvals, iters, evals = load_nelder_mead_data(name)
303 dim_count = fvals.size - 1
304 N = dim_count + 1 # Number of points in simplex
305 K = simplex.shape[1] # Total number of parameters
307 # Otherwise, construct the initial simplex and array of function values
308 else:
309 if perturb is None: # Default: perturb each parameter by 10%
310 perturb = 0.1 * guess
311 perturb[guess == 0] = 0.1
313 params_to_opt = np.where(perturb != 0)[
314 0
315 ] # Indices of which parameters to optimize
316 dim_count = params_to_opt.size # Number of parameters to search over
317 N = dim_count + 1 # Number of points in simplex
318 K = guess.size # Total number of parameters
319 simplex = np.tile(guess, (N, 1))
320 for j in range(
321 dim_count,
322 ): # Perturb each parameter to optimize by the specified distance
323 simplex[j + 1, params_to_opt[j]] = (
324 simplex[j + 1, params_to_opt[j]] + perturb[params_to_opt[j]]
325 )
327 # Initialize iteration and evaluation counts, plus a 1D array of function values
328 fvals = np.zeros(dim_count + 1) + np.nan
330 iters = 0
331 evals = 0
333 # Make sure degree of parallelization is not illegal
334 if P > N - 1:
335 warnings.warn(
336 "Requested degree of simplex parallelization is "
337 + str(P)
338 + ", but dimension of optimization problem is only "
339 + str(N - 1)
340 + ".",
341 )
342 warnings.warn(
343 "Degree of parallelization has been reduced to " + str(N - 1) + "."
344 )
345 P = N - 2
347 # Create the pool of worker processes
348 cpu_cores = multiprocessing.cpu_count() # Total number of available CPU cores
349 cores_to_use = min(cpu_cores, dim_count)
350 if maxthreads is not None: # Cap the number of cores if desired
351 cores_to_use = min(cores_to_use, maxthreads)
352 parallel = Parallel(n_jobs=cores_to_use)
353 use_parallel = cores_to_use > 1
355 # Begin a new Nelder-Mead search
356 if not resume:
357 temp_simplex = list(simplex) # Evaluate the initial simplex
358 if use_parallel:
359 fvals = np.array(
360 parallel(delayed(obj_func)(params) for params in temp_simplex)
361 )
362 else:
363 fvals = np.array([obj_func(params) for params in temp_simplex])
364 evals += N
365 # Reorder the initial simplex
366 order = np.argsort(fvals)
367 fvals = fvals[order]
368 simplex = simplex[order, :]
369 fmin = fvals[0]
370 f_dist = np.abs(fmin - fvals[-1])
371 x_dist = np.max(
372 np.sqrt(np.sum((simplex - np.tile(simplex[0, :], (N, 1))) ** 2.0, axis=1)),
373 )
374 if verbose > 0:
375 print(
376 "Evaluated the initial simplex: fmin="
377 + str(fmin)
378 + ", f_dist="
379 + str(f_dist)
380 + ", x_dist="
381 + str(x_dist),
382 )
383 if savefreq is not None:
384 save_nelder_mead_data(name, simplex, fvals, iters, evals)
385 if verbose > 0:
386 print("Saved search progress in " + name + ".txt")
387 elif verbose > 0:
388 print(
389 "Resuming search after "
390 + str(iters)
391 + " iterations and "
392 + str(evals)
393 + " function evaluations.",
394 )
396 # Initialize some inputs for the multithreader
397 j_list = range(N - P, N)
398 opt_params = [r_param, c_param, e_param]
400 # Run the Nelder-Mead algorithm until a terminal condition is met
401 go = True
402 while go:
403 t_start = time()
404 iters += 1
405 if verbose > 0:
406 print("Beginning iteration #" + str(iters) + " now.")
408 # Update the P worst points of the simplex
409 if use_parallel:
410 output = parallel(
411 delayed(parallel_nelder_mead_worker)(
412 obj_func,
413 simplex,
414 fvals,
415 j,
416 P,
417 opt_params,
418 )
419 for j in j_list
420 )
421 else:
422 output = [
423 parallel_nelder_mead_worker(obj_func, simplex, fvals, j, P, opt_params)
424 for j in j_list
425 ]
427 # Extract the output for each node
428 new_subsimplex = np.zeros((P, K)) + np.nan
429 new_vals = np.zeros(P) + np.nan
430 new_evals = 0
431 for i in range(P):
432 new_subsimplex[i, :] = output[i][0]
433 new_vals[i] = output[i][1]
434 new_evals += output[i][2]
435 evals += new_evals
437 # Check whether any updates actually happened
438 old_subsimplex = simplex[(N - P) : N, :]
439 if np.max(np.abs(new_subsimplex - old_subsimplex)) == 0:
440 if verbose > 0:
441 print("Updated the simplex, but must perform a shrink step.")
442 # If every attempted update was unsuccessful, must shrink the simplex
443 simplex = (
444 s_param * np.tile(simplex[0, :], (N, 1)) + (1.0 - s_param) * simplex
445 )
446 temp_simplex = list(simplex[1:N, :])
447 if use_parallel:
448 fvals = np.array(
449 [fvals[0]]
450 + parallel(delayed(obj_func)(params) for params in temp_simplex),
451 )
452 else:
453 fvals = np.array(
454 [fvals[0]] + [obj_func(params) for params in temp_simplex]
455 )
456 new_evals += N - 1
457 evals += N - 1
458 else:
459 if verbose > 0:
460 print("Updated the simplex successfully.")
461 # Otherwise, update the simplex with the new results
462 simplex[(N - P) : N, :] = new_subsimplex
463 fvals[(N - P) : N] = new_vals
465 # Reorder the simplex from best to worst
466 order = np.argsort(fvals)
467 fvals = fvals[order]
468 simplex = simplex[order, :]
469 fmin = fvals[0]
470 f_dist = np.abs(fmin - fvals[-1])
471 x_dist = np.max(
472 np.sqrt(np.sum((simplex - np.tile(simplex[0, :], (N, 1))) ** 2.0, axis=1)),
473 )
474 t_end = time()
475 if verbose > 0:
476 t_iter = t_end - t_start
477 print(
478 "Finished iteration #"
479 + str(iters)
480 + " with "
481 + str(new_evals)
482 + " evaluations ("
483 + str(evals)
484 + " cumulative) in "
485 + str(t_iter)
486 + " seconds.",
487 )
488 print(
489 "Simplex status: fmin="
490 + str(fmin)
491 + ", f_dist="
492 + str(f_dist)
493 + ", x_dist="
494 + str(x_dist),
495 )
497 # Check for terminal conditions
498 if iters >= maxiter:
499 go = False
500 print("Maximum iterations reached, terminating unsuccessfully.")
501 if evals >= maxeval:
502 go = False
503 print("Maximum evaluations reached, terminating unsuccessfully.")
504 if f_dist < ftol:
505 go = False
506 print("Function tolerance reached, terminating successfully.")
507 if x_dist < xtol:
508 go = False
509 print("Parameter tolerance reached, terminating successfully.")
511 # Save the progress of the estimation if desired
512 if savefreq is not None:
513 if (iters % savefreq) == 0:
514 save_nelder_mead_data(name, simplex, fvals, iters, evals)
515 if verbose > 0:
516 print("Saved search progress in " + name + ".txt")
518 # Return the results
519 xopt = simplex[0, :]
520 return xopt
523def save_nelder_mead_data(name, simplex, fvals, iters, evals):
524 """Stores the progress of a parallel Nelder-Mead search in a text file so that
525 it can be resumed later (after manual termination or a crash).
527 Parameters
528 ----------
529 name : string
530 Name of the txt file in which to store search progress.
531 simplex : np.array
532 The current state of the simplex of parameter guesses.
533 fvals : np.array
534 The objective function value at each row of simplex.
535 iters : int
536 The number of completed Nelder-Mead iterations.
537 evals : int
538 The cumulative number of function evaluations in the search process.
540 Returns
541 -------
542 None
544 """
545 N = simplex.shape[0] # Number of points in simplex
547 with open(name + ".txt", "w", newline="") as f:
548 my_writer = csv.writer(f, delimiter=",")
549 my_writer.writerow(simplex.shape)
550 my_writer.writerow([iters, evals])
551 for n in range(N):
552 my_writer.writerow(simplex[n, :])
553 my_writer.writerow(fvals)
556def load_nelder_mead_data(name):
557 """Reads the progress of a parallel Nelder-Mead search from a text file, as
558 created by save_nelder_mead_data().
560 Parameters
561 ----------
562 name : string
563 Name of the txt file from which to read search progress.
565 Returns
566 -------
567 simplex : np.array
568 The current state of the simplex of parameter guesses.
569 fvals : np.array
570 The objective function value at each row of simplex.
571 iters : int
572 The number of completed Nelder-Mead iterations.
573 evals : int
574 The cumulative number of function evaluations in the search process.
576 """
577 # Open the Nelder-Mead progress file
578 with open(name + ".txt", newline="") as f:
579 my_reader = csv.reader(f, delimiter=",")
581 # Get the shape of the simplex and initialize it
582 my_shape_txt = my_reader.__next__()
583 N = int(my_shape_txt[0])
584 K = int(my_shape_txt[1])
585 simplex = np.zeros((N, K)) + np.nan
587 # Get number of iterations and cumulative evaluations from the next line
588 my_nums_txt = my_reader.__next__()
589 iters = int(my_nums_txt[0])
590 evals = int(my_nums_txt[1])
592 # Read one line per point of the simplex
593 for n in range(N):
594 simplex[n, :] = np.array(my_reader.__next__(), dtype=float)
596 # Read the final line to get function values
597 fvals = np.array(my_reader.__next__(), dtype=float)
599 return simplex, fvals, iters, evals
602def parallel_nelder_mead_worker(obj_func, simplex, f_vals, j, P, opt_params):
603 """A worker process for the parallel Nelder-Mead algorithm. Updates one point
604 in the simplex, returning its function value as well. Should basically
605 never be called directly, only by parallelNelderMead().
607 Parameters
608 ----------
609 obj_func : function
610 The function to be minimized; takes a single 1D array as input.
611 simplex : numpy.array
612 The current simplex for minimization; simplex[k,:] is an input for obj_func.
613 f_vals : numpy.array
614 The values of the objective function at each point of the simplex:
615 f_vals[k] = obj_func(simplex[k,:])
616 j : int
617 Index of the point in the simplex to update: simplex[j,:]
618 P : int
619 Degree of parallelization of the algorithm.
620 opt_params : numpy.array
621 Three element array with parameters for reflection, contraction, expansion.
623 Returns
624 -------
625 new_point : numpy.array
626 An updated point for the simplex; might be the same as simplex[j,:].
627 new_val : float
628 The value of the objective function at the new point: obj_func(new_point).
629 evals : int
630 Number of evaluations of obj_func by this worker.
632 """
633 # Unpack the input parameters
634 alpha = opt_params[0] # reflection parameter
635 beta = opt_params[1] # contraction parameter
636 gamma = opt_params[2] # expansion parameter
637 my_point = simplex[j, :] # vertex to update
638 my_val = f_vals[j] # value at the vertex to update
639 best_val = f_vals[0] # best value in the vertex
640 next_val = f_vals[j - 1] # next best point in the simplex
641 evals = 0
643 # Calculate the centroid of the "good" simplex points
644 N = simplex.shape[0] # number of points in simplex
645 centroid = np.mean(simplex[0 : (N - P), :], axis=0)
647 # Calculate the reflection point and its function value
648 r_point = centroid + alpha * (centroid - my_point)
649 r_val = obj_func(r_point)
650 evals += 1
652 # Case 1: the reflection point is better than best point
653 if r_val < best_val:
654 e_point = r_point + gamma * (r_point - centroid)
655 e_val = obj_func(e_point) # Calculate expansion point
656 evals += 1
657 if e_val < r_val:
658 new_point = e_point
659 new_val = e_val
660 else:
661 new_point = r_point
662 new_val = r_val
663 # Case 2: the reflection point is better than the next best point
664 elif r_val < next_val:
665 new_point = r_point # Report reflection point
666 new_val = r_val
667 # Case 3: the reflection point is worse than the next best point
668 else:
669 if r_val < my_val:
670 temp_point = r_point # Check whether reflection or original point
671 temp_val = r_val # is better and use it temporarily
672 else:
673 temp_point = my_point
674 temp_val = my_val
675 c_point = temp_point + beta * (centroid - temp_point)
676 c_val = obj_func(c_point) # Calculate contraction point
677 evals += 1
678 if c_val < temp_val:
679 new_point = c_point
680 new_val = c_val # Check whether the contraction point is better
681 else:
682 new_point = temp_point
683 new_val = temp_val
685 # Return the outputs
686 return new_point, new_val, evals
689def estimate_msm(
690 agent: AgentType,
691 params: dict,
692 empirical_moments: dict,
693 moments_cov: dict | np.ndarray,
694 simulate_moments: callable,
695 optimize_options: dict,
696 simulate_moments_kwargs: dict = None,
697 weights: str = "diagonal",
698 estimagic_options: dict = {},
699):
700 """Use the method of simulated moments to estimate a model."""
701 simulate_moments_kwargs = simulate_moments_kwargs or {}
702 simulate_moments_kwargs.setdefault("agent", agent)
704 res = em.estimate_msm(
705 simulate_moments,
706 empirical_moments,
707 moments_cov,
708 params,
709 optimize_options=optimize_options,
710 simulate_moments_kwargs=simulate_moments_kwargs,
711 weights=weights,
712 **estimagic_options,
713 )
715 return res