Coverage for HARK/estimation.py: 73%
240 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 05:14 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 05:14 +0000
1"""Functions for estimating structural models, including optimization methods
2and bootstrapping tools.
3"""
5import csv
6import multiprocessing
7import warnings
8from copy import deepcopy # For replicating complex objects
9from time import time # Used to time execution
11import estimagic as em
12import numpy as np # Numerical Python
13from joblib import Parallel, delayed
14from scipy.optimize import fmin, fmin_powell # off-the-shelf minimizers
16from HARK.core import AgentType
18__all__ = [
19 "minimize_nelder_mead",
20 "minimize_powell",
21 "bootstrap_sample_from_data",
22 "parallelNelderMead",
23]
26def minimize_nelder_mead(
27 objective_func,
28 parameter_guess,
29 verbose=False,
30 which_vars=None,
31 **kwargs,
32):
33 """Minimizes the objective function using the Nelder-Mead simplex algorithm,
34 starting from an initial parameter guess.
36 Parameters
37 ----------
38 objective_func : function
39 The function to be minimized. It should take only a single argument, which
40 should be a list representing the parameters to be estimated.
41 parameter_guess : [float]
42 A starting point for the Nelder-Mead algorithm, which must be a valid
43 input for objective_func.
44 which_vars : np.array or None
45 Array of booleans indicating which parameters should be estimated. When
46 not provided, estimation is performed on all parameters.
47 verbose : boolean
48 A flag for the amount of output to print.
50 Returns
51 -------
52 xopt : [float]
53 The values that minimize objective_func.
55 """
56 # Specify a temporary "modified objective function" that restricts parameters to be estimated
57 if which_vars is None:
58 which_vars = np.ones(len(parameter_guess), dtype=bool)
60 def objective_func_mod(params):
61 params_full = np.copy(parameter_guess)
62 params_full[which_vars] = params
63 out = objective_func(params_full)
64 return out
66 # convert parameter guess to np array to slice it with boolean array
67 parameter_guess_mod = np.array(parameter_guess)[which_vars]
69 # Execute the minimization, starting from the given parameter guess
70 t0 = time() # Time the process
71 OUTPUT = fmin(
72 objective_func_mod,
73 parameter_guess_mod,
74 full_output=1,
75 disp=verbose,
76 **kwargs,
77 )
78 t1 = time()
80 # Extract values from optimization output:
81 xopt = OUTPUT[0] # Parameters that minimize function.
82 fopt = OUTPUT[1] # Value of function at minimum: ``fopt = func(xopt)``.
83 optiter = OUTPUT[2] # Number of iterations performed.
84 funcalls = OUTPUT[3] # Number of function calls made.
85 warnflag = OUTPUT[4] # warnflag : int
86 # 1 : Maximum number of function evaluations made.
87 # 2 : Maximum number of iterations reached.
88 # Check that optimization succeeded:
89 if warnflag != 0:
90 warnings.warn(
91 "Minimization failed! xopt="
92 + str(xopt)
93 + ", fopt="
94 + str(fopt)
95 + ", optiter="
96 + str(optiter)
97 + ", funcalls="
98 + str(funcalls)
99 + ", warnflag="
100 + str(warnflag),
101 )
102 xopt_full = np.copy(parameter_guess)
103 xopt_full[which_vars] = xopt
105 # Display and return the results:
106 if verbose:
107 print("Time to estimate is " + str(t1 - t0) + " seconds.")
108 return xopt_full
111def minimize_powell(objective_func, parameter_guess, verbose=False):
112 """Minimizes the objective function using a derivative-free Powell algorithm,
113 starting from an initial parameter guess.
115 Parameters
116 ----------
117 objective_func : function
118 The function to be minimized. It should take only a single argument, which
119 should be a list representing the parameters to be estimated.
120 parameter_guess : [float]
121 A starting point for the Powell algorithm, which must be a valid
122 input for objective_func.
123 verbose : boolean
124 A flag for the amount of output to print.
126 Returns
127 -------
128 xopt : [float]
129 The values that minimize objective_func.
131 """
132 # Execute the minimization, starting from the given parameter guess
133 t0 = time() # Time the process
134 OUTPUT = fmin_powell(
135 objective_func,
136 parameter_guess,
137 full_output=1,
138 maxiter=1000,
139 disp=verbose,
140 )
141 t1 = time()
143 # Extract values from optimization output:
144 xopt = OUTPUT[0] # Parameters that minimize function.
145 fopt = OUTPUT[1] # Value of function at minimum: ``fopt = func(xopt)``.
146 direc = OUTPUT[2]
147 optiter = OUTPUT[3] # Number of iterations performed.
148 funcalls = OUTPUT[4] # Number of function calls made.
149 warnflag = OUTPUT[5] # warnflag : int
150 # 1 : Maximum number of function evaluations made.
151 # 2 : Maximum number of iterations reached.
153 # Check that optimization succeeded:
154 if warnflag != 0:
155 warnings.warn(
156 "Minimization failed! xopt="
157 + str(xopt)
158 + ", fopt="
159 + str(fopt)
160 + ", direc="
161 + str(direc)
162 + ", optiter="
163 + str(optiter)
164 + ", funcalls="
165 + str(funcalls)
166 + ", warnflag="
167 + str(warnflag),
168 )
170 # Display and return the results:
171 if verbose:
172 print("Time to estimate is " + str(t1 - t0) + " seconds.")
173 return xopt
176def bootstrap_sample_from_data(data, weights=None, seed=0):
177 """Samples rows from the input array of data, generating a new data array with
178 an equal number of rows (records). Rows are drawn with equal probability
179 by default, but probabilities can be specified with weights (must sum to 1).
181 Parameters
182 ----------
183 data : np.array
184 An array of data, with each row representing a record.
185 weights : np.array
186 A weighting array with length equal to data.shape[0].
187 seed : int
188 A seed for the random number generator.
190 Returns
191 -------
192 new_data : np.array
193 A resampled version of input data.
195 """
196 # Set up the random number generator
197 RNG = np.random.default_rng(seed)
198 N = data.shape[0]
200 # Set up weights
201 if weights is not None:
202 cutoffs = np.cumsum(weights)
203 else:
204 cutoffs = np.linspace(0, 1, N)
206 # Draw random indices
207 indices = np.searchsorted(cutoffs, RNG.uniform(size=N))
209 # Create a bootstrapped sample
210 new_data = deepcopy(data[indices,])
211 return new_data
214def parallelNelderMead(
215 obj_func,
216 guess,
217 perturb=None,
218 P=1,
219 ftol=1e-8,
220 xtol=1e-8,
221 maxiter=np.inf,
222 maxeval=np.inf,
223 r_param=1.0,
224 e_param=1.0,
225 c_param=0.5,
226 s_param=0.5,
227 maxthreads=None,
228 name=None,
229 resume=False,
230 savefreq=None,
231 verbose=1,
232):
233 """A parallel implementation of the Nelder-Mead minimization algorithm, as
234 described in Lee and Wiswall. For long optimization procedures, it can
235 save progress between iterations and resume later.
237 Parameters
238 ----------
239 obj_func : function
240 The objective function to be minimized. Takes a single 1D array as input.
241 guess : np.array
242 Initial starting point for the simplex, representing an input for obj_func.
243 perturb : np.array
244 Perturbation vector for the simplex, of the same length as an input to
245 obj_func. If perturb[j] is non-zero, a simplex point will be created
246 that perturbs the j-th element of guess by perturb[j]; if it is zero,
247 then the j-th parameter of obj_func will not be optimized over. By
248 default, perturb=None, indicating that all parameters should be optimized,
249 with an initial perturbation of 0.1*guess.
250 P : int
251 Degree of parallelization: the number of vertices of the simplex to try
252 to update on each iteration of the process.
253 ftol : float
254 Absolute tolerance of the objective function for convergence. If suc-
255 cessive iterations return minimum function values that differ by less
256 than ftol, the process terminates successfully.
257 xtol : float
258 Absolute tolerance of the input values for convergence. If the maximum
259 distance between the current minimum point and the worst point in the
260 simplex is less than xtol, then the process terminates successfully.
261 maxiter : int
262 Maximum number of Nelder-Mead iterations; reaching iters=maxiter is
263 reported as an "unsuccessful" minimization.
264 maxeval : int
265 Maximum number of evaluations of obj_func (across all processes); reaching
266 evals=maxeval is reported as an "unsuccessful" minimization.
267 r_param: float
268 Parameter indicating magnitude of the reflection point calculation.
269 e_param: float
270 Parameter indicating magnitude of the expansion point calculation.
271 c_param: float
272 Parameter indicating magnitude of the contraction point calculation.
273 s_param: float
274 Parameter indicating magnitude of the shrink calculation.
275 maxthreads : int
276 The maximum number of CPU cores that the optimization should use,
277 regardless of the size of the problem.
278 name : string
279 A filename for (optionally) saving the progress of the Nelder-Mead search,
280 and for resuming a previous search (when resume=True). Useful for long
281 searches that could potentially be interrupted by computer down time.
282 resume : boolean
283 An indicator for whether the search should resume from earlier progress.
284 When True, the process will load a progress file named in input name.
285 savefreq : int
286 When not None, search progress will be saved to name.txt every savefreq
287 iterations, to be loaded later with resume=True).
288 verbose : int
289 Indicator for the verbosity of the optimization routine. Higher values
290 generate more text output; verbose=0 produces no text output.
292 Returns
293 -------
294 min_point : np.array
295 The input that minimizes obj_func, as found by the minimization.
296 fmin : float
297 The minimum of obj_func; fmin = obj_func(min_point).
299 """
300 # If this is a resumed search, load the data
301 if resume:
302 simplex, fvals, iters, evals = load_nelder_mead_data(name)
303 dim_count = fvals.size - 1
304 N = dim_count + 1 # Number of points in simplex
305 K = simplex.shape[1] # Total number of parameters
307 # Otherwise, construct the initial simplex and array of function values
308 else:
309 if perturb is None: # Default: perturb each parameter by 10%
310 perturb = 0.1 * guess
311 perturb[guess == 0] = 0.1
313 params_to_opt = np.where(perturb != 0)[
314 0
315 ] # Indices of which parameters to optimize
316 dim_count = params_to_opt.size # Number of parameters to search over
317 N = dim_count + 1 # Number of points in simplex
318 K = guess.size # Total number of parameters
319 simplex = np.tile(guess, (N, 1))
320 for j in range(
321 dim_count,
322 ): # Perturb each parameter to optimize by the specified distance
323 simplex[j + 1, params_to_opt[j]] = (
324 simplex[j + 1, params_to_opt[j]] + perturb[params_to_opt[j]]
325 )
327 # Initialize iteration and evaluation counts, plus a 1D array of function values
328 fvals = np.zeros(dim_count + 1) + np.nan
330 iters = 0
331 evals = 0
333 # Make sure degree of parallelization is not illegal
334 if P > N - 1:
335 print(
336 "Requested degree of simplex parallelization is "
337 + str(P)
338 + ", but dimension of optimization problem is only "
339 + str(N - 1)
340 + ".",
341 )
342 print("Degree of parallelization has been reduced to " + str(N - 1) + ".")
343 P = N - 1
345 # Create the pool of worker processes
346 cpu_cores = multiprocessing.cpu_count() # Total number of available CPU cores
347 cores_to_use = min(cpu_cores, dim_count)
348 if maxthreads is not None: # Cap the number of cores if desired
349 cores_to_use = min(cores_to_use, maxthreads)
350 parallel = Parallel(n_jobs=cores_to_use)
351 use_parallel = cores_to_use > 1
353 # Begin a new Nelder-Mead search
354 if not resume:
355 temp_simplex = list(simplex) # Evaluate the initial simplex
356 if use_parallel:
357 fvals = np.array(
358 parallel(delayed(obj_func)(params) for params in temp_simplex)
359 )
360 else:
361 fvals = np.array([obj_func(params) for params in temp_simplex])
362 evals += N
363 # Reorder the initial simplex
364 order = np.argsort(fvals)
365 fvals = fvals[order]
366 simplex = simplex[order, :]
367 fmin = fvals[0]
368 f_dist = np.abs(fmin - fvals[-1])
369 x_dist = np.max(
370 np.sqrt(np.sum((simplex - np.tile(simplex[0, :], (N, 1))) ** 2.0, axis=1)),
371 )
372 if verbose > 0:
373 print(
374 "Evaluated the initial simplex: fmin="
375 + str(fmin)
376 + ", f_dist="
377 + str(f_dist)
378 + ", x_dist="
379 + str(x_dist),
380 )
381 if savefreq is not None:
382 save_nelder_mead_data(name, simplex, fvals, iters, evals)
383 if verbose > 0:
384 print("Saved search progress in " + name + ".txt")
385 elif verbose > 0:
386 print(
387 "Resuming search after "
388 + str(iters)
389 + " iterations and "
390 + str(evals)
391 + " function evaluations.",
392 )
394 # Initialize some inputs for the multithreader
395 j_list = range(N - P, N)
396 opt_params = [r_param, c_param, e_param]
398 # Run the Nelder-Mead algorithm until a terminal condition is met
399 go = True
400 while go:
401 t_start = time()
402 iters += 1
403 if verbose > 0:
404 print("Beginning iteration #" + str(iters) + " now.")
406 # Update the P worst points of the simplex
407 if use_parallel:
408 output = parallel(
409 delayed(parallel_nelder_mead_worker)(
410 obj_func,
411 simplex,
412 fvals,
413 j,
414 P,
415 opt_params,
416 )
417 for j in j_list
418 )
419 else:
420 output = [
421 parallel_nelder_mead_worker(obj_func, simplex, fvals, j, P, opt_params)
422 for j in j_list
423 ]
425 # Extract the output for each node
426 new_subsimplex = np.zeros((P, K)) + np.nan
427 new_vals = np.zeros(P) + np.nan
428 new_evals = 0
429 for i in range(P):
430 new_subsimplex[i, :] = output[i][0]
431 new_vals[i] = output[i][1]
432 new_evals += output[i][2]
433 evals += new_evals
435 # Check whether any updates actually happened
436 old_subsimplex = simplex[(N - P) : N, :]
437 if np.max(np.abs(new_subsimplex - old_subsimplex)) == 0:
438 if verbose > 0:
439 print("Updated the simplex, but must perform a shrink step.")
440 # If every attempted update was unsuccessful, must shrink the simplex
441 simplex = (
442 s_param * np.tile(simplex[0, :], (N, 1)) + (1.0 - s_param) * simplex
443 )
444 temp_simplex = list(simplex[1:N, :])
445 if use_parallel:
446 fvals = np.array(
447 [fvals[0]]
448 + parallel(delayed(obj_func)(params) for params in temp_simplex),
449 )
450 else:
451 fvals = np.array(
452 [fvals[0]] + [obj_func(params) for params in temp_simplex]
453 )
454 new_evals += N - 1
455 evals += N - 1
456 else:
457 if verbose > 0:
458 print("Updated the simplex successfully.")
459 # Otherwise, update the simplex with the new results
460 simplex[(N - P) : N, :] = new_subsimplex
461 fvals[(N - P) : N] = new_vals
463 # Reorder the simplex from best to worst
464 order = np.argsort(fvals)
465 fvals = fvals[order]
466 simplex = simplex[order, :]
467 fmin = fvals[0]
468 f_dist = np.abs(fmin - fvals[-1])
469 x_dist = np.max(
470 np.sqrt(np.sum((simplex - np.tile(simplex[0, :], (N, 1))) ** 2.0, axis=1)),
471 )
472 t_end = time()
473 if verbose > 0:
474 t_iter = t_end - t_start
475 print(
476 "Finished iteration #"
477 + str(iters)
478 + " with "
479 + str(new_evals)
480 + " evaluations ("
481 + str(evals)
482 + " cumulative) in "
483 + str(t_iter)
484 + " seconds.",
485 )
486 print(
487 "Simplex status: fmin="
488 + str(fmin)
489 + ", f_dist="
490 + str(f_dist)
491 + ", x_dist="
492 + str(x_dist),
493 )
495 # Check for terminal conditions
496 if iters >= maxiter:
497 go = False
498 print("Maximum iterations reached, terminating unsuccessfully.")
499 if evals >= maxeval:
500 go = False
501 print("Maximum evaluations reached, terminating unsuccessfully.")
502 if f_dist < ftol:
503 go = False
504 print("Function tolerance reached, terminating successfully.")
505 if x_dist < xtol:
506 go = False
507 print("Parameter tolerance reached, terminating successfully.")
509 # Save the progress of the estimation if desired
510 if savefreq is not None:
511 if (iters % savefreq) == 0:
512 save_nelder_mead_data(name, simplex, fvals, iters, evals)
513 if verbose > 0:
514 print("Saved search progress in " + name + ".txt")
516 # Return the results
517 xopt = simplex[0, :]
518 return xopt
521def save_nelder_mead_data(name, simplex, fvals, iters, evals):
522 """Stores the progress of a parallel Nelder-Mead search in a text file so that
523 it can be resumed later (after manual termination or a crash).
525 Parameters
526 ----------
527 name : string
528 Name of the txt file in which to store search progress.
529 simplex : np.array
530 The current state of the simplex of parameter guesses.
531 fvals : np.array
532 The objective function value at each row of simplex.
533 iters : int
534 The number of completed Nelder-Mead iterations.
535 evals : int
536 The cumulative number of function evaluations in the search process.
538 Returns
539 -------
540 None
542 """
543 N = simplex.shape[0] # Number of points in simplex
545 with open(name + ".txt", "w", newline="") as f:
546 my_writer = csv.writer(f, delimiter=",")
547 my_writer.writerow(simplex.shape)
548 my_writer.writerow([iters, evals])
549 for n in range(N):
550 my_writer.writerow(simplex[n, :])
551 my_writer.writerow(fvals)
554def load_nelder_mead_data(name):
555 """Reads the progress of a parallel Nelder-Mead search from a text file, as
556 created by save_nelder_mead_data().
558 Parameters
559 ----------
560 name : string
561 Name of the txt file from which to read search progress.
563 Returns
564 -------
565 simplex : np.array
566 The current state of the simplex of parameter guesses.
567 fvals : np.array
568 The objective function value at each row of simplex.
569 iters : int
570 The number of completed Nelder-Mead iterations.
571 evals : int
572 The cumulative number of function evaluations in the search process.
574 """
575 # Open the Nelder-Mead progress file
576 with open(name + ".txt", newline="") as f:
577 my_reader = csv.reader(f, delimiter=",")
579 # Get the shape of the simplex and initialize it
580 my_shape_txt = my_reader.__next__()
581 N = int(my_shape_txt[0])
582 K = int(my_shape_txt[1])
583 simplex = np.zeros((N, K)) + np.nan
585 # Get number of iterations and cumulative evaluations from the next line
586 my_nums_txt = my_reader.__next__()
587 iters = int(my_nums_txt[0])
588 evals = int(my_nums_txt[1])
590 # Read one line per point of the simplex
591 for n in range(N):
592 simplex[n, :] = np.array(my_reader.__next__(), dtype=float)
594 # Read the final line to get function values
595 fvals = np.array(my_reader.__next__(), dtype=float)
597 return simplex, fvals, iters, evals
600def parallel_nelder_mead_worker(obj_func, simplex, f_vals, j, P, opt_params):
601 """A worker process for the parallel Nelder-Mead algorithm. Updates one point
602 in the simplex, returning its function value as well. Should basically
603 never be called directly, only by parallelNelderMead().
605 Parameters
606 ----------
607 obj_func : function
608 The function to be minimized; takes a single 1D array as input.
609 simplex : numpy.array
610 The current simplex for minimization; simplex[k,:] is an input for obj_func.
611 f_vals : numpy.array
612 The values of the objective function at each point of the simplex:
613 f_vals[k] = obj_func(simplex[k,:])
614 j : int
615 Index of the point in the simplex to update: simplex[j,:]
616 P : int
617 Degree of parallelization of the algorithm.
618 opt_params : numpy.array
619 Three element array with parameters for reflection, contraction, expansion.
621 Returns
622 -------
623 new_point : numpy.array
624 An updated point for the simplex; might be the same as simplex[j,:].
625 new_val : float
626 The value of the objective function at the new point: obj_func(new_point).
627 evals : int
628 Number of evaluations of obj_func by this worker.
630 """
631 # Unpack the input parameters
632 alpha = opt_params[0] # reflection parameter
633 beta = opt_params[1] # contraction parameter
634 gamma = opt_params[2] # expansion parameter
635 my_point = simplex[j, :] # vertex to update
636 my_val = f_vals[j] # value at the vertex to update
637 best_val = f_vals[0] # best value in the vertex
638 next_val = f_vals[j - 1] # next best point in the simplex
639 evals = 0
641 # Calculate the centroid of the "good" simplex points
642 N = simplex.shape[0] # number of points in simplex
643 centroid = np.mean(simplex[0 : (N - P), :], axis=0)
645 # Calculate the reflection point and its function value
646 r_point = centroid + alpha * (centroid - my_point)
647 r_val = obj_func(r_point)
648 evals += 1
650 # Case 1: the reflection point is better than best point
651 if r_val < best_val:
652 e_point = r_point + gamma * (r_point - centroid)
653 e_val = obj_func(e_point) # Calculate expansion point
654 evals += 1
655 if e_val < r_val:
656 new_point = e_point
657 new_val = e_val
658 else:
659 new_point = r_point
660 new_val = r_val
661 # Case 2: the reflection point is better than the next best point
662 elif r_val < next_val:
663 new_point = r_point # Report reflection point
664 new_val = r_val
665 # Case 3: the reflection point is worse than the next best point
666 else:
667 if r_val < my_val:
668 temp_point = r_point # Check whether reflection or original point
669 temp_val = r_val # is better and use it temporarily
670 else:
671 temp_point = my_point
672 temp_val = my_val
673 c_point = temp_point + beta * (centroid - temp_point)
674 c_val = obj_func(c_point) # Calculate contraction point
675 evals += 1
676 if c_val < temp_val:
677 new_point = c_point
678 new_val = c_val # Check whether the contraction point is better
679 else:
680 new_point = temp_point
681 new_val = temp_val
683 # Return the outputs
684 return new_point, new_val, evals
687def estimate_msm(
688 agent: AgentType,
689 params: dict,
690 empirical_moments: dict,
691 moments_cov: dict | np.ndarray,
692 simulate_moments: callable,
693 optimize_options: dict,
694 simulate_moments_kwargs: dict = None,
695 weights: str = "diagonal",
696 estimagic_options: dict = {},
697):
698 """Use the method of simulated moments to estimate a model."""
699 simulate_moments_kwargs = simulate_moments_kwargs or {}
700 simulate_moments_kwargs.setdefault("agent", agent)
702 res = em.estimate_msm(
703 simulate_moments,
704 empirical_moments,
705 moments_cov,
706 params,
707 optimize_options=optimize_options,
708 simulate_moments_kwargs=simulate_moments_kwargs,
709 weights=weights,
710 **estimagic_options,
711 )
713 return res