2Example of optimal control resolution by direct optimization of a single trajectory.
8import matplotlib.pyplot
as plt
10from pendulum
import Pendulum
11from scipy.optimize
import fmin_l_bfgs_b
15x0 = env.reset().copy()
19 """Cost for a trajectory starting at state X0 with control U"""
22 for t in range(NSTEPS):
23 u = U[env.nu * t : env.nu * (t + 1)] # Control at time step <t>
24 _, r = env.step(u) # Pendulum step, with reward r
25 csum += r # Cumulative sum
26 return -csum # Returns cost ie negative reward
29def display(U, verbose=False):
30 """Display the trajectory on Gepetto viewer."""
33 print("U = ", " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())))
34 for i in range(len(U) / env.nu):
35 env.dynamics(x, U[env.nu * i : env.nu * (i + 1)], True)
43 """Call back function used to follow optimizer steps."""
47 self.withdisplay = False
50 def __call__(self, U):
54 " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())),
58 self.h_rwd.append(cost(U))
60 display(U) # Display if CTRL-Z has been pressed.
62 def setWithDisplay(self, boolean=None):
63 self.withdisplay = not self.withdisplay if boolean is None else boolean
67signal.signal(signal.SIGTSTP, lambda x, y: callback.setWithDisplay())
70# Initial guess for the control trajectory.
71U0 = np.zeros(NSTEPS * env.nu) - env.umax
74 [-env.umax, env.umax],
78) # Set control bounds to environment umax.
80# Start BFGS optimization routine
81U, c, info = fmin_l_bfgs_b(
82 cost, x0=U0, callback=callback, approx_grad=True, bounds=bounds
85# When done, display the trajectory in Gepetto-viewer
88plt.plot(callback.h_rwd)