pinocchio  3.7.0
A fast and flexible implementation of Rigid Body Dynamics algorithms and their analytical derivatives
 
Loading...
Searching...
No Matches
ocp.py
1"""
2Example of optimal control resolution by direct optimization of a single trajectory.
3"""
4
5import signal
6import time
7
8import matplotlib.pyplot as plt
9import numpy as np
10from pendulum import Pendulum
11from scipy.optimize import fmin_l_bfgs_b
12
13env = Pendulum(1)
14NSTEPS = 50
15x0 = env.reset().copy()
16
17
18def cost(U):
19 """Cost for a trajectory starting at state X0 with control U"""
20 env.reset(x0)
21 csum = 0.0
22 for t in range(NSTEPS):
23 u = U[env.nu * t : env.nu * (t + 1)] # Control at time step <t>
24 _, r = env.step(u) # Pendulum step, with reward r
25 csum += r # Cumulative sum
26 return -csum # Returns cost ie negative reward
27
28
29def display(U, verbose=False):
30 """Display the trajectory on Gepetto viewer."""
31 x = x0.copy()
32 if verbose:
33 print("U = ", " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())))
34 for i in range(len(U) / env.nu):
35 env.dynamics(x, U[env.nu * i : env.nu * (i + 1)], True)
36 env.display(x)
37 time.sleep(5e-2)
38 if verbose:
39 print(f"X{i}")
40
41
42class CallBack:
43 """Call back function used to follow optimizer steps."""
44
45 def __init__(self):
46 self.iter = 0
47 self.withdisplay = False
48 self.h_rwd = []
49
50 def __call__(self, U):
51 print(
52 "Iteration ",
53 self.iter,
54 " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())),
55 )
56 self.iter += 1
57 self.U = U.copy()
58 self.h_rwd.append(cost(U))
59 if self.withdisplay:
60 display(U) # Display if CTRL-Z has been pressed.
61
62 def setWithDisplay(self, boolean=None):
63 self.withdisplay = not self.withdisplay if boolean is None else boolean
64
65
66callback = CallBack()
67signal.signal(signal.SIGTSTP, lambda x, y: callback.setWithDisplay())
68
69# --- OCP resolution
70# Initial guess for the control trajectory.
71U0 = np.zeros(NSTEPS * env.nu) - env.umax
72bounds = (
73 [
74 [-env.umax, env.umax],
75 ]
76 * env.nu
77 * NSTEPS
78) # Set control bounds to environment umax.
79
80# Start BFGS optimization routine
81U, c, info = fmin_l_bfgs_b(
82 cost, x0=U0, callback=callback, approx_grad=True, bounds=bounds
83)
84
85# When done, display the trajectory in Gepetto-viewer
86display(U, True)
87
88plt.plot(callback.h_rwd)
89plt.show()