pinocchio  3.2.0
A fast and flexible implementation of Rigid Body Dynamics algorithms and their analytical derivatives
ocp.py
1 """
2 Example of optimal control resolution by direct optimization of a single trajectory.
3 """
4 
5 import signal
6 import time
7 
8 import matplotlib.pyplot as plt
9 import numpy as np
10 from scipy.optimize import fmin_l_bfgs_b
11 
12 from pendulum import Pendulum
13 
14 env = Pendulum(1)
15 NSTEPS = 50
16 x0 = env.reset().copy()
17 
18 
19 def cost(U):
20  """Cost for a trajectory starting at state X0 with control U"""
21  env.reset(x0)
22  csum = 0.0
23  for t in range(NSTEPS):
24  u = U[env.nu * t : env.nu * (t + 1)] # Control at time step <t>
25  _, r = env.step(u) # Pendulum step, with reward r
26  csum += r # Cumulative sum
27  return -csum # Returns cost ie negative reward
28 
29 
30 def display(U, verbose=False):
31  """Display the trajectory on Gepetto viewer."""
32  x = x0.copy()
33  if verbose:
34  print("U = ", " ".join(map(lambda u: "%.1f" % u, np.asarray(U).flatten())))
35  for i in range(len(U) / env.nu):
36  env.dynamics(x, U[env.nu * i : env.nu * (i + 1)], True)
37  env.display(x)
38  time.sleep(5e-2)
39  if verbose:
40  print("X%d" % i, x.T)
41 
42 
43 class CallBack:
44  """Call back function used to follow optimizer steps."""
45 
46  def __init__(self):
47  self.iteriter = 0
48  self.withdisplaywithdisplay = False
49  self.h_rwdh_rwd = []
50 
51  def __call__(self, U):
52  print(
53  "Iteration ",
54  self.iteriter,
55  " ".join(map(lambda u: "%.1f" % u, np.asarray(U).flatten())),
56  )
57  self.iteriter += 1
58  self.UU = U.copy()
59  self.h_rwdh_rwd.append(cost(U))
60  if self.withdisplaywithdisplay:
61  display(U) # Display if CTRL-Z has been pressed.
62 
63  def setWithDisplay(self, boolean=None):
64  self.withdisplaywithdisplay = not self.withdisplaywithdisplay if boolean is None else boolean
65 
66 
67 callback = CallBack()
68 signal.signal(signal.SIGTSTP, lambda x, y: callback.setWithDisplay())
69 
70 
71 U0 = np.zeros(NSTEPS * env.nu) - env.umax # Initial guess for the control trajectory.
72 bounds = (
73  [
74  [-env.umax, env.umax],
75  ]
76  * env.nu
77  * NSTEPS
78 ) # Set control bounds to environment umax.
79 
80 # Start BFGS optimization routine
81 U, c, info = fmin_l_bfgs_b(
82  cost, x0=U0, callback=callback, approx_grad=True, bounds=bounds
83 )
84 
85 # When done, display the trajectory in Gepetto-viewer
86 display(U, True)
87 
88 plt.plot(callback.h_rwd)
89 plt.show()
withdisplay
Definition: ocp.py:48
def display(U, verbose=False)
Definition: ocp.py:30