pinocchio  3.3.1
A fast and flexible implementation of Rigid Body Dynamics algorithms and their analytical derivatives
ocp.py
1 """
2 Example of optimal control resolution by direct optimization of a single trajectory.
3 """
4 
5 import signal
6 import time
7 
8 import matplotlib.pyplot as plt
9 import numpy as np
10 from pendulum import Pendulum
11 from scipy.optimize import fmin_l_bfgs_b
12 
13 env = Pendulum(1)
14 NSTEPS = 50
15 x0 = env.reset().copy()
16 
17 
18 def cost(U):
19  """Cost for a trajectory starting at state X0 with control U"""
20  env.reset(x0)
21  csum = 0.0
22  for t in range(NSTEPS):
23  u = U[env.nu * t : env.nu * (t + 1)] # Control at time step <t>
24  _, r = env.step(u) # Pendulum step, with reward r
25  csum += r # Cumulative sum
26  return -csum # Returns cost ie negative reward
27 
28 
29 def display(U, verbose=False):
30  """Display the trajectory on Gepetto viewer."""
31  x = x0.copy()
32  if verbose:
33  print("U = ", " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())))
34  for i in range(len(U) / env.nu):
35  env.dynamics(x, U[env.nu * i : env.nu * (i + 1)], True)
36  env.display(x)
37  time.sleep(5e-2)
38  if verbose:
39  print("X%d" % i, x.T)
40 
41 
42 class CallBack:
43  """Call back function used to follow optimizer steps."""
44 
45  def __init__(self):
46  self.iteriter = 0
47  self.withdisplaywithdisplay = False
48  self.h_rwdh_rwd = []
49 
50  def __call__(self, U):
51  print(
52  "Iteration ",
53  self.iteriter,
54  " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())),
55  )
56  self.iteriter += 1
57  self.UU = U.copy()
58  self.h_rwdh_rwd.append(cost(U))
59  if self.withdisplaywithdisplay:
60  display(U) # Display if CTRL-Z has been pressed.
61 
62  def setWithDisplay(self, boolean=None):
63  self.withdisplaywithdisplay = not self.withdisplaywithdisplay if boolean is None else boolean
64 
65 
66 callback = CallBack()
67 signal.signal(signal.SIGTSTP, lambda x, y: callback.setWithDisplay())
68 
69 
70 U0 = np.zeros(NSTEPS * env.nu) - env.umax # Initial guess for the control trajectory.
71 bounds = (
72  [
73  [-env.umax, env.umax],
74  ]
75  * env.nu
76  * NSTEPS
77 ) # Set control bounds to environment umax.
78 
79 # Start BFGS optimization routine
80 U, c, info = fmin_l_bfgs_b(
81  cost, x0=U0, callback=callback, approx_grad=True, bounds=bounds
82 )
83 
84 # When done, display the trajectory in Gepetto-viewer
85 display(U, True)
86 
87 plt.plot(callback.h_rwd)
88 plt.show()
withdisplay
Definition: ocp.py:47
def display(U, verbose=False)
Definition: ocp.py:29