master/doxygen-html/ocp_8py_source.html

 """

 Example of optimal control resolution by direct optimization of a single trajectory.

 """


 import signal

 import time


 import matplotlib.pyplot as plt

 import numpy as np

 from pendulum import Pendulum

 from scipy.optimize import fmin_l_bfgs_b


 env = Pendulum(1)

 NSTEPS = 50

 x0 = env.reset().copy()


 def cost(U):

     """Cost for a trajectory starting at state X0 with control U"""

     env.reset(x0)

     csum = 0.0

     for t in range(NSTEPS):

         u = U[env.nu * t : env.nu * (t + 1)]  # Control at time step <t>

         _, r = env.step(u)  # Pendulum step, with reward r

         csum += r  # Cumulative sum

     return -csum  # Returns cost ie negative reward


 def display(U, verbose=False):

     """Display the trajectory on Gepetto viewer."""

     x = x0.copy()

     if verbose:

         print("U = ", " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())))

     for i in range(len(U) / env.nu):

         env.dynamics(x, U[env.nu * i : env.nu * (i + 1)], True)

         env.display(x)

         time.sleep(5e-2)

         if verbose:

             print(f"X{i}")


 class CallBack:

     """Call back function used to follow optimizer steps."""


     def __init__(self):

         self.iteriter = 0

         self.withdisplaywithdisplay = False

         self.h_rwdh_rwd = []


     def __call__(self, U):

         print(

             "Iteration ",

             self.iteriter,

             " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())),

         )

         self.iteriter += 1

         self.UU = U.copy()

         self.h_rwdh_rwd.append(cost(U))

         if self.withdisplaywithdisplay:

             display(U)  # Display if CTRL-Z has been pressed.


     def setWithDisplay(self, boolean=None):

         self.withdisplaywithdisplay = not self.withdisplaywithdisplay if boolean is None else boolean


 callback = CallBack()

 signal.signal(signal.SIGTSTP, lambda x, y: callback.setWithDisplay())


 # --- OCP resolution

 # Initial guess for the control trajectory.

 U0 = np.zeros(NSTEPS * env.nu) - env.umax

 bounds = (

     [

         [-env.umax, env.umax],

     ]

     * env.nu

     * NSTEPS

 )  # Set control bounds to environment umax.


 # Start BFGS optimization routine

 U, c, info = fmin_l_bfgs_b(

     cost, x0=U0, callback=callback, approx_grad=True, bounds=bounds

 )


 # When done, display the trajectory in Gepetto-viewer

 display(U, True)


 plt.plot(callback.h_rwd)

 plt.show()

ocp.CallBack
Definition: ocp.py:42

ocp.CallBack.iter
iter
Definition: ocp.py:46

ocp.CallBack.U
U
Definition: ocp.py:57

ocp.CallBack.h_rwd
h_rwd
Definition: ocp.py:48

ocp.CallBack.withdisplay
withdisplay
Definition: ocp.py:47

pendulum.Pendulum
Definition: pendulum.py:43

display
Definition: display.py:1

ocp.display
def display(U, verbose=False)
Definition: ocp.py:29