5 from pendulum
import Pendulum
17 DV = 2.0 * (VMAX) / NV
18 DU = 2.0 * (UMAX) / NU
23 q = (q + pi) % (2 * pi)
24 return int(round(q / DQ)) % NQ
28 v = np.clip(v, -VMAX + 1e-3, VMAX - 1e-3)
29 return int(np.floor((v + VMAX) / DV))
33 u = np.clip(u, -UMAX + 1e-3, UMAX - 1e-3)
34 return int(np.floor((u + UMAX) / DU))
38 """From continuous to discrete."""
39 return c2dq(qv[0]), c2dv(qv[1])
44 iq = np.clip(iq, 0, NQ - 1)
49 iv = np.clip(iv, 0, NV - 1) - (NV - 1) / 2
54 iu = np.clip(iu, 0, NU - 1) - (NU - 1) / 2
59 """From discrete to continuous"""
60 return d2cq(iqv[0]), d2cv(iqv[1])
64 return x[0] + x[1] * NQ
68 return [i % NQ, i / NQ]
94 return x2i(c2d([0.0, 0.0]))
96 def reset(self, x=None):
98 x = [np.random.randint(0, NQ), np.random.randint(0, NV)]
107 reward = 1
if x2i(self.
xx) == self.
goalgoal
else 0
108 return x2i(self.
xx), reward
119 time.sleep(self.
pendulumpendulum.DT)
121 def dynamics(self, ix, iu):
122 x = np.array(d2c(ix))
125 self.xc, _ = self.
pendulumpendulum.dynamics(x, u)
126 return c2d(x.T.tolist()[0])
132 print env.reset(x2i([14,11]))
142 if d2cv(v)==0.0: u = MAXU-1 if u==0 else 0
143 hq.append( d2cq(env.x[0]) )
144 hv.append( d2cv(env.x[1]) )
145 hqc.append( env.xc[0,0] )
146 hvc.append( env.xc[1,0] )
def dynamics(self, ix, iu)