Classes | |
| class | PolicyNetwork |
| class | QValueNetwork |
| class | ReplayItem |
Functions | |
| rendertrial (maxiter=NSTEPS, verbose=True) | |
Variables | |
| batch | |
| int | BATCH_SIZE = 64 |
| d_batch = np.vstack([b.done for b in batch]) | |
| float | DECAY_RATE = 0.99 |
| bool | done = False |
| env = Pendulum(1) | |
| feed_dict | |
| list | h_qva = [] |
| list | h_rwd = [] |
| list | h_ste = [] |
| tuple | maxq |
| n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED) | |
| int | NEPISODES = 100 |
| int | NH1 = 250 |
| int | NSTEPS = 100 |
| NU = env.nu | |
| NX = env.nobs | |
| optim | |
| policy = PolicyNetwork().setupOptim() | |
| float | POLICY_LEARNING_RATE = 0.0001 |
| policyTarget = PolicyNetwork().setupTargetAssign(policy) | |
| q2_batch | |
| qgrad | |
| qref_batch = r_batch + (not d_batch) * (DECAY_RATE * q2_batch) | |
| qvalue = QValueNetwork().setupOptim() | |
| float | QVALUE_LEARNING_RATE = 0.001 |
| qvalueTarget = QValueNetwork().setupTargetAssign(qvalue) | |
| r | |
| r_batch = np.vstack([b.reward for b in batch]) | |
| RANDOM_SEED = int((time.time() % 10) * 1000) | |
| int | REPLAY_SIZE = 10000 |
| replayDeque = deque() | |
| float | rsum = 0.0 |
| sess = tf.InteractiveSession() | |
| u = sess.run(policy.policy, feed_dict={policy.x: x}) | |
| u2_batch | |
| u_batch = np.vstack([b.u for b in batch]) | |
| u_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003, seed=RANDOM_SEED) | |
| u_targ = sess.run(policy.policy, feed_dict={policy.x: x_batch}) | |
| float | UPDATE_RATE = 0.01 |
| withSinCos | |
| x = env.reset().T | |
| x2 = x2.T | |
| x2_batch = np.vstack([b.x2 for b in batch]) | |
| x_batch = np.vstack([b.x for b in batch]) | |
Deep actor-critic network, From "Continuous control with deep reinforcement learning", by Lillicrap et al, arXiv:1509.02971
| rendertrial | ( | maxiter = NSTEPS, |
|
verbose = True |
|||
| ) |
Definition at line 157 of file continuous.py.
| batch |
Definition at line 204 of file continuous.py.
| int BATCH_SIZE = 64 |
Definition at line 35 of file continuous.py.
| d_batch = np.vstack([b.done for b in batch]) |
Definition at line 210 of file continuous.py.
| float DECAY_RATE = 0.99 |
Definition at line 32 of file continuous.py.
| bool done = False |
Definition at line 190 of file continuous.py.
| env = Pendulum(1) |
Definition at line 39 of file continuous.py.
| feed_dict |
Definition at line 226 of file continuous.py.
| list h_qva = [] |
Definition at line 176 of file continuous.py.
| list h_rwd = [] |
Definition at line 175 of file continuous.py.
| list h_ste = [] |
Definition at line 177 of file continuous.py.
| tuple maxq |
Definition at line 250 of file continuous.py.
| n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED) |
Definition at line 24 of file continuous.py.
| int NEPISODES = 100 |
Definition at line 28 of file continuous.py.
| int NH1 = 250 |
Definition at line 36 of file continuous.py.
| int NSTEPS = 100 |
Definition at line 29 of file continuous.py.
| NU = env.nu |
Definition at line 42 of file continuous.py.
| NX = env.nobs |
Definition at line 41 of file continuous.py.
| optim |
Definition at line 240 of file continuous.py.
| policy = PolicyNetwork().setupOptim() |
Definition at line 143 of file continuous.py.
| float POLICY_LEARNING_RATE = 0.0001 |
Definition at line 31 of file continuous.py.
| policyTarget = PolicyNetwork().setupTargetAssign(policy) |
Definition at line 144 of file continuous.py.
| q2_batch |
Definition at line 217 of file continuous.py.
| qgrad |
Definition at line 235 of file continuous.py.
| qref_batch = r_batch + (not d_batch) * (DECAY_RATE * q2_batch) |
Definition at line 221 of file continuous.py.
| qvalue = QValueNetwork().setupOptim() |
Definition at line 146 of file continuous.py.
| float QVALUE_LEARNING_RATE = 0.001 |
Definition at line 30 of file continuous.py.
| qvalueTarget = QValueNetwork().setupTargetAssign(qvalue) |
Definition at line 147 of file continuous.py.
| r |
Definition at line 188 of file continuous.py.
| r_batch = np.vstack([b.reward for b in batch]) |
Definition at line 209 of file continuous.py.
| RANDOM_SEED = int((time.time() % 10) * 1000) |
Definition at line 19 of file continuous.py.
| int REPLAY_SIZE = 10000 |
Definition at line 34 of file continuous.py.
| replayDeque = deque() |
Definition at line 139 of file continuous.py.
| float rsum = 0.0 |
Definition at line 182 of file continuous.py.
| sess = tf.InteractiveSession() |
Definition at line 149 of file continuous.py.
| u = sess.run(policy.policy, feed_dict={policy.x: x}) |
Definition at line 186 of file continuous.py.
| u2_batch |
Definition at line 214 of file continuous.py.
| u_batch = np.vstack([b.u for b in batch]) |
Definition at line 208 of file continuous.py.
| u_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003, seed=RANDOM_SEED) |
Definition at line 25 of file continuous.py.
| u_targ = sess.run(policy.policy, feed_dict={policy.x: x_batch}) |
Definition at line 234 of file continuous.py.
| float UPDATE_RATE = 0.01 |
Definition at line 33 of file continuous.py.
| withSinCos |
Definition at line 40 of file continuous.py.
| x = env.reset().T |
Definition at line 181 of file continuous.py.
| x2 = x2.T |
Definition at line 188 of file continuous.py.
| x2_batch = np.vstack([b.x2 for b in batch]) |
Definition at line 211 of file continuous.py.
| x_batch = np.vstack([b.x for b in batch]) |
Definition at line 207 of file continuous.py.