diff options
Diffstat (limited to 'examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py')
-rw-r--r-- | examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py new file mode 100644 index 000000000..aa467133c --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py @@ -0,0 +1,109 @@ +"""An example to run the minitaur environment of alternating legs. + +""" +import time + + +import numpy as np +import tensorflow as tf +import minitaur_alternating_legs_env +import minitaur_gym_env +from env_randomizers import minitaur_alternating_legs_env_randomizer as randomizer_lib + +#FLAGS = flags.FLAGS +#flags.DEFINE_string("log_path", None, "The directory to write the log file.") + + +def hand_tuned_agent(observation, timestamp): + """A hand tuned controller structure with vizier optimized parameters. + + Args: + observation: The observation of the environment. It includes the roll, pith + the speed of roll and pitch changes. + timestamp: The simulated time since the simulation reset. + Returns: + Delta desired motor angles to be added to the reference motion of + alternating legs for balance. + """ + roll = observation[0] + pitch = observation[1] + roll_dot = observation[2] + pitch_dot = observation[3] + + # The following gains are hand-tuned. These gains are + # designed according to traditional robotics techniques. These are linear + # feedback balance conroller. The idea is that when the base is tilting, + # the legs in the air should swing more towards the falling direction to catch + # up falling. At the same time, the legs in the air should extend more to + # touch ground earlier. + roll_gain = 1.0 + pitch_gain = 1.0 + roll_dot_gain = 0.1 + pitch_dot_gain = 0.1 + + roll_compensation = roll_gain * roll + roll_dot_gain * roll_dot + pitch_compensation = pitch_gain * pitch + pitch_dot_gain * pitch_dot + + first_leg = [ + 0, -pitch_compensation, -pitch_compensation, 0, 0, + -pitch_compensation - roll_compensation, + pitch_compensation + roll_compensation, 0 + ] + second_leg = [ + -pitch_compensation, 0, 0, -pitch_compensation, + pitch_compensation - roll_compensation, 0, 0, + -pitch_compensation + roll_compensation + ] + if (timestamp // minitaur_alternating_legs_env.STEP_PERIOD) % 2: + return second_leg + else: + return first_leg + + +def hand_tuned_balance_example(log_path=None): + """An example that the minitaur balances while alternating its legs. + + Args: + log_path: The directory that the log files are written to. If log_path is + None, no logs will be written. + """ + steps = 1000 + episodes = 5 + + randomizer = randomizer_lib.MinitaurAlternatingLegsEnvRandomizer() + environment = minitaur_alternating_legs_env.MinitaurAlternatingLegsEnv( + urdf_version=minitaur_gym_env.DERPY_V0_URDF_VERSION, + render=True, + num_steps_to_log=steps, + pd_latency=0.002, + control_latency=0.02, + remove_default_joint_damping=True, + on_rack=False, + env_randomizer=randomizer, + log_path=log_path) + np.random.seed(100) + avg_reward = 0 + for i in xrange(episodes): + sum_reward = 0 + observation = environment.reset() + for _ in xrange(steps): + # Sleep to prevent serial buffer overflow on microcontroller. + time.sleep(0.002) + action = hand_tuned_agent(observation, + environment.minitaur.GetTimeSinceReset()) + observation, reward, done, _ = environment.step(action) + sum_reward += reward + if done: + break + tf.logging.info("reward {}: {}".format(i, sum_reward)) + avg_reward += sum_reward + tf.logging.info("avg_reward: {}\n\n\n".format(avg_reward / episodes)) + + +def main(unused_argv): + hand_tuned_balance_example(log_path=FLAGS.log_path) + + +if __name__ == "__main__": + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run() |