1 files changed, 109 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py
new file mode 100644
index 000000000..aa467133c
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py
@@ -0,0 +1,109 @@
+"""An example to run the minitaur environment of alternating legs.
+
+"""
+import time
+
+
+import numpy as np
+import tensorflow as tf
+import minitaur_alternating_legs_env
+import minitaur_gym_env
+from env_randomizers import minitaur_alternating_legs_env_randomizer as randomizer_lib
+
+#FLAGS = flags.FLAGS
+#flags.DEFINE_string("log_path", None, "The directory to write the log file.")
+
+
+def hand_tuned_agent(observation, timestamp):
+  """A hand tuned controller structure with vizier optimized parameters.
+
+  Args:
+    observation: The observation of the environment. It includes the roll, pith
+      the speed of roll and pitch changes.
+    timestamp: The simulated time since the simulation reset.
+  Returns:
+    Delta desired motor angles to be added to the reference motion of
+      alternating legs for balance.
+  """
+  roll = observation[0]
+  pitch = observation[1]
+  roll_dot = observation[2]
+  pitch_dot = observation[3]
+
+  # The following gains are hand-tuned. These gains are
+  # designed according to traditional robotics techniques. These are linear
+  # feedback balance conroller. The idea is that when the base is tilting,
+  # the legs in the air should swing more towards the falling direction to catch
+  # up falling. At the same time, the legs in the air should extend more to
+  # touch ground earlier.
+  roll_gain = 1.0
+  pitch_gain = 1.0
+  roll_dot_gain = 0.1
+  pitch_dot_gain = 0.1
+
+  roll_compensation = roll_gain * roll + roll_dot_gain * roll_dot
+  pitch_compensation = pitch_gain * pitch + pitch_dot_gain * pitch_dot
+
+  first_leg = [
+      0, -pitch_compensation, -pitch_compensation, 0, 0,
+      -pitch_compensation - roll_compensation,
+      pitch_compensation + roll_compensation, 0
+  ]
+  second_leg = [
+      -pitch_compensation, 0, 0, -pitch_compensation,
+      pitch_compensation - roll_compensation, 0, 0,
+      -pitch_compensation + roll_compensation
+  ]
+  if (timestamp // minitaur_alternating_legs_env.STEP_PERIOD) % 2:
+    return second_leg
+  else:
+    return first_leg
+
+
+def hand_tuned_balance_example(log_path=None):
+  """An example that the minitaur balances while alternating its legs.
+
+  Args:
+    log_path: The directory that the log files are written to. If log_path is
+      None, no logs will be written.
+  """
+  steps = 1000
+  episodes = 5
+
+  randomizer = randomizer_lib.MinitaurAlternatingLegsEnvRandomizer()
+  environment = minitaur_alternating_legs_env.MinitaurAlternatingLegsEnv(
+      urdf_version=minitaur_gym_env.DERPY_V0_URDF_VERSION,
+      render=True,
+      num_steps_to_log=steps,
+      pd_latency=0.002,
+      control_latency=0.02,
+      remove_default_joint_damping=True,
+      on_rack=False,
+      env_randomizer=randomizer,
+      log_path=log_path)
+  np.random.seed(100)
+  avg_reward = 0
+  for i in xrange(episodes):
+    sum_reward = 0
+    observation = environment.reset()
+    for _ in xrange(steps):
+      # Sleep to prevent serial buffer overflow on microcontroller.
+      time.sleep(0.002)
+      action = hand_tuned_agent(observation,
+                                environment.minitaur.GetTimeSinceReset())
+      observation, reward, done, _ = environment.step(action)
+      sum_reward += reward
+      if done:
+        break
+    tf.logging.info("reward {}: {}".format(i, sum_reward))
+    avg_reward += sum_reward
+  tf.logging.info("avg_reward: {}\n\n\n".format(avg_reward / episodes))
+
+
+def main(unused_argv):
+  hand_tuned_balance_example(log_path=FLAGS.log_path)
+
+
+if __name__ == "__main__":
+  tf.logging.set_verbosity(tf.logging.INFO)
+  tf.app.run()