1 files changed, 244 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py
new file mode 100644
index 000000000..82d64b184
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py
@@ -0,0 +1,244 @@
+"""This file implements the gym environment of minitaur alternating legs.
+
+"""
+import math
+
+from gym import spaces
+import numpy as np
+import minitaur_gym_env
+
+INIT_EXTENSION_POS = 2.6
+INIT_SWING_POS = 0.0
+DESIRED_PITCH = 0
+NUM_LEGS = 4
+NUM_MOTORS = 2 * NUM_LEGS
+STEP_PERIOD = 1.0 / 3.0  # Three steps per second.
+STEP_AMPLITUDE = 0.75
+
+
+class MinitaurAlternatingLegsEnv(minitaur_gym_env.MinitaurGymEnv):
+  """The gym environment for the minitaur.
+
+  It simulates the locomotion of a minitaur, a quadruped robot. The state space
+  include the angles, velocities and torques for all the motors and the action
+  space is the desired motor angle for each motor. The reward function is based
+  on how far the minitaur walks in 1000 steps and penalizes the energy
+  expenditure.
+
+  """
+  metadata = {
+      "render.modes": ["human", "rgb_array"],
+      "video.frames_per_second": 66
+  }
+
+  def __init__(self,
+               urdf_version=None,
+               control_time_step=0.006,
+               action_repeat=6,
+               control_latency=0,
+               pd_latency=0,
+               on_rack=False,
+               motor_kp=1.0,
+               motor_kd=0.02,
+               remove_default_joint_damping=False,
+               render=False,
+               num_steps_to_log=1000,
+               env_randomizer=None,
+               log_path=None):
+    """Initialize the minitaur alternating legs gym environment.
+
+    Args:
+      urdf_version: [DEFAULT_URDF_VERSION, DERPY_V0_URDF_VERSION] are allowable
+        versions. If None, DEFAULT_URDF_VERSION is used. Refer to
+        minitaur_gym_env for more details.
+      control_time_step: The time step between two successive control signals.
+      action_repeat: The number of simulation steps that an action is repeated.
+      control_latency: The latency between get_observation() and the actual
+        observation. See minituar.py for more details.
+      pd_latency: The latency used to get motor angles/velocities used to
+        compute PD controllers. See minitaur.py for more details.
+      on_rack: Whether to place the minitaur on rack. This is only used to debug
+        the walking gait. In this mode, the minitaur's base is hung midair so
+        that its walking gait is clearer to visualize.
+      motor_kp: The P gain of the motor.
+      motor_kd: The D gain of the motor.
+      remove_default_joint_damping: Whether to remove the default joint damping.
+      render: Whether to render the simulation.
+      num_steps_to_log: The max number of control steps in one episode. If the
+        number of steps is over num_steps_to_log, the environment will still
+        be running, but only first num_steps_to_log will be recorded in logging.
+      env_randomizer: An instance (or a list) of EnvRanzomier(s) that can
+        randomize the environment during when env.reset() is called and add
+        perturbations when env._step() is called.
+      log_path: The path to write out logs. For the details of logging, refer to
+        minitaur_logging.proto.
+    """
+    # _swing_offset and _extension_offset is to mimick the bent legs.
+    self._swing_offset = np.zeros(NUM_LEGS)
+    self._extension_offset = np.zeros(NUM_LEGS)
+    super(MinitaurAlternatingLegsEnv, self).__init__(
+        urdf_version=urdf_version,
+        accurate_motor_model_enabled=True,
+        motor_overheat_protection=True,
+        hard_reset=False,
+        motor_kp=motor_kp,
+        motor_kd=motor_kd,
+        remove_default_joint_damping=remove_default_joint_damping,
+        control_latency=control_latency,
+        pd_latency=pd_latency,
+        on_rack=on_rack,
+        render=render,
+        num_steps_to_log=num_steps_to_log,
+        env_randomizer=env_randomizer,
+        log_path=log_path,
+        control_time_step=control_time_step,
+        action_repeat=action_repeat)
+
+    action_dim = 8
+    action_high = np.array([0.1] * action_dim)
+    self.action_space = spaces.Box(-action_high, action_high)
+
+    self._cam_dist = 1.0
+    self._cam_yaw = 30
+    self._cam_pitch = -30
+
+  def _reset(self):
+    self.desired_pitch = DESIRED_PITCH
+    # In this environment, the actions are
+    # [swing leg 1, swing leg 2, swing leg 3, swing leg 4,
+    #  extension leg 1, extension leg 2, extension leg 3, extension leg 4]
+    init_pose = [
+        INIT_SWING_POS + self._swing_offset[0],
+        INIT_SWING_POS + self._swing_offset[1],
+        INIT_SWING_POS + self._swing_offset[2],
+        INIT_SWING_POS + self._swing_offset[3],
+        INIT_EXTENSION_POS + self._extension_offset[0],
+        INIT_EXTENSION_POS + self._extension_offset[1],
+        INIT_EXTENSION_POS + self._extension_offset[2],
+        INIT_EXTENSION_POS + self._extension_offset[3]
+    ]
+    initial_motor_angles = self._convert_from_leg_model(init_pose)
+    super(MinitaurAlternatingLegsEnv, self)._reset(
+        initial_motor_angles=initial_motor_angles, reset_duration=0.5)
+    return self._get_observation()
+
+  def _convert_from_leg_model(self, leg_pose):
+    motor_pose = np.zeros(NUM_MOTORS)
+    for i in range(NUM_LEGS):
+      motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i]
+      motor_pose[2 * i
+                 + 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
+    return motor_pose
+
+  def _signal(self, t):
+    initial_pose = np.array([
+        INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS,
+        INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS,
+        INIT_EXTENSION_POS
+    ])
+    amplitude = STEP_AMPLITUDE
+    period = STEP_PERIOD
+    extension = amplitude * (-1.0 + math.cos(2 * math.pi / period * t))
+    ith_leg = int(t / period) % 2
+    first_leg = np.array([0, 0, 0, 0, 0, extension, extension, 0])
+    second_leg = np.array([0, 0, 0, 0, extension, 0, 0, extension])
+    if ith_leg:
+      signal = initial_pose + second_leg
+    else:
+      signal = initial_pose + first_leg
+    return signal
+
+  def _transform_action_to_motor_command(self, action):
+    #  Add swing_offset and extension_offset to mimick the bent legs.
+    action[0:4] += self._swing_offset
+    action[4:8] += self._extension_offset
+    action += self._signal(self.minitaur.GetTimeSinceReset())
+    action = self._convert_from_leg_model(action)
+    return action
+
+  def is_fallen(self):
+    """Decide whether the minitaur has fallen.
+
+    If the up directions between the base and the world is large (the dot
+    product is smaller than 0.85), the minitaur is considered fallen.
+
+    Returns:
+      Boolean value that indicates whether the minitaur has fallen.
+    """
+    orientation = self.minitaur.GetBaseOrientation()
+    rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
+    local_up = rot_mat[6:]
+    return np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85
+
+  def _reward(self):
+    return 1.0
+
+  def _get_true_observation(self):
+    """Get the true observations of this environment.
+
+    It includes the roll, the error between current pitch and desired pitch,
+    roll dot and pitch dot of the base.
+
+    Returns:
+      The observation list.
+    """
+    observation = []
+    roll, pitch, _ = self.minitaur.GetTrueBaseRollPitchYaw()
+    roll_rate, pitch_rate, _ = self.minitaur.GetTrueBaseRollPitchYawRate()
+    observation.extend([roll, pitch, roll_rate, pitch_rate])
+    observation[1] -= self.desired_pitch  # observation[1] is the pitch
+    self._true_observation = np.array(observation)
+    return self._true_observation
+
+  def _get_observation(self):
+    observation = []
+    roll, pitch, _ = self.minitaur.GetBaseRollPitchYaw()
+    roll_rate, pitch_rate, _ = self.minitaur.GetBaseRollPitchYawRate()
+    observation.extend([roll, pitch, roll_rate, pitch_rate])
+    observation[1] -= self.desired_pitch  # observation[1] is the pitch
+    self._observation = np.array(observation)
+    return self._observation
+
+  def _get_observation_upper_bound(self):
+    """Get the upper bound of the observation.
+
+    Returns:
+      The upper bound of an observation. See GetObservation() for the details
+        of each element of an observation.
+    """
+    upper_bound = np.zeros(self._get_observation_dimension())
+    upper_bound[0:2] = 2 * math.pi  # Roll, pitch, yaw of the base.
+    upper_bound[2:4] = 2 * math.pi / self._time_step  # Roll, pitch, yaw rate.
+    return upper_bound
+
+  def _get_observation_lower_bound(self):
+    lower_bound = -self._get_observation_upper_bound()
+    return lower_bound
+
+  def set_swing_offset(self, value):
+    """Set the swing offset of each leg.
+
+    It is to mimic the bent leg.
+
+    Args:
+      value: A list of four values.
+    """
+    self._swing_offset = value
+
+  def set_extension_offset(self, value):
+    """Set the extension offset of each leg.
+
+    It is to mimic the bent leg.
+
+    Args:
+      value: A list of four values.
+    """
+    self._extension_offset = value
+
+  def set_desired_pitch(self, value):
+    """Set the desired pitch of the base, which is a user input.
+
+    Args:
+      value: A scalar.
+    """
+    self.desired_pitch = value