1 files changed, 324 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env.py
new file mode 100644
index 000000000..3b5ff9e83
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env.py
@@ -0,0 +1,324 @@
+"""Implements the gym environment of minitaur moving with trotting style.
+"""
+import math
+
+from gym import spaces
+import numpy as np
+from pybullet_envs.minitaur.envs import minitaur_gym_env
+
+# TODO(tingnan): These constants should be moved to minitaur/minitaur_gym_env.
+NUM_LEGS = 4
+NUM_MOTORS = 2 * NUM_LEGS
+
+
+class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
+  """The trotting gym environment for the minitaur.
+
+  In this env, Minitaur performs a trotting style locomotion specified by
+  extension_amplitude, swing_amplitude, and step_frequency. Each diagonal pair
+  of legs will move according to the reference trajectory:
+      extension = extsion_amplitude * cos(2 * pi * step_frequency * t + phi)
+      swing = swing_amplitude * sin(2 * pi * step_frequency * t + phi)
+  And the two diagonal leg pairs have a phase (phi) difference of pi. The
+  reference signal may be modified by the feedback actions from a balance
+  controller (e.g. a neural network).
+
+  """
+  metadata = {
+      "render.modes": ["human", "rgb_array"],
+      "video.frames_per_second": 166
+  }
+
+  def __init__(self,
+               urdf_version=None,
+               control_time_step=0.006,
+               action_repeat=6,
+               control_latency=0.02,
+               pd_latency=0.003,
+               on_rack=False,
+               motor_kp=1.0,
+               motor_kd=0.015,
+               remove_default_joint_damping=True,
+               render=False,
+               num_steps_to_log=1000,
+               accurate_motor_model_enabled=True,
+               use_signal_in_observation=False,
+               use_angle_in_observation=False,
+               hard_reset=False,
+               env_randomizer=None,
+               log_path=None,
+               init_extension=2.0,
+               init_swing=0.0,
+               step_frequency=2.0,
+               extension_amplitude=0.35,
+               swing_amplitude=0.3):
+    """Initialize the minitaur trotting gym environment.
+
+    Args:
+      urdf_version: [DEFAULT_URDF_VERSION, DERPY_V0_URDF_VERSION,
+        RAINBOW_DASH_V0_URDF_VERSION] are allowable versions. If None,
+          DEFAULT_URDF_VERSION is used. Refer to minitaur_gym_env for more
+          details.
+      control_time_step: The time step between two successive control signals.
+      action_repeat: The number of simulation steps that an action is repeated.
+      control_latency: The latency between get_observation() and the actual
+        observation. See minituar.py for more details.
+      pd_latency: The latency used to get motor angles/velocities used to
+        compute PD controllers. See minitaur.py for more details.
+      on_rack: Whether to place the minitaur on rack. This is only used to debug
+        the walking gait. In this mode, the minitaur's base is hung midair so
+        that its walking gait is clearer to visualize.
+      motor_kp: The P gain of the motor.
+      motor_kd: The D gain of the motor.
+      remove_default_joint_damping: Whether to remove the default joint damping.
+      render: Whether to render the simulation.
+      num_steps_to_log: The max number of control steps in one episode. If the
+        number of steps is over num_steps_to_log, the environment will still
+        be running, but only first num_steps_to_log will be recorded in logging.
+      accurate_motor_model_enabled: Uses the nonlinear DC motor model if set to
+        True.
+      use_signal_in_observation: Includes the reference motor angles in the
+        observation vector.
+      use_angle_in_observation: Includes the measured motor angles in the
+        observation vector.
+      hard_reset: Whether to reset the whole simulation environment or just
+        reposition the robot.
+      env_randomizer: A list of EnvRandomizers that can randomize the
+        environment during when env.reset() is called and add perturbation
+        forces when env._step() is called.
+      log_path: The path to write out logs. For the details of logging, refer to
+        minitaur_logging.proto.
+      init_extension: The initial reset length of the leg.
+      init_swing: The initial reset swing position of the leg.
+      step_frequency: The desired leg stepping frequency.
+      extension_amplitude: The maximum leg extension change within a locomotion
+        cycle.
+      swing_amplitude: The maximum leg swing change within a cycle.
+    """
+
+    # _swing_offset and _extension_offset is to mimick the bent legs. The
+    # offsets will be added when applying the motor commands.
+    self._swing_offset = np.zeros(NUM_LEGS)
+    self._extension_offset = np.zeros(NUM_LEGS)
+
+    # The reset position.
+    self._init_pose = [
+        init_swing, init_swing, init_swing, init_swing, init_extension,
+        init_extension, init_extension, init_extension
+    ]
+
+    self._step_frequency = step_frequency
+    self._extension_amplitude = extension_amplitude
+    self._swing_amplitude = swing_amplitude
+    self._use_signal_in_observation = use_signal_in_observation
+    self._use_angle_in_observation = use_angle_in_observation
+    super(MinitaurTrottingEnv, self).__init__(
+        urdf_version=urdf_version,
+        accurate_motor_model_enabled=accurate_motor_model_enabled,
+        motor_overheat_protection=True,
+        motor_kp=motor_kp,
+        motor_kd=motor_kd,
+        remove_default_joint_damping=remove_default_joint_damping,
+        control_latency=control_latency,
+        pd_latency=pd_latency,
+        on_rack=on_rack,
+        render=render,
+        hard_reset=hard_reset,
+        num_steps_to_log=num_steps_to_log,
+        env_randomizer=env_randomizer,
+        log_path=log_path,
+        control_time_step=control_time_step,
+        action_repeat=action_repeat)
+
+    action_dim = NUM_LEGS * 2
+    action_high = np.array([0.25] * action_dim)
+    self.action_space = spaces.Box(-action_high, action_high)
+
+    # For render purpose.
+    self._cam_dist = 1.0
+    self._cam_yaw = 30
+    self._cam_pitch = -30
+
+  def _reset(self):
+    # In this environment, the actions are
+    # [swing leg 1, swing leg 2, swing leg 3, swing leg 4,
+    #  extension leg 1, extension leg 2, extension leg 3, extension leg 4]
+    initial_motor_angles = self._convert_from_leg_model(self._init_pose)
+    super(MinitaurTrottingEnv, self)._reset(
+        initial_motor_angles=initial_motor_angles, reset_duration=0.5)
+    return self._get_observation()
+
+  def _convert_from_leg_model(self, leg_pose):
+    """Converts leg space action into motor commands.
+
+    Args:
+      leg_pose: A numpy array. leg_pose[0:NUM_LEGS] are leg swing angles
+        and leg_pose[NUM_LEGS:2*NUM_LEGS] contains leg extensions.
+
+    Returns:
+      A numpy array of the corresponding motor angles for the given leg pose.
+    """
+    motor_pose = np.zeros(NUM_MOTORS)
+    for i in range(NUM_LEGS):
+      motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i]
+      motor_pose[2 * i
+                 + 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
+    return motor_pose
+
+  def _gen_signal(self, t, phase):
+    """Generates a sinusoidal reference leg trajectory.
+
+    The foot (leg tip) will move in a ellipse specified by extension and swing
+    amplitude.
+
+    Args:
+      t: Current time in simulation.
+      phase: The phase offset for the periodic trajectory.
+
+    Returns:
+      The desired leg extension and swing angle at the current time.
+    """
+    period = 1 / self._step_frequency
+    extension = self._extension_amplitude * math.cos(
+        2 * math.pi / period * t + phase)
+    swing = self._swing_amplitude * math.sin(2 * math.pi / period * t + phase)
+    return extension, swing
+
+  def _signal(self, t):
+    """Generates the trotting gait for the robot.
+
+    Args:
+      t: Current time in simulation.
+
+    Returns:
+      A numpy array of the reference leg positions.
+    """
+    # Generates the leg trajectories for the two digonal pair of legs.
+    ext_first_pair, sw_first_pair = self._gen_signal(t, 0)
+    ext_second_pair, sw_second_pair = self._gen_signal(t, math.pi)
+
+    trotting_signal = np.array([
+        sw_first_pair, sw_second_pair, sw_second_pair, sw_first_pair,
+        ext_first_pair, ext_second_pair, ext_second_pair, ext_first_pair
+    ])
+    signal = np.array(self._init_pose) + trotting_signal
+    return signal
+
+  def _transform_action_to_motor_command(self, action):
+    """Generates the motor commands for the given action.
+
+    Swing/extension offsets and the reference leg trajectory will be added on
+    top of the inputs before the conversion.
+
+    Args:
+      action: A numpy array contains the leg swings and extensions that will be
+        added to the reference trotting trajectory. action[0:NUM_LEGS] are leg
+        swing angles, and action[NUM_LEGS:2*NUM_LEGS] contains leg extensions.
+
+    Returns:
+      A numpy array of the desired motor angles for the given leg space action.
+    """
+    # Add swing_offset and extension_offset to mimick the bent legs.
+    action[0:NUM_LEGS] += self._swing_offset
+    action[NUM_LEGS:2 * NUM_LEGS] += self._extension_offset
+
+    # Add the reference trajectory (i.e. the trotting signal).
+    action += self._signal(self.minitaur.GetTimeSinceReset())
+    return self._convert_from_leg_model(action)
+
+  def is_fallen(self):
+    """Decide whether the minitaur has fallen.
+
+    Returns:
+      Boolean value that indicates whether the minitaur has fallen.
+    """
+    roll, pitch, _ = self.minitaur.GetTrueBaseRollPitchYaw()
+    is_fallen = math.fabs(roll) > 0.3 or math.fabs(pitch) > 0.3
+    return is_fallen
+
+  def _get_true_observation(self):
+    """Get the true observations of this environment.
+
+    It includes the true roll, pitch, roll dot and pitch dot of the base. Also
+    includes the disired/observed motor angles if the relevant flags are set.
+
+    Returns:
+      The observation list.
+    """
+    observation = []
+    roll, pitch, _ = self.minitaur.GetTrueBaseRollPitchYaw()
+    roll_rate, pitch_rate, _ = self.minitaur.GetTrueBaseRollPitchYawRate()
+    observation.extend([roll, pitch, roll_rate, pitch_rate])
+    if self._use_signal_in_observation:
+      observation.extend(self._transform_action_to_motor_command([0] * 8))
+    if self._use_angle_in_observation:
+      observation.extend(self.minitaur.GetMotorAngles().tolist())
+    self._true_observation = np.array(observation)
+    return self._true_observation
+
+  def _get_observation(self):
+    """Get observations of this environment.
+
+    It includes the base roll, pitch, roll dot and pitch dot which may contain
+    noises, bias, and latency. Also includes the disired/observed motor angles
+    if the relevant flags are set.
+
+    Returns:
+      The observation list.
+    """
+    observation = []
+    roll, pitch, _ = self.minitaur.GetBaseRollPitchYaw()
+    roll_rate, pitch_rate, _ = self.minitaur.GetBaseRollPitchYawRate()
+    observation.extend([roll, pitch, roll_rate, pitch_rate])
+    if self._use_signal_in_observation:
+      observation.extend(self._transform_action_to_motor_command([0] * 8))
+    if self._use_angle_in_observation:
+      observation.extend(self.minitaur.GetMotorAngles().tolist())
+    self._observation = np.array(observation)
+    return self._observation
+
+  def _get_observation_upper_bound(self):
+    """Get the upper bound of the observation.
+
+    Returns:
+      A numpy array contains the upper bound of an observation. See
+      GetObservation() for the details of each element of an observation.
+    """
+    upper_bound = []
+    upper_bound.extend([2 * math.pi] * 2)  # Roll, pitch, yaw of the base.
+    upper_bound.extend(
+        [2 * math.pi / self._time_step] * 2)  # Roll, pitch, yaw rate.
+    if self._use_signal_in_observation:
+      upper_bound.extend([2 * math.pi] * NUM_MOTORS)  # Signal
+    if self._use_angle_in_observation:
+      upper_bound.extend([2 * math.pi] * NUM_MOTORS)  # Motor angles
+    return np.array(upper_bound)
+
+  def _get_observation_lower_bound(self):
+    """Get the lower bound of the observation.
+
+    Returns:
+      The lower bound of an observation (the reverse of the upper bound).
+    """
+    lower_bound = -self._get_observation_upper_bound()
+    return lower_bound
+
+  def set_swing_offset(self, value):
+    """Set the swing offset of each leg.
+
+    It is to mimic the bent leg.
+
+    Args:
+      value: A list of four values.
+    """
+    self._swing_offset = value
+
+  def set_extension_offset(self, value):
+    """Set the extension offset of each leg.
+
+    It is to mimic the bent leg.
+
+    Args:
+      value: A list of four values.
+    """
+    self._extension_offset = value