summaryrefslogtreecommitdiff
path: root/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py')
-rw-r--r--examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py244
1 files changed, 244 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py
new file mode 100644
index 000000000..82d64b184
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py
@@ -0,0 +1,244 @@
+"""This file implements the gym environment of minitaur alternating legs.
+
+"""
+import math
+
+from gym import spaces
+import numpy as np
+import minitaur_gym_env
+
+INIT_EXTENSION_POS = 2.6
+INIT_SWING_POS = 0.0
+DESIRED_PITCH = 0
+NUM_LEGS = 4
+NUM_MOTORS = 2 * NUM_LEGS
+STEP_PERIOD = 1.0 / 3.0 # Three steps per second.
+STEP_AMPLITUDE = 0.75
+
+
+class MinitaurAlternatingLegsEnv(minitaur_gym_env.MinitaurGymEnv):
+ """The gym environment for the minitaur.
+
+ It simulates the locomotion of a minitaur, a quadruped robot. The state space
+ include the angles, velocities and torques for all the motors and the action
+ space is the desired motor angle for each motor. The reward function is based
+ on how far the minitaur walks in 1000 steps and penalizes the energy
+ expenditure.
+
+ """
+ metadata = {
+ "render.modes": ["human", "rgb_array"],
+ "video.frames_per_second": 66
+ }
+
+ def __init__(self,
+ urdf_version=None,
+ control_time_step=0.006,
+ action_repeat=6,
+ control_latency=0,
+ pd_latency=0,
+ on_rack=False,
+ motor_kp=1.0,
+ motor_kd=0.02,
+ remove_default_joint_damping=False,
+ render=False,
+ num_steps_to_log=1000,
+ env_randomizer=None,
+ log_path=None):
+ """Initialize the minitaur alternating legs gym environment.
+
+ Args:
+ urdf_version: [DEFAULT_URDF_VERSION, DERPY_V0_URDF_VERSION] are allowable
+ versions. If None, DEFAULT_URDF_VERSION is used. Refer to
+ minitaur_gym_env for more details.
+ control_time_step: The time step between two successive control signals.
+ action_repeat: The number of simulation steps that an action is repeated.
+ control_latency: The latency between get_observation() and the actual
+ observation. See minituar.py for more details.
+ pd_latency: The latency used to get motor angles/velocities used to
+ compute PD controllers. See minitaur.py for more details.
+ on_rack: Whether to place the minitaur on rack. This is only used to debug
+ the walking gait. In this mode, the minitaur's base is hung midair so
+ that its walking gait is clearer to visualize.
+ motor_kp: The P gain of the motor.
+ motor_kd: The D gain of the motor.
+ remove_default_joint_damping: Whether to remove the default joint damping.
+ render: Whether to render the simulation.
+ num_steps_to_log: The max number of control steps in one episode. If the
+ number of steps is over num_steps_to_log, the environment will still
+ be running, but only first num_steps_to_log will be recorded in logging.
+ env_randomizer: An instance (or a list) of EnvRanzomier(s) that can
+ randomize the environment during when env.reset() is called and add
+ perturbations when env._step() is called.
+ log_path: The path to write out logs. For the details of logging, refer to
+ minitaur_logging.proto.
+ """
+ # _swing_offset and _extension_offset is to mimick the bent legs.
+ self._swing_offset = np.zeros(NUM_LEGS)
+ self._extension_offset = np.zeros(NUM_LEGS)
+ super(MinitaurAlternatingLegsEnv, self).__init__(
+ urdf_version=urdf_version,
+ accurate_motor_model_enabled=True,
+ motor_overheat_protection=True,
+ hard_reset=False,
+ motor_kp=motor_kp,
+ motor_kd=motor_kd,
+ remove_default_joint_damping=remove_default_joint_damping,
+ control_latency=control_latency,
+ pd_latency=pd_latency,
+ on_rack=on_rack,
+ render=render,
+ num_steps_to_log=num_steps_to_log,
+ env_randomizer=env_randomizer,
+ log_path=log_path,
+ control_time_step=control_time_step,
+ action_repeat=action_repeat)
+
+ action_dim = 8
+ action_high = np.array([0.1] * action_dim)
+ self.action_space = spaces.Box(-action_high, action_high)
+
+ self._cam_dist = 1.0
+ self._cam_yaw = 30
+ self._cam_pitch = -30
+
+ def _reset(self):
+ self.desired_pitch = DESIRED_PITCH
+ # In this environment, the actions are
+ # [swing leg 1, swing leg 2, swing leg 3, swing leg 4,
+ # extension leg 1, extension leg 2, extension leg 3, extension leg 4]
+ init_pose = [
+ INIT_SWING_POS + self._swing_offset[0],
+ INIT_SWING_POS + self._swing_offset[1],
+ INIT_SWING_POS + self._swing_offset[2],
+ INIT_SWING_POS + self._swing_offset[3],
+ INIT_EXTENSION_POS + self._extension_offset[0],
+ INIT_EXTENSION_POS + self._extension_offset[1],
+ INIT_EXTENSION_POS + self._extension_offset[2],
+ INIT_EXTENSION_POS + self._extension_offset[3]
+ ]
+ initial_motor_angles = self._convert_from_leg_model(init_pose)
+ super(MinitaurAlternatingLegsEnv, self)._reset(
+ initial_motor_angles=initial_motor_angles, reset_duration=0.5)
+ return self._get_observation()
+
+ def _convert_from_leg_model(self, leg_pose):
+ motor_pose = np.zeros(NUM_MOTORS)
+ for i in range(NUM_LEGS):
+ motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i]
+ motor_pose[2 * i
+ + 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
+ return motor_pose
+
+ def _signal(self, t):
+ initial_pose = np.array([
+ INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS,
+ INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS,
+ INIT_EXTENSION_POS
+ ])
+ amplitude = STEP_AMPLITUDE
+ period = STEP_PERIOD
+ extension = amplitude * (-1.0 + math.cos(2 * math.pi / period * t))
+ ith_leg = int(t / period) % 2
+ first_leg = np.array([0, 0, 0, 0, 0, extension, extension, 0])
+ second_leg = np.array([0, 0, 0, 0, extension, 0, 0, extension])
+ if ith_leg:
+ signal = initial_pose + second_leg
+ else:
+ signal = initial_pose + first_leg
+ return signal
+
+ def _transform_action_to_motor_command(self, action):
+ # Add swing_offset and extension_offset to mimick the bent legs.
+ action[0:4] += self._swing_offset
+ action[4:8] += self._extension_offset
+ action += self._signal(self.minitaur.GetTimeSinceReset())
+ action = self._convert_from_leg_model(action)
+ return action
+
+ def is_fallen(self):
+ """Decide whether the minitaur has fallen.
+
+ If the up directions between the base and the world is large (the dot
+ product is smaller than 0.85), the minitaur is considered fallen.
+
+ Returns:
+ Boolean value that indicates whether the minitaur has fallen.
+ """
+ orientation = self.minitaur.GetBaseOrientation()
+ rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
+ local_up = rot_mat[6:]
+ return np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85
+
+ def _reward(self):
+ return 1.0
+
+ def _get_true_observation(self):
+ """Get the true observations of this environment.
+
+ It includes the roll, the error between current pitch and desired pitch,
+ roll dot and pitch dot of the base.
+
+ Returns:
+ The observation list.
+ """
+ observation = []
+ roll, pitch, _ = self.minitaur.GetTrueBaseRollPitchYaw()
+ roll_rate, pitch_rate, _ = self.minitaur.GetTrueBaseRollPitchYawRate()
+ observation.extend([roll, pitch, roll_rate, pitch_rate])
+ observation[1] -= self.desired_pitch # observation[1] is the pitch
+ self._true_observation = np.array(observation)
+ return self._true_observation
+
+ def _get_observation(self):
+ observation = []
+ roll, pitch, _ = self.minitaur.GetBaseRollPitchYaw()
+ roll_rate, pitch_rate, _ = self.minitaur.GetBaseRollPitchYawRate()
+ observation.extend([roll, pitch, roll_rate, pitch_rate])
+ observation[1] -= self.desired_pitch # observation[1] is the pitch
+ self._observation = np.array(observation)
+ return self._observation
+
+ def _get_observation_upper_bound(self):
+ """Get the upper bound of the observation.
+
+ Returns:
+ The upper bound of an observation. See GetObservation() for the details
+ of each element of an observation.
+ """
+ upper_bound = np.zeros(self._get_observation_dimension())
+ upper_bound[0:2] = 2 * math.pi # Roll, pitch, yaw of the base.
+ upper_bound[2:4] = 2 * math.pi / self._time_step # Roll, pitch, yaw rate.
+ return upper_bound
+
+ def _get_observation_lower_bound(self):
+ lower_bound = -self._get_observation_upper_bound()
+ return lower_bound
+
+ def set_swing_offset(self, value):
+ """Set the swing offset of each leg.
+
+ It is to mimic the bent leg.
+
+ Args:
+ value: A list of four values.
+ """
+ self._swing_offset = value
+
+ def set_extension_offset(self, value):
+ """Set the extension offset of each leg.
+
+ It is to mimic the bent leg.
+
+ Args:
+ value: A list of four values.
+ """
+ self._extension_offset = value
+
+ def set_desired_pitch(self, value):
+ """Set the desired pitch of the base, which is a user input.
+
+ Args:
+ value: A scalar.
+ """
+ self.desired_pitch = value