diff options
author | erwincoumans <erwin.coumans@gmail.com> | 2018-03-31 21:15:27 -0700 |
---|---|---|
committer | erwincoumans <erwin.coumans@gmail.com> | 2018-03-31 21:15:27 -0700 |
commit | 14d37ecb434706da5c08344aa95a3e23238df514 (patch) | |
tree | f1ca849865aa8a690d83185ca5c953f8e0f03b24 /examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py | |
parent | ec682904977d7c870ff53d6b82350fb9adc00c24 (diff) | |
download | bullet3-14d37ecb434706da5c08344aa95a3e23238df514.tar.gz |
first pass of updated minitaur quadruped environment
Diffstat (limited to 'examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py')
-rw-r--r-- | examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py | 244 |
1 files changed, 244 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py new file mode 100644 index 000000000..82d64b184 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env.py @@ -0,0 +1,244 @@ +"""This file implements the gym environment of minitaur alternating legs. + +""" +import math + +from gym import spaces +import numpy as np +import minitaur_gym_env + +INIT_EXTENSION_POS = 2.6 +INIT_SWING_POS = 0.0 +DESIRED_PITCH = 0 +NUM_LEGS = 4 +NUM_MOTORS = 2 * NUM_LEGS +STEP_PERIOD = 1.0 / 3.0 # Three steps per second. +STEP_AMPLITUDE = 0.75 + + +class MinitaurAlternatingLegsEnv(minitaur_gym_env.MinitaurGymEnv): + """The gym environment for the minitaur. + + It simulates the locomotion of a minitaur, a quadruped robot. The state space + include the angles, velocities and torques for all the motors and the action + space is the desired motor angle for each motor. The reward function is based + on how far the minitaur walks in 1000 steps and penalizes the energy + expenditure. + + """ + metadata = { + "render.modes": ["human", "rgb_array"], + "video.frames_per_second": 66 + } + + def __init__(self, + urdf_version=None, + control_time_step=0.006, + action_repeat=6, + control_latency=0, + pd_latency=0, + on_rack=False, + motor_kp=1.0, + motor_kd=0.02, + remove_default_joint_damping=False, + render=False, + num_steps_to_log=1000, + env_randomizer=None, + log_path=None): + """Initialize the minitaur alternating legs gym environment. + + Args: + urdf_version: [DEFAULT_URDF_VERSION, DERPY_V0_URDF_VERSION] are allowable + versions. If None, DEFAULT_URDF_VERSION is used. Refer to + minitaur_gym_env for more details. + control_time_step: The time step between two successive control signals. + action_repeat: The number of simulation steps that an action is repeated. + control_latency: The latency between get_observation() and the actual + observation. See minituar.py for more details. + pd_latency: The latency used to get motor angles/velocities used to + compute PD controllers. See minitaur.py for more details. + on_rack: Whether to place the minitaur on rack. This is only used to debug + the walking gait. In this mode, the minitaur's base is hung midair so + that its walking gait is clearer to visualize. + motor_kp: The P gain of the motor. + motor_kd: The D gain of the motor. + remove_default_joint_damping: Whether to remove the default joint damping. + render: Whether to render the simulation. + num_steps_to_log: The max number of control steps in one episode. If the + number of steps is over num_steps_to_log, the environment will still + be running, but only first num_steps_to_log will be recorded in logging. + env_randomizer: An instance (or a list) of EnvRanzomier(s) that can + randomize the environment during when env.reset() is called and add + perturbations when env._step() is called. + log_path: The path to write out logs. For the details of logging, refer to + minitaur_logging.proto. + """ + # _swing_offset and _extension_offset is to mimick the bent legs. + self._swing_offset = np.zeros(NUM_LEGS) + self._extension_offset = np.zeros(NUM_LEGS) + super(MinitaurAlternatingLegsEnv, self).__init__( + urdf_version=urdf_version, + accurate_motor_model_enabled=True, + motor_overheat_protection=True, + hard_reset=False, + motor_kp=motor_kp, + motor_kd=motor_kd, + remove_default_joint_damping=remove_default_joint_damping, + control_latency=control_latency, + pd_latency=pd_latency, + on_rack=on_rack, + render=render, + num_steps_to_log=num_steps_to_log, + env_randomizer=env_randomizer, + log_path=log_path, + control_time_step=control_time_step, + action_repeat=action_repeat) + + action_dim = 8 + action_high = np.array([0.1] * action_dim) + self.action_space = spaces.Box(-action_high, action_high) + + self._cam_dist = 1.0 + self._cam_yaw = 30 + self._cam_pitch = -30 + + def _reset(self): + self.desired_pitch = DESIRED_PITCH + # In this environment, the actions are + # [swing leg 1, swing leg 2, swing leg 3, swing leg 4, + # extension leg 1, extension leg 2, extension leg 3, extension leg 4] + init_pose = [ + INIT_SWING_POS + self._swing_offset[0], + INIT_SWING_POS + self._swing_offset[1], + INIT_SWING_POS + self._swing_offset[2], + INIT_SWING_POS + self._swing_offset[3], + INIT_EXTENSION_POS + self._extension_offset[0], + INIT_EXTENSION_POS + self._extension_offset[1], + INIT_EXTENSION_POS + self._extension_offset[2], + INIT_EXTENSION_POS + self._extension_offset[3] + ] + initial_motor_angles = self._convert_from_leg_model(init_pose) + super(MinitaurAlternatingLegsEnv, self)._reset( + initial_motor_angles=initial_motor_angles, reset_duration=0.5) + return self._get_observation() + + def _convert_from_leg_model(self, leg_pose): + motor_pose = np.zeros(NUM_MOTORS) + for i in range(NUM_LEGS): + motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i] + motor_pose[2 * i + + 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i] + return motor_pose + + def _signal(self, t): + initial_pose = np.array([ + INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, + INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS, + INIT_EXTENSION_POS + ]) + amplitude = STEP_AMPLITUDE + period = STEP_PERIOD + extension = amplitude * (-1.0 + math.cos(2 * math.pi / period * t)) + ith_leg = int(t / period) % 2 + first_leg = np.array([0, 0, 0, 0, 0, extension, extension, 0]) + second_leg = np.array([0, 0, 0, 0, extension, 0, 0, extension]) + if ith_leg: + signal = initial_pose + second_leg + else: + signal = initial_pose + first_leg + return signal + + def _transform_action_to_motor_command(self, action): + # Add swing_offset and extension_offset to mimick the bent legs. + action[0:4] += self._swing_offset + action[4:8] += self._extension_offset + action += self._signal(self.minitaur.GetTimeSinceReset()) + action = self._convert_from_leg_model(action) + return action + + def is_fallen(self): + """Decide whether the minitaur has fallen. + + If the up directions between the base and the world is large (the dot + product is smaller than 0.85), the minitaur is considered fallen. + + Returns: + Boolean value that indicates whether the minitaur has fallen. + """ + orientation = self.minitaur.GetBaseOrientation() + rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation) + local_up = rot_mat[6:] + return np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 + + def _reward(self): + return 1.0 + + def _get_true_observation(self): + """Get the true observations of this environment. + + It includes the roll, the error between current pitch and desired pitch, + roll dot and pitch dot of the base. + + Returns: + The observation list. + """ + observation = [] + roll, pitch, _ = self.minitaur.GetTrueBaseRollPitchYaw() + roll_rate, pitch_rate, _ = self.minitaur.GetTrueBaseRollPitchYawRate() + observation.extend([roll, pitch, roll_rate, pitch_rate]) + observation[1] -= self.desired_pitch # observation[1] is the pitch + self._true_observation = np.array(observation) + return self._true_observation + + def _get_observation(self): + observation = [] + roll, pitch, _ = self.minitaur.GetBaseRollPitchYaw() + roll_rate, pitch_rate, _ = self.minitaur.GetBaseRollPitchYawRate() + observation.extend([roll, pitch, roll_rate, pitch_rate]) + observation[1] -= self.desired_pitch # observation[1] is the pitch + self._observation = np.array(observation) + return self._observation + + def _get_observation_upper_bound(self): + """Get the upper bound of the observation. + + Returns: + The upper bound of an observation. See GetObservation() for the details + of each element of an observation. + """ + upper_bound = np.zeros(self._get_observation_dimension()) + upper_bound[0:2] = 2 * math.pi # Roll, pitch, yaw of the base. + upper_bound[2:4] = 2 * math.pi / self._time_step # Roll, pitch, yaw rate. + return upper_bound + + def _get_observation_lower_bound(self): + lower_bound = -self._get_observation_upper_bound() + return lower_bound + + def set_swing_offset(self, value): + """Set the swing offset of each leg. + + It is to mimic the bent leg. + + Args: + value: A list of four values. + """ + self._swing_offset = value + + def set_extension_offset(self, value): + """Set the extension offset of each leg. + + It is to mimic the bent leg. + + Args: + value: A list of four values. + """ + self._extension_offset = value + + def set_desired_pitch(self, value): + """Set the desired pitch of the base, which is a user input. + + Args: + value: A scalar. + """ + self.desired_pitch = value |