summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManifoldFR <manifoldfr@outlook.com>2020-06-25 12:03:30 +0200
committerManifoldFR <manifoldfr@outlook.com>2020-06-25 12:03:30 +0200
commitb9259888211979c5192bab927d867b693230ad46 (patch)
tree215e8c21a3799e83955613c33f502742dd1702aa
parentfa218725ec17e593d874b960d6f2615125424860 (diff)
downloadbullet3-b9259888211979c5192bab927d867b693230ad46.tar.gz
DeepMimic Gym enhancements
* fix render() camera up axis * make render() camera track character * increase render resolution * add option to restart mocap clip at t=0 at reset (default=random time) * Gym env: option for "test mode" (episode starts at mocap clip start) * Gym env: option to offset/rescale actions and obs (like original code)
-rw-r--r--examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py23
-rw-r--r--examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py153
2 files changed, 129 insertions, 47 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
index 309de425c..29ad13fde 100644
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
@@ -10,16 +10,26 @@ import pybullet_data
import pybullet as p1
import random
+from enum import Enum
+
+class InitializationStrategy(Enum):
+ """Set how the environment is initialized."""
+ START = 0
+ RANDOM = 1 # random state initialization (RSI)
+
class PyBulletDeepMimicEnv(Env):
- def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
+ def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None,
+ init_strategy=InitializationStrategy.RANDOM):
super().__init__(arg_parser, enable_draw)
self._num_agents = 1
self._pybullet_client = pybullet_client
self._isInitialized = False
self._useStablePD = True
self._arg_parser = arg_parser
+ self._init_strategy = init_strategy
+ print("Initialization strategy: {:s}".format(init_strategy))
self.reset()
def reset(self):
@@ -77,9 +87,14 @@ class PyBulletDeepMimicEnv(Env):
time.sleep(timeStep)
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
#startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
- rnrange = 1000
- rn = random.randint(0, rnrange)
- startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
+
+ if self._init_strategy == InitializationStrategy.RANDOM:
+ rnrange = 1000
+ rn = random.randint(0, rnrange)
+ startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
+ elif self._init_strategy == InitializationStrategy.START:
+ startTime = 0
+
self.t = startTime
self._humanoid.setSimTime(startTime)
diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py
index 4a829db75..d4d3e3972 100644
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py
@@ -19,18 +19,27 @@ import pybullet as p2
import pybullet_data
from pybullet_utils import bullet_client as bc
from pkg_resources import parse_version
-from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
+from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv, InitializationStrategy
from pybullet_utils.arg_parser import ArgParser
from pybullet_utils.logger import Logger
+from typing import Optional
+
logger = logging.getLogger(__name__)
class HumanoidDeepBulletEnv(gym.Env):
+ """Base Gym environment for DeepMimic."""
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
- def __init__(self, renders=False, arg_file=''):
-
+ def __init__(self, renders=False, arg_file='', test_mode=False,
+ rescale_actions=True,
+ rescale_observations=True):
+ """
+ Args:
+ test_mode (bool): in test mode, the `reset()` method will always set the mocap clip time
+ to 0.
+ """
self._arg_parser = ArgParser()
Logger.print2("===========================================================")
succ = False
@@ -39,15 +48,33 @@ class HumanoidDeepBulletEnv(gym.Env):
succ = self._arg_parser.load_file(path)
Logger.print2(arg_file)
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
-
- self._p = None
+
+ self._p: Optional[BulletClient] = None
self._time_step = 1./240.
- self._internal_env = None
+ self._internal_env: Optional[PyBulletDeepMimicEnv] = None
self._renders = renders
self._discrete_actions = False
- self._arg_file=arg_file
- self._render_height = 200
- self._render_width = 320
+ self._arg_file = arg_file
+ self._render_height = 400
+ self._render_width = 640
+ self._rescale_actions = rescale_actions
+ self._rescale_observations = rescale_observations
+ self.agent_id = -1
+
+ self._numSteps = None
+ self.test_mode = test_mode
+ if self.test_mode:
+ print("Environment running in TEST mode")
+
+ self.reset()
+
+ # Query the policy at 30Hz
+ self.policy_query_30 = True
+ if self.policy_query_30:
+ self._policy_step = 1./30
+ else:
+ self._policy_step = 1./240
+ self._num_env_steps = int(self._policy_step / self._time_step)
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 0.4 #2.4
@@ -94,8 +121,6 @@ class HumanoidDeepBulletEnv(gym.Env):
self.viewer = None
self._configure()
- # Query the policy at 30Hz
- self.policy_query_30 = True
def _configure(self, display=None):
self.display = display
@@ -105,42 +130,72 @@ class HumanoidDeepBulletEnv(gym.Env):
return [seed]
def step(self, action):
-
- #apply control action
- agent_id = -1
+ agent_id = self.agent_id
+
+ if self._rescale_actions:
+ # Rescale the action
+ mean = -self._action_offset
+ std = 1./self._action_scale
+ action = action * std + mean
+
+ # Record reward
+ reward = self._internal_env.calc_reward(agent_id)
+
+ # Apply control action
self._internal_env.set_action(agent_id, action)
-
- #step sim
- self._internal_env.update(self._time_step)
-
- if self.policy_query_30:
- # Step simulation until a new action is required
- while not self._internal_env.need_new_action(agent_id):
- self._p.stepSimulation()
- self._internal_env.t += self._time_step
- # print("t:", self._internal_env.t)
- # print("next update time:", self._internal_env.needs_update_time)
- # print("Performing update")
-
- #record state
+
+ start_time = self._internal_env.t
+
+ # step sim
+ for i in range(self._num_env_steps):
+ self._internal_env.update(self._time_step)
+
+ elapsed_time = self._internal_env.t - start_time
+
+ self._numSteps += 1
+
+ # Record state
self.state = self._internal_env.record_state(agent_id)
- #record reward
- reward = self._internal_env.calc_reward(agent_id)
-
- #record done
+ if self._rescale_observations:
+ state = np.array(self.state)
+ mean = -self._state_offset
+ std = 1./self._state_scale
+ state = (state - mean) / (std + 1e-8)
+
+ # Record done
done = self._internal_env.is_episode_end()
- return np.array(self.state), reward, done, {}
+ info = {}
+ return state, reward, done, info
def reset(self):
- # print("-----------reset simulation---------------")
- if self._internal_env==None:
- self._internal_env = PyBulletDeepMimicEnv(self._arg_parser, self._renders)
+ # use the initialization strategy
+ if self._internal_env is None:
+ if self.test_mode:
+ init_strat = InitializationStrategy.START
+ else:
+ init_strat = InitializationStrategy.RANDOM
+ self._internal_env = PyBulletDeepMimicEnv(self._arg_parser, self._renders,
+ init_strategy=init_strat)
+
self._internal_env.reset()
self._p = self._internal_env._pybullet_client
- agent_id = -1 #unused here
- state = self._internal_env.record_state(agent_id)
+ agent_id = self.agent_id # unused here
+ self._state_offset = self._internal_env.build_state_offset(self.agent_id)
+ self._state_scale = self._internal_env.build_state_scale(self.agent_id)
+ self._action_offset = self._internal_env.build_action_offset(self.agent_id)
+ self._action_scale = self._internal_env.build_action_scale(self.agent_id)
+ self._numSteps = 0
+ # Record state
+ self.state = self._internal_env.record_state(agent_id)
+
+ # return state as ndarray
+ state = np.array(self.state)
+ if self._rescale_observations:
+ mean = -self._state_offset
+ std = 1./self._state_scale
+ state = (state - mean) / (std + 1e-8)
return state
def render(self, mode='human', close=False):
@@ -148,8 +203,15 @@ class HumanoidDeepBulletEnv(gym.Env):
self._renders = True
if mode != "rgb_array":
return np.array([])
- base_pos=[0,0,0]
- self._cam_dist = 2
+ human = self._internal_env._humanoid
+ base_pos, orn = self._p.getBasePositionAndOrientation(human._sim_model)
+ base_pos = np.asarray(base_pos)
+ # track the position
+ base_pos[1] += 0.3
+ rpy = self._p.getEulerFromQuaternion(orn) # rpy, in radians
+ rpy = 180 / np.pi * np.asarray(rpy) # convert rpy in degrees
+
+ self._cam_dist = 3
self._cam_pitch = 0.3
self._cam_yaw = 0
if (not self._p == None):
@@ -159,10 +221,9 @@ class HumanoidDeepBulletEnv(gym.Env):
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
- upAxisIndex=2)
+ upAxisIndex=1)
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
- aspect=float(self._render_width) /
- self._render_height,
+ aspect=float(self._render_width) / self._render_height,
nearVal=0.1,
farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(
@@ -171,6 +232,12 @@ class HumanoidDeepBulletEnv(gym.Env):
renderer=self._p.ER_BULLET_HARDWARE_OPENGL,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix)
+ # self._p.resetDebugVisualizerCamera(
+ # cameraDistance=2 * self._cam_dist,
+ # cameraYaw=self._cam_yaw,
+ # cameraPitch=self._cam_pitch,
+ # cameraTargetPosition=base_pos
+ # )
else:
px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8)
rgb_array = np.array(px, dtype=np.uint8)