1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
"""An example to run the minitaur environment of alternating legs.
"""
import time
import numpy as np
import tensorflow as tf
import minitaur_alternating_legs_env
import minitaur_gym_env
from env_randomizers import minitaur_alternating_legs_env_randomizer as randomizer_lib
#FLAGS = flags.FLAGS
#flags.DEFINE_string("log_path", None, "The directory to write the log file.")
def hand_tuned_agent(observation, timestamp):
"""A hand tuned controller structure with vizier optimized parameters.
Args:
observation: The observation of the environment. It includes the roll, pith
the speed of roll and pitch changes.
timestamp: The simulated time since the simulation reset.
Returns:
Delta desired motor angles to be added to the reference motion of
alternating legs for balance.
"""
roll = observation[0]
pitch = observation[1]
roll_dot = observation[2]
pitch_dot = observation[3]
# The following gains are hand-tuned. These gains are
# designed according to traditional robotics techniques. These are linear
# feedback balance conroller. The idea is that when the base is tilting,
# the legs in the air should swing more towards the falling direction to catch
# up falling. At the same time, the legs in the air should extend more to
# touch ground earlier.
roll_gain = 1.0
pitch_gain = 1.0
roll_dot_gain = 0.1
pitch_dot_gain = 0.1
roll_compensation = roll_gain * roll + roll_dot_gain * roll_dot
pitch_compensation = pitch_gain * pitch + pitch_dot_gain * pitch_dot
first_leg = [
0, -pitch_compensation, -pitch_compensation, 0, 0,
-pitch_compensation - roll_compensation,
pitch_compensation + roll_compensation, 0
]
second_leg = [
-pitch_compensation, 0, 0, -pitch_compensation,
pitch_compensation - roll_compensation, 0, 0,
-pitch_compensation + roll_compensation
]
if (timestamp // minitaur_alternating_legs_env.STEP_PERIOD) % 2:
return second_leg
else:
return first_leg
def hand_tuned_balance_example(log_path=None):
"""An example that the minitaur balances while alternating its legs.
Args:
log_path: The directory that the log files are written to. If log_path is
None, no logs will be written.
"""
steps = 1000
episodes = 5
randomizer = randomizer_lib.MinitaurAlternatingLegsEnvRandomizer()
environment = minitaur_alternating_legs_env.MinitaurAlternatingLegsEnv(
urdf_version=minitaur_gym_env.DERPY_V0_URDF_VERSION,
render=True,
num_steps_to_log=steps,
pd_latency=0.002,
control_latency=0.02,
remove_default_joint_damping=True,
on_rack=False,
env_randomizer=randomizer,
log_path=log_path)
np.random.seed(100)
avg_reward = 0
for i in xrange(episodes):
sum_reward = 0
observation = environment.reset()
for _ in xrange(steps):
# Sleep to prevent serial buffer overflow on microcontroller.
time.sleep(0.002)
action = hand_tuned_agent(observation,
environment.minitaur.GetTimeSinceReset())
observation, reward, done, _ = environment.step(action)
sum_reward += reward
if done:
break
tf.logging.info("reward {}: {}".format(i, sum_reward))
avg_reward += sum_reward
tf.logging.info("avg_reward: {}\n\n\n".format(avg_reward / episodes))
def main(unused_argv):
hand_tuned_balance_example(log_path=FLAGS.log_path)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run()
|