文章目录
一、运行turtlebot3_dqn主要程序
1、笔记本终端1
source activate py27
cd ~/catkin_ws
source devel/setup.bash
export TURTLEBOT3_MODEL=burger
roslaunch turtlebot3_gazebo turtlebot3_stage_1.launch
2、笔记本终端2
source activate py27
cd ~/catkin_ws
source devel/setup.bash
export TURTLEBOT3_MODEL=burger
roslaunch turtlebot3_dqn turtlebot3_dqn_stage_1.launch
二、终端1的turtlebot3_gazebo turtlebot3_stage_1.launch
1、turtlebot3_stage_1.launch
<launch>
<arg name="model" default="$(env TURTLEBOT3_MODEL)" doc="model type [burger, waffle, waffle_pi]"/>
<arg name="x_pos" default="0.0"/>
<arg name="y_pos" default="0.0"/>
<arg name="z_pos" default="0.0"/>
<include file="$(find gazebo_ros)/launch/empty_world.launch">
<arg name="world_name" value="$(find turtlebot3_gazebo)/worlds/turtlebot3_stage_1.world"/>
<arg name="paused" value="false"/>
<arg name="use_sim_time" value="0.1"/>
<arg name="gui" value="true"/>
<arg name="headless" value="false"/>
<arg name="debug" value="false"/>
</include>
<param name="robot_description" command="$(find xacro)/xacro.py $(find turtlebot3_description)/urdf/turtlebot3_$(arg model).urdf.xacro" />
<node name="spawn_urdf" pkg="gazebo_ros" type="spawn_model" args="-urdf -model turtlebot3_burger -x $(arg x_pos) -y $(arg y_pos) -z $(arg z_pos) -param robot_description" />
</launch>
2、(find turtlebot3_gazebo)/worlds/turtlebot3_stage_1.world
<sdf version='1.6'>
<world name='default'>
<!-- A global light source -->
<include>
<uri>model://sun</uri>
</include>
<!-- A ground plane -->
<include>
<uri>model://ground_plane</uri>
</include>
<physics type="ode">
<real_time_update_rate>1000.0</real_time_update_rate>
<max_step_size>0.001</max_step_size>
<real_time_factor>2</real_time_factor>
<ode>
<solver>
<type>quick</type>
<iters>150</iters>
<precon_iters>0</precon_iters>
<sor>1.400000</sor>
<use_dynamic_moi_rescaling>1</use_dynamic_moi_rescaling>
</solver>
<constraints>
<cfm>0.00001</cfm>
<erp>0.2</erp>
<contact_max_correcting_vel>2000.000000</contact_max_correcting_vel>
<contact_surface_layer>0.01000</contact_surface_layer>
</constraints>
</ode>
</physics>
<!-- Load world -->
<include>
<uri>model://turtlebot3_square</uri>
</include>
<scene>
<ambient>0.4 0.4 0.4 1</ambient>
<background>0.7 0.7 0.7 1</background>
<shadows>true</shadows>
</scene>
<gui fullscreen='0'>
<camera name='user_camera'>
<pose>0.0 0.0 17.0 0 1.5708 0</pose>
<view_controller>orbit</view_controller>
</camera>
</gui>
</world>
</sdf>
3、(find turtlebot3_description)/urdf/turtlebot3_$(arg model).urdf.xacro
turtlebot3_burger.urdf.xacro:
<?xml version="1.0" ?>
<robot name="turtlebot3_burger" xmlns:xacro="http://ros.org/wiki/xacro">
<xacro:include filename="$(find turtlebot3_description)/urdf/common_properties.xacro"/>
<xacro:include filename="$(find turtlebot3_description)/urdf/turtlebot3_burger.gazebo.xacro"/>
<link name="base_footprint"/>
<joint name="base_joint" type="fixed">
<parent link="base_footprint"/>
<child link="base_link"/>
<origin xyz="0.0 0.0 0.010" rpy="0 0 0"/>
</joint>
<link name="base_link">
<visual>
<origin xyz="-0.032 0 0.0" rpy="0 0 0"/>
<geometry>
<mesh filename="package://turtlebot3_description/meshes/bases/burger_base.stl" scale="0.001 0.001 0.001"/>
</geometry>
<material name="light_black"/>
</visual>
<collision>
<origin xyz="-0.032 0 0.070" rpy="0 0 0"/>
<geometry>
<box size="0.140 0.140 0.143"/>
</geometry>
</collision>
<inertial>
<origin xyz="0 0 0" rpy="0 0 0"/>
<mass value="8.2573504e-01"/>
<inertia ixx="2.2124416e-03" ixy="-1.2294101e-05" ixz="3.4938785e-05"
iyy="2.1193702e-03" iyz="-5.0120904e-06"
izz="2.0064271e-03" />
</inertial>
</link>
<joint name="wheel_left_joint" type="continuous">
<parent link="base_link"/>
<child link="wheel_left_link"/>
<origin xyz="0.0 0.08 0.023" rpy="-1.57 0 0"/>
<axis xyz="0 0 1"/>
</joint>
<link name="wheel_left_link">
<visual>
<origin xyz="0 0 0" rpy="1.57 0 0"/>
<geometry>
<mesh filename="package://turtlebot3_description/meshes/wheels/left_tire.stl" scale="0.001 0.001 0.001"/>
</geometry>
<material name="dark"/>
</visual>
<collision>
<origin xyz="0 0 0" rpy="0 0 0"/>
<geometry>
<cylinder length="0.018" radius="0.033"/>
</geometry>
</collision>
<inertial>
<origin xyz="0 0 0" />
<mass value="2.8498940e-02" />
<inertia ixx="1.1175580e-05" ixy="-4.2369783e-11" ixz="-5.9381719e-09"
iyy="1.1192413e-05" iyz="-1.4400107e-11"
izz="2.0712558e-05" />
</inertial>
</link>
<joint name="wheel_right_joint" type="continuous">
<parent link="base_link"/>
<child link="wheel_right_link"/>
<origin xyz="0.0 -0.080 0.023" rpy="-1.57 0 0"/>
<axis xyz="0 0 1"/>
</joint>
<link name="wheel_right_link">
<visual>
<origin xyz="0 0 0" rpy="1.57 0 0"/>
<geometry>
<mesh filename="package://turtlebot3_description/meshes/wheels/right_tire.stl" scale="0.001 0.001 0.001"/>
</geometry>
<material name="dark"/>
</visual>
<collision>
<origin xyz="0 0 0" rpy="0 0 0"/>
<geometry>
<cylinder length="0.018" radius="0.033"/>
</geometry>
</collision>
<inertial>
<origin xyz="0 0 0" />
<mass value="2.8498940e-02" />
<inertia ixx="1.1175580e-05" ixy="-4.2369783e-11" ixz="-5.9381719e-09"
iyy="1.1192413e-05" iyz="-1.4400107e-11"
izz="2.0712558e-05" />
</inertial>
</link>
<joint name="caster_back_joint" type="fixed">
<parent link="base_link"/>
<child link="caster_back_link"/>
<origin xyz="-0.081 0 -0.004" rpy="-1.57 0 0"/>
</joint>
<link name="caster_back_link">
<collision>
<origin xyz="0 0.001 0" rpy="0 0 0"/>
<geometry>
<box size="0.030 0.009 0.020"/>
</geometry>
</collision>
<inertial>
<origin xyz="0 0 0" />
<mass value="0.005" />
<inertia ixx="0.001" ixy="0.0" ixz="0.0"
iyy="0.001" iyz="0.0"
izz="0.001" />
</inertial>
</link>
<joint name="imu_joint" type="fixed">
<parent link="base_link"/>
<child link="imu_link"/>
<origin xyz="-0.032 0 0.068" rpy="0 0 0"/>
</joint>
<link name="imu_link"/>
<joint name="scan_joint" type="fixed">
<parent link="base_link"/>
<child link="base_scan"/>
<origin xyz="-0.032 0 0.172" rpy="0 0 0"/>
</joint>
<link name="base_scan">
<visual>
<origin xyz="0 0 0.0" rpy="0 0 0"/>
<geometry>
<mesh filename="package://turtlebot3_description/meshes/sensors/lds.stl" scale="0.001 0.001 0.001"/>
</geometry>
<material name="dark"/>
</visual>
<collision>
<origin xyz="0.015 0 -0.0065" rpy="0 0 0"/>
<geometry>
<cylinder length="0.0315" radius="0.055"/>
</geometry>
</collision>
<inertial>
<mass value="0.114" />
<origin xyz="0 0 0" />
<inertia ixx="0.001" ixy="0.0" ixz="0.0"
iyy="0.001" iyz="0.0"
izz="0.001" />
</inertial>
</link>
</robot>
三、终端2的roslaunch turtlebot3_dqn turtlebot3_dqn_stage_1.launch
1、turtlebot3_dqn_stage_1.launch
turtlebot3_dqn_stage_1.launch:
<launch>
<arg name="stage" default="1"/>
<param name="stage_number" value="$(arg stage)"/>
<node pkg="turtlebot3_dqn" type="turtlebot3_dqn_stage_1" name="turtlebot3_dqn_stage_1" output="screen" />
</launch>
2、node pkg=“turtlebot3_dqn” type=“turtlebot3_dqn_stage_1”
turtlebot3_dqn_stage_1:
import rospy
import os
import json
import numpy as np
import random
import time
import sys
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
from collections import deque
from std_msgs.msg import Float32MultiArray
from src.turtlebot3_dqn.environment_stage_1 import Env
from keras.models import Sequential, load_model
from keras.optimizers import RMSprop
from keras.layers import Dense, Dropout, Activation
EPISODES = 3000
class ReinforceAgent():
def __init__(self, state_size, action_size):
self.pub_result = rospy.Publisher('result', Float32MultiArray, queue_size=5)
self.dirPath = os.path.dirname(os.path.realpath(__file__))
self.dirPath = self.dirPath.replace('turtlebot3_dqn/nodes', 'turtlebot3_dqn/save_model/stage_1_')
self.result = Float32MultiArray()
self.load_model = True
self.load_episode = 30
self.state_size = state_size
self.action_size = action_size
self.episode_step = 6000
self.target_update = 2000
self.discount_factor = 0.99
self.learning_rate = 0.00025
self.epsilon = 1.0
self.epsilon_decay = 0.99
self.epsilon_min = 0.05
self.batch_size = 64
self.train_start = 64
self.memory = deque(maxlen=1000000)
self.model = self.buildModel()
self.target_model = self.buildModel()
self.updateTargetModel()
if self.load_model:
self.model.set_weights(load_model(self.dirPath+str(self.load_episode)+".h5").get_weights())
with open(self.dirPath+str(self.load_episode)+'.json') as outfile:
param = json.load(outfile)
self.epsilon = param.get('epsilon')
print param # fyo
print self.epsilon # fyo
def buildModel(self):
model = Sequential()
dropout = 0.2
model.add(Dense(64, input_shape=(self.state_size,), activation='relu', kernel_initializer='lecun_uniform'))
model.add(Dense(64, activation='relu', kernel_initializer='lecun_uniform'))
model.add(Dropout(dropout))
model.add(Dense(self.action_size, kernel_initializer='lecun_uniform'))
model.add(Activation('linear'))
model.compile(loss='mse', optimizer=RMSprop(lr=self.learning_rate, rho=0.9, epsilon=1e-06))
model.summary()
return model
def getQvalue(self, reward, next_target, done):
if done:
return reward
else:
return reward + self.discount_factor * np.amax(next_target)
def updateTargetModel(self):
self.target_model.set_weights(self.model.get_weights())
def getAction(self, state):
if np.random.rand() <= self.epsilon:
self.q_value = np.zeros(self.action_size)
return random.randrange(self.action_size)
else:
q_value = self.model.predict(state.reshape(1, len(state)))
self.q_value = q_value
return np.argmax(q_value[0])
def appendMemory(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def trainModel(self, target=False):
mini_batch = random.sample(self.memory, self.batch_size)
X_batch = np.empty((0, self.state_size), dtype=np.float64)
Y_batch = np.empty((0, self.action_size), dtype=np.float64)
for i in range(self.batch_size):
states = mini_batch[i][0]
actions = mini_batch[i][1]
rewards = mini_batch[i][2]
next_states = mini_batch[i][3]
dones = mini_batch[i][4]
q_value = self.model.predict(states.reshape(1, len(states)))
self.q_value = q_value
if target:
next_target = self.target_model.predict(next_states.reshape(1, len(next_states)))
else:
next_target = self.model.predict(next_states.reshape(1, len(next_states)))
next_q_value = self.getQvalue(rewards, next_target, dones)
X_batch = np.append(X_batch, np.array([states.copy()]), axis=0)
Y_sample = q_value.copy()
Y_sample[0][actions] = next_q_value
Y_batch = np.append(Y_batch, np.array([Y_sample[0]]), axis=0)
if dones:
X_batch = np.append(X_batch, np.array([next_states.copy()]), axis=0)
Y_batch = np.append(Y_batch, np.array([[rewards] * self.action_size]), axis=0)
self.model.fit(X_batch, Y_batch, batch_size=self.batch_size, epochs=1, verbose=0)
if __name__ == '__main__':
rospy.init_node('turtlebot3_dqn_stage_1')
pub_result = rospy.Publisher('result', Float32MultiArray, queue_size=5)
pub_get_action = rospy.Publisher('get_action', Float32MultiArray, queue_size=5)
result = Float32MultiArray()
get_action = Float32MultiArray()
state_size = 26
action_size = 5
env = Env(action_size)
agent = ReinforceAgent(state_size, action_size)
scores, episodes = [], []
global_step = 0
start_time = time.time()
for e in range(agent.load_episode + 1, EPISODES):
done = False
state = env.reset()
score = 0
for t in range(agent.episode_step):
action = agent.getAction(state)
next_state, reward, done = env.step(action)
agent.appendMemory(state, action, reward, next_state, done)
if len(agent.memory) >= agent.train_start:
if global_step <= agent.target_update:
agent.trainModel()
else:
agent.trainModel(True)
score += reward
state = next_state
get_action.data = [action, score, reward]
pub_get_action.publish(get_action)
if e % 10 == 0:
agent.model.save(agent.dirPath + str(e) + '.h5')
with open(agent.dirPath + str(e) + '.json', 'w') as outfile:
json.dump(param_dictionary, outfile)
if t >= 500:
rospy.loginfo("Time out!!")
done = True
if done:
result.data = [score, np.max(agent.q_value)]
pub_result.publish(result)
agent.updateTargetModel()
scores.append(score)
episodes.append(e)
m, s = divmod(int(time.time() - start_time), 60)
h, m = divmod(m, 60)
rospy.loginfo('Ep: %d score: %.2f memory: %d epsilon: %.2f time: %d:%02d:%02d',
e, score, len(agent.memory), agent.epsilon, h, m, s)
param_keys = ['epsilon']
param_values = [agent.epsilon]
param_dictionary = dict(zip(param_keys, param_values))
break
global_step += 1
if global_step % agent.target_update == 0:
rospy.loginfo("UPDATE TARGET NETWORK")
if agent.epsilon > agent.epsilon_min:
agent.epsilon *= agent.epsilon_decay
3、from src.turtlebot3_dqn.environment_stage_1 import Env
environment_stage_1.py:
import rospy
import numpy as np
import math
from math import pi
from geometry_msgs.msg import Twist, Point, Pose
from sensor_msgs.msg import LaserScan
from nav_msgs.msg import Odometry
from std_srvs.srv import Empty
from tf.transformations import euler_from_quaternion, quaternion_from_euler
from respawnGoal import Respawn
class Env():
def __init__(self, action_size):
self.goal_x = 0
self.goal_y = 0
self.heading = 0
self.action_size = action_size
self.initGoal = True
self.get_goalbox = False
self.position = Pose()
self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5)
self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry)
self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty)
self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty)
self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty)
self.respawn_goal = Respawn()
def getGoalDistace(self):
goal_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2)
return goal_distance
def getOdometry(self, odom):
self.position = odom.pose.pose.position
orientation = odom.pose.pose.orientation
orientation_list = [orientation.x, orientation.y, orientation.z, orientation.w]
_, _, yaw = euler_from_quaternion(orientation_list)
goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x)
heading = goal_angle - yaw
if heading > pi:
heading -= 2 * pi
elif heading < -pi:
heading += 2 * pi
self.heading = round(heading, 2)
def getState(self, scan):
scan_range = []
heading = self.heading
min_range = 0.13
done = False
for i in range(len(scan.ranges)):
if scan.ranges[i] == float('Inf'):
scan_range.append(3.5)
elif np.isnan(scan.ranges[i]):
scan_range.append(0)
else:
scan_range.append(scan.ranges[i])
if min_range > min(scan_range) > 0:
done = True
current_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y),2)
if current_distance < 0.2:
self.get_goalbox = True
return scan_range + [heading, current_distance], done
def setReward(self, state, done, action):
yaw_reward = []
current_distance = state[-1]
heading = state[-2]
for i in range(5):
angle = -pi / 4 + heading + (pi / 8 * i) + pi / 2
tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0])
yaw_reward.append(tr)
distance_rate = 2 ** (current_distance / self.goal_distance)
reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate)
if done:
rospy.loginfo("Collision!!")
reward = -200
self.pub_cmd_vel.publish(Twist())
if self.get_goalbox:
rospy.loginfo("Goal!!")
reward = 200
self.pub_cmd_vel.publish(Twist())
self.goal_x, self.goal_y = self.respawn_goal.getPosition(True, delete=True)
self.goal_distance = self.getGoalDistace()
self.get_goalbox = False
return reward
def step(self, action):
max_angular_vel = 1.5
ang_vel = ((self.action_size - 1)/2 - action) * max_angular_vel * 0.5
vel_cmd = Twist()
vel_cmd.linear.x = 0.15
vel_cmd.angular.z = ang_vel
self.pub_cmd_vel.publish(vel_cmd)
print vel_cmd # fyo
data = None
while data is None:
try:
data = rospy.wait_for_message('scan', LaserScan, timeout=5)
except:
pass
state, done = self.getState(data)
reward = self.setReward(state, done, action)
return np.asarray(state), reward, done
def reset(self):
rospy.wait_for_service('gazebo/reset_simulation')
try:
self.reset_proxy()
except (rospy.ServiceException) as e:
print("gazebo/reset_simulation service call failed")
data = None
while data is None:
try:
data = rospy.wait_for_message('scan', LaserScan, timeout=5)
except:
pass
if self.initGoal:
self.goal_x, self.goal_y = self.respawn_goal.getPosition()
self.initGoal = False
self.goal_distance = self.getGoalDistace()
state, done = self.getState(data)
return np.asarray(state)