-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Closed
Description
I make a direct environment for Franka lifting with 2 robots. Bellow is my config and some code snippets
I am using MAPPO from skrl
MAPPO parameters
seed: 42
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: STATES
layers: [512, 512, 256, 128]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: STATES
layers: [512, 512, 256, 128]
activations: elu
output: ONE
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
agent:
class: MAPPO
rollouts: 24
learning_epochs: 5
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 1.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.01
value_loss_scale: 1.0
kl_threshold: 0.0
rewards_shaper_scale: 0.01
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "two_arms_lift_coll_mappo_cfg_2_test"
experiment_name: "test_configuration"
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 36000
# timesteps: 72000
environment_info: log
Action pre-process and action apply
#######################
# Pre physics step
#######################
def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
self.actions = actions
# make gripper actions binary
robot_1_targets = self.robot_1_dof_targets + self.actions["rb_1"].clone()
self.robot_1_dof_targets[self.robot_1_left_finger_link_idx] = torch.where(robot_1_targets[self.robot_1_left_finger_link_idx] > 0, self.open_translation, self.close_translation)
self.robot_1_dof_targets[self.robot_1_right_finger_link_idx] = torch.where(robot_1_targets[self.robot_1_right_finger_link_idx] > 0, self.open_translation, self.close_translation)
# make gripper actions binary
robot_2_targets = self.robot_2_dof_targets + self.actions["rb_2"].clone()
self.robot_2_dof_targets[self.robot_2_left_finger_link_idx] = torch.where(robot_2_targets[self.robot_2_left_finger_link_idx] > 0, self.open_translation, self.close_translation)
self.robot_2_dof_targets[self.robot_2_right_finger_link_idx] = torch.where(robot_2_targets[self.robot_2_right_finger_link_idx] > 0, self.open_translation, self.close_translation)
# update the joint targets
self.robot_1_dof_targets[:] = torch.clamp(robot_1_targets, self.rb_1_dof_low_limits, self.rb_1_dof_up_limmits)
self.robot_2_dof_targets[:] = torch.clamp(robot_2_targets, self.rb_2_dof_low_limits, self.rb_2_dof_up_limits)
def _apply_action(self) -> None:
self.robot_1.set_joint_position_target(self.robot_1_dof_targets)
self.robot_2.set_joint_position_target(self.robot_2_dof_targets
Observation and states
def _joint_pos_rel(self, robot: Articulation, joint_ids: list[int]) -> torch.Tensor:
"""Difference of the current joint position and the default joint"""
return robot.data.joint_pos[:, joint_ids] - robot.data.default_joint_pos[:, joint_ids]
def _joint_vel_rel(self, robot: Articulation, joint_ids: list[int]) -> torch.Tensor:
# return asset.data.joint_vel[:, asset_cfg.joint_ids] - asset.data.default_joint_vel[:, asset_cfg.joint_ids]
return robot.data.joint_vel[:, joint_ids] - robot.data.default_joint_vel[:, joint_ids]
def _get_observations(self) -> dict[str, torch.Tensor]:
self.robot_1_prev_actions = self.actions["rb_1"].clone()
self.robot_2_prev_actions = self.actions["rb_2"].clone()
observation = dict()
self.joint_pos_rel_1 = self._joint_pos_rel(robot=self.robot_1, joint_ids=self.joint_ids)
self.joint_pos_rel_2 = self._joint_pos_rel(robot=self.robot_2, joint_ids=self.joint_ids)
self.joint_vel_rel_1 = self._joint_vel_rel(robot=self.robot_1, joint_ids=self.joint_ids)
self.joint_vel_rel_2 = self._joint_vel_rel(robot=self.robot_2, joint_ids=self.joint_ids)
self.object_A_pos_b = self._object_position_in_robot_root_frame(
self._object_A.data.root_pos_w, self.robot_1.data.root_link_state_w)
self.object_B_pos_b = self._object_position_in_robot_root_frame(
self._object_B.data.root_pos_w, self.robot_2.data.root_link_state_w)
""" For lifting environment """
obs_robot_1 = torch.cat(
(
self.joint_pos_rel_1, # 9
self.joint_vel_rel_1, # 9
self.object_A_pos_b, # 3; 21
self.target_1_pose_b[:, :3], # 3; 24
self.actions["rb_1"], # 9; 33
),
dim=-1,
)
obs_robot_2 = torch.cat(
(
self.joint_pos_rel_2, # for joint positions reward
self.joint_vel_rel_2, # for joint velocities reward
self.object_B_pos_b, # target object position
self.target_2_pose_b[:, :3], # target object position
self.actions["rb_2"], # actions
),
dim=-1,
)
observation["rb_1"] = obs_robot_1
observation["rb_2"] = obs_robot_2
return observation
def _get_states(self) -> torch.Tensor:
states = torch.cat(
(
# --- Robot 1 ---
self.joint_pos_rel_1, # DOF positions (9) ;
self.joint_vel_rel_1, # DOF velocities (9) ;
self.ee_1_pos_w, # ee pos _w (3);
self.ee_1_rot_w, # ee rot _w (4);
self.robot_1_dof_targets, # applied_actions (9); 34
# --- Robot 2 ---
self.joint_pos_rel_2, # DOF positions (9); 43
self.joint_vel_rel_2, # DOF velocities (9); 52
self.ee_2_pos_w, # ee pos _w (3) ; 55
self.ee_2_rot_w, # ee rot _w (4); 59
self.robot_2_dof_targets, # applied_actions (9); 68
# --- object 1 ---
self._object_A_pos_b, # object A positions (3); 77
self._object_A_rot_b, # object A rotations (4); 81
# --- object 2 ---
self._object_B_pos_b, # object B positions (3) ; 84
self._object_B_rot_b, # object B rotations (4) ; 88
# --- target 1 ---
self.target_1_pose_b[:, :3], # target 1 positions (3); 91
self.target_1_pose_b[:, 3:7], # target 1 rotations (4); 95
# --- target 2 ---
self.target_2_pose_b[:, :3], # target 2 positions (3); 98
self.target_2_pose_b[:, 3:7], # target 2 rotations (4); 102
),
dim = -1,
)
return states
Reward functions is the same with Manager based environment
Video
shake.shake.mp4
Metadata
Metadata
Assignees
Labels
No labels