From 4949312a66a95c502838612fea4e13b5686f6006 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 12 Jan 2026 10:30:14 +0800 Subject: [PATCH 1/9] update --- configs/agents/rl/cart_pole/gym_config.json | 63 ++++++++ configs/agents/rl/cart_pole/train_config.json | 64 ++++++++ embodichain/lab/gym/envs/__init__.py | 2 + .../lab/gym/envs/tasks/rl/cart_pole.py | 151 ++++++++++++++++++ 4 files changed, 280 insertions(+) create mode 100644 configs/agents/rl/cart_pole/gym_config.json create mode 100644 configs/agents/rl/cart_pole/train_config.json create mode 100644 embodichain/lab/gym/envs/tasks/rl/cart_pole.py diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json new file mode 100644 index 00000000..9d2cfac8 --- /dev/null +++ b/configs/agents/rl/cart_pole/gym_config.json @@ -0,0 +1,63 @@ +{ + "id": "CartPoleRL", + "max_episodes": 2, + "env": { + "num_envs": 128, + "sim_steps_per_control": 4, + "events": { + }, + "observations": {}, + "extensions": { + "obs_mode": "state", + "action_scale": 0.1, + "episode_length": 100, + "slider_limit": 15, + "pole_limit": 6.283185307, + "veloctity_weight": 0.01 + } + }, + "robot": { + "uid": "Cart", + "urdf_cfg": { + "components": [ + { + "component_type": "arm", + "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole.urdf", + "transform": [ + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 1.0] + ] + } + ] + }, + "init_pos": [0.0, 0.0, 0.0], + "init_rot": [0.0, 0.0, 0.0], + "init_qpos": [0.0, 0.6], + "drive_pros": { + "stiffness": { + "slider_to_cart": 1e2, + "cart_to_pole":1e-2 + }, + "damping": { + "slider_to_cart": 1e1, + "cart_to_pole":1e-3 + }, + "max_effort": { + "slider_to_cart": 1e3, + "cart_to_pole":1e-1 + } + }, + "control_parts": { + "arm": ["slider_to_cart"], + "hand": ["cart_to_pole"] + } + }, + "sensor": [], + "light": { + }, + "rigid_object": [], + "rigid_object_group": [], + "articulation": [] +} diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/cart_pole/train_config.json new file mode 100644 index 00000000..7a077e9c --- /dev/null +++ b/configs/agents/rl/cart_pole/train_config.json @@ -0,0 +1,64 @@ +{ + "trainer": { + "exp_name": "push_cube_ppo", + "gym_config": "configs/agents/rl/cart_pole/gym_config.json", + "seed": 42, + "device": "cuda:0", + "headless": true, + "iterations": 1000, + "rollout_steps": 1024, + "eval_freq": 2, + "save_freq": 200, + "use_wandb": false, + "wandb_project_name": "embodychain-push_cube", + "events": { + "eval": { + "record_camera": { + "func": "record_camera_data_async", + "mode": "interval", + "interval_step": 1, + "params": { + "name": "main_cam", + "resolution": [640, 480], + "eye": [-1.4, 1.4, 2.0], + "target": [0, 0, 0], + "up": [0, 0, 1], + "intrinsics": [600, 600, 320, 240], + "save_path": "./outputs/videos/eval" + } + } + } + } + }, + "policy": { + "name": "actor_critic", + "actor": { + "type": "mlp", + "network_cfg": { + "hidden_sizes": [256, 256], + "activation": "relu" + } + }, + "critic": { + "type": "mlp", + "network_cfg": { + "hidden_sizes": [256, 256], + "activation": "relu" + } + } + }, + "algorithm": { + "name": "ppo", + "cfg": { + "learning_rate": 0.0001, + "n_epochs": 10, + "batch_size": 8192, + "gamma": 0.99, + "gae_lambda": 0.95, + "clip_coef": 0.2, + "ent_coef": 0.01, + "vf_coef": 0.5, + "max_grad_norm": 0.5 + } + } +} \ No newline at end of file diff --git a/embodichain/lab/gym/envs/__init__.py b/embodichain/lab/gym/envs/__init__.py index 286d7824..b6fd9333 100644 --- a/embodichain/lab/gym/envs/__init__.py +++ b/embodichain/lab/gym/envs/__init__.py @@ -29,3 +29,5 @@ # Reinforcement learning environments from embodichain.lab.gym.envs.tasks.rl.push_cube import PushCubeEnv + +from embodichain.lab.gym.envs.tasks.rl.cart_pole import CartPoleEnv diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py new file mode 100644 index 00000000..7ed0ba64 --- /dev/null +++ b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py @@ -0,0 +1,151 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2025 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import torch +import numpy as np +from typing import Dict, Any, Optional, Sequence +from gymnasium import spaces + +from embodichain.lab.gym.utils.registration import register_env +from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg +from embodichain.lab.sim.cfg import MarkerCfg +from embodichain.lab.sim.types import EnvObs, EnvAction +from embodichain.utils import logger + + +@register_env("CartPoleRL", max_episode_steps=50, override=True) +class CartPoleEnv(EmbodiedEnv): + """Push cube task for reinforcement learning. + + The task involves pushing a cube to a target goal position using a robotic arm. + The reward consists of reaching reward, placing reward, action penalty, and success bonus. + """ + + def __init__(self, cfg=None, **kwargs): + if cfg is None: + cfg = EmbodiedEnvCfg() + cfg.sim_cfg.arena_space = 35.0 + extensions = getattr(cfg, "extensions", {}) or {} + + # cfg.sim_cfg.enable_rt = True + + defaults = { + "qvel_weight": 0.01, + "action_scale": 0.02, + "episode_length": 100, + "slider_limit": 15, + "pole_limit": 6.283185307, + "veloctity_weight": 0.01, + } + for name, default in defaults.items(): + value = extensions.get(name, getattr(cfg, name, default)) + setattr(cfg, name, value) + setattr(self, name, getattr(cfg, name)) + + self.last_cube_goal_dist = None + + super().__init__(cfg, **kwargs) + + def _init_sim_state(self, **kwargs): + super()._init_sim_state(**kwargs) + self.single_action_space = spaces.Box( + low=-self.slider_limit, + high=self.slider_limit, + shape=(1,), + dtype=np.float32, + ) + if self.obs_mode == "state": + self.single_observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32 + ) + + def _initialize_episode( + self, env_ids: Optional[Sequence[int]] = None, **kwargs + ) -> None: + super()._initialize_episode(env_ids=env_ids, **kwargs) + + def _step_action(self, action: EnvAction) -> EnvAction: + + if self.obs_mode == "state": + delta_qpos = action[:, 0] + scaled_action = delta_qpos * self.action_scale + scaled_action = torch.clamp( + scaled_action, -self.slider_limit, self.slider_limit + ) + current_qpos = self.robot.get_qpos(name="arm") + target_qpos = current_qpos + scaled_action[:, None] + self.robot.set_qpos(qpos=target_qpos, name="arm") + return scaled_action + + def get_obs(self, **kwargs) -> EnvObs: + qpos = self.robot.get_qpos(name="hand") + qvel = self.robot.get_qvel(name="hand") + if self.obs_mode == "state": + return torch.cat([qpos, qvel], dim=1) + return { + "robot": {"qpos": qpos, "qvel": qvel}, + } + + def get_reward( + self, obs: EnvObs, action: EnvAction, info: Dict[str, Any] + ) -> torch.Tensor: + if self.obs_mode == "state": + qpos = obs[:, 0:1].reshape(-1) + qvel = obs[:, 1:2].reshape(-1) + else: + qpos = obs["robot"]["qpos"].reshape(-1) + qvel = obs["robot"]["qvel"].reshape(-1) + upward_reward = 1 - torch.abs(qpos) / self.pole_limit + velocity_reward = 1 - torch.tanh(torch.abs(qvel)) + reward = upward_reward + self.veloctity_weight * velocity_reward + if "rewards" not in info: + info["rewards"] = {} + info["rewards"]["upward_reward"] = upward_reward + info["rewards"]["velocity_reward"] = velocity_reward + return reward + + def get_info(self, **kwargs) -> Dict[str, Any]: + pole_qpos = self.robot.get_qpos(name="hand").reshape(-1) + pole_qvel = self.robot.get_qvel(name="hand").reshape(-1) + upward_distance = torch.abs(pole_qpos) + is_success = torch.logical_and( + upward_distance < 0.05, torch.abs(pole_qvel) < 0.1 + ) + + info = { + "success": is_success, + "fail": torch.zeros( + self.cfg.num_envs, device=self.device, dtype=torch.bool + ), + "elapsed_steps": self._elapsed_steps, + } + info["metrics"] = { + "upward_distance": upward_distance, + } + return info + + def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor: + is_timeout = self._elapsed_steps >= self.episode_length + pole_qpos = self.robot.get_qpos(name="hand").reshape(-1) + is_fallen = torch.abs(pole_qpos) > torch.pi * 0.25 + return is_timeout | is_fallen + + def evaluate(self, **kwargs) -> Dict[str, Any]: + info = self.get_info(**kwargs) + return { + "success": info["success"][0].item(), + "upward_distance": info["upward_distance"], + } From 7165debb53c9dbea6aa9db0c925586e93932217b Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 12 Jan 2026 12:12:34 +0800 Subject: [PATCH 2/9] update --- configs/agents/rl/cart_pole/gym_config.json | 8 ++++---- embodichain/lab/gym/envs/tasks/rl/cart_pole.py | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json index 9d2cfac8..e43a663a 100644 --- a/configs/agents/rl/cart_pole/gym_config.json +++ b/configs/agents/rl/cart_pole/gym_config.json @@ -34,18 +34,18 @@ }, "init_pos": [0.0, 0.0, 0.0], "init_rot": [0.0, 0.0, 0.0], - "init_qpos": [0.0, 0.6], + "init_qpos": [0.0, 1.0], "drive_pros": { "stiffness": { - "slider_to_cart": 1e2, + "slider_to_cart": 1e-1, "cart_to_pole":1e-2 }, "damping": { - "slider_to_cart": 1e1, + "slider_to_cart": 1e-2, "cart_to_pole":1e-3 }, "max_effort": { - "slider_to_cart": 1e3, + "slider_to_cart": 1e0, "cart_to_pole":1e-1 } }, diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py index 7ed0ba64..8d263c10 100644 --- a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py +++ b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py @@ -37,7 +37,7 @@ class CartPoleEnv(EmbodiedEnv): def __init__(self, cfg=None, **kwargs): if cfg is None: cfg = EmbodiedEnvCfg() - cfg.sim_cfg.arena_space = 35.0 + cfg.sim_cfg.arena_space = 12.0 extensions = getattr(cfg, "extensions", {}) or {} # cfg.sim_cfg.enable_rt = True @@ -78,7 +78,6 @@ def _initialize_episode( super()._initialize_episode(env_ids=env_ids, **kwargs) def _step_action(self, action: EnvAction) -> EnvAction: - if self.obs_mode == "state": delta_qpos = action[:, 0] scaled_action = delta_qpos * self.action_scale @@ -140,7 +139,7 @@ def get_info(self, **kwargs) -> Dict[str, Any]: def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor: is_timeout = self._elapsed_steps >= self.episode_length pole_qpos = self.robot.get_qpos(name="hand").reshape(-1) - is_fallen = torch.abs(pole_qpos) > torch.pi * 0.25 + is_fallen = torch.abs(pole_qpos) > torch.pi * 0.5 return is_timeout | is_fallen def evaluate(self, **kwargs) -> Dict[str, Any]: From 89dc002b1b3c08f71c743658ed22d70023397b9c Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 28 Jan 2026 16:26:42 +0800 Subject: [PATCH 3/9] update --- configs/agents/rl/cart_pole/gym_config.json | 139 ++++++++++-------- configs/agents/rl/cart_pole/train_config.json | 125 ++++++++-------- embodichain/lab/gym/envs/managers/rewards.py | 15 ++ .../lab/gym/envs/tasks/rl/cart_pole.py | 126 ++-------------- 4 files changed, 174 insertions(+), 231 deletions(-) diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json index e43a663a..b01746cf 100644 --- a/configs/agents/rl/cart_pole/gym_config.json +++ b/configs/agents/rl/cart_pole/gym_config.json @@ -1,63 +1,84 @@ -{ - "id": "CartPoleRL", - "max_episodes": 2, - "env": { - "num_envs": 128, - "sim_steps_per_control": 4, - "events": { - }, - "observations": {}, - "extensions": { - "obs_mode": "state", - "action_scale": 0.1, - "episode_length": 100, - "slider_limit": 15, - "pole_limit": 6.283185307, - "veloctity_weight": 0.01 - } - }, - "robot": { - "uid": "Cart", - "urdf_cfg": { - "components": [ - { - "component_type": "arm", - "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole.urdf", - "transform": [ - [1.0, 0.0, 0.0, 0.0], - [0.0, 1.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 1.0] - ] +{ + "id": "CartPoleRL", + "max_episodes": 5, + "env": { + "events": {}, + "observations": { + "robot_qpos": { + "func": "normalize_robot_joint_data", + "mode": "modify", + "name": "robot/qpos", + "params": { + "joint_ids": [0, 1] + } + } + }, + "rewards": { + "upward_reward": { + "func": "pole_upward", + "mode": "add", + "weight": 1.0, + "params": {} + }, + "velocity_penalty": { + "func": "joint_velocity_penalty", + "mode": "add", + "weight": 0.01, + "params": { + "robot_uid": "Cart", + "part_name": "hand" + } + } + }, + "extensions": { + "action_type": "delta_qpos", + "episode_length": 500, + "action_scale": 0.1, + "success_threshold": 0.1 } - ] }, - "init_pos": [0.0, 0.0, 0.0], - "init_rot": [0.0, 0.0, 0.0], - "init_qpos": [0.0, 1.0], - "drive_pros": { - "stiffness": { - "slider_to_cart": 1e-1, - "cart_to_pole":1e-2 - }, - "damping": { - "slider_to_cart": 1e-2, - "cart_to_pole":1e-3 - }, - "max_effort": { - "slider_to_cart": 1e0, - "cart_to_pole":1e-1 - } + "robot": { + "uid": "Cart", + "urdf_cfg": { + "components": [ + { + "component_type": "arm", + "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole2.urdf", + "transform": [ + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 1.0] + ] + } + ] + }, + "init_pos": [0.0, 0.0, 0.0], + "init_rot": [0.0, 0.0, 0.0], + "init_qpos": [0.0, 0.3], + "drive_pros": { + "stiffness": { + "slider_to_cart": 1e1, + "cart_to_pole":1e-2 + }, + "damping": { + "slider_to_cart": 1e0, + "cart_to_pole":1e-3 + }, + "max_effort": { + "slider_to_cart": 1e2, + "cart_to_pole":1e-1 + } + }, + "control_parts": { + "arm": ["slider_to_cart"], + "hand": ["cart_to_pole"] + } }, - "control_parts": { - "arm": ["slider_to_cart"], - "hand": ["cart_to_pole"] - } - }, - "sensor": [], - "light": { - }, - "rigid_object": [], - "rigid_object_group": [], - "articulation": [] + "sensor": [], + "light": {}, + "background": [], + "rigid_object": [], + "rigid_object_group": [], + "articulation": [] } diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/cart_pole/train_config.json index 7a077e9c..8481e4df 100644 --- a/configs/agents/rl/cart_pole/train_config.json +++ b/configs/agents/rl/cart_pole/train_config.json @@ -1,64 +1,67 @@ -{ - "trainer": { - "exp_name": "push_cube_ppo", - "gym_config": "configs/agents/rl/cart_pole/gym_config.json", - "seed": 42, - "device": "cuda:0", - "headless": true, - "iterations": 1000, - "rollout_steps": 1024, - "eval_freq": 2, - "save_freq": 200, - "use_wandb": false, - "wandb_project_name": "embodychain-push_cube", - "events": { - "eval": { - "record_camera": { - "func": "record_camera_data_async", - "mode": "interval", - "interval_step": 1, - "params": { - "name": "main_cam", - "resolution": [640, 480], - "eye": [-1.4, 1.4, 2.0], - "target": [0, 0, 0], - "up": [0, 0, 1], - "intrinsics": [600, 600, 320, 240], - "save_path": "./outputs/videos/eval" - } +{ + "trainer": { + "exp_name": "push_cube_ppo", + "gym_config": "configs/agents/rl/cart_pole/gym_config.json", + "seed": 42, + "device": "cuda:0", + "headless": true, + "enable_rt": false, + "gpu_id": 0, + "num_envs": 64, + "iterations": 1000, + "rollout_steps": 1024, + "eval_freq": 200, + "save_freq": 200, + "use_wandb": false, + "wandb_project_name": "embodychain-push_cube", + "events": { + "eval": { + "record_camera": { + "func": "record_camera_data_async", + "mode": "interval", + "interval_step": 1, + "params": { + "name": "main_cam", + "resolution": [640, 480], + "eye": [-1.4, 1.4, 2.0], + "target": [0, 0, 0], + "up": [0, 0, 1], + "intrinsics": [600, 600, 320, 240], + "save_path": "./outputs/videos/eval" + } + } + } } - } - } - }, - "policy": { - "name": "actor_critic", - "actor": { - "type": "mlp", - "network_cfg": { - "hidden_sizes": [256, 256], - "activation": "relu" - } }, - "critic": { - "type": "mlp", - "network_cfg": { - "hidden_sizes": [256, 256], - "activation": "relu" - } - } - }, - "algorithm": { - "name": "ppo", - "cfg": { - "learning_rate": 0.0001, - "n_epochs": 10, - "batch_size": 8192, - "gamma": 0.99, - "gae_lambda": 0.95, - "clip_coef": 0.2, - "ent_coef": 0.01, - "vf_coef": 0.5, - "max_grad_norm": 0.5 + "policy": { + "name": "actor_critic", + "actor": { + "type": "mlp", + "network_cfg": { + "hidden_sizes": [256, 256], + "activation": "relu" + } + }, + "critic": { + "type": "mlp", + "network_cfg": { + "hidden_sizes": [256, 256], + "activation": "relu" + } + } + }, + "algorithm": { + "name": "ppo", + "cfg": { + "learning_rate": 0.0001, + "n_epochs": 10, + "batch_size": 8192, + "gamma": 0.99, + "gae_lambda": 0.95, + "clip_coef": 0.2, + "ent_coef": 0.01, + "vf_coef": 0.5, + "max_grad_norm": 0.5 + } } - } -} \ No newline at end of file +} diff --git a/embodichain/lab/gym/envs/managers/rewards.py b/embodichain/lab/gym/envs/managers/rewards.py index dad89b7f..9b7818a9 100644 --- a/embodichain/lab/gym/envs/managers/rewards.py +++ b/embodichain/lab/gym/envs/managers/rewards.py @@ -658,3 +658,18 @@ def incremental_distance_to_target( env._reward_states[state_key] = current_dist.clone() return reward + + +def pole_upward( + env: EmbodiedEnv, + obs: dict, + action: torch.Tensor | dict, + info: dict, + object_cfg: SceneEntityCfg = None, +): + robot = env.robot + pole_qpos = robot.get_qpos(name="hand").reshape(-1) # [num_envs, ] + + normalized_upward = torch.abs(pole_qpos) / torch.pi + reward = 1.0 - normalized_upward + return reward diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py index 8d263c10..72840fe3 100644 --- a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py +++ b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py @@ -15,19 +15,16 @@ # ---------------------------------------------------------------------------- import torch -import numpy as np -from typing import Dict, Any, Optional, Sequence -from gymnasium import spaces +from typing import Dict, Any, Tuple from embodichain.lab.gym.utils.registration import register_env -from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg -from embodichain.lab.sim.cfg import MarkerCfg -from embodichain.lab.sim.types import EnvObs, EnvAction -from embodichain.utils import logger +from embodichain.lab.gym.envs.rl_env import RLEnv +from embodichain.lab.gym.envs import EmbodiedEnvCfg +from embodichain.lab.sim.types import EnvObs @register_env("CartPoleRL", max_episode_steps=50, override=True) -class CartPoleEnv(EmbodiedEnv): +class CartPoleEnv(RLEnv): """Push cube task for reinforcement learning. The task involves pushing a cube to a target goal position using a robotic arm. @@ -37,114 +34,21 @@ class CartPoleEnv(EmbodiedEnv): def __init__(self, cfg=None, **kwargs): if cfg is None: cfg = EmbodiedEnvCfg() - cfg.sim_cfg.arena_space = 12.0 - extensions = getattr(cfg, "extensions", {}) or {} - - # cfg.sim_cfg.enable_rt = True - - defaults = { - "qvel_weight": 0.01, - "action_scale": 0.02, - "episode_length": 100, - "slider_limit": 15, - "pole_limit": 6.283185307, - "veloctity_weight": 0.01, - } - for name, default in defaults.items(): - value = extensions.get(name, getattr(cfg, name, default)) - setattr(cfg, name, value) - setattr(self, name, getattr(cfg, name)) - - self.last_cube_goal_dist = None - super().__init__(cfg, **kwargs) - def _init_sim_state(self, **kwargs): - super()._init_sim_state(**kwargs) - self.single_action_space = spaces.Box( - low=-self.slider_limit, - high=self.slider_limit, - shape=(1,), - dtype=np.float32, - ) - if self.obs_mode == "state": - self.single_observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32 - ) - - def _initialize_episode( - self, env_ids: Optional[Sequence[int]] = None, **kwargs - ) -> None: - super()._initialize_episode(env_ids=env_ids, **kwargs) - - def _step_action(self, action: EnvAction) -> EnvAction: - if self.obs_mode == "state": - delta_qpos = action[:, 0] - scaled_action = delta_qpos * self.action_scale - scaled_action = torch.clamp( - scaled_action, -self.slider_limit, self.slider_limit - ) - current_qpos = self.robot.get_qpos(name="arm") - target_qpos = current_qpos + scaled_action[:, None] - self.robot.set_qpos(qpos=target_qpos, name="arm") - return scaled_action - - def get_obs(self, **kwargs) -> EnvObs: - qpos = self.robot.get_qpos(name="hand") - qvel = self.robot.get_qvel(name="hand") - if self.obs_mode == "state": - return torch.cat([qpos, qvel], dim=1) - return { - "robot": {"qpos": qpos, "qvel": qvel}, - } - - def get_reward( - self, obs: EnvObs, action: EnvAction, info: Dict[str, Any] - ) -> torch.Tensor: - if self.obs_mode == "state": - qpos = obs[:, 0:1].reshape(-1) - qvel = obs[:, 1:2].reshape(-1) - else: - qpos = obs["robot"]["qpos"].reshape(-1) - qvel = obs["robot"]["qvel"].reshape(-1) - upward_reward = 1 - torch.abs(qpos) / self.pole_limit - velocity_reward = 1 - torch.tanh(torch.abs(qvel)) - reward = upward_reward + self.veloctity_weight * velocity_reward - if "rewards" not in info: - info["rewards"] = {} - info["rewards"]["upward_reward"] = upward_reward - info["rewards"]["velocity_reward"] = velocity_reward - return reward - - def get_info(self, **kwargs) -> Dict[str, Any]: - pole_qpos = self.robot.get_qpos(name="hand").reshape(-1) - pole_qvel = self.robot.get_qvel(name="hand").reshape(-1) - upward_distance = torch.abs(pole_qpos) - is_success = torch.logical_and( - upward_distance < 0.05, torch.abs(pole_qvel) < 0.1 - ) - - info = { - "success": is_success, - "fail": torch.zeros( - self.cfg.num_envs, device=self.device, dtype=torch.bool - ), - "elapsed_steps": self._elapsed_steps, - } - info["metrics"] = { - "upward_distance": upward_distance, - } - return info + def compute_task_state( + self, **kwargs + ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]: + qpos = self.robot.get_qpos(name="hand").reshape(-1) # [num_envs, ] + qvel = self.robot.get_qvel(name="hand").reshape(-1) # [num_envs, ] + upward_distance = torch.abs(qpos) + is_success = torch.logical_and(upward_distance < 0.05, torch.abs(qvel) < 0.1) + is_fail = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) + metrics = {"distance_to_goal": upward_distance} + return is_success, is_fail, metrics def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor: is_timeout = self._elapsed_steps >= self.episode_length pole_qpos = self.robot.get_qpos(name="hand").reshape(-1) is_fallen = torch.abs(pole_qpos) > torch.pi * 0.5 return is_timeout | is_fallen - - def evaluate(self, **kwargs) -> Dict[str, Any]: - info = self.get_info(**kwargs) - return { - "success": info["success"][0].item(), - "upward_distance": info["upward_distance"], - } From ed67efb1cf349a9c12a3ae24a580fc1666e34cb8 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 29 Jan 2026 09:53:13 +0800 Subject: [PATCH 4/9] update --- configs/agents/rl/cart_pole/gym_config.json | 4 ++-- configs/agents/rl/cart_pole/train_config.json | 4 ++-- embodichain/lab/gym/envs/rl_env.py | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json index b01746cf..a5925292 100644 --- a/configs/agents/rl/cart_pole/gym_config.json +++ b/configs/agents/rl/cart_pole/gym_config.json @@ -53,9 +53,9 @@ } ] }, - "init_pos": [0.0, 0.0, 0.0], + "init_pos": [0.0, 0.0, 0.5], "init_rot": [0.0, 0.0, 0.0], - "init_qpos": [0.0, 0.3], + "init_qpos": [-0.2, 0.07], "drive_pros": { "stiffness": { "slider_to_cart": 1e1, diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/cart_pole/train_config.json index 8481e4df..5f1b55cf 100644 --- a/configs/agents/rl/cart_pole/train_config.json +++ b/configs/agents/rl/cart_pole/train_config.json @@ -23,8 +23,8 @@ "params": { "name": "main_cam", "resolution": [640, 480], - "eye": [-1.4, 1.4, 2.0], - "target": [0, 0, 0], + "eye": [-1.4, 1.4, 2.5], + "target": [0, 0, 0.7], "up": [0, 0, 1], "intrinsics": [600, 600, 320, 240], "save_path": "./outputs/videos/eval" diff --git a/embodichain/lab/gym/envs/rl_env.py b/embodichain/lab/gym/envs/rl_env.py index 9ce6b5f1..a14adc67 100644 --- a/embodichain/lab/gym/envs/rl_env.py +++ b/embodichain/lab/gym/envs/rl_env.py @@ -244,8 +244,9 @@ def get_info(self, **kwargs) -> Dict[str, Any]: "success": success, "fail": fail, "elapsed_steps": self._elapsed_steps, - "goal_pose": self.goal_pose, } + if hasattr(self, "_goal_pose") and self._goal_pose is not None: + info["goal_pose"] = self._goal_pose info["metrics"] = metrics return info From 8177cfac93e82585f6778975b0a73c6a519e1b96 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 29 Jan 2026 19:06:53 +0800 Subject: [PATCH 5/9] update --- .../rl/{ => basic}/cart_pole/gym_config.json | 2 +- .../{ => basic}/cart_pole/train_config.json | 2 +- embodichain/data/assets/robot_assets.py | 32 +++++++++++++++++++ embodichain/lab/gym/envs/__init__.py | 2 +- .../envs/tasks/rl/{ => basic}/cart_pole.py | 0 5 files changed, 35 insertions(+), 3 deletions(-) rename configs/agents/rl/{ => basic}/cart_pole/gym_config.json (96%) rename configs/agents/rl/{ => basic}/cart_pole/train_config.json (96%) rename embodichain/lab/gym/envs/tasks/rl/{ => basic}/cart_pole.py (100%) diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json similarity index 96% rename from configs/agents/rl/cart_pole/gym_config.json rename to configs/agents/rl/basic/cart_pole/gym_config.json index a5925292..1dd5c2f7 100644 --- a/configs/agents/rl/cart_pole/gym_config.json +++ b/configs/agents/rl/basic/cart_pole/gym_config.json @@ -43,7 +43,7 @@ "components": [ { "component_type": "arm", - "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole2.urdf", + "urdf_path": "CartPole/cart_pole.urdf", "transform": [ [1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/basic/cart_pole/train_config.json similarity index 96% rename from configs/agents/rl/cart_pole/train_config.json rename to configs/agents/rl/basic/cart_pole/train_config.json index 5f1b55cf..6b91fab4 100644 --- a/configs/agents/rl/cart_pole/train_config.json +++ b/configs/agents/rl/basic/cart_pole/train_config.json @@ -1,7 +1,7 @@ { "trainer": { "exp_name": "push_cube_ppo", - "gym_config": "configs/agents/rl/cart_pole/gym_config.json", + "gym_config": "configs/agents/rl/basic/cart_pole/gym_config.json", "seed": 42, "device": "cuda:0", "headless": true, diff --git a/embodichain/data/assets/robot_assets.py b/embodichain/data/assets/robot_assets.py index 5251621e..9d9860d6 100644 --- a/embodichain/data/assets/robot_assets.py +++ b/embodichain/data/assets/robot_assets.py @@ -469,3 +469,35 @@ def __init__(self, data_root: str = None): path = EMBODICHAIN_DEFAULT_DATA_ROOT if data_root is None else data_root super().__init__(prefix, data_descriptor, path) + + +class CartPole(EmbodiChainDataset): + """Dataset class for the CartPole. + + Directory structure: + cart_pole/ + cart_pole.urdf + cart.mtl + cart.obj + pole.mtl + pole.obj + slide_bar.mtl + slide_bar.obj + + Example usage: + >>> from embodichain.data.robot_dataset import CartPole + >>> dataset = CartPole() + or + >>> from embodichain.data import get_data_path + >>> print(get_data_path("CartPole/cart_pole.urdf")) + """ + + def __init__(self, data_root: str = None): + data_descriptor = o3d.data.DataDescriptor( + os.path.join(EMBODICHAIN_DOWNLOAD_PREFIX, robot_assets, "cart_pole.zip"), + "9d185eb18b19f9c95153e01943c5b0a2", + ) + prefix = "cart_pole" + path = EMBODICHAIN_DEFAULT_DATA_ROOT if data_root is None else data_root + + super().__init__(prefix, data_descriptor, path) diff --git a/embodichain/lab/gym/envs/__init__.py b/embodichain/lab/gym/envs/__init__.py index e7f57863..d4fa9148 100644 --- a/embodichain/lab/gym/envs/__init__.py +++ b/embodichain/lab/gym/envs/__init__.py @@ -37,6 +37,6 @@ # Reinforcement learning environments from embodichain.lab.gym.envs.tasks.rl.push_cube import PushCubeEnv -from embodichain.lab.gym.envs.tasks.rl.cart_pole import CartPoleEnv +from embodichain.lab.gym.envs.tasks.rl.basic.cart_pole import CartPoleEnv from embodichain.lab.gym.envs.tasks.special.simple_task import SimpleTaskEnv diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py similarity index 100% rename from embodichain/lab/gym/envs/tasks/rl/cart_pole.py rename to embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py From 3d01a8aba973c7a629e13d38757e5ef107de0d6e Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 29 Jan 2026 19:25:45 +0800 Subject: [PATCH 6/9] update --- configs/agents/rl/basic/cart_pole/gym_config.json | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/configs/agents/rl/basic/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json index 1dd5c2f7..ce35f18f 100644 --- a/configs/agents/rl/basic/cart_pole/gym_config.json +++ b/configs/agents/rl/basic/cart_pole/gym_config.json @@ -43,13 +43,7 @@ "components": [ { "component_type": "arm", - "urdf_path": "CartPole/cart_pole.urdf", - "transform": [ - [1.0, 0.0, 0.0, 0.0], - [0.0, 1.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 1.0] - ] + "urdf_path": "CartPole/cart_pole.urdf" } ] }, From 1d467354904f004c5f62a034398dcc7a9138e550 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 29 Jan 2026 19:42:45 +0800 Subject: [PATCH 7/9] update --- .../agents/rl/basic/cart_pole/gym_config.json | 6 ---- .../rl/basic/cart_pole/train_config.json | 2 +- embodichain/lab/gym/envs/managers/rewards.py | 15 ---------- .../lab/gym/envs/tasks/rl/basic/cart_pole.py | 30 +++++++++++++++++-- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/configs/agents/rl/basic/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json index ce35f18f..ab762402 100644 --- a/configs/agents/rl/basic/cart_pole/gym_config.json +++ b/configs/agents/rl/basic/cart_pole/gym_config.json @@ -14,12 +14,6 @@ } }, "rewards": { - "upward_reward": { - "func": "pole_upward", - "mode": "add", - "weight": 1.0, - "params": {} - }, "velocity_penalty": { "func": "joint_velocity_penalty", "mode": "add", diff --git a/configs/agents/rl/basic/cart_pole/train_config.json b/configs/agents/rl/basic/cart_pole/train_config.json index 6b91fab4..06ea85c0 100644 --- a/configs/agents/rl/basic/cart_pole/train_config.json +++ b/configs/agents/rl/basic/cart_pole/train_config.json @@ -13,7 +13,7 @@ "eval_freq": 200, "save_freq": 200, "use_wandb": false, - "wandb_project_name": "embodychain-push_cube", + "wandb_project_name": "embodychain-cart_pole", "events": { "eval": { "record_camera": { diff --git a/embodichain/lab/gym/envs/managers/rewards.py b/embodichain/lab/gym/envs/managers/rewards.py index 9b7818a9..dad89b7f 100644 --- a/embodichain/lab/gym/envs/managers/rewards.py +++ b/embodichain/lab/gym/envs/managers/rewards.py @@ -658,18 +658,3 @@ def incremental_distance_to_target( env._reward_states[state_key] = current_dist.clone() return reward - - -def pole_upward( - env: EmbodiedEnv, - obs: dict, - action: torch.Tensor | dict, - info: dict, - object_cfg: SceneEntityCfg = None, -): - robot = env.robot - pole_qpos = robot.get_qpos(name="hand").reshape(-1) # [num_envs, ] - - normalized_upward = torch.abs(pole_qpos) / torch.pi - reward = 1.0 - normalized_upward - return reward diff --git a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py index 72840fe3..90fe959d 100644 --- a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py +++ b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py @@ -25,10 +25,14 @@ @register_env("CartPoleRL", max_episode_steps=50, override=True) class CartPoleEnv(RLEnv): - """Push cube task for reinforcement learning. + """ + CartPole balancing task for reinforcement learning. - The task involves pushing a cube to a target goal position using a robotic arm. - The reward consists of reaching reward, placing reward, action penalty, and success bonus. + The agent controls a cart (robot hand joint) to keep a pole balanced near the upright + position by regulating its angle and angular velocity. Episodes are considered + successful when the pole remains close to vertical with low velocity, and they + terminate either when a maximum number of steps is reached or when the pole falls + beyond an allowed tilt threshold. """ def __init__(self, cfg=None, **kwargs): @@ -36,6 +40,26 @@ def __init__(self, cfg=None, **kwargs): cfg = EmbodiedEnvCfg() super().__init__(cfg, **kwargs) + def get_reward(self, obs, action, info): + """Get the reward for the current step (pole upward reward). + + Each SimulationManager env must implement its own get_reward function to define the reward function for the task, If the + env is considered for RL/IL training. + + Args: + obs: The observation from the environment. + action: The action applied to the robot agent. + info: The info dictionary. + + Returns: + The reward for the current step. + """ + pole_qpos = self.robot.get_qpos(name="hand").reshape(-1) # [num_envs, ] + + normalized_upward = torch.abs(pole_qpos) / torch.pi + reward = 1.0 - normalized_upward + return reward + def compute_task_state( self, **kwargs ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]: From 3116c3b283b5eb68765d283af09859c8876a01a5 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 30 Jan 2026 10:24:00 +0800 Subject: [PATCH 8/9] update --- configs/agents/rl/basic/cart_pole/gym_config.json | 2 +- embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/agents/rl/basic/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json index ab762402..a343af16 100644 --- a/configs/agents/rl/basic/cart_pole/gym_config.json +++ b/configs/agents/rl/basic/cart_pole/gym_config.json @@ -17,7 +17,7 @@ "velocity_penalty": { "func": "joint_velocity_penalty", "mode": "add", - "weight": 0.01, + "weight": 0.005, "params": { "robot_uid": "Cart", "part_name": "hand" diff --git a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py index 90fe959d..c40c3fc6 100644 --- a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py +++ b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py @@ -66,7 +66,7 @@ def compute_task_state( qpos = self.robot.get_qpos(name="hand").reshape(-1) # [num_envs, ] qvel = self.robot.get_qvel(name="hand").reshape(-1) # [num_envs, ] upward_distance = torch.abs(qpos) - is_success = torch.logical_and(upward_distance < 0.05, torch.abs(qvel) < 0.1) + is_success = torch.logical_and(upward_distance < 0.02, torch.abs(qvel) < 0.05) is_fail = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) metrics = {"distance_to_goal": upward_distance} return is_success, is_fail, metrics From 51a850db74b394e603c0d2b75b0282faa6ffecf3 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 30 Jan 2026 10:41:03 +0800 Subject: [PATCH 9/9] update --- configs/agents/rl/basic/cart_pole/train_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/agents/rl/basic/cart_pole/train_config.json b/configs/agents/rl/basic/cart_pole/train_config.json index 06ea85c0..8412fe36 100644 --- a/configs/agents/rl/basic/cart_pole/train_config.json +++ b/configs/agents/rl/basic/cart_pole/train_config.json @@ -10,7 +10,7 @@ "num_envs": 64, "iterations": 1000, "rollout_steps": 1024, - "eval_freq": 200, + "eval_freq": 2, "save_freq": 200, "use_wandb": false, "wandb_project_name": "embodychain-cart_pole",