From 4949312a66a95c502838612fea4e13b5686f6006 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Mon, 12 Jan 2026 10:30:14 +0800
Subject: [PATCH 1/9] update

---
 configs/agents/rl/cart_pole/gym_config.json   |  63 ++++++++
 configs/agents/rl/cart_pole/train_config.json |  64 ++++++++
 embodichain/lab/gym/envs/__init__.py          |   2 +
 .../lab/gym/envs/tasks/rl/cart_pole.py        | 151 ++++++++++++++++++
 4 files changed, 280 insertions(+)
 create mode 100644 configs/agents/rl/cart_pole/gym_config.json
 create mode 100644 configs/agents/rl/cart_pole/train_config.json
 create mode 100644 embodichain/lab/gym/envs/tasks/rl/cart_pole.py

diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json
new file mode 100644
index 00000000..9d2cfac8
--- /dev/null
+++ b/configs/agents/rl/cart_pole/gym_config.json
@@ -0,0 +1,63 @@
+{
+  "id": "CartPoleRL",
+  "max_episodes": 2,
+  "env": {
+    "num_envs": 128,
+    "sim_steps_per_control": 4,
+    "events": {
+    },
+    "observations": {},
+    "extensions": {
+      "obs_mode": "state",
+      "action_scale": 0.1,
+      "episode_length": 100,
+      "slider_limit": 15,
+      "pole_limit": 6.283185307,
+      "veloctity_weight": 0.01
+    }
+  },
+  "robot": {
+    "uid": "Cart",
+    "urdf_cfg": {
+      "components": [
+        {
+          "component_type": "arm",
+          "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole.urdf",
+          "transform": [
+            [1.0, 0.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 1.0]
+          ]
+        }
+      ]
+    },
+    "init_pos": [0.0, 0.0, 0.0],
+    "init_rot": [0.0, 0.0, 0.0],
+    "init_qpos": [0.0, 0.6],
+    "drive_pros": {
+      "stiffness": {
+        "slider_to_cart": 1e2,
+        "cart_to_pole":1e-2
+      },
+      "damping": {
+        "slider_to_cart": 1e1,
+        "cart_to_pole":1e-3
+      },
+      "max_effort": {
+        "slider_to_cart": 1e3,
+        "cart_to_pole":1e-1
+      }
+    },
+    "control_parts": {
+      "arm": ["slider_to_cart"],
+      "hand": ["cart_to_pole"]
+    }
+  },
+  "sensor": [],
+  "light": {
+  },
+  "rigid_object": [],
+  "rigid_object_group": [],
+  "articulation": []
+}
diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/cart_pole/train_config.json
new file mode 100644
index 00000000..7a077e9c
--- /dev/null
+++ b/configs/agents/rl/cart_pole/train_config.json
@@ -0,0 +1,64 @@
+{
+  "trainer": {
+    "exp_name": "push_cube_ppo",
+    "gym_config": "configs/agents/rl/cart_pole/gym_config.json",
+    "seed": 42,
+    "device": "cuda:0",
+    "headless": true,
+    "iterations": 1000,
+    "rollout_steps": 1024,
+    "eval_freq": 2,
+    "save_freq": 200,
+    "use_wandb": false,
+    "wandb_project_name": "embodychain-push_cube",
+    "events": {
+      "eval": {
+        "record_camera": {
+          "func": "record_camera_data_async",
+          "mode": "interval",
+          "interval_step": 1,
+          "params": {
+            "name": "main_cam",
+            "resolution": [640, 480],
+            "eye": [-1.4, 1.4, 2.0],
+            "target": [0, 0, 0],
+            "up": [0, 0, 1],
+            "intrinsics": [600, 600, 320, 240],
+            "save_path": "./outputs/videos/eval"
+          }
+        }
+      }
+    }
+  },
+  "policy": {
+    "name": "actor_critic",
+    "actor": {
+      "type": "mlp",
+      "network_cfg": {
+        "hidden_sizes": [256, 256],
+        "activation": "relu"
+      }
+    },
+    "critic": {
+      "type": "mlp",
+      "network_cfg": {
+        "hidden_sizes": [256, 256],
+        "activation": "relu"
+      }
+    }
+  },
+  "algorithm": {
+    "name": "ppo",
+    "cfg": {
+      "learning_rate": 0.0001,
+      "n_epochs": 10,
+      "batch_size": 8192,
+      "gamma": 0.99,
+      "gae_lambda": 0.95,
+      "clip_coef": 0.2,
+      "ent_coef": 0.01,
+      "vf_coef": 0.5,
+      "max_grad_norm": 0.5
+    }
+  }
+}
\ No newline at end of file
diff --git a/embodichain/lab/gym/envs/__init__.py b/embodichain/lab/gym/envs/__init__.py
index 286d7824..b6fd9333 100644
--- a/embodichain/lab/gym/envs/__init__.py
+++ b/embodichain/lab/gym/envs/__init__.py
@@ -29,3 +29,5 @@
 
 # Reinforcement learning environments
 from embodichain.lab.gym.envs.tasks.rl.push_cube import PushCubeEnv
+
+from embodichain.lab.gym.envs.tasks.rl.cart_pole import CartPoleEnv
diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
new file mode 100644
index 00000000..7ed0ba64
--- /dev/null
+++ b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
@@ -0,0 +1,151 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import torch
+import numpy as np
+from typing import Dict, Any, Optional, Sequence
+from gymnasium import spaces
+
+from embodichain.lab.gym.utils.registration import register_env
+from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg
+from embodichain.lab.sim.cfg import MarkerCfg
+from embodichain.lab.sim.types import EnvObs, EnvAction
+from embodichain.utils import logger
+
+
+@register_env("CartPoleRL", max_episode_steps=50, override=True)
+class CartPoleEnv(EmbodiedEnv):
+    """Push cube task for reinforcement learning.
+
+    The task involves pushing a cube to a target goal position using a robotic arm.
+    The reward consists of reaching reward, placing reward, action penalty, and success bonus.
+    """
+
+    def __init__(self, cfg=None, **kwargs):
+        if cfg is None:
+            cfg = EmbodiedEnvCfg()
+        cfg.sim_cfg.arena_space = 35.0
+        extensions = getattr(cfg, "extensions", {}) or {}
+
+        # cfg.sim_cfg.enable_rt = True
+
+        defaults = {
+            "qvel_weight": 0.01,
+            "action_scale": 0.02,
+            "episode_length": 100,
+            "slider_limit": 15,
+            "pole_limit": 6.283185307,
+            "veloctity_weight": 0.01,
+        }
+        for name, default in defaults.items():
+            value = extensions.get(name, getattr(cfg, name, default))
+            setattr(cfg, name, value)
+            setattr(self, name, getattr(cfg, name))
+
+        self.last_cube_goal_dist = None
+
+        super().__init__(cfg, **kwargs)
+
+    def _init_sim_state(self, **kwargs):
+        super()._init_sim_state(**kwargs)
+        self.single_action_space = spaces.Box(
+            low=-self.slider_limit,
+            high=self.slider_limit,
+            shape=(1,),
+            dtype=np.float32,
+        )
+        if self.obs_mode == "state":
+            self.single_observation_space = spaces.Box(
+                low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32
+            )
+
+    def _initialize_episode(
+        self, env_ids: Optional[Sequence[int]] = None, **kwargs
+    ) -> None:
+        super()._initialize_episode(env_ids=env_ids, **kwargs)
+
+    def _step_action(self, action: EnvAction) -> EnvAction:
+
+        if self.obs_mode == "state":
+            delta_qpos = action[:, 0]
+        scaled_action = delta_qpos * self.action_scale
+        scaled_action = torch.clamp(
+            scaled_action, -self.slider_limit, self.slider_limit
+        )
+        current_qpos = self.robot.get_qpos(name="arm")
+        target_qpos = current_qpos + scaled_action[:, None]
+        self.robot.set_qpos(qpos=target_qpos, name="arm")
+        return scaled_action
+
+    def get_obs(self, **kwargs) -> EnvObs:
+        qpos = self.robot.get_qpos(name="hand")
+        qvel = self.robot.get_qvel(name="hand")
+        if self.obs_mode == "state":
+            return torch.cat([qpos, qvel], dim=1)
+        return {
+            "robot": {"qpos": qpos, "qvel": qvel},
+        }
+
+    def get_reward(
+        self, obs: EnvObs, action: EnvAction, info: Dict[str, Any]
+    ) -> torch.Tensor:
+        if self.obs_mode == "state":
+            qpos = obs[:, 0:1].reshape(-1)
+            qvel = obs[:, 1:2].reshape(-1)
+        else:
+            qpos = obs["robot"]["qpos"].reshape(-1)
+            qvel = obs["robot"]["qvel"].reshape(-1)
+        upward_reward = 1 - torch.abs(qpos) / self.pole_limit
+        velocity_reward = 1 - torch.tanh(torch.abs(qvel))
+        reward = upward_reward + self.veloctity_weight * velocity_reward
+        if "rewards" not in info:
+            info["rewards"] = {}
+        info["rewards"]["upward_reward"] = upward_reward
+        info["rewards"]["velocity_reward"] = velocity_reward
+        return reward
+
+    def get_info(self, **kwargs) -> Dict[str, Any]:
+        pole_qpos = self.robot.get_qpos(name="hand").reshape(-1)
+        pole_qvel = self.robot.get_qvel(name="hand").reshape(-1)
+        upward_distance = torch.abs(pole_qpos)
+        is_success = torch.logical_and(
+            upward_distance < 0.05, torch.abs(pole_qvel) < 0.1
+        )
+
+        info = {
+            "success": is_success,
+            "fail": torch.zeros(
+                self.cfg.num_envs, device=self.device, dtype=torch.bool
+            ),
+            "elapsed_steps": self._elapsed_steps,
+        }
+        info["metrics"] = {
+            "upward_distance": upward_distance,
+        }
+        return info
+
+    def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor:
+        is_timeout = self._elapsed_steps >= self.episode_length
+        pole_qpos = self.robot.get_qpos(name="hand").reshape(-1)
+        is_fallen = torch.abs(pole_qpos) > torch.pi * 0.25
+        return is_timeout | is_fallen
+
+    def evaluate(self, **kwargs) -> Dict[str, Any]:
+        info = self.get_info(**kwargs)
+        return {
+            "success": info["success"][0].item(),
+            "upward_distance": info["upward_distance"],
+        }

From 7165debb53c9dbea6aa9db0c925586e93932217b Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Mon, 12 Jan 2026 12:12:34 +0800
Subject: [PATCH 2/9] update

---
 configs/agents/rl/cart_pole/gym_config.json    | 8 ++++----
 embodichain/lab/gym/envs/tasks/rl/cart_pole.py | 5 ++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json
index 9d2cfac8..e43a663a 100644
--- a/configs/agents/rl/cart_pole/gym_config.json
+++ b/configs/agents/rl/cart_pole/gym_config.json
@@ -34,18 +34,18 @@
     },
     "init_pos": [0.0, 0.0, 0.0],
     "init_rot": [0.0, 0.0, 0.0],
-    "init_qpos": [0.0, 0.6],
+    "init_qpos": [0.0, 1.0],
     "drive_pros": {
       "stiffness": {
-        "slider_to_cart": 1e2,
+        "slider_to_cart": 1e-1,
         "cart_to_pole":1e-2
       },
       "damping": {
-        "slider_to_cart": 1e1,
+        "slider_to_cart": 1e-2,
         "cart_to_pole":1e-3
       },
       "max_effort": {
-        "slider_to_cart": 1e3,
+        "slider_to_cart": 1e0,
         "cart_to_pole":1e-1
       }
     },
diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
index 7ed0ba64..8d263c10 100644
--- a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
+++ b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
@@ -37,7 +37,7 @@ class CartPoleEnv(EmbodiedEnv):
     def __init__(self, cfg=None, **kwargs):
         if cfg is None:
             cfg = EmbodiedEnvCfg()
-        cfg.sim_cfg.arena_space = 35.0
+        cfg.sim_cfg.arena_space = 12.0
         extensions = getattr(cfg, "extensions", {}) or {}
 
         # cfg.sim_cfg.enable_rt = True
@@ -78,7 +78,6 @@ def _initialize_episode(
         super()._initialize_episode(env_ids=env_ids, **kwargs)
 
     def _step_action(self, action: EnvAction) -> EnvAction:
-
         if self.obs_mode == "state":
             delta_qpos = action[:, 0]
         scaled_action = delta_qpos * self.action_scale
@@ -140,7 +139,7 @@ def get_info(self, **kwargs) -> Dict[str, Any]:
     def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor:
         is_timeout = self._elapsed_steps >= self.episode_length
         pole_qpos = self.robot.get_qpos(name="hand").reshape(-1)
-        is_fallen = torch.abs(pole_qpos) > torch.pi * 0.25
+        is_fallen = torch.abs(pole_qpos) > torch.pi * 0.5
         return is_timeout | is_fallen
 
     def evaluate(self, **kwargs) -> Dict[str, Any]:

From 89dc002b1b3c08f71c743658ed22d70023397b9c Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Wed, 28 Jan 2026 16:26:42 +0800
Subject: [PATCH 3/9] update

---
 configs/agents/rl/cart_pole/gym_config.json   | 139 ++++++++++--------
 configs/agents/rl/cart_pole/train_config.json | 125 ++++++++--------
 embodichain/lab/gym/envs/managers/rewards.py  |  15 ++
 .../lab/gym/envs/tasks/rl/cart_pole.py        | 126 ++--------------
 4 files changed, 174 insertions(+), 231 deletions(-)

diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json
index e43a663a..b01746cf 100644
--- a/configs/agents/rl/cart_pole/gym_config.json
+++ b/configs/agents/rl/cart_pole/gym_config.json
@@ -1,63 +1,84 @@
-{
-  "id": "CartPoleRL",
-  "max_episodes": 2,
-  "env": {
-    "num_envs": 128,
-    "sim_steps_per_control": 4,
-    "events": {
-    },
-    "observations": {},
-    "extensions": {
-      "obs_mode": "state",
-      "action_scale": 0.1,
-      "episode_length": 100,
-      "slider_limit": 15,
-      "pole_limit": 6.283185307,
-      "veloctity_weight": 0.01
-    }
-  },
-  "robot": {
-    "uid": "Cart",
-    "urdf_cfg": {
-      "components": [
-        {
-          "component_type": "arm",
-          "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole.urdf",
-          "transform": [
-            [1.0, 0.0, 0.0, 0.0],
-            [0.0, 1.0, 0.0, 0.0],
-            [0.0, 0.0, 1.0, 0.0],
-            [0.0, 0.0, 0.0, 1.0]
-          ]
+{ 
+    "id": "CartPoleRL",
+    "max_episodes": 5,
+    "env": {
+        "events": {},
+        "observations": {
+            "robot_qpos": {
+                "func": "normalize_robot_joint_data",
+                "mode": "modify",
+                "name": "robot/qpos",
+                "params": {
+                    "joint_ids": [0, 1]
+                }
+            }
+        },
+        "rewards": {
+            "upward_reward": {
+                "func": "pole_upward",
+                "mode": "add",
+                "weight": 1.0,
+                "params": {}
+            },
+            "velocity_penalty": {
+                "func": "joint_velocity_penalty",
+                "mode": "add",
+                "weight": 0.01,
+                "params": {
+                    "robot_uid": "Cart",
+                    "part_name": "hand"
+                }
+            }
+        },
+        "extensions": {
+            "action_type": "delta_qpos",
+            "episode_length": 500,
+            "action_scale": 0.1,
+            "success_threshold": 0.1
         }
-      ]
     },
-    "init_pos": [0.0, 0.0, 0.0],
-    "init_rot": [0.0, 0.0, 0.0],
-    "init_qpos": [0.0, 1.0],
-    "drive_pros": {
-      "stiffness": {
-        "slider_to_cart": 1e-1,
-        "cart_to_pole":1e-2
-      },
-      "damping": {
-        "slider_to_cart": 1e-2,
-        "cart_to_pole":1e-3
-      },
-      "max_effort": {
-        "slider_to_cart": 1e0,
-        "cart_to_pole":1e-1
-      }
+    "robot": {
+        "uid": "Cart",
+        "urdf_cfg": {
+            "components": [
+                {
+                    "component_type": "arm",
+                    "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole2.urdf",
+                    "transform": [
+                        [1.0, 0.0, 0.0, 0.0],
+                        [0.0, 1.0, 0.0, 0.0],
+                        [0.0, 0.0, 1.0, 0.0],
+                        [0.0, 0.0, 0.0, 1.0]
+                    ]
+                }
+            ]
+        },
+        "init_pos": [0.0, 0.0, 0.0],
+        "init_rot": [0.0, 0.0, 0.0],
+        "init_qpos": [0.0, 0.3],
+        "drive_pros": {
+            "stiffness": {
+                "slider_to_cart": 1e1,
+                "cart_to_pole":1e-2
+            },
+            "damping": {
+                "slider_to_cart": 1e0,
+                "cart_to_pole":1e-3
+            },
+            "max_effort": {
+                "slider_to_cart": 1e2,
+                "cart_to_pole":1e-1
+            }
+        },
+        "control_parts": {
+            "arm": ["slider_to_cart"],
+            "hand": ["cart_to_pole"]
+        }
     },
-    "control_parts": {
-      "arm": ["slider_to_cart"],
-      "hand": ["cart_to_pole"]
-    }
-  },
-  "sensor": [],
-  "light": {
-  },
-  "rigid_object": [],
-  "rigid_object_group": [],
-  "articulation": []
+    "sensor": [],
+    "light": {},
+    "background": [],
+    "rigid_object": [],
+    "rigid_object_group": [],
+    "articulation": []
 }
diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/cart_pole/train_config.json
index 7a077e9c..8481e4df 100644
--- a/configs/agents/rl/cart_pole/train_config.json
+++ b/configs/agents/rl/cart_pole/train_config.json
@@ -1,64 +1,67 @@
-{
-  "trainer": {
-    "exp_name": "push_cube_ppo",
-    "gym_config": "configs/agents/rl/cart_pole/gym_config.json",
-    "seed": 42,
-    "device": "cuda:0",
-    "headless": true,
-    "iterations": 1000,
-    "rollout_steps": 1024,
-    "eval_freq": 2,
-    "save_freq": 200,
-    "use_wandb": false,
-    "wandb_project_name": "embodychain-push_cube",
-    "events": {
-      "eval": {
-        "record_camera": {
-          "func": "record_camera_data_async",
-          "mode": "interval",
-          "interval_step": 1,
-          "params": {
-            "name": "main_cam",
-            "resolution": [640, 480],
-            "eye": [-1.4, 1.4, 2.0],
-            "target": [0, 0, 0],
-            "up": [0, 0, 1],
-            "intrinsics": [600, 600, 320, 240],
-            "save_path": "./outputs/videos/eval"
-          }
+{ 
+    "trainer": {
+        "exp_name": "push_cube_ppo",
+        "gym_config": "configs/agents/rl/cart_pole/gym_config.json",
+        "seed": 42,
+        "device": "cuda:0",
+        "headless": true,
+        "enable_rt": false,
+        "gpu_id": 0,
+        "num_envs": 64,
+        "iterations": 1000,
+        "rollout_steps": 1024,
+        "eval_freq": 200,
+        "save_freq": 200,
+        "use_wandb": false,
+        "wandb_project_name": "embodychain-push_cube",
+        "events": {
+            "eval": {
+                "record_camera": {
+                    "func": "record_camera_data_async",
+                    "mode": "interval",
+                    "interval_step": 1,
+                    "params": {
+                        "name": "main_cam",
+                        "resolution": [640, 480],
+                        "eye": [-1.4, 1.4, 2.0],
+                        "target": [0, 0, 0],
+                        "up": [0, 0, 1],
+                        "intrinsics": [600, 600, 320, 240],
+                        "save_path": "./outputs/videos/eval"
+                    }
+                }
+            }
         }
-      }
-    }
-  },
-  "policy": {
-    "name": "actor_critic",
-    "actor": {
-      "type": "mlp",
-      "network_cfg": {
-        "hidden_sizes": [256, 256],
-        "activation": "relu"
-      }
     },
-    "critic": {
-      "type": "mlp",
-      "network_cfg": {
-        "hidden_sizes": [256, 256],
-        "activation": "relu"
-      }
-    }
-  },
-  "algorithm": {
-    "name": "ppo",
-    "cfg": {
-      "learning_rate": 0.0001,
-      "n_epochs": 10,
-      "batch_size": 8192,
-      "gamma": 0.99,
-      "gae_lambda": 0.95,
-      "clip_coef": 0.2,
-      "ent_coef": 0.01,
-      "vf_coef": 0.5,
-      "max_grad_norm": 0.5
+    "policy": {
+        "name": "actor_critic",
+        "actor": {
+            "type": "mlp",
+            "network_cfg": {
+                "hidden_sizes": [256, 256],
+                "activation": "relu"
+            }
+        },
+        "critic": {
+            "type": "mlp",
+            "network_cfg": {
+                "hidden_sizes": [256, 256],
+                "activation": "relu"
+            }
+        }
+    },
+    "algorithm": {
+        "name": "ppo",
+        "cfg": {
+            "learning_rate": 0.0001,
+            "n_epochs": 10,
+            "batch_size": 8192,
+            "gamma": 0.99,
+            "gae_lambda": 0.95,
+            "clip_coef": 0.2,
+            "ent_coef": 0.01,
+            "vf_coef": 0.5,
+            "max_grad_norm": 0.5
+        }
     }
-  }
-}
\ No newline at end of file
+}
diff --git a/embodichain/lab/gym/envs/managers/rewards.py b/embodichain/lab/gym/envs/managers/rewards.py
index dad89b7f..9b7818a9 100644
--- a/embodichain/lab/gym/envs/managers/rewards.py
+++ b/embodichain/lab/gym/envs/managers/rewards.py
@@ -658,3 +658,18 @@ def incremental_distance_to_target(
     env._reward_states[state_key] = current_dist.clone()
 
     return reward
+
+
+def pole_upward(
+    env: EmbodiedEnv,
+    obs: dict,
+    action: torch.Tensor | dict,
+    info: dict,
+    object_cfg: SceneEntityCfg = None,
+):
+    robot = env.robot
+    pole_qpos = robot.get_qpos(name="hand").reshape(-1)  # [num_envs, ]
+
+    normalized_upward = torch.abs(pole_qpos) / torch.pi
+    reward = 1.0 - normalized_upward
+    return reward
diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
index 8d263c10..72840fe3 100644
--- a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
+++ b/embodichain/lab/gym/envs/tasks/rl/cart_pole.py
@@ -15,19 +15,16 @@
 # ----------------------------------------------------------------------------
 
 import torch
-import numpy as np
-from typing import Dict, Any, Optional, Sequence
-from gymnasium import spaces
+from typing import Dict, Any, Tuple
 
 from embodichain.lab.gym.utils.registration import register_env
-from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg
-from embodichain.lab.sim.cfg import MarkerCfg
-from embodichain.lab.sim.types import EnvObs, EnvAction
-from embodichain.utils import logger
+from embodichain.lab.gym.envs.rl_env import RLEnv
+from embodichain.lab.gym.envs import EmbodiedEnvCfg
+from embodichain.lab.sim.types import EnvObs
 
 
 @register_env("CartPoleRL", max_episode_steps=50, override=True)
-class CartPoleEnv(EmbodiedEnv):
+class CartPoleEnv(RLEnv):
     """Push cube task for reinforcement learning.
 
     The task involves pushing a cube to a target goal position using a robotic arm.
@@ -37,114 +34,21 @@ class CartPoleEnv(EmbodiedEnv):
     def __init__(self, cfg=None, **kwargs):
         if cfg is None:
             cfg = EmbodiedEnvCfg()
-        cfg.sim_cfg.arena_space = 12.0
-        extensions = getattr(cfg, "extensions", {}) or {}
-
-        # cfg.sim_cfg.enable_rt = True
-
-        defaults = {
-            "qvel_weight": 0.01,
-            "action_scale": 0.02,
-            "episode_length": 100,
-            "slider_limit": 15,
-            "pole_limit": 6.283185307,
-            "veloctity_weight": 0.01,
-        }
-        for name, default in defaults.items():
-            value = extensions.get(name, getattr(cfg, name, default))
-            setattr(cfg, name, value)
-            setattr(self, name, getattr(cfg, name))
-
-        self.last_cube_goal_dist = None
-
         super().__init__(cfg, **kwargs)
 
-    def _init_sim_state(self, **kwargs):
-        super()._init_sim_state(**kwargs)
-        self.single_action_space = spaces.Box(
-            low=-self.slider_limit,
-            high=self.slider_limit,
-            shape=(1,),
-            dtype=np.float32,
-        )
-        if self.obs_mode == "state":
-            self.single_observation_space = spaces.Box(
-                low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32
-            )
-
-    def _initialize_episode(
-        self, env_ids: Optional[Sequence[int]] = None, **kwargs
-    ) -> None:
-        super()._initialize_episode(env_ids=env_ids, **kwargs)
-
-    def _step_action(self, action: EnvAction) -> EnvAction:
-        if self.obs_mode == "state":
-            delta_qpos = action[:, 0]
-        scaled_action = delta_qpos * self.action_scale
-        scaled_action = torch.clamp(
-            scaled_action, -self.slider_limit, self.slider_limit
-        )
-        current_qpos = self.robot.get_qpos(name="arm")
-        target_qpos = current_qpos + scaled_action[:, None]
-        self.robot.set_qpos(qpos=target_qpos, name="arm")
-        return scaled_action
-
-    def get_obs(self, **kwargs) -> EnvObs:
-        qpos = self.robot.get_qpos(name="hand")
-        qvel = self.robot.get_qvel(name="hand")
-        if self.obs_mode == "state":
-            return torch.cat([qpos, qvel], dim=1)
-        return {
-            "robot": {"qpos": qpos, "qvel": qvel},
-        }
-
-    def get_reward(
-        self, obs: EnvObs, action: EnvAction, info: Dict[str, Any]
-    ) -> torch.Tensor:
-        if self.obs_mode == "state":
-            qpos = obs[:, 0:1].reshape(-1)
-            qvel = obs[:, 1:2].reshape(-1)
-        else:
-            qpos = obs["robot"]["qpos"].reshape(-1)
-            qvel = obs["robot"]["qvel"].reshape(-1)
-        upward_reward = 1 - torch.abs(qpos) / self.pole_limit
-        velocity_reward = 1 - torch.tanh(torch.abs(qvel))
-        reward = upward_reward + self.veloctity_weight * velocity_reward
-        if "rewards" not in info:
-            info["rewards"] = {}
-        info["rewards"]["upward_reward"] = upward_reward
-        info["rewards"]["velocity_reward"] = velocity_reward
-        return reward
-
-    def get_info(self, **kwargs) -> Dict[str, Any]:
-        pole_qpos = self.robot.get_qpos(name="hand").reshape(-1)
-        pole_qvel = self.robot.get_qvel(name="hand").reshape(-1)
-        upward_distance = torch.abs(pole_qpos)
-        is_success = torch.logical_and(
-            upward_distance < 0.05, torch.abs(pole_qvel) < 0.1
-        )
-
-        info = {
-            "success": is_success,
-            "fail": torch.zeros(
-                self.cfg.num_envs, device=self.device, dtype=torch.bool
-            ),
-            "elapsed_steps": self._elapsed_steps,
-        }
-        info["metrics"] = {
-            "upward_distance": upward_distance,
-        }
-        return info
+    def compute_task_state(
+        self, **kwargs
+    ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]:
+        qpos = self.robot.get_qpos(name="hand").reshape(-1)  # [num_envs, ]
+        qvel = self.robot.get_qvel(name="hand").reshape(-1)  # [num_envs, ]
+        upward_distance = torch.abs(qpos)
+        is_success = torch.logical_and(upward_distance < 0.05, torch.abs(qvel) < 0.1)
+        is_fail = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
+        metrics = {"distance_to_goal": upward_distance}
+        return is_success, is_fail, metrics
 
     def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor:
         is_timeout = self._elapsed_steps >= self.episode_length
         pole_qpos = self.robot.get_qpos(name="hand").reshape(-1)
         is_fallen = torch.abs(pole_qpos) > torch.pi * 0.5
         return is_timeout | is_fallen
-
-    def evaluate(self, **kwargs) -> Dict[str, Any]:
-        info = self.get_info(**kwargs)
-        return {
-            "success": info["success"][0].item(),
-            "upward_distance": info["upward_distance"],
-        }

From ed67efb1cf349a9c12a3ae24a580fc1666e34cb8 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Thu, 29 Jan 2026 09:53:13 +0800
Subject: [PATCH 4/9] update

---
 configs/agents/rl/cart_pole/gym_config.json   | 4 ++--
 configs/agents/rl/cart_pole/train_config.json | 4 ++--
 embodichain/lab/gym/envs/rl_env.py            | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/cart_pole/gym_config.json
index b01746cf..a5925292 100644
--- a/configs/agents/rl/cart_pole/gym_config.json
+++ b/configs/agents/rl/cart_pole/gym_config.json
@@ -53,9 +53,9 @@
                 }
             ]
         },
-        "init_pos": [0.0, 0.0, 0.0],
+        "init_pos": [0.0, 0.0, 0.5],
         "init_rot": [0.0, 0.0, 0.0],
-        "init_qpos": [0.0, 0.3],
+        "init_qpos": [-0.2, 0.07],
         "drive_pros": {
             "stiffness": {
                 "slider_to_cart": 1e1,
diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/cart_pole/train_config.json
index 8481e4df..5f1b55cf 100644
--- a/configs/agents/rl/cart_pole/train_config.json
+++ b/configs/agents/rl/cart_pole/train_config.json
@@ -23,8 +23,8 @@
                     "params": {
                         "name": "main_cam",
                         "resolution": [640, 480],
-                        "eye": [-1.4, 1.4, 2.0],
-                        "target": [0, 0, 0],
+                        "eye": [-1.4, 1.4, 2.5],
+                        "target": [0, 0, 0.7],
                         "up": [0, 0, 1],
                         "intrinsics": [600, 600, 320, 240],
                         "save_path": "./outputs/videos/eval"
diff --git a/embodichain/lab/gym/envs/rl_env.py b/embodichain/lab/gym/envs/rl_env.py
index 9ce6b5f1..a14adc67 100644
--- a/embodichain/lab/gym/envs/rl_env.py
+++ b/embodichain/lab/gym/envs/rl_env.py
@@ -244,8 +244,9 @@ def get_info(self, **kwargs) -> Dict[str, Any]:
             "success": success,
             "fail": fail,
             "elapsed_steps": self._elapsed_steps,
-            "goal_pose": self.goal_pose,
         }
+        if hasattr(self, "_goal_pose") and self._goal_pose is not None:
+            info["goal_pose"] = self._goal_pose
         info["metrics"] = metrics
         return info
 

From 8177cfac93e82585f6778975b0a73c6a519e1b96 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Thu, 29 Jan 2026 19:06:53 +0800
Subject: [PATCH 5/9] update

---
 .../rl/{ => basic}/cart_pole/gym_config.json  |  2 +-
 .../{ => basic}/cart_pole/train_config.json   |  2 +-
 embodichain/data/assets/robot_assets.py       | 32 +++++++++++++++++++
 embodichain/lab/gym/envs/__init__.py          |  2 +-
 .../envs/tasks/rl/{ => basic}/cart_pole.py    |  0
 5 files changed, 35 insertions(+), 3 deletions(-)
 rename configs/agents/rl/{ => basic}/cart_pole/gym_config.json (96%)
 rename configs/agents/rl/{ => basic}/cart_pole/train_config.json (96%)
 rename embodichain/lab/gym/envs/tasks/rl/{ => basic}/cart_pole.py (100%)

diff --git a/configs/agents/rl/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json
similarity index 96%
rename from configs/agents/rl/cart_pole/gym_config.json
rename to configs/agents/rl/basic/cart_pole/gym_config.json
index a5925292..1dd5c2f7 100644
--- a/configs/agents/rl/cart_pole/gym_config.json
+++ b/configs/agents/rl/basic/cart_pole/gym_config.json
@@ -43,7 +43,7 @@
             "components": [
                 {
                     "component_type": "arm",
-                    "urdf_path": "/home/chenjian/Downloads/cartpole/cartpole2.urdf",
+                    "urdf_path": "CartPole/cart_pole.urdf",
                     "transform": [
                         [1.0, 0.0, 0.0, 0.0],
                         [0.0, 1.0, 0.0, 0.0],
diff --git a/configs/agents/rl/cart_pole/train_config.json b/configs/agents/rl/basic/cart_pole/train_config.json
similarity index 96%
rename from configs/agents/rl/cart_pole/train_config.json
rename to configs/agents/rl/basic/cart_pole/train_config.json
index 5f1b55cf..6b91fab4 100644
--- a/configs/agents/rl/cart_pole/train_config.json
+++ b/configs/agents/rl/basic/cart_pole/train_config.json
@@ -1,7 +1,7 @@
 { 
     "trainer": {
         "exp_name": "push_cube_ppo",
-        "gym_config": "configs/agents/rl/cart_pole/gym_config.json",
+        "gym_config": "configs/agents/rl/basic/cart_pole/gym_config.json",
         "seed": 42,
         "device": "cuda:0",
         "headless": true,
diff --git a/embodichain/data/assets/robot_assets.py b/embodichain/data/assets/robot_assets.py
index 5251621e..9d9860d6 100644
--- a/embodichain/data/assets/robot_assets.py
+++ b/embodichain/data/assets/robot_assets.py
@@ -469,3 +469,35 @@ def __init__(self, data_root: str = None):
         path = EMBODICHAIN_DEFAULT_DATA_ROOT if data_root is None else data_root
 
         super().__init__(prefix, data_descriptor, path)
+
+
+class CartPole(EmbodiChainDataset):
+    """Dataset class for the CartPole.
+
+    Directory structure:
+        cart_pole/
+            cart_pole.urdf
+            cart.mtl
+            cart.obj
+            pole.mtl
+            pole.obj
+            slide_bar.mtl
+            slide_bar.obj
+
+    Example usage:
+        >>> from embodichain.data.robot_dataset import CartPole
+        >>> dataset = CartPole()
+        or
+        >>> from embodichain.data import get_data_path
+        >>> print(get_data_path("CartPole/cart_pole.urdf"))
+    """
+
+    def __init__(self, data_root: str = None):
+        data_descriptor = o3d.data.DataDescriptor(
+            os.path.join(EMBODICHAIN_DOWNLOAD_PREFIX, robot_assets, "cart_pole.zip"),
+            "9d185eb18b19f9c95153e01943c5b0a2",
+        )
+        prefix = "cart_pole"
+        path = EMBODICHAIN_DEFAULT_DATA_ROOT if data_root is None else data_root
+
+        super().__init__(prefix, data_descriptor, path)
diff --git a/embodichain/lab/gym/envs/__init__.py b/embodichain/lab/gym/envs/__init__.py
index e7f57863..d4fa9148 100644
--- a/embodichain/lab/gym/envs/__init__.py
+++ b/embodichain/lab/gym/envs/__init__.py
@@ -37,6 +37,6 @@
 # Reinforcement learning environments
 from embodichain.lab.gym.envs.tasks.rl.push_cube import PushCubeEnv
 
-from embodichain.lab.gym.envs.tasks.rl.cart_pole import CartPoleEnv
+from embodichain.lab.gym.envs.tasks.rl.basic.cart_pole import CartPoleEnv
 
 from embodichain.lab.gym.envs.tasks.special.simple_task import SimpleTaskEnv
diff --git a/embodichain/lab/gym/envs/tasks/rl/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
similarity index 100%
rename from embodichain/lab/gym/envs/tasks/rl/cart_pole.py
rename to embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py

From 3d01a8aba973c7a629e13d38757e5ef107de0d6e Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Thu, 29 Jan 2026 19:25:45 +0800
Subject: [PATCH 6/9] update

---
 configs/agents/rl/basic/cart_pole/gym_config.json | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/configs/agents/rl/basic/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json
index 1dd5c2f7..ce35f18f 100644
--- a/configs/agents/rl/basic/cart_pole/gym_config.json
+++ b/configs/agents/rl/basic/cart_pole/gym_config.json
@@ -43,13 +43,7 @@
             "components": [
                 {
                     "component_type": "arm",
-                    "urdf_path": "CartPole/cart_pole.urdf",
-                    "transform": [
-                        [1.0, 0.0, 0.0, 0.0],
-                        [0.0, 1.0, 0.0, 0.0],
-                        [0.0, 0.0, 1.0, 0.0],
-                        [0.0, 0.0, 0.0, 1.0]
-                    ]
+                    "urdf_path": "CartPole/cart_pole.urdf"
                 }
             ]
         },

From 1d467354904f004c5f62a034398dcc7a9138e550 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Thu, 29 Jan 2026 19:42:45 +0800
Subject: [PATCH 7/9] update

---
 .../agents/rl/basic/cart_pole/gym_config.json |  6 ----
 .../rl/basic/cart_pole/train_config.json      |  2 +-
 embodichain/lab/gym/envs/managers/rewards.py  | 15 ----------
 .../lab/gym/envs/tasks/rl/basic/cart_pole.py  | 30 +++++++++++++++++--
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/configs/agents/rl/basic/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json
index ce35f18f..ab762402 100644
--- a/configs/agents/rl/basic/cart_pole/gym_config.json
+++ b/configs/agents/rl/basic/cart_pole/gym_config.json
@@ -14,12 +14,6 @@
             }
         },
         "rewards": {
-            "upward_reward": {
-                "func": "pole_upward",
-                "mode": "add",
-                "weight": 1.0,
-                "params": {}
-            },
             "velocity_penalty": {
                 "func": "joint_velocity_penalty",
                 "mode": "add",
diff --git a/configs/agents/rl/basic/cart_pole/train_config.json b/configs/agents/rl/basic/cart_pole/train_config.json
index 6b91fab4..06ea85c0 100644
--- a/configs/agents/rl/basic/cart_pole/train_config.json
+++ b/configs/agents/rl/basic/cart_pole/train_config.json
@@ -13,7 +13,7 @@
         "eval_freq": 200,
         "save_freq": 200,
         "use_wandb": false,
-        "wandb_project_name": "embodychain-push_cube",
+        "wandb_project_name": "embodychain-cart_pole",
         "events": {
             "eval": {
                 "record_camera": {
diff --git a/embodichain/lab/gym/envs/managers/rewards.py b/embodichain/lab/gym/envs/managers/rewards.py
index 9b7818a9..dad89b7f 100644
--- a/embodichain/lab/gym/envs/managers/rewards.py
+++ b/embodichain/lab/gym/envs/managers/rewards.py
@@ -658,18 +658,3 @@ def incremental_distance_to_target(
     env._reward_states[state_key] = current_dist.clone()
 
     return reward
-
-
-def pole_upward(
-    env: EmbodiedEnv,
-    obs: dict,
-    action: torch.Tensor | dict,
-    info: dict,
-    object_cfg: SceneEntityCfg = None,
-):
-    robot = env.robot
-    pole_qpos = robot.get_qpos(name="hand").reshape(-1)  # [num_envs, ]
-
-    normalized_upward = torch.abs(pole_qpos) / torch.pi
-    reward = 1.0 - normalized_upward
-    return reward
diff --git a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
index 72840fe3..90fe959d 100644
--- a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
+++ b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
@@ -25,10 +25,14 @@
 
 @register_env("CartPoleRL", max_episode_steps=50, override=True)
 class CartPoleEnv(RLEnv):
-    """Push cube task for reinforcement learning.
+    """
+    CartPole balancing task for reinforcement learning.
 
-    The task involves pushing a cube to a target goal position using a robotic arm.
-    The reward consists of reaching reward, placing reward, action penalty, and success bonus.
+    The agent controls a cart (robot hand joint) to keep a pole balanced near the upright
+    position by regulating its angle and angular velocity. Episodes are considered
+    successful when the pole remains close to vertical with low velocity, and they
+    terminate either when a maximum number of steps is reached or when the pole falls
+    beyond an allowed tilt threshold.
     """
 
     def __init__(self, cfg=None, **kwargs):
@@ -36,6 +40,26 @@ def __init__(self, cfg=None, **kwargs):
             cfg = EmbodiedEnvCfg()
         super().__init__(cfg, **kwargs)
 
+    def get_reward(self, obs, action, info):
+        """Get the reward for the current step (pole upward reward).
+
+        Each SimulationManager env must implement its own get_reward function to define the reward function for the task, If the
+        env is considered for RL/IL training.
+
+        Args:
+            obs: The observation from the environment.
+            action: The action applied to the robot agent.
+            info: The info dictionary.
+
+        Returns:
+            The reward for the current step.
+        """
+        pole_qpos = self.robot.get_qpos(name="hand").reshape(-1)  # [num_envs, ]
+
+        normalized_upward = torch.abs(pole_qpos) / torch.pi
+        reward = 1.0 - normalized_upward
+        return reward
+
     def compute_task_state(
         self, **kwargs
     ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]:

From 3116c3b283b5eb68765d283af09859c8876a01a5 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Fri, 30 Jan 2026 10:24:00 +0800
Subject: [PATCH 8/9] update

---
 configs/agents/rl/basic/cart_pole/gym_config.json    | 2 +-
 embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/agents/rl/basic/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json
index ab762402..a343af16 100644
--- a/configs/agents/rl/basic/cart_pole/gym_config.json
+++ b/configs/agents/rl/basic/cart_pole/gym_config.json
@@ -17,7 +17,7 @@
             "velocity_penalty": {
                 "func": "joint_velocity_penalty",
                 "mode": "add",
-                "weight": 0.01,
+                "weight": 0.005,
                 "params": {
                     "robot_uid": "Cart",
                     "part_name": "hand"
diff --git a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
index 90fe959d..c40c3fc6 100644
--- a/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
+++ b/embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
@@ -66,7 +66,7 @@ def compute_task_state(
         qpos = self.robot.get_qpos(name="hand").reshape(-1)  # [num_envs, ]
         qvel = self.robot.get_qvel(name="hand").reshape(-1)  # [num_envs, ]
         upward_distance = torch.abs(qpos)
-        is_success = torch.logical_and(upward_distance < 0.05, torch.abs(qvel) < 0.1)
+        is_success = torch.logical_and(upward_distance < 0.02, torch.abs(qvel) < 0.05)
         is_fail = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
         metrics = {"distance_to_goal": upward_distance}
         return is_success, is_fail, metrics

From 51a850db74b394e603c0d2b75b0282faa6ffecf3 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian@dexforce.com>
Date: Fri, 30 Jan 2026 10:41:03 +0800
Subject: [PATCH 9/9] update

---
 configs/agents/rl/basic/cart_pole/train_config.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/agents/rl/basic/cart_pole/train_config.json b/configs/agents/rl/basic/cart_pole/train_config.json
index 06ea85c0..8412fe36 100644
--- a/configs/agents/rl/basic/cart_pole/train_config.json
+++ b/configs/agents/rl/basic/cart_pole/train_config.json
@@ -10,7 +10,7 @@
         "num_envs": 64,
         "iterations": 1000,
         "rollout_steps": 1024,
-        "eval_freq": 200,
+        "eval_freq": 2,
         "save_freq": 200,
         "use_wandb": false,
         "wandb_project_name": "embodychain-cart_pole",