DexForce · yangchen73 · Mar 9, 2026 · Mar 7, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -19,7 +19,7 @@ EmbodiChain/
 │   ├── data/                     # Assets, datasets, constants, enums
 │   ├── lab/                      # Simulation lab
 │   │   ├── gym/                  # OpenAI Gym-compatible environments
-│   │   │   ├── envs/             # BaseEnv, EmbodiedEnv, RLEnv
+│   │   │   ├── envs/             # BaseEnv, EmbodiedEnv
 │   │   │   │   ├── managers/     # Observation, event, reward, record, dataset managers
 │   │   │   │   │   └── randomization/  # Physics, geometry, spatial, visual randomizers
 │   │   │   │   ├── tasks/        # Task implementations (tableware, RL, special)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -57,7 +57,7 @@ A `CLAUDE.md` file is present at the root of this repository. Claude Code reads
 
 ```
 > Explain how the Functor/Manager pattern works in embodichain/lab/gym/envs/managers/
-> What is the difference between EmbodiedEnv and RLEnv?
+> How does the Action Manager work with EmbodiedEnv for RL tasks?
 > Show me an example of how a randomization functor is registered in a task config.
 ```
 

diff --git a/configs/agents/rl/basic/cart_pole/gym_config.json b/configs/agents/rl/basic/cart_pole/gym_config.json
@@ -25,11 +25,13 @@
                 }
             }
         },
-        "extensions": {
-            "action_type": "delta_qpos",
-            "action_scale": 0.1,
-            "success_threshold": 0.1
-        }
+        "actions": {
+            "delta_qpos": {
+                "func": "DeltaQposTerm",
+                "params": { "scale": 0.1 }
+            }
+        },
+        "extensions": {}
     },
     "robot": {
         "uid": "Cart",

diff --git a/configs/agents/rl/push_cube/gym_config.json b/configs/agents/rl/push_cube/gym_config.json
@@ -111,9 +111,13 @@
                 "params": {}
             }
         },
+        "actions": {
+            "delta_qpos": {
+                "func": "DeltaQposTerm",
+                "params": { "scale": 0.1 }
+            }
+        },
         "extensions": {
-            "action_type": "delta_qpos",
-            "action_scale": 0.1,
             "success_threshold": 0.1
         }
     },

diff --git a/docs/source/api_reference/embodichain/embodichain.lab.gym.envs.managers.rst b/docs/source/api_reference/embodichain/embodichain.lab.gym.envs.managers.rst
@@ -17,10 +17,19 @@ embodichain.lab.gym.envs.managers
       SceneEntityCfg
       EventCfg
       ObservationCfg
+      ActionTermCfg
       Functor
       ManagerBase
       EventManager
       ObservationManager
+      ActionManager
+      ActionTerm
+      DeltaQposTerm
+      QposTerm
+      QposNormalizedTerm
+      EefPoseTerm
+      QvelTerm
+      QfTerm
 
    .. rubric:: Functions
 
@@ -61,6 +70,10 @@ Configuration Classes
     :members:
     :exclude-members: __init__, class_type
 
+.. autoclass:: ActionTermCfg
+    :members:
+    :exclude-members: __init__, class_type
+
 Base Classes
 ------------
 
@@ -87,6 +100,46 @@ Managers
     :inherited-members:
     :show-inheritance:
 
+.. autoclass:: ActionManager
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+.. autoclass:: ActionTerm
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+.. autoclass:: DeltaQposTerm
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+.. autoclass:: QposTerm
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+.. autoclass:: QposNormalizedTerm
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+.. autoclass:: EefPoseTerm
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+.. autoclass:: QvelTerm
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+.. autoclass:: QfTerm
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
 Observation Functions
 --------------------
 

diff --git a/docs/source/overview/gym/env.md b/docs/source/overview/gym/env.md
@@ -5,7 +5,7 @@
 
 The {class}`~envs.EmbodiedEnv` is the core environment class in EmbodiChain designed for complex Embodied AI tasks. It adopts a **configuration-driven** architecture, allowing users to define robots, sensors, objects, lighting, and automated behaviors (events) purely through configuration classes, minimizing the need for boilerplate code.
 
-For **Reinforcement Learning** tasks, EmbodiChain provides {class}`~envs.RLEnv`, a specialized subclass that extends {class}`~envs.EmbodiedEnv` with RL-specific utilities such as flexible action preprocessing, goal management, and standardized info structure.
+For **Reinforcement Learning** tasks, EmbodiChain provides the **Action Manager** (configured via ``actions`` in {class}`~envs.EmbodiedEnvCfg`), which handles action preprocessing (scaling, IK, delta_qpos, etc.) in a modular, configurable way. RL tasks inherit from {class}`~envs.EmbodiedEnv` directly and use the Action Manager for action processing.
 
 ## Core Architecture
 
@@ -17,7 +17,7 @@ EmbodiChain provides a hierarchy of environment classes for different task types
   * **Event Manager**: Domain randomization, scene setup, and dynamic asset swapping.
   * **Observation Manager**: Flexible observation space extensions.
   * **Dataset Manager**: Built-in support for demonstration data collection.
-* **{class}`~envs.RLEnv`**: Specialized environment for RL tasks, extending {class}`~envs.EmbodiedEnv` with action preprocessing, goal management, and standardized reward/info structure.
+* **Action Manager**: Configurable action preprocessing for RL tasks (delta_qpos, eef_pose, qvel, etc.), integrated into {class}`~envs.EmbodiedEnv` when ``actions`` is configured.
 
 ## Configuration System
 
@@ -90,8 +90,11 @@ The {class}`~envs.EmbodiedEnvCfg` class exposes the following additional paramet
 * **dataset** (Union[object, None]): 
   Dataset collection settings. Defaults to None, in which case no dataset collection is performed. Please refer to the {class}`~envs.managers.DatasetManager` class for more details.
 
+* **actions** (Union[object, None]): 
+  Action Manager settings for RL tasks. When configured, preprocesses raw policy actions (e.g., delta_qpos, eef_pose) into robot control format. Replaces the legacy RLEnv. Defaults to None. See the {class}`~envs.managers.ActionManager` class for more details.
+
 * **extensions** (Union[Dict[str, Any], None]): 
-  Task-specific extension parameters that are automatically bound to the environment instance. This allows passing custom parameters (e.g., ``action_type``, ``action_scale``) without modifying the base configuration class. These parameters are accessible as instance attributes after environment initialization. Defaults to None.
+  Task-specific extension parameters that are automatically bound to the environment instance. This allows passing custom parameters (e.g., ``success_threshold``) without modifying the base configuration class. For action configuration, use the ``actions`` field instead. These parameters are accessible as instance attributes after environment initialization. Defaults to None.
 
 * **filter_visual_rand** (bool): 
   Whether to filter out visual randomization functors. Useful for debugging motion and physics issues when visual randomization interferes with the debugging process. Defaults to ``False``.
@@ -125,10 +128,10 @@ class MyTaskEnvCfg(EmbodiedEnvCfg):
     observations = ...   # Custom observation spec
     dataset = ...        # Data collection settings
 
-    # 4. Task Extensions
-    extensions = {       # Task-specific parameters
-        "action_type": "delta_qpos",
-        "action_scale": 0.1,
+    # 4. Action Manager (for RL tasks)
+    actions = ...        # Action preprocessing (e.g., DeltaQposTerm with scale)
+    extensions = {       # Task-specific parameters (e.g., success_threshold)
+        "success_threshold": 0.1,
     }
 ```
 
@@ -186,37 +189,56 @@ The dataset manager is called automatically during {meth}`~envs.Env.step()`, ens
 
 ## Reinforcement Learning Environment
 
-For RL tasks, EmbodiChain provides {class}`~envs.RLEnv`, a specialized base class that extends {class}`~envs.EmbodiedEnv` with RL-specific utilities:
+For RL tasks, EmbodiChain uses the **Action Manager** integrated into {class}`~envs.EmbodiedEnv`:
 
-* **Action Preprocessing**: Flexible action transformation supporting delta_qpos, absolute qpos, joint velocity, joint force, and end-effector pose (with IK).
-* **Goal Management**: Built-in goal pose tracking and visualization with axis markers.
-* **Standardized Info Structure**: Template methods for computing task-specific success/failure conditions and metrics.
+* **Action Preprocessing**: Configurable via ``actions`` in {class}`~envs.EmbodiedEnvCfg`. Supports DeltaQposTerm, QposTerm, QposNormalizedTerm, EefPoseTerm, QvelTerm, QfTerm.
+* **Standardized Info Structure**: {class}`~envs.EmbodiedEnv` provides ``compute_task_state``, ``get_info``, and ``evaluate`` for task-specific success/failure and metrics.
 * **Episode Management**: Configurable episode length and truncation logic.
 
-### Configuration Extensions for RL
+### Action Manager Configuration
 
-RL environments use the ``extensions`` field to pass task-specific parameters:
+Configure action preprocessing via the ``actions`` field:
 
 ```python
-extensions = {
-    "action_type": "delta_qpos",      # Action type: delta_qpos, qpos, qvel, qf, eef_pose
-    "action_scale": 0.1,              # Scaling factor applied to all actions
-    "success_threshold": 0.1,         # Task-specific success threshold (optional)
+from embodichain.lab.gym.envs.managers import ActionTermCfg, DeltaQposTerm
+from embodichain.utils import configclass
+
+@configclass
+class MyRLActionCfg:
+    delta_qpos: ActionTermCfg = ActionTermCfg(
+        func=DeltaQposTerm,
+        params={"scale": 0.1}
+    )
+
+# In EmbodiedEnvCfg:
+actions = MyRLActionCfg()
+extensions = {"success_threshold": 0.1}  # Task-specific parameters
+```
+
+In JSON config, use the ``actions`` section:
+
+```json
+"actions": {
+    "delta_qpos": {
+        "func": "DeltaQposTerm",
+        "params": { "scale": 0.1 }
+    }
 }
 ```
 
+
 ## Creating a Custom Task
 
 ### For Reinforcement Learning Tasks
 
-Inherit from {class}`~envs.RLEnv` and implement the task-specific logic:
+Inherit from {class}`~envs.EmbodiedEnv` and implement the task-specific logic. Configure the Action Manager via ``actions`` in your config:
 
 ```python
-from embodichain.lab.gym.envs import RLEnv, EmbodiedEnvCfg
+from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg
 from embodichain.lab.gym.utils.registration import register_env
 
 @register_env("MyRLTask-v0")
-class MyRLTaskEnv(RLEnv):
+class MyRLTaskEnv(EmbodiedEnv):
     def __init__(self, cfg: MyTaskEnvCfg, **kwargs):
         super().__init__(cfg, **kwargs)
 
@@ -272,7 +294,7 @@ For a complete example of a modular environment setup, please refer to the {ref}
 - {ref}`tutorial_create_basic_env` - Creating basic environments
 - {ref}`tutorial_modular_env` - Advanced modular environment setup
 - {ref}`tutorial_rl` - Reinforcement learning training guide
-- {doc}`/api_reference/embodichain/embodichain.lab.gym.envs` - Complete API reference for EmbodiedEnv, RLEnv, and configurations
+- {doc}`/api_reference/embodichain/embodichain.lab.gym.envs` - Complete API reference for EmbodiedEnv and configurations
 
 ```{toctree}
 :maxdepth: 1

diff --git a/docs/source/tutorial/rl.rst b/docs/source/tutorial/rl.rst
@@ -78,11 +78,10 @@ The ``env`` section defines the task environment:
 - **id**: Environment registry ID (e.g., "PushCubeRL")
 - **cfg**: Environment-specific configuration parameters
 
-For RL environments (inheriting from ``RLEnv``), use the ``extensions`` field for RL-specific parameters:
+For RL environments, use the ``actions`` field for action preprocessing and ``extensions`` for task-specific parameters:
 
-- **action_type**: Action type - "delta_qpos" (default), "qpos", "qvel", "qf", "eef_pose"
-- **action_scale**: Scaling factor applied to all actions (default: 1.0)
-- **success_threshold**: Task-specific success threshold (optional)
+- **actions**: Action Manager config (e.g., DeltaQposTerm with scale)
+- **extensions**: Task-specific parameters (e.g., success_threshold)
 
 Example:
 
@@ -92,9 +91,13 @@ Example:
      "id": "PushCubeRL",
      "cfg": {
        "num_envs": 4,
+       "actions": {
+         "delta_qpos": {
+           "func": "DeltaQposTerm",
+           "params": { "scale": 0.1 }
+         }
+       },
        "extensions": {
-         "action_type": "delta_qpos",
-         "action_scale": 0.1,
          "success_threshold": 0.1
        }
      }
@@ -354,16 +357,16 @@ Adding a New Environment
 
 To add a new RL environment:
 
-1. Create an environment class inheriting from ``RLEnv`` (which provides action preprocessing, goal management, and standardized info structure):
+1. Create an environment class inheriting from ``EmbodiedEnv`` (with Action Manager configured for action preprocessing and standardized info structure):
 
 .. code-block:: python
 
-   from embodichain.lab.gym.envs import RLEnv, EmbodiedEnvCfg
+   from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg
    from embodichain.lab.gym.utils.registration import register_env
    import torch
 
    @register_env("MyTaskRL", override=True)
-   class MyTaskEnv(RLEnv):
+   class MyTaskEnv(EmbodiedEnv):
        def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs):
            super().__init__(cfg, **kwargs)
 
@@ -375,37 +378,37 @@ To add a new RL environment:
            return is_success, is_fail, metrics
 
 
-1. Configure the environment in your JSON config with RL-specific extensions:
+2. Configure the environment in your JSON config with ``actions`` and ``extensions``:
 
 .. code-block:: json
 
    "env": {
      "id": "MyTaskRL",
      "cfg": {
        "num_envs": 4,
+       "actions": {
+         "delta_qpos": {
+           "func": "DeltaQposTerm",
+           "params": { "scale": 0.1 }
+         }
+       },
        "extensions": {
-         "action_type": "delta_qpos",
-         "action_scale": 0.1,
          "success_threshold": 0.05
        }
      }
    }
 
-The ``RLEnv`` base class provides:
+The ``EmbodiedEnv`` with Action Manager provides:
 
-- **Action Preprocessing**: Automatically handles different action types (delta_qpos, qpos, qvel, qf, eef_pose)
-- **Action Scaling**: Applies ``action_scale`` to all actions
-- **Goal Management**: Built-in goal pose tracking and visualization
+- **Action Preprocessing**: Configurable via ``actions`` (DeltaQposTerm, QposTerm, EefPoseTerm, etc.)
 - **Standardized Info**: Implements ``get_info()`` using ``compute_task_state()`` template method
 
 Best Practices
 ~~~~~~~~~~~~~~
 
-- **Use RLEnv for RL Tasks**: Always inherit from ``RLEnv`` for reinforcement learning tasks. It provides action preprocessing, goal management, and standardized info structure out of the box.
+- **Use EmbodiedEnv with Action Manager for RL Tasks**: Inherit from ``EmbodiedEnv`` and configure ``actions`` in your config. The Action Manager handles action preprocessing (delta_qpos, qpos, qvel, qf, eef_pose) in a modular way.
 
-- **Action Type Configuration**: Configure ``action_type`` in the environment's ``extensions`` field. The default is "delta_qpos" (incremental joint positions). Other options: "qpos" (absolute), "qvel" (velocity), "qf" (force), "eef_pose" (end-effector pose with IK).
-
-- **Action Scaling**: Use ``action_scale`` in the environment's ``extensions`` field to scale actions. This is applied in ``RLEnv._preprocess_action()`` before robot control.
+- **Action Configuration**: Use the ``actions`` field in your JSON config. Example: ``"delta_qpos": {"func": "DeltaQposTerm", "params": {"scale": 0.1}}``.
 
 - **Device Management**: Device is single-sourced from ``runtime.cuda``. All components (trainer/algorithm/policy/env) share the same device.
 

diff --git a/embodichain/agents/rl/algo/grpo.py b/embodichain/agents/rl/algo/grpo.py
@@ -164,7 +164,10 @@ def collect_rollout(
 
         for _ in range(num_steps):
             actions, log_prob, _ = policy.get_action(current_obs, deterministic=False)
-            action_type = getattr(env, "action_type", "delta_qpos")
+            am = getattr(env, "action_manager", None)
+            action_type = (
+                am.action_type if am else getattr(env, "action_type", "delta_qpos")
+            )
             action_dict = {action_type: actions}
             next_obs, reward, terminated, truncated, env_info = env.step(action_dict)
             done = (terminated | truncated).bool()

diff --git a/embodichain/agents/rl/algo/ppo.py b/embodichain/agents/rl/algo/ppo.py
@@ -97,7 +97,10 @@ def collect_rollout(
             )
 
             # Wrap action as dict for env processing
-            action_type = getattr(env, "action_type", "delta_qpos")
+            am = getattr(env, "action_manager", None)
+            action_type = (
+                am.action_type if am else getattr(env, "action_type", "delta_qpos")
+            )
             action_dict = {action_type: actions}
 
             # Step environment

diff --git a/embodichain/agents/rl/utils/trainer.py b/embodichain/agents/rl/utils/trainer.py
@@ -259,7 +259,12 @@ def _eval_once(self, num_episodes: int = 5):
             while not done_mask.all():
                 # Get deterministic actions from policy
                 actions, _, _ = self.policy.get_action(obs, deterministic=True)
-                action_type = getattr(self.eval_env, "action_type", "delta_qpos")
+                am = getattr(self.eval_env, "action_manager", None)
+                action_type = (
+                    am.action_type
+                    if am
+                    else getattr(self.eval_env, "action_type", "delta_qpos")
+                )
                 action_dict = {action_type: actions}
 
                 # Environment step