Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions configs/agents/rl/basic/cart_pole/train_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"trainer": {
"exp_name": "push_cube_ppo",
"exp_name": "cart_pole_ppo",
"gym_config": "configs/agents/rl/basic/cart_pole/gym_config.json",
"seed": 42,
"device": "cuda:0",
Expand All @@ -10,7 +10,7 @@
"num_envs": 64,
"iterations": 1000,
"rollout_steps": 1024,
"eval_freq": 2,
"eval_freq": 200,
"save_freq": 200,
"use_wandb": false,
"wandb_project_name": "embodychain-cart_pole",
Expand Down
4 changes: 3 additions & 1 deletion embodichain/lab/gym/envs/embodied_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,12 @@ def _extend_reward(
**kwargs,
) -> torch.Tensor:
if self.reward_manager:
rewards, reward_info = self.reward_manager.compute(
extra_rewards, reward_info = self.reward_manager.compute(
obs=obs, action=action, info=info
)
info["rewards"] = reward_info
# Add manager terms to base reward from get_reward() so task reward is kept
rewards = rewards + extra_rewards
return rewards

def _prepare_scene(self, **kwargs) -> None:
Expand Down
4 changes: 3 additions & 1 deletion embodichain/lab/gym/envs/tasks/rl/basic/cart_pole.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ def compute_task_state(
qpos = self.robot.get_qpos(name="hand").reshape(-1) # [num_envs, ]
qvel = self.robot.get_qvel(name="hand").reshape(-1) # [num_envs, ]
upward_distance = torch.abs(qpos)
is_success = torch.logical_and(upward_distance < 0.02, torch.abs(qvel) < 0.05)
balance = torch.logical_and(upward_distance < 0.02, torch.abs(qvel) < 0.05)
at_final_step = self._elapsed_steps >= self.episode_length - 1
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at_final_step is checked against self.episode_length - 1, but check_truncated() times out at self.episode_length and BaseEnv.step() terminates immediately on info['success']. This makes the episode end one step early (and can also make success impossible when using gymnasium.make + TimeLimitWrapper if max_episode_steps != episode_length, e.g. CartPole is registered with max_episode_steps=50 while the cart_pole gym config sets episode_length=500). Consider deriving the final-step condition from the actual timeout condition (same threshold as truncation) and/or aligning episode_length with the env time limit so success can be reached in all supported entrypoints.

Suggested change
at_final_step = self._elapsed_steps >= self.episode_length - 1
at_final_step = self._elapsed_steps >= self.episode_length

Copilot uses AI. Check for mistakes.
is_success = torch.logical_and(at_final_step, balance)
is_fail = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
metrics = {"distance_to_goal": upward_distance}
return is_success, is_fail, metrics
Expand Down
Loading