From f4e50c85e6a01d8bd9718c15414d9d66c82fc642 Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Thu, 13 Mar 2025 08:03:32 -0400
Subject: [PATCH 01/10] initializes files for direct env

---
 .../direct/franka_reach/__init__.py           | 25 +++++++++++++++++++
 .../direct/franka_reach/franka_reach_env.py   | 23 +++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
new file mode 100644
index 000000000000..f052e833c439
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""
+Franka-Reach environment.
+"""
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+gym.register(
+    id="Isaac-Franka-Reach-Direct-v0",
+    entry_point=f"{__name__}.franka_reach_env:FrankaReachEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.franka_reach_env:FrankaReachEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
new file mode 100644
index 000000000000..d12f2e9eddf1
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+
+from isaacsim.core.utils.stage import get_current_stage
+from isaacsim.core.utils.torch.transformations import tf_combine, tf_inverse, tf_vector
+from pxr import UsdGeom
+
+import isaaclab.sim as sim_utils
+from isaaclab.actuators.actuator_cfg import ImplicitActuatorCfg
+from isaaclab.assets import Articulation, ArticulationCfg
+from isaaclab.envs import DirectRLEnv, DirectRLEnvCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.terrains import TerrainImporterCfg
+from isaaclab.utils import configclass
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+from isaaclab.utils.math import sample_uniform
\ No newline at end of file

From 332e982febd092fd9dd85413ffe5e72539780d58 Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Thu, 13 Mar 2025 09:25:15 -0400
Subject: [PATCH 02/10] completes env file and adds rl_games agent

---
 .../direct/franka_reach/agents/__init__.py    |   4 +
 .../franka_reach/agents/rl_games_ppo_cfg.yaml |  75 ++++++++
 .../direct/franka_reach/franka_reach_env.py   | 181 +++++++++++++++++-
 3 files changed, 258 insertions(+), 2 deletions(-)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/__init__.py
new file mode 100644
index 000000000000..e75ca2bc3f90
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml
new file mode 100644
index 000000000000..27e44c9c3195
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,75 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: franka_reach_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    # value_bootstrap: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.01
+    normalize_advantage: True
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 100000000
+    max_epochs: 1500
+    save_best_after: 200
+    save_frequency: 100
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 16
+    minibatch_size: 8192
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
index d12f2e9eddf1..26a2a6c78c16 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
@@ -13,11 +13,188 @@
 
 import isaaclab.sim as sim_utils
 from isaaclab.actuators.actuator_cfg import ImplicitActuatorCfg
-from isaaclab.assets import Articulation, ArticulationCfg
+from isaaclab.assets import Articulation, ArticulationCfg, AssetBaseCfg
 from isaaclab.envs import DirectRLEnv, DirectRLEnvCfg
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.sim import SimulationCfg
 from isaaclab.terrains import TerrainImporterCfg
 from isaaclab.utils import configclass
 from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
-from isaaclab.utils.math import sample_uniform
\ No newline at end of file
+from isaaclab.utils.math import sample_uniform
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
+
+
+
+
+@configclass
+class FrankaReachEnvCfg(DirectRLEnvCfg):
+    # env
+    episode_length_s = 8.3333  # 500 timesteps
+    decimation = 2
+    action_space = 9
+    observation_space = 23
+    state_space = 0
+
+    # simulation
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 120,
+        render_interval=decimation,
+        physics_material=sim_utils.RigidBodyMaterialCfg(
+            friction_combine_mode="multiply",
+            restitution_combine_mode="multiply",
+            static_friction=1.0,
+            dynamic_friction=1.0,
+            restitution=0.0,
+        ),
+    )
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=3.0, replicate_physics=True)
+
+    # robot
+    robot = ArticulationCfg(
+        prim_path="/World/envs/env_.*/Robot",
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Franka/franka_instanceable.usd",
+            activate_contact_sensors=False,
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                disable_gravity=False,
+                max_depenetration_velocity=5.0,
+            ),
+            articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+                enabled_self_collisions=False, solver_position_iteration_count=12, solver_velocity_iteration_count=1
+            ),
+        ),
+        init_state=ArticulationCfg.InitialStateCfg(
+            joint_pos={
+                "panda_joint1": 1.157,
+                "panda_joint2": -1.066,
+                "panda_joint3": -0.155,
+                "panda_joint4": -2.239,
+                "panda_joint5": -1.841,
+                "panda_joint6": 1.003,
+                "panda_joint7": 0.469,
+                "panda_finger_joint.*": 0.035,
+            },
+            pos=(1.0, 0.0, 0.0),
+            rot=(0.0, 0.0, 0.0, 1.0),
+        ),
+        actuators={
+            "panda_shoulder": ImplicitActuatorCfg(
+                joint_names_expr=["panda_joint[1-4]"],
+                effort_limit=87.0,
+                velocity_limit=2.175,
+                stiffness=80.0,
+                damping=4.0,
+            ),
+            "panda_forearm": ImplicitActuatorCfg(
+                joint_names_expr=["panda_joint[5-7]"],
+                effort_limit=12.0,
+                velocity_limit=2.61,
+                stiffness=80.0,
+                damping=4.0,
+            ),
+            "panda_hand": ImplicitActuatorCfg(
+                joint_names_expr=["panda_finger_joint.*"],
+                effort_limit=200.0,
+                velocity_limit=0.2,
+                stiffness=2e3,
+                damping=1e2,
+            ),
+        },
+    )
+
+    # # table
+    # table = AssetBaseCfg(
+    #     prim_path="{ENV_REGEX_NS}/Table",
+    #     spawn=sim_utils.UsdFileCfg(
+    #         usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd",
+    #     ),
+    #     init_state=AssetBaseCfg.InitialStateCfg(pos=(0.55, 0.0, 0.0), rot=(0.70711, 0.0, 0.0, 0.70711)),
+    # )
+
+    # # # ground plane
+    # ground = AssetBaseCfg(
+    #     prim_path="/World/ground",
+    #     spawn=sim_utils.GroundPlaneCfg(),
+    #     init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
+    # )
+
+    # reset
+    max_target_dist = 0.2
+    initial_joint_pos_range = [-0.1, 0.1]
+
+    # reward scales
+    dist_reward_scale = 1.5
+    rot_reward_scale = 1.5
+    reach_reward_scale = 10.0
+    action_penalty_scale = 0.05
+    finger_reward_scale = 2.0
+
+    action_scale = 7.5
+    dof_velocity_scale = 0.1
+
+class FrankaReachEnv(DirectRLEnv):
+
+    cfg: FrankaReachEnvCfg
+
+    def __init__(self, cfg: FrankaReachEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
+        # self._setup_scene()
+
+    def _setup_scene(self):
+        """Initialize simulation scene."""
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg(), translation=(0.0, 0.0, -0.4))
+
+        # spawn a usd file of a table into the scene
+        cfg = sim_utils.UsdFileCfg(usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd")
+        cfg.func(
+            "/World/envs/env_.*/Table", cfg, translation=(0.55, 0.0, 0.0), orientation=(0.70711, 0.0, 0.0, 0.70711)
+        )
+
+        self._robot = Articulation(self.cfg.robot)
+
+        self.scene.clone_environments(copy_from_source=False)
+
+        self.scene.articulations["robot"] = self._robot
+
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _get_rewards(self) -> torch.Tensor:
+        target_pos = torch.tensor([0.5, 0.0, 0.5], device=self.device)
+        end_effector_pos = self.robot.data.body_pos_w[:, self.robot.find_bodies("panda_link7")[0][0]]
+        distance = torch.norm(end_effector_pos - target_pos, dim=-1)
+        dist_reward = self.cfg.dist_reward_scale / (1.0 + distance)
+        action_penalty = torch.sum(self.actions**2, dim=-1) * self.cfg.action_penalty_scale
+        return dist_reward - action_penalty
+
+    def _get_observations(self) -> dict:
+        dof_pos_scaled = (2.0 * (self.robot.data.joint_pos - self.robot.data.default_joint_pos))
+        obs = torch.cat((dof_pos_scaled, self.robot.data.joint_vel), dim=-1)
+        return {"policy": torch.clamp(obs, -5.0, 5.0)}
+
+    def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
+        target_pos = torch.tensor([0.5, 0.0, 0.5], device=self.device)
+        end_effector_pos = self.robot.data.body_pos_w[:, self.robot.find_bodies("panda_link7")[0][0]]
+        distance = torch.norm(end_effector_pos - target_pos, dim=-1)
+        terminated = distance < self.cfg.max_target_dist
+        truncated = self.episode_length_buf >= self.max_episode_length - 1
+        return terminated, truncated
+
+    def _reset_idx(self, env_ids: torch.Tensor | None):
+        if env_ids is None:
+            env_ids = self.robot._ALL_INDICES
+        super()._reset_idx(env_ids)
+        joint_pos = self.robot.data.default_joint_pos[env_ids] + sample_uniform(
+            self.cfg.initial_joint_pos_range[0],
+            self.cfg.initial_joint_pos_range[1],
+            joint_pos.shape,
+            self.device,
+        )
+        self.robot.write_joint_state_to_sim(joint_pos, torch.zeros_like(joint_pos), env_ids)
+
+    def _pre_physics_step(self, actions: torch.Tensor) -> None:
+        self.actions = actions.clone().clamp(-1.0, 1.0)
+
+    def _apply_action(self) -> None:
+        self.robot.set_joint_position_target(self.actions)

From bdeaf82b23e11b9045e6e4fcf33e0cca4efd2b8f Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 01:24:31 -0400
Subject: [PATCH 03/10] adds markers visualization and completes reach direct
 env

---
 .../direct/franka_reach/franka_reach_env.py   | 240 +++++++++++-------
 1 file changed, 154 insertions(+), 86 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
index 26a2a6c78c16..787bec7dd758 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
@@ -7,35 +7,32 @@
 
 import torch
 
-from isaacsim.core.utils.stage import get_current_stage
-from isaacsim.core.utils.torch.transformations import tf_combine, tf_inverse, tf_vector
-from pxr import UsdGeom
-
 import isaaclab.sim as sim_utils
 from isaaclab.actuators.actuator_cfg import ImplicitActuatorCfg
-from isaaclab.assets import Articulation, ArticulationCfg, AssetBaseCfg
+from isaaclab.assets import Articulation, ArticulationCfg
 from isaaclab.envs import DirectRLEnv, DirectRLEnvCfg
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.sim import SimulationCfg
-from isaaclab.terrains import TerrainImporterCfg
 from isaaclab.utils import configclass
 from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
 from isaaclab.utils.math import sample_uniform
 from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
-
-
+from isaaclab.markers.config import FRAME_MARKER_CFG
+from isaaclab.markers import VisualizationMarkers
 
 
 @configclass
 class FrankaReachEnvCfg(DirectRLEnvCfg):
-    # env
-    episode_length_s = 8.3333  # 500 timesteps
-    decimation = 2
+    """Configuration class for the Franka reaching environment."""
+
+    # Environment
+    episode_length_s = 5.0
+    decimation = 8
     action_space = 9
-    observation_space = 23
+    observation_space = 21
     state_space = 0
 
-    # simulation
+    # Simulation
     sim: SimulationCfg = SimulationCfg(
         dt=1 / 120,
         render_interval=decimation,
@@ -47,10 +44,15 @@ class FrankaReachEnvCfg(DirectRLEnvCfg):
             restitution=0.0,
         ),
     )
-    # scene
-    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=3.0, replicate_physics=True)
 
-    # robot
+    # Scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=4096,
+        env_spacing=3.0,
+        replicate_physics=True
+    )
+
+    # Robot
     robot = ArticulationCfg(
         prim_path="/World/envs/env_.*/Robot",
         spawn=sim_utils.UsdFileCfg(
@@ -61,7 +63,9 @@ class FrankaReachEnvCfg(DirectRLEnvCfg):
                 max_depenetration_velocity=5.0,
             ),
             articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-                enabled_self_collisions=False, solver_position_iteration_count=12, solver_velocity_iteration_count=1
+                enabled_self_collisions=False,
+                solver_position_iteration_count=12,
+                solver_velocity_iteration_count=1,
             ),
         ),
         init_state=ArticulationCfg.InitialStateCfg(
@@ -75,126 +79,190 @@ class FrankaReachEnvCfg(DirectRLEnvCfg):
                 "panda_joint7": 0.469,
                 "panda_finger_joint.*": 0.035,
             },
-            pos=(1.0, 0.0, 0.0),
-            rot=(0.0, 0.0, 0.0, 1.0),
+            pos=(0.0, 0.0, 0.0),
+            rot=(1.0, 0.0, 0.0, 0.0),
         ),
         actuators={
             "panda_shoulder": ImplicitActuatorCfg(
                 joint_names_expr=["panda_joint[1-4]"],
-                effort_limit=87.0,
-                velocity_limit=2.175,
+                effort_limit_sim=87.0,
+                velocity_limit_sim=2.175,
                 stiffness=80.0,
                 damping=4.0,
             ),
             "panda_forearm": ImplicitActuatorCfg(
                 joint_names_expr=["panda_joint[5-7]"],
-                effort_limit=12.0,
-                velocity_limit=2.61,
+                effort_limit_sim=12.0,
+                velocity_limit_sim=2.61,
                 stiffness=80.0,
                 damping=4.0,
             ),
             "panda_hand": ImplicitActuatorCfg(
                 joint_names_expr=["panda_finger_joint.*"],
-                effort_limit=200.0,
-                velocity_limit=0.2,
+                effort_limit_sim=200.0,
+                velocity_limit_sim=0.2,
                 stiffness=2e3,
                 damping=1e2,
             ),
         },
     )
 
-    # # table
-    # table = AssetBaseCfg(
-    #     prim_path="{ENV_REGEX_NS}/Table",
-    #     spawn=sim_utils.UsdFileCfg(
-    #         usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd",
-    #     ),
-    #     init_state=AssetBaseCfg.InitialStateCfg(pos=(0.55, 0.0, 0.0), rot=(0.70711, 0.0, 0.0, 0.70711)),
-    # )
-
-    # # # ground plane
-    # ground = AssetBaseCfg(
-    #     prim_path="/World/ground",
-    #     spawn=sim_utils.GroundPlaneCfg(),
-    #     init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
-    # )
-
-    # reset
-    max_target_dist = 0.2
+    # Markers
+    markers = FRAME_MARKER_CFG.replace(prim_path="/Visuals/Markers")
+    markers.markers["frame"].scale = (0.1, 0.1, 0.1)
+
+    # Reset
     initial_joint_pos_range = [-0.1, 0.1]
 
-    # reward scales
+    # Reward scales
     dist_reward_scale = 1.5
-    rot_reward_scale = 1.5
-    reach_reward_scale = 10.0
     action_penalty_scale = 0.05
-    finger_reward_scale = 2.0
 
-    action_scale = 7.5
-    dof_velocity_scale = 0.1
 
 class FrankaReachEnv(DirectRLEnv):
+    """Environment class for the Franka robot reaching task."""
 
     cfg: FrankaReachEnvCfg
 
     def __init__(self, cfg: FrankaReachEnvCfg, render_mode: str | None = None, **kwargs):
         super().__init__(cfg, render_mode, **kwargs)
-        # self._setup_scene()
 
-    def _setup_scene(self):
-        """Initialize simulation scene."""
-        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg(), translation=(0.0, 0.0, -0.4))
+        self.robot_dof_lower_limits = self._robot.data.soft_joint_pos_limits[0, :, 0].to(device=self.device)
+        self.robot_dof_upper_limits = self._robot.data.soft_joint_pos_limits[0, :, 1].to(device=self.device)
 
-        # spawn a usd file of a table into the scene
-        cfg = sim_utils.UsdFileCfg(usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd")
-        cfg.func(
-            "/World/envs/env_.*/Table", cfg, translation=(0.55, 0.0, 0.0), orientation=(0.70711, 0.0, 0.0, 0.70711)
-        )
+        self.target_pos = torch.tensor([0.5, 0.0, 0.3], device=self.device).repeat(self.num_envs, 1)
 
+    def _setup_scene(self) -> None:
+        """Set up the simulation scene with the robot, markers, table, and ground plane."""
         self._robot = Articulation(self.cfg.robot)
-
+        # Add markers
+        self.scene.target_markers = VisualizationMarkers(self.cfg.markers)
+        self.scene.ee_markers = VisualizationMarkers(self.cfg.markers)
+        # Add ground plane
+        spawn_ground_plane(
+            prim_path="/World/ground",
+            cfg=GroundPlaneCfg(),
+            translation=(0.0, 0.0, -1.05)
+        )
+        # Add table
+        table_cfg = sim_utils.UsdFileCfg(
+            usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
+        )
+        table_cfg.func(
+            "/World/envs/env_.*/Table",
+            table_cfg,
+            translation=(0.55, 0.0, 0.0),
+            orientation=(0.70711, 0.0, 0.0, 0.70711),
+        )
+        # Clone and replicate environments
         self.scene.clone_environments(copy_from_source=False)
-
+        # Add articulation to the scene
         self.scene.articulations["robot"] = self._robot
-
+        # Add light
         light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
         light_cfg.func("/World/Light", light_cfg)
 
-    def _get_rewards(self) -> torch.Tensor:
-        target_pos = torch.tensor([0.5, 0.0, 0.5], device=self.device)
-        end_effector_pos = self.robot.data.body_pos_w[:, self.robot.find_bodies("panda_link7")[0][0]]
-        distance = torch.norm(end_effector_pos - target_pos, dim=-1)
-        dist_reward = self.cfg.dist_reward_scale / (1.0 + distance)
-        action_penalty = torch.sum(self.actions**2, dim=-1) * self.cfg.action_penalty_scale
-        return dist_reward - action_penalty
+    def _pre_physics_step(self, actions: torch.Tensor) -> None:
+        """
+        Process actions before stepping the physics and update markers 
+        for the end-effector and target.
+        """
+        self.actions = actions.clone().clamp(self.robot_dof_lower_limits, self.robot_dof_upper_limits)
+        # Convert local target to world coordinates for visualization
+        root_pos = self._robot.data.root_state_w[:, :3]
+        target_world = self.target_pos + root_pos
+        # Update marker positions
+        ee_pos = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
+        self.scene.ee_markers.visualize(ee_pos)
+        self.scene.target_markers.visualize(target_world)
+
+    def _apply_action(self) -> None:
+        """Send position targets to the robot's joints."""
+        self._robot.set_joint_position_target(self.actions)
 
     def _get_observations(self) -> dict:
-        dof_pos_scaled = (2.0 * (self.robot.data.joint_pos - self.robot.data.default_joint_pos))
-        obs = torch.cat((dof_pos_scaled, self.robot.data.joint_vel), dim=-1)
+        """
+        Retrieve the robot's state for RL training:
+          - Joint positions (scaled around defaults)
+          - Joint velocities
+          - Relative position from the end-effector to the target
+        """
+        dof_pos_scaled = (
+            2.0
+            * (self._robot.data.joint_pos - self._robot.data.default_joint_pos)
+        )
+        end_effector_pos = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
+        root_pos = self._robot.data.root_state_w[:, :3]
+        target = self.target_pos + root_pos  # Convert to local frame
+        to_target = target - end_effector_pos
+
+        obs = torch.cat(
+            (dof_pos_scaled, self._robot.data.joint_vel, to_target),
+            dim=-1,
+        )
         return {"policy": torch.clamp(obs, -5.0, 5.0)}
 
+    def _get_rewards(self) -> torch.Tensor:
+        """Calculate the step reward based on distance to the target and action penalty."""
+        root_pos = self._robot.data.root_state_w[:, :3]
+        target = self.target_pos + root_pos
+        curr_pos_w = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
+        # Distance from end-effector to target
+        d = torch.norm(curr_pos_w - target, p=2, dim=-1)
+        # Distance-based reward (higher for being close)
+        dist_reward = self.cfg.dist_reward_scale / (1.0 + d**2)
+        dist_reward *= dist_reward
+        dist_reward = torch.where(d <= 0.02, dist_reward * 2, dist_reward)
+
+        # Action penalty for smoothness
+        action_penalty = torch.sum(self.actions**2, dim=-1) * self.cfg.action_penalty_scale
+
+        return dist_reward - action_penalty
+
     def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
-        target_pos = torch.tensor([0.5, 0.0, 0.5], device=self.device)
-        end_effector_pos = self.robot.data.body_pos_w[:, self.robot.find_bodies("panda_link7")[0][0]]
-        distance = torch.norm(end_effector_pos - target_pos, dim=-1)
-        terminated = distance < self.cfg.max_target_dist
+        """Determine whether an episode should terminate or truncate."""
+        root_pos = self._robot.data.root_state_w[:, :3]
+        target = self.target_pos + root_pos
+        end_effector_pos = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
+        distance = torch.norm(end_effector_pos - target, dim=-1)
+
+        # Terminate if close enough to the goal
+        terminated = distance < 0.02
+
+        # Truncate if max episode length is reached
         truncated = self.episode_length_buf >= self.max_episode_length - 1
+
         return terminated, truncated
 
-    def _reset_idx(self, env_ids: torch.Tensor | None):
+    def _reset_idx(self, env_ids: torch.Tensor | None) -> None:
+        """Reset environments with the provided indices."""
         if env_ids is None:
-            env_ids = self.robot._ALL_INDICES
+            env_ids = self._robot._ALL_INDICES
+
         super()._reset_idx(env_ids)
-        joint_pos = self.robot.data.default_joint_pos[env_ids] + sample_uniform(
-            self.cfg.initial_joint_pos_range[0],
-            self.cfg.initial_joint_pos_range[1],
-            joint_pos.shape,
-            self.device,
+
+        # Randomize joint positions
+        joint_pos = (
+            self._robot.data.default_joint_pos[env_ids]
+            + sample_uniform(
+                self.cfg.initial_joint_pos_range[0],
+                self.cfg.initial_joint_pos_range[1],
+                (len(env_ids), self._robot.num_joints),
+                self.device,
+            )
+        )
+        joint_pos = torch.clamp(
+            joint_pos,
+            self.robot_dof_lower_limits,
+            self.robot_dof_upper_limits
         )
-        self.robot.write_joint_state_to_sim(joint_pos, torch.zeros_like(joint_pos), env_ids)
+        joint_vel = torch.zeros_like(joint_pos)
 
-    def _pre_physics_step(self, actions: torch.Tensor) -> None:
-        self.actions = actions.clone().clamp(-1.0, 1.0)
+        # Set joint positions and velocities
+        self._robot.set_joint_position_target(joint_pos, env_ids=env_ids)
+        self._robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
 
-    def _apply_action(self) -> None:
-        self.robot.set_joint_position_target(self.actions)
+        # Randomize target position
+        self.target_pos[env_ids, 0] = sample_uniform(0.35, 0.65, (len(env_ids),), self.device)
+        self.target_pos[env_ids, 1] = sample_uniform(-0.2, 0.2, (len(env_ids),), self.device)
+        self.target_pos[env_ids, 2] = sample_uniform(0.15, 0.5, (len(env_ids),), self.device)

From 900c12e91c44bebb0e7c7fd08f948aa13146807c Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 01:27:03 -0400
Subject: [PATCH 04/10] adds rsl_rl agent

---
 .../direct/franka_reach/__init__.py           |  1 +
 .../franka_reach/agents/rsl_rl_ppo_cfg.py     | 37 +++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rsl_rl_ppo_cfg.py

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
index f052e833c439..3abf89fb2dc1 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
@@ -21,5 +21,6 @@
     kwargs={
         "env_cfg_entry_point": f"{__name__}.franka_reach_env:FrankaReachEnvCfg",
         "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg",
     },
 )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rsl_rl_ppo_cfg.py
new file mode 100644
index 000000000000..b92a00c5db97
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 16
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "franka_reach_direct"
+    empirical_normalization = True
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.0,
+        num_learning_epochs=8,
+        num_mini_batches=8,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    )

From 3500c7f7dee30bed3c3c0974bd2f159ebf386ba7 Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 01:31:17 -0400
Subject: [PATCH 05/10] formats code

---
 .../direct/franka_reach/franka_reach_env.py   | 44 ++++++-------------
 1 file changed, 13 insertions(+), 31 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
index 787bec7dd758..4350b1d22de2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
@@ -11,14 +11,14 @@
 from isaaclab.actuators.actuator_cfg import ImplicitActuatorCfg
 from isaaclab.assets import Articulation, ArticulationCfg
 from isaaclab.envs import DirectRLEnv, DirectRLEnvCfg
+from isaaclab.markers import VisualizationMarkers
+from isaaclab.markers.config import FRAME_MARKER_CFG
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.sim import SimulationCfg
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
 from isaaclab.utils import configclass
 from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
 from isaaclab.utils.math import sample_uniform
-from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
-from isaaclab.markers.config import FRAME_MARKER_CFG
-from isaaclab.markers import VisualizationMarkers
 
 
 @configclass
@@ -46,11 +46,7 @@ class FrankaReachEnvCfg(DirectRLEnvCfg):
     )
 
     # Scene
-    scene: InteractiveSceneCfg = InteractiveSceneCfg(
-        num_envs=4096,
-        env_spacing=3.0,
-        replicate_physics=True
-    )
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=3.0, replicate_physics=True)
 
     # Robot
     robot = ArticulationCfg(
@@ -139,11 +135,7 @@ def _setup_scene(self) -> None:
         self.scene.target_markers = VisualizationMarkers(self.cfg.markers)
         self.scene.ee_markers = VisualizationMarkers(self.cfg.markers)
         # Add ground plane
-        spawn_ground_plane(
-            prim_path="/World/ground",
-            cfg=GroundPlaneCfg(),
-            translation=(0.0, 0.0, -1.05)
-        )
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg(), translation=(0.0, 0.0, -1.05))
         # Add table
         table_cfg = sim_utils.UsdFileCfg(
             usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
@@ -164,7 +156,7 @@ def _setup_scene(self) -> None:
 
     def _pre_physics_step(self, actions: torch.Tensor) -> None:
         """
-        Process actions before stepping the physics and update markers 
+        Process actions before stepping the physics and update markers
         for the end-effector and target.
         """
         self.actions = actions.clone().clamp(self.robot_dof_lower_limits, self.robot_dof_upper_limits)
@@ -187,10 +179,7 @@ def _get_observations(self) -> dict:
           - Joint velocities
           - Relative position from the end-effector to the target
         """
-        dof_pos_scaled = (
-            2.0
-            * (self._robot.data.joint_pos - self._robot.data.default_joint_pos)
-        )
+        dof_pos_scaled = 2.0 * (self._robot.data.joint_pos - self._robot.data.default_joint_pos)
         end_effector_pos = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
         root_pos = self._robot.data.root_state_w[:, :3]
         target = self.target_pos + root_pos  # Convert to local frame
@@ -242,20 +231,13 @@ def _reset_idx(self, env_ids: torch.Tensor | None) -> None:
         super()._reset_idx(env_ids)
 
         # Randomize joint positions
-        joint_pos = (
-            self._robot.data.default_joint_pos[env_ids]
-            + sample_uniform(
-                self.cfg.initial_joint_pos_range[0],
-                self.cfg.initial_joint_pos_range[1],
-                (len(env_ids), self._robot.num_joints),
-                self.device,
-            )
-        )
-        joint_pos = torch.clamp(
-            joint_pos,
-            self.robot_dof_lower_limits,
-            self.robot_dof_upper_limits
+        joint_pos = self._robot.data.default_joint_pos[env_ids] + sample_uniform(
+            self.cfg.initial_joint_pos_range[0],
+            self.cfg.initial_joint_pos_range[1],
+            (len(env_ids), self._robot.num_joints),
+            self.device,
         )
+        joint_pos = torch.clamp(joint_pos, self.robot_dof_lower_limits, self.robot_dof_upper_limits)
         joint_vel = torch.zeros_like(joint_pos)
 
         # Set joint positions and velocities

From bbb02bcbb369e1b446f7ff842b02be51998ffdf0 Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 01:53:48 -0400
Subject: [PATCH 06/10] adds log

---
 .../isaaclab_tasks/direct/franka_reach/franka_reach_env.py  | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
index 4350b1d22de2..b872b6053c70 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
@@ -206,6 +206,12 @@ def _get_rewards(self) -> torch.Tensor:
         # Action penalty for smoothness
         action_penalty = torch.sum(self.actions**2, dim=-1) * self.cfg.action_penalty_scale
 
+        self.extras["log"] = {
+            "distance": (d).mean(),
+            "dist_reward": (dist_reward).mean(),
+            "action_penalty": (action_penalty).mean(),
+        }
+
         return dist_reward - action_penalty
 
     def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:

From be82a39c78c69a5fc53d034d99a47c0feb94a277 Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 02:06:10 -0400
Subject: [PATCH 07/10] updates documentation

---
 docs/source/overview/environments.rst | 57 ++++++++++++++-------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/docs/source/overview/environments.rst b/docs/source/overview/environments.rst
index cf4a5eb7e5a5..721aa38bdd14 100644
--- a/docs/source/overview/environments.rst
+++ b/docs/source/overview/environments.rst
@@ -100,33 +100,35 @@ for the lift-cube environment:
 .. table::
     :widths: 33 37 30
 
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | World              | Environment ID          | Description                                                                 |
-    +====================+=========================+=============================================================================+
-    | |reach-franka|     | |reach-franka-link|     | Move the end-effector to a sampled target pose with the Franka robot        |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | |reach-ur10|       | |reach-ur10-link|       | Move the end-effector to a sampled target pose with the UR10 robot          |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | |lift-cube|        | |lift-cube-link|        | Pick a cube and bring it to a sampled target position with the Franka robot |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | |stack-cube|       | |stack-cube-link|       | Stack three cubes (bottom to top: blue, red, green) with the Franka robot   |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | |cabi-franka|      | |cabi-franka-link|      | Grasp the handle of a cabinet's drawer and open it with the Franka robot    |
-    |                    |                         |                                                                             |
-    |                    | |franka-direct-link|    |                                                                             |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | |cube-allegro|     | |cube-allegro-link|     | In-hand reorientation of a cube using Allegro hand                          |
-    |                    |                         |                                                                             |
-    |                    | |allegro-direct-link|   |                                                                             |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | |cube-shadow|      | |cube-shadow-link|      | In-hand reorientation of a cube using Shadow hand                           |
-    |                    |                         |                                                                             |
-    |                    | |cube-shadow-ff-link|   |                                                                             |
-    |                    |                         |                                                                             |
-    |                    | |cube-shadow-lstm-link| |                                                                             |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
-    | |cube-shadow|      | |cube-shadow-vis-link|  | In-hand reorientation of a cube using Shadow hand using perceptive inputs   |
-    +--------------------+-------------------------+-----------------------------------------------------------------------------+
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | World              | Environment ID              | Description                                                                 |
+    +====================+=============================+=============================================================================+
+    | |reach-franka|     | |reach-franka-link|         | Move the end-effector to a sampled target pose with the Franka robot        |
+    |                    |                             |                                                                             |
+    |                    | |reach-franka-direct-link|  |                                                                             |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | |reach-ur10|       | |reach-ur10-link|           | Move the end-effector to a sampled target pose with the UR10 robot          |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | |lift-cube|        | |lift-cube-link|            | Pick a cube and bring it to a sampled target position with the Franka robot |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | |stack-cube|       | |stack-cube-link|           | Stack three cubes (bottom to top: blue, red, green) with the Franka robot   |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | |cabi-franka|      | |cabi-franka-link|          | Grasp the handle of a cabinet's drawer and open it with the Franka robot    |
+    |                    |                             |                                                                             |
+    |                    | |franka-direct-link|        |                                                                             |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | |cube-allegro|     | |cube-allegro-link|         | In-hand reorientation of a cube using Allegro hand                          |
+    |                    |                             |                                                                             |
+    |                    | |allegro-direct-link|       |                                                                             |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | |cube-shadow|      | |cube-shadow-link|          | In-hand reorientation of a cube using Shadow hand                           |
+    |                    |                             |                                                                             |
+    |                    | |cube-shadow-ff-link|       |                                                                             |
+    |                    |                             |                                                                             |
+    |                    | |cube-shadow-lstm-link|     |                                                                             |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
+    | |cube-shadow|      | |cube-shadow-vis-link|      | In-hand reorientation of a cube using Shadow hand using perceptive inputs   |
+    +--------------------+-----------------------------+-----------------------------------------------------------------------------+
 
 .. |reach-franka| image:: ../_static/tasks/manipulation/franka_reach.jpg
 .. |reach-ur10| image:: ../_static/tasks/manipulation/ur10_reach.jpg
@@ -137,6 +139,7 @@ for the lift-cube environment:
 .. |stack-cube| image:: ../_static/tasks/manipulation/franka_stack.jpg
 
 .. |reach-franka-link| replace:: `Isaac-Reach-Franka-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py>`__
+.. |reach-franka-direct-link| replace:: `Isaac-Franka-Reach-Direct-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py>`__
 .. |reach-ur10-link| replace:: `Isaac-Reach-UR10-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/joint_pos_env_cfg.py>`__
 .. |lift-cube-link| replace:: `Isaac-Lift-Cube-Franka-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/joint_pos_env_cfg.py>`__
 .. |lift-cube-ik-abs-link| replace:: `Isaac-Lift-Cube-Franka-IK-Abs-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/ik_abs_env_cfg.py>`__

From 6104e0662e507014b933ce1c6d2d4be1a0fd61e7 Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 03:15:42 -0400
Subject: [PATCH 08/10] removes rl_games agent

---
 .../direct/franka_reach/__init__.py           |  1 -
 .../franka_reach/agents/rl_games_ppo_cfg.yaml | 75 -------------------
 2 files changed, 76 deletions(-)
 delete mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
index 3abf89fb2dc1..ab6d0ecda19f 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/__init__.py
@@ -20,7 +20,6 @@
     disable_env_checker=True,
     kwargs={
         "env_cfg_entry_point": f"{__name__}.franka_reach_env:FrankaReachEnvCfg",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg",
     },
 )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml
deleted file mode 100644
index 27e44c9c3195..000000000000
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/agents/rl_games_ppo_cfg.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-params:
-  seed: 42
-
-  # environment wrapper clipping
-  env:
-    clip_actions: 1.0
-
-  algo:
-    name: a2c_continuous
-
-  model:
-    name: continuous_a2c_logstd
-
-  network:
-    name: actor_critic
-    separate: False
-    space:
-      continuous:
-        mu_activation: None
-        sigma_activation: None
-
-        mu_init:
-          name: default
-        sigma_init:
-          name: const_initializer
-          val: 0
-        fixed_sigma: True
-    mlp:
-      units: [256, 128, 64]
-      activation: elu
-      d2rl: False
-
-      initializer:
-        name: default
-      regularizer:
-        name: None
-
-  load_checkpoint: False # flag which sets whether to load the checkpoint
-  load_path: '' # path to the checkpoint to load
-
-  config:
-    name: franka_reach_direct
-    env_name: rlgpu
-    device: 'cuda:0'
-    device_name: 'cuda:0'
-    multi_gpu: False
-    ppo: True
-    mixed_precision: False
-    normalize_input: True
-    normalize_value: True
-    # value_bootstrap: True
-    num_actors: -1  # configured from the script (based on num_envs)
-    reward_shaper:
-      scale_value: 0.01
-    normalize_advantage: True
-    gamma: 0.99
-    tau: 0.95
-    learning_rate: 5e-4
-    lr_schedule: adaptive
-    kl_threshold: 0.008
-    score_to_win: 100000000
-    max_epochs: 1500
-    save_best_after: 200
-    save_frequency: 100
-    grad_norm: 1.0
-    entropy_coef: 0.0
-    truncate_grads: True
-    e_clip: 0.2
-    horizon_length: 16
-    minibatch_size: 8192
-    mini_epochs: 8
-    critic_coef: 4
-    clip_value: True
-    seq_length: 4
-    bounds_loss_coef: 0.0001

From face79237dddcc5a14c7eb1e0687a777922d29fa Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 03:16:13 -0400
Subject: [PATCH 09/10] tweaks termination parameters

---
 .../isaaclab_tasks/direct/franka_reach/franka_reach_env.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
index b872b6053c70..d06008cb1a45 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py
@@ -222,7 +222,7 @@ def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
         distance = torch.norm(end_effector_pos - target, dim=-1)
 
         # Terminate if close enough to the goal
-        terminated = distance < 0.02
+        terminated = distance < 0.01
 
         # Truncate if max episode length is reached
         truncated = self.episode_length_buf >= self.max_episode_length - 1

From 5390120149fe0276981c79b45e9b18111c11b6b2 Mon Sep 17 00:00:00 2001
From: louis <le.lay.louis@gmail.com>
Date: Sun, 16 Mar 2025 03:23:06 -0400
Subject: [PATCH 10/10] Updates isaaclab_tasks CHANGELOG.rst and extension.toml

---
 source/isaaclab_tasks/config/extension.toml | 2 +-
 source/isaaclab_tasks/docs/CHANGELOG.rst    | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml
index 86d9d5572854..e74a43d977c8 100644
--- a/source/isaaclab_tasks/config/extension.toml
+++ b/source/isaaclab_tasks/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.10.25"
+version = "0.10.26"
 
 # Description
 title = "Isaac Lab Environments"
diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst
index a63ee07e1777..82baf107dff4 100644
--- a/source/isaaclab_tasks/docs/CHANGELOG.rst
+++ b/source/isaaclab_tasks/docs/CHANGELOG.rst
@@ -1,6 +1,14 @@
 Changelog
 ---------
 
+0.10.26 (2025-03-16)
+~~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added ``Isaac-Franka-Reach-Direct-v0`` environment as a direct RL env that implements the Franka Reach task.
+
 0.10.25 (2025-03-10)
 ~~~~~~~~~~~~~~~~~~~~