Skip to content
3 changes: 3 additions & 0 deletions docs/source/overview/environments.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ for the lift-cube environment:
| World | Environment ID | Description |
+======================+===========================+=============================================================================+
| |reach-franka| | |reach-franka-link| | Move the end-effector to a sampled target pose with the Franka robot |
| | | |
| | |reach-franka-direct-link|| |
+----------------------+---------------------------+-----------------------------------------------------------------------------+
| |reach-ur10| | |reach-ur10-link| | Move the end-effector to a sampled target pose with the UR10 robot |
+----------------------+---------------------------+-----------------------------------------------------------------------------+
Expand Down Expand Up @@ -161,6 +163,7 @@ for the lift-cube environment:
.. |galbot_stack| image:: ../_static/tasks/manipulation/galbot_stack_cube.jpg

.. |reach-franka-link| replace:: `Isaac-Reach-Franka-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py>`__
.. |reach-franka-direct-link| replace:: `Isaac-Franka-Reach-Direct-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/direct/franka_reach/franka_reach_env.py>`__
.. |reach-ur10-link| replace:: `Isaac-Reach-UR10-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/joint_pos_env_cfg.py>`__
.. |deploy-reach-ur10e-link| replace:: `Isaac-Deploy-Reach-UR10e-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/reach/config/ur_10e/joint_pos_env_cfg.py>`__
.. |lift-cube-link| replace:: `Isaac-Lift-Cube-Franka-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/joint_pos_env_cfg.py>`__
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab_tasks/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.10.51"
version = "0.10.52"

# Description
title = "Isaac Lab Environments"
Expand Down
8 changes: 8 additions & 0 deletions source/isaaclab_tasks/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Changelog
---------

0.10.52 (2025-09-09)
~~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added ``Isaac-Franka-Reach-Direct-v0`` environment as a direct RL env that implements the Franka Reach task.

0.10.51 (2025-09-08)
~~~~~~~~~~~~~~~~~~~~

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Franka-Reach environment.
"""

import gymnasium as gym

from . import agents

##
# Register Gym environments.
##

gym.register(
id="Isaac-Franka-Reach-Direct-v0",
entry_point=f"{__name__}.franka_reach_env:FrankaReachEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.franka_reach_env:FrankaReachEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg",
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from isaaclab.utils import configclass

from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg


@configclass
class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 1500
save_interval = 50
experiment_name = "franka_reach_direct"
empirical_normalization = True
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.0,
num_learning_epochs=8,
num_mini_batches=8,
learning_rate=5.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.008,
max_grad_norm=1.0,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

import torch

import isaaclab.sim as sim_utils
from isaaclab.actuators.actuator_cfg import ImplicitActuatorCfg
from isaaclab.assets import Articulation, ArticulationCfg
from isaaclab.envs import DirectRLEnv, DirectRLEnvCfg
from isaaclab.markers import VisualizationMarkers
from isaaclab.markers.config import FRAME_MARKER_CFG
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils import configclass
from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
from isaaclab.utils.math import sample_uniform


@configclass
class FrankaReachEnvCfg(DirectRLEnvCfg):
"""Configuration class for the Franka reaching environment."""

# Environment
episode_length_s = 5.0
decimation = 8
action_space = 9
observation_space = 21
state_space = 0

# Simulation
sim: SimulationCfg = SimulationCfg(
dt=1 / 120,
render_interval=decimation,
physics_material=sim_utils.RigidBodyMaterialCfg(
friction_combine_mode="multiply",
restitution_combine_mode="multiply",
static_friction=1.0,
dynamic_friction=1.0,
restitution=0.0,
),
)

# Scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=3.0, replicate_physics=True)

# Robot
robot = ArticulationCfg(
prim_path="/World/envs/env_.*/Robot",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Franka/franka_instanceable.usd",
activate_contact_sensors=False,
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=5.0,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,
solver_position_iteration_count=12,
solver_velocity_iteration_count=1,
),
),
init_state=ArticulationCfg.InitialStateCfg(
joint_pos={
"panda_joint1": 1.157,
"panda_joint2": -1.066,
"panda_joint3": -0.155,
"panda_joint4": -2.239,
"panda_joint5": -1.841,
"panda_joint6": 1.003,
"panda_joint7": 0.469,
"panda_finger_joint.*": 0.035,
},
pos=(0.0, 0.0, 0.0),
rot=(1.0, 0.0, 0.0, 0.0),
),
actuators={
"panda_shoulder": ImplicitActuatorCfg(
joint_names_expr=["panda_joint[1-4]"],
effort_limit_sim=87.0,
velocity_limit_sim=2.175,
stiffness=80.0,
damping=4.0,
),
"panda_forearm": ImplicitActuatorCfg(
joint_names_expr=["panda_joint[5-7]"],
effort_limit_sim=12.0,
velocity_limit_sim=2.61,
stiffness=80.0,
damping=4.0,
),
"panda_hand": ImplicitActuatorCfg(
joint_names_expr=["panda_finger_joint.*"],
effort_limit_sim=200.0,
velocity_limit_sim=0.2,
stiffness=2e3,
damping=1e2,
),
},
)

# Markers
markers = FRAME_MARKER_CFG.replace(prim_path="/Visuals/Markers")
markers.markers["frame"].scale = (0.1, 0.1, 0.1)

# Reset
initial_joint_pos_range = [-0.1, 0.1]

# Reward scales
dist_reward_scale = 1.5
action_penalty_scale = 0.05


class FrankaReachEnv(DirectRLEnv):
"""Environment class for the Franka robot reaching task."""

cfg: FrankaReachEnvCfg

def __init__(self, cfg: FrankaReachEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)

self.robot_dof_lower_limits = self._robot.data.soft_joint_pos_limits[0, :, 0].to(device=self.device)
self.robot_dof_upper_limits = self._robot.data.soft_joint_pos_limits[0, :, 1].to(device=self.device)

self.target_pos = torch.tensor([0.5, 0.0, 0.3], device=self.device).repeat(self.num_envs, 1)

def _setup_scene(self) -> None:
"""Set up the simulation scene with the robot, markers, table, and ground plane."""
self._robot = Articulation(self.cfg.robot)
# Add markers
self.scene.target_markers = VisualizationMarkers(self.cfg.markers)
self.scene.ee_markers = VisualizationMarkers(self.cfg.markers)
# Add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg(), translation=(0.0, 0.0, -1.05))
# Add table
table_cfg = sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
)
table_cfg.func(
"/World/envs/env_.*/Table",
table_cfg,
translation=(0.55, 0.0, 0.0),
orientation=(0.70711, 0.0, 0.0, 0.70711),
)
# Clone and replicate environments
self.scene.clone_environments(copy_from_source=False)
# Add articulation to the scene
self.scene.articulations["robot"] = self._robot
# Add light
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)

def _pre_physics_step(self, actions: torch.Tensor) -> None:
"""
Process actions before stepping the physics and update markers
for the end-effector and target.
"""
self.actions = actions.clone().clamp(self.robot_dof_lower_limits, self.robot_dof_upper_limits)
# Convert local target to world coordinates for visualization
root_pos = self._robot.data.root_state_w[:, :3]
target_world = self.target_pos + root_pos
# Update marker positions
ee_pos = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
self.scene.ee_markers.visualize(ee_pos)
self.scene.target_markers.visualize(target_world)

def _apply_action(self) -> None:
"""Send position targets to the robot's joints."""
self._robot.set_joint_position_target(self.actions)

def _get_observations(self) -> dict:
"""
Retrieve the robot's state for RL training:
- Joint positions (scaled around defaults)
- Joint velocities
- Relative position from the end-effector to the target
"""
dof_pos_scaled = 2.0 * (self._robot.data.joint_pos - self._robot.data.default_joint_pos)
end_effector_pos = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
root_pos = self._robot.data.root_state_w[:, :3]
target = self.target_pos + root_pos # Convert to local frame
to_target = target - end_effector_pos

obs = torch.cat(
(dof_pos_scaled, self._robot.data.joint_vel, to_target),
dim=-1,
)
return {"policy": torch.clamp(obs, -5.0, 5.0)}

def _get_rewards(self) -> torch.Tensor:
"""Calculate the step reward based on distance to the target and action penalty."""
root_pos = self._robot.data.root_state_w[:, :3]
target = self.target_pos + root_pos
curr_pos_w = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
# Distance from end-effector to target
d = torch.norm(curr_pos_w - target, p=2, dim=-1)
# Distance-based reward (higher for being close)
dist_reward = self.cfg.dist_reward_scale / (1.0 + d**2)
dist_reward *= dist_reward
dist_reward = torch.where(d <= 0.02, dist_reward * 2, dist_reward)

# Action penalty for smoothness
action_penalty = torch.sum(self.actions**2, dim=-1) * self.cfg.action_penalty_scale

self.extras["log"] = {
"distance": (d).mean(),
"dist_reward": (dist_reward).mean(),
"action_penalty": (action_penalty).mean(),
}

return dist_reward - action_penalty

def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
"""Determine whether an episode should terminate or truncate."""
root_pos = self._robot.data.root_state_w[:, :3]
target = self.target_pos + root_pos
end_effector_pos = self._robot.data.body_pos_w[:, self._robot.find_bodies("panda_hand")[0][0]]
distance = torch.norm(end_effector_pos - target, dim=-1)

# Terminate if close enough to the goal
terminated = distance < 0.01

# Truncate if max episode length is reached
truncated = self.episode_length_buf >= self.max_episode_length - 1

return terminated, truncated

def _reset_idx(self, env_ids: torch.Tensor | None) -> None:
"""Reset environments with the provided indices."""
if env_ids is None:
env_ids = self._robot._ALL_INDICES

super()._reset_idx(env_ids)

# Randomize joint positions
joint_pos = self._robot.data.default_joint_pos[env_ids] + sample_uniform(
self.cfg.initial_joint_pos_range[0],
self.cfg.initial_joint_pos_range[1],
(len(env_ids), self._robot.num_joints),
self.device,
)
joint_pos = torch.clamp(joint_pos, self.robot_dof_lower_limits, self.robot_dof_upper_limits)
joint_vel = torch.zeros_like(joint_pos)

# Set joint positions and velocities
self._robot.set_joint_position_target(joint_pos, env_ids=env_ids)
self._robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)

# Randomize target position
self.target_pos[env_ids, 0] = sample_uniform(0.35, 0.65, (len(env_ids),), self.device)
self.target_pos[env_ids, 1] = sample_uniform(-0.2, 0.2, (len(env_ids),), self.device)
self.target_pos[env_ids, 2] = sample_uniform(0.15, 0.5, (len(env_ids),), self.device)