diff --git a/.github/unittest/linux_libs/scripts_safety_gymnasium/install.sh b/.github/unittest/linux_libs/scripts_safety_gymnasium/install.sh new file mode 100755 index 00000000000..d078a95e8e7 --- /dev/null +++ b/.github/unittest/linux_libs/scripts_safety_gymnasium/install.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +unset PYTORCH_VERSION +# For unittest, nightly PyTorch is used as the following section, +# so no need to set PYTORCH_VERSION. +# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. + +set -e + +# Ensure uv is in PATH +export PATH="$HOME/.local/bin:$PATH" + +# Activate the virtual environment +source ./env/bin/activate + +if [ "${CU_VERSION:-}" == cpu ] ; then + version="cpu" +else + if [[ ${#CU_VERSION} -eq 4 ]]; then + CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}" + elif [[ ${#CU_VERSION} -eq 5 ]]; then + CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}" + fi + echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)" + version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")" +fi + +# submodules +git submodule sync && git submodule update --init --recursive + +printf "Installing PyTorch with cu128" +if [[ "$TORCH_VERSION" == "nightly" ]]; then + if [ "${CU_VERSION:-}" == cpu ] ; then + uv pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U + else + uv pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128 -U + fi +elif [[ "$TORCH_VERSION" == "stable" ]]; then + if [ "${CU_VERSION:-}" == cpu ] ; then + uv pip install torch --index-url https://download.pytorch.org/whl/cpu -U + else + uv pip install torch --index-url https://download.pytorch.org/whl/cu128 -U + fi +else + printf "Failed to install pytorch" + exit 1 +fi + +# Ensure tensordict and torchrl dependencies are installed +# (since we use --no-deps for tensordict and torchrl) +uv pip install numpy pyvers packaging cloudpickle + +# Install build dependencies for torchrl (needed with --no-build-isolation) +uv pip install setuptools wheel setuptools_scm ninja "pybind11[global]" + +# install tensordict +if [[ "$RELEASE" == 0 ]]; then + uv pip install --no-deps git+https://github.com/pytorch/tensordict.git +else + uv pip install --no-deps tensordict +fi + +# smoke test +python -c "import functorch;import tensordict" + +printf "* Installing torchrl\n" +python -m pip install -e . --no-build-isolation --no-deps + +# smoke test +python -c "import torchrl" diff --git a/.github/unittest/linux_libs/scripts_safety_gymnasium/post_process.sh b/.github/unittest/linux_libs/scripts_safety_gymnasium/post_process.sh new file mode 100755 index 00000000000..b143bf9fe69 --- /dev/null +++ b/.github/unittest/linux_libs/scripts_safety_gymnasium/post_process.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -e + +# Activate the virtual environment +source ./env/bin/activate diff --git a/.github/unittest/linux_libs/scripts_safety_gymnasium/requirements.txt b/.github/unittest/linux_libs/scripts_safety_gymnasium/requirements.txt new file mode 100644 index 00000000000..e3e8e971633 --- /dev/null +++ b/.github/unittest/linux_libs/scripts_safety_gymnasium/requirements.txt @@ -0,0 +1,18 @@ +# Core dependencies for tensordict/torchrl (installed with --no-deps) +numpy +pyvers +packaging +cloudpickle + +# Test dependencies +pytest +pytest-xdist +pytest-instafail +pytest-error-for-skips +coverage + +# Safety-Gymnasium and friends +safety-gymnasium +gymnasium +mujoco +imageio diff --git a/.github/unittest/linux_libs/scripts_safety_gymnasium/run_test.sh b/.github/unittest/linux_libs/scripts_safety_gymnasium/run_test.sh new file mode 100755 index 00000000000..e8aec8ee903 --- /dev/null +++ b/.github/unittest/linux_libs/scripts_safety_gymnasium/run_test.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -e + +# Activate the virtual environment +source ./env/bin/activate + +apt-get update && apt-get install -y git wget cmake + +export PYTORCH_TEST_WITH_SLOW='1' +export LAZY_LEGACY_OP=False +python -m torch.utils.collect_env +# Avoid error: "fatal: unsafe repository" +git config --global --add safe.directory '*' + +root_dir="$(git rev-parse --show-toplevel)" +env_dir="${root_dir}/env" +lib_dir="${env_dir}/lib" + +deactivate 2>/dev/null || true && source ./env/bin/activate + +# this workflow only tests the libs +python -c "import safety_gymnasium" + +python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/libs --instafail -v --durations 200 --capture no -k TestSafetyGymnasium --error-for-skips --runslow + +coverage combine -q +coverage xml -i diff --git a/.github/unittest/linux_libs/scripts_safety_gymnasium/setup_env.sh b/.github/unittest/linux_libs/scripts_safety_gymnasium/setup_env.sh new file mode 100755 index 00000000000..0f1210f3a54 --- /dev/null +++ b/.github/unittest/linux_libs/scripts_safety_gymnasium/setup_env.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +# This script is for setting up environment in which unit test is ran. +# To speed up the CI time, the resulting environment is cached. +# +# Do not install PyTorch and torchvision here, otherwise they also get cached. + +set -e +set -v + +apt-get update && apt-get upgrade -y && apt-get install -y git cmake +# Avoid error: "fatal: unsafe repository" +git config --global --add safe.directory '*' +apt-get install -y wget \ + gcc \ + g++ \ + unzip \ + curl \ + patchelf \ + libosmesa6-dev \ + libgl1-mesa-glx \ + libglfw3 \ + swig3.0 \ + libglew-dev \ + libglvnd0 \ + libgl1 \ + libglx0 \ + libegl1 \ + libgles2 \ + libglib2.0-0 + +# Upgrade specific package +apt-get upgrade -y libstdc++6 + +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +root_dir="$(git rev-parse --show-toplevel)" +env_dir="${root_dir}/env" + +cd "${root_dir}" + +# Install uv if not already installed +if ! command -v uv &> /dev/null; then + printf "* Installing uv\n" + curl -LsSf https://astral.sh/uv/install.sh | sh + export PATH="$HOME/.local/bin:$PATH" +fi + +# Create virtual environment using uv +printf "python: ${PYTHON_VERSION}\n" +if [ ! -d "${env_dir}" ]; then + printf "* Creating a test environment with uv\n" + uv venv "${env_dir}" --python "${PYTHON_VERSION}" +fi + +# Activate the virtual environment +source "${env_dir}/bin/activate" + +# Upgrade pip +uv pip install --upgrade pip + +# Install dependencies from requirements.txt +printf "* Installing dependencies (except PyTorch)\n" +if [ -f "${this_dir}/requirements.txt" ]; then + uv pip install -r "${this_dir}/requirements.txt" +fi diff --git a/.github/workflows/test-linux-libs.yml b/.github/workflows/test-linux-libs.yml index 9eb25699f02..1a246200820 100644 --- a/.github/workflows/test-linux-libs.yml +++ b/.github/workflows/test-linux-libs.yml @@ -625,6 +625,46 @@ jobs: bash .github/unittest/linux_libs/scripts_procgen/run_test.sh bash .github/unittest/linux_libs/scripts_procgen/post_process.sh + unittests-safety-gymnasium: + strategy: + matrix: + python_version: ["3.10"] + cuda_arch_version: ["12.8"] + if: ${{ github.event_name == 'push' || github.event_name == 'workflow_call' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'Environments') || contains(github.event.pull_request.labels.*.name, 'Environments/safety_gymnasium') }} + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + repository: pytorch/rl + runner: "linux.g5.4xlarge.nvidia.gpu" + gpu-arch-type: cuda + gpu-arch-version: "12.8" + docker-image: "nvidia/cuda:12.4.0-devel-ubuntu22.04" + timeout: 120 + script: | + if [[ "${{ github.ref }}" =~ release/* ]]; then + export RELEASE=1 + export TORCH_VERSION=stable + else + export RELEASE=0 + export TORCH_VERSION=nightly + fi + + set -euo pipefail + export PYTHON_VERSION="3.10" + export CU_VERSION="12.8" + export TAR_OPTIONS="--no-same-owner" + export UPLOAD_CHANNEL="nightly" + export TF_CPP_MIN_LOG_LEVEL=0 + export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 + + nvidia-smi + + bash .github/unittest/linux_libs/scripts_safety_gymnasium/setup_env.sh + bash .github/unittest/linux_libs/scripts_safety_gymnasium/install.sh + PYTHON=./env/bin/python bash .github/unittest/helpers/assert_torch_version.sh "$TORCH_VERSION" + bash .github/unittest/linux_libs/scripts_safety_gymnasium/run_test.sh + bash .github/unittest/linux_libs/scripts_safety_gymnasium/post_process.sh + unittests-robohive: strategy: matrix: diff --git a/docs/source/reference/envs_libraries.rst b/docs/source/reference/envs_libraries.rst index a545e1cf40e..16611706297 100644 --- a/docs/source/reference/envs_libraries.rst +++ b/docs/source/reference/envs_libraries.rst @@ -107,6 +107,8 @@ Available wrappers PettingZooWrapper ProcgenWrapper RoboHiveEnv + SafetyGymnasiumEnv + SafetyGymnasiumWrapper SMACv2Env SMACv2Wrapper UnityMLAgentsEnv diff --git a/test/libs/test_safety_gymnasium.py b/test/libs/test_safety_gymnasium.py new file mode 100644 index 00000000000..bc79b9301a5 --- /dev/null +++ b/test/libs/test_safety_gymnasium.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from __future__ import annotations + +import pytest +import torch + +from torchrl.envs.libs.safety_gymnasium import ( + _has_safety_gymnasium, + SafetyGymnasiumEnv, + SafetyGymnasiumWrapper, +) +from torchrl.envs.utils import check_env_specs + + +@pytest.mark.skipif(not _has_safety_gymnasium, reason="safety-gymnasium not installed") +class TestSafetyGymnasium: + def test_wrapper_specs(self): + import safety_gymnasium + + base = safety_gymnasium.make("SafetyPointGoal1-v0") + env = SafetyGymnasiumWrapper(base) + check_env_specs(env) + assert "cost" in env.observation_spec.keys() + + def test_env_from_name_specs(self): + env = SafetyGymnasiumEnv(env_name="SafetyPointGoal1-v0") + check_env_specs(env) + assert "cost" in env.observation_spec.keys() + + def test_rollout_exposes_cost(self): + env = SafetyGymnasiumEnv(env_name="SafetyPointGoal1-v0") + env.set_seed(0) + td = env.rollout(5) + assert ("next", "cost") in td.keys(True) + assert td["next", "cost"].dtype == torch.float64 + assert td["next", "cost"].shape == td["next", "reward"].shape[:-1] + + def test_cost_fires_on_hazard_contact(self): + # SafetyCarPush2-v0 has dense hazards; under random actions we expect + # at least one positive cost in a long rollout. Without this signal + # being plumbed through, every cost would be zero. + env = SafetyGymnasiumEnv(env_name="SafetyCarPush2-v0") + env.set_seed(0) + td = env.rollout(2000, break_when_any_done=False) + assert (td["next", "cost"] > 0).any(), ( + "Expected at least one nonzero cost over 2000 random steps; " + "cost signal may not be plumbed correctly." + ) diff --git a/torchrl/envs/libs/__init__.py b/torchrl/envs/libs/__init__.py index 49577c60694..7eb774046ff 100644 --- a/torchrl/envs/libs/__init__.py +++ b/torchrl/envs/libs/__init__.py @@ -26,6 +26,7 @@ from .pettingzoo import PettingZooEnv, PettingZooWrapper from .procgen import ProcgenEnv, ProcgenWrapper from .robohive import RoboHiveEnv +from .safety_gymnasium import SafetyGymnasiumEnv, SafetyGymnasiumWrapper from .smacv2 import SMACv2Env, SMACv2Wrapper from .unity_mlagents import UnityMLAgentsEnv, UnityMLAgentsWrapper from .vmas import VmasEnv, VmasWrapper @@ -59,6 +60,8 @@ "ProcgenEnv", "ProcgenWrapper", "RoboHiveEnv", + "SafetyGymnasiumEnv", + "SafetyGymnasiumWrapper", "SMACv2Env", "SMACv2Wrapper", "UnityMLAgentsEnv", diff --git a/torchrl/envs/libs/safety_gymnasium.py b/torchrl/envs/libs/safety_gymnasium.py new file mode 100644 index 00000000000..bca9906bc5d --- /dev/null +++ b/torchrl/envs/libs/safety_gymnasium.py @@ -0,0 +1,162 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from __future__ import annotations + +import importlib.util +from types import ModuleType + +import torch + +from torchrl.data.tensor_specs import Unbounded +from torchrl.envs.libs.gym import GymEnv, GymWrapper, set_gym_backend +from torchrl.envs.utils import _classproperty + +_has_safety_gymnasium = importlib.util.find_spec("safety_gymnasium") is not None + + +def _list_safety_gymnasium_envs() -> list[str]: + """Discover task ids exposed by safety-gymnasium. + + safety-gymnasium registers many id variants (``*Gymnasium``, + ``*Vision*``, ``*Debug``, ``*FadingEasy*``, ...). We surface the + canonical 6-tuple-step ids and skip the ``Gymnasium`` variants because + those return the standard 5-tuple and would not match this wrapper's + ``_output_transform``. + """ + if not _has_safety_gymnasium: + return [] + import gymnasium + import safety_gymnasium # noqa: F401 -- import side-effect: register envs + + return sorted( + env_id + for env_id in gymnasium.envs.registry + if env_id.startswith("Safety") and "Gymnasium" not in env_id + ) + + +class _SafetyGymCostMixin: + """Expose safety-gymnasium's per-step ``cost`` signal as a top-level observation key. + + safety-gymnasium's ``step`` returns a 6-tuple + ``(obs, reward, cost, terminated, truncated, info)``. We collapse the + extra ``cost`` element into a stashed attribute, then write it onto + the step/reset tensordict so it travels with the observation rather + than through the info-dict-reader machinery. + """ + + def _post_init_cost(self) -> None: + self.observation_spec["cost"] = Unbounded(shape=(), dtype=torch.float64) + self._last_cost = torch.zeros((), dtype=torch.float64) + + def _output_transform(self, step_outputs_tuple): + observations, reward, cost, terminated, truncated, info = step_outputs_tuple + self._last_cost = torch.as_tensor(cost, dtype=torch.float64) + return ( + observations, + reward, + terminated, + truncated, + terminated | truncated, + info, + ) + + def _step(self, tensordict): + out = super()._step(tensordict) + out.set("cost", self._last_cost) + return out + + def _reset(self, tensordict=None, **kwargs): + out = super()._reset(tensordict, **kwargs) + out.set("cost", torch.zeros_like(self._last_cost)) + return out + + +class SafetyGymnasiumWrapper(_SafetyGymCostMixin, GymWrapper): + """Safety-Gymnasium environment wrapper. + + Safety-Gymnasium (https://github.com/PKU-Alignment/safety-gymnasium) is + the actively-maintained successor to OpenAI's Safety-Gym. It provides + constrained-RL benchmarks where each step emits a parallel ``cost`` + signal alongside the standard reward, allowing agents to optimize + reward subject to a safety budget. + + The underlying ``step`` API returns a 6-tuple. This wrapper folds + ``cost`` into the output tensordict as a top-level key alongside + ``reward``. + + Args: + env (safety_gymnasium.Env): the environment to wrap. + + Examples: + >>> import safety_gymnasium # doctest: +SKIP + >>> from torchrl.envs.libs.safety_gymnasium import SafetyGymnasiumWrapper + >>> base = safety_gymnasium.make("SafetyPointGoal1-v0") # doctest: +SKIP + >>> env = SafetyGymnasiumWrapper(base) # doctest: +SKIP + >>> td = env.rollout(3) # doctest: +SKIP + >>> assert ("next", "cost") in td.keys(True) # doctest: +SKIP + + """ + + git_url = "https://github.com/PKU-Alignment/safety-gymnasium" + libname = "safety-gymnasium" + + _make_specs = set_gym_backend("gymnasium")(GymEnv._make_specs) + + def __init__(self, env=None, **kwargs): + super().__init__(env=env, **kwargs) + self._post_init_cost() + + @_classproperty + def available_envs(cls): + return _list_safety_gymnasium_envs() + + +class SafetyGymnasiumEnv(_SafetyGymCostMixin, GymEnv): + """Safety-Gymnasium environment built from an env id. + + See :class:`SafetyGymnasiumWrapper` for behavior details. The + constructor builds the environment via + ``safety_gymnasium.make(env_name)`` and applies the same + cost-extraction pipeline. + + Args: + env_name (str): the safety-gymnasium task id, e.g. + ``"SafetyPointGoal1-v0"``. + + Examples: + >>> from torchrl.envs.libs.safety_gymnasium import SafetyGymnasiumEnv + >>> env = SafetyGymnasiumEnv(env_name="SafetyPointGoal1-v0") # doctest: +SKIP + >>> td = env.rollout(3) # doctest: +SKIP + >>> assert ("next", "cost") in td.keys(True) # doctest: +SKIP + + """ + + git_url = "https://github.com/PKU-Alignment/safety-gymnasium" + libname = "safety-gymnasium" + + @_classproperty + def available_envs(cls): + return _list_safety_gymnasium_envs() + + @property + def lib(self) -> ModuleType: + if _has_safety_gymnasium: + import safety_gymnasium + + return safety_gymnasium + try: + import safety_gymnasium # noqa: F401 + except ImportError as err: + raise ImportError( + "safety-gymnasium not found, install with " + "`pip install safety-gymnasium`" + ) from err + + _make_specs = set_gym_backend("gymnasium")(GymEnv._make_specs) + + def __init__(self, env_name=None, **kwargs): + super().__init__(env_name=env_name, **kwargs) + self._post_init_cost()