From 6885663e4fc182e405d9b486d92bbe897c8b4551 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Thu, 7 May 2026 11:27:48 +0000
Subject: [PATCH 01/26] Implement Fabric-aware get/set_local_poses via
 indexedfabricarray
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the earlier Python-based parent-loop implementation (which was
correct but ~3× slower than USD for local poses) with a fully GPU-side
Fabric path that follows the bareya/pbarejko/camera-update prototype:

* Three persistent ``PrimSelection`` instances differing only in
  per-attribute access mode — one each for {trans_ro, world_rw, local_rw}.
* Path-based view → fabric index mapping computed once from
  ``selection.GetPaths()`` and stored as a Warp ``int32`` array.  No
  custom prim attributes are written to the stage.
* All transform reads and writes go through ``wp.indexedfabricarray``,
  so the kernels just dereference ``ifa[view_index]`` instead of taking
  a separate mapping argument.
* Stage-level ``IFabricHierarchy`` cache and dirty-stages set so multiple
  ``FabricFrameView`` instances on the same stage share state.

World ↔ local consistency is preserved through Warp kernels that run on
the affected write paths:

* ``set_local_poses`` writes ``omni:fabric:localMatrix`` directly via the
  compose kernel, then a second kernel recomputes child worldMatrix from
  ``parent_world * child_local`` so the next ``get_world_poses`` read is
  consistent.  ``IFabricHierarchy.update_world_xforms()`` is *not* used
  for this — in practice it re-reads USD's authored xformOps and would
  overwrite the matrices we just authored.
* ``set_world_poses`` mirrors the above, recomputing
  ``child_local = inv(parent_world) * child_world`` after the write.

Two new public Warp kernels in ``isaaclab.utils.warp.fabric``:

* ``decompose_indexed_fabric_transforms`` /
  ``compose_indexed_fabric_transforms`` — indexed-array variants of the
  existing decompose/compose kernels.
* ``update_indexed_local_matrix_from_world`` /
  ``update_indexed_world_matrix_from_local`` — propagate one direction
  using a parent indexed fabric array.  Implemented directly in storage
  convention (``local = world * inv(parent)``,
  ``world = local * parent``) — the four transposes the math-convention
  form would imply cancel out under ``(A·B)^T = B^T·A^T``.

Benchmark (1024 prims, 50 iterations, RTX A6000):

  Operation             USD (ms)   Fabric (ms)   Speedup
  Get World Poses        12.33      0.044         282×
  Set World Poses        27.98      0.117         240×
  Interleaved Set→Get    41.34      0.160         258×
  Get Local Poses         6.04      0.037         162×
  Set Local Poses         8.54      0.053         162×

Local-pose ops went from ~3× slower than USD (in the earlier
torch-based parent-loop implementation) to ~160× faster, with the
new Fabric-side localMatrix authoring keeping ``test_set_world_updates_local``
passing without an xfail override.

Tests: 41 passed in the Fabric backend's contract + new coverage for
the Fabric-native ``set_local_poses``, ``get_scales``, and topology
rebuild paths.

Drops the no-longer-needed ``cd571d482`` Python-loop attempt.
---
 .../benchmarks/benchmark_view_comparison.py   |  60 +-
 .../changelog.d/fix-fabric-local-matrix.rst   |  18 +
 source/isaaclab/isaaclab/utils/warp/fabric.py | 172 +++++
 .../changelog.d/fix-fabric-local-matrix.rst   |  24 +
 .../sim/views/fabric_frame_view.py            | 687 ++++++++++++------
 .../test/sim/test_views_xform_prim_fabric.py  | 138 ++--
 6 files changed, 837 insertions(+), 262 deletions(-)
 create mode 100644 source/isaaclab/changelog.d/fix-fabric-local-matrix.rst
 create mode 100644 source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst

diff --git a/scripts/benchmarks/benchmark_view_comparison.py b/scripts/benchmarks/benchmark_view_comparison.py
index a637f687803e..80051f555d78 100644
--- a/scripts/benchmarks/benchmark_view_comparison.py
+++ b/scripts/benchmarks/benchmark_view_comparison.py
@@ -271,26 +271,71 @@ def _run_pose_benchmarks(
     positions: wp.array,
     orientations: wp.array,
 ):
-    """Shared benchmark loop for get/set world poses on any FrameView."""
+    """Shared benchmark loop for get/set {world,local} poses on any FrameView."""
+
+    # FrameView getters now return ProxyArray; older callers worked with wp.array
+    # directly. Support both transparently.
+    def _as_wp(a):
+        return a.warp if hasattr(a, "warp") else a
+
+    positions_wp = _as_wp(positions)
+    orientations_wp = _as_wp(orientations)
+
     start_time = time.perf_counter()
     for _ in range(num_iterations):
         view.get_world_poses()
     timing_results["get_world_poses"] = (time.perf_counter() - start_time) / num_iterations
 
-    new_positions = wp.clone(positions)
+    new_positions = wp.clone(positions_wp)
     new_positions_t = wp.to_torch(new_positions)
     new_positions_t[:, 2] += 0.5
     expected_positions = new_positions_t.clone()
 
     start_time = time.perf_counter()
     for _ in range(num_iterations):
-        view.set_world_poses(new_positions, orientations)
+        view.set_world_poses(new_positions, orientations_wp)
     timing_results["set_world_poses"] = (time.perf_counter() - start_time) / num_iterations
 
+    # Interleaved set→get on world poses — the realistic write/read pattern for
+    # downstream consumers (e.g. cameras updating their pose then immediately
+    # querying it).
+    start_time = time.perf_counter()
+    for _ in range(num_iterations):
+        view.set_world_poses(new_positions, orientations_wp)
+        view.get_world_poses()
+    timing_results["interleaved_world"] = (time.perf_counter() - start_time) / num_iterations
+
+    # Local poses — Fabric-aware path on FabricFrameView, USD path otherwise.
+    if hasattr(view, "get_local_poses"):
+        start_time = time.perf_counter()
+        for _ in range(num_iterations):
+            view.get_local_poses()
+        timing_results["get_local_poses"] = (time.perf_counter() - start_time) / num_iterations
+
+    if hasattr(view, "set_local_poses"):
+        local_pos, local_ori = view.get_local_poses()
+        local_pos_t = (
+            local_pos.torch
+            if hasattr(local_pos, "torch")
+            else (wp.to_torch(local_pos) if isinstance(local_pos, wp.array) else local_pos)
+        )
+        local_ori_t = (
+            local_ori.torch
+            if hasattr(local_ori, "torch")
+            else (wp.to_torch(local_ori) if isinstance(local_ori, wp.array) else local_ori)
+        )
+        new_local_pos = wp.from_torch(local_pos_t.clone().contiguous())
+        new_local_ori = wp.from_torch(local_ori_t.clone().contiguous())
+
+        start_time = time.perf_counter()
+        for _ in range(num_iterations):
+            view.set_local_poses(translations=new_local_pos, orientations=new_local_ori)
+        timing_results["set_local_poses"] = (time.perf_counter() - start_time) / num_iterations
+
     ret_pos, ret_quat = view.get_world_poses()
-    ret_pos_t = wp.to_torch(ret_pos)
-    ret_quat_t = wp.to_torch(ret_quat)
-    ori_t = wp.to_torch(orientations)
+    ret_pos_t = ret_pos.torch if hasattr(ret_pos, "torch") else wp.to_torch(ret_pos)
+    ret_quat_t = ret_quat.torch if hasattr(ret_quat, "torch") else wp.to_torch(ret_quat)
+    ori_t = wp.to_torch(orientations_wp)
 
     pos_ok = torch.allclose(ret_pos_t, expected_positions, atol=1e-4, rtol=0)
     quat_ok = torch.allclose(ret_quat_t, ori_t, atol=1e-4, rtol=0)
@@ -327,6 +372,9 @@ def print_results(results_dict: dict[str, dict[str, float]], num_prims: int, num
         ("Initialization", "init"),
         ("Get World Poses", "get_world_poses"),
         ("Set World Poses", "set_world_poses"),
+        ("Interleaved Set->Get", "interleaved_world"),
+        ("Get Local Poses", "get_local_poses"),
+        ("Set Local Poses", "set_local_poses"),
     ]
 
     for op_name, op_key in operations:
diff --git a/source/isaaclab/changelog.d/fix-fabric-local-matrix.rst b/source/isaaclab/changelog.d/fix-fabric-local-matrix.rst
new file mode 100644
index 000000000000..e3c6cc22ba5c
--- /dev/null
+++ b/source/isaaclab/changelog.d/fix-fabric-local-matrix.rst
@@ -0,0 +1,18 @@
+Added
+^^^^^
+
+* Added :func:`~isaaclab.utils.warp.fabric.decompose_indexed_fabric_transforms`
+  and :func:`~isaaclab.utils.warp.fabric.compose_indexed_fabric_transforms`
+  Warp kernels.  They mirror the existing
+  ``decompose_fabric_transformation_matrix_to_warp_arrays`` /
+  ``compose_fabric_transformation_matrix_from_warp_arrays`` kernels but
+  operate on :class:`wp.indexedfabricarray`, so the view-to-fabric mapping
+  is baked into the array and the kernel just dereferences
+  ``ifa[view_index]`` instead of taking a separate ``mapping`` argument.
+* Added :func:`~isaaclab.utils.warp.fabric.update_indexed_local_matrix_from_world`
+  and :func:`~isaaclab.utils.warp.fabric.update_indexed_world_matrix_from_local`
+  Warp kernels that propagate ``local = world * inv(parent)`` and
+  ``world = local * parent`` directly on Fabric storage matrices (no
+  explicit transposes).  Used by
+  :class:`~isaaclab_physx.sim.views.FabricFrameView` to keep child world and
+  local matrices consistent across writes without round-tripping through USD.
diff --git a/source/isaaclab/isaaclab/utils/warp/fabric.py b/source/isaaclab/isaaclab/utils/warp/fabric.py
index a48f773f4991..bcf65fc02ae9 100644
--- a/source/isaaclab/isaaclab/utils/warp/fabric.py
+++ b/source/isaaclab/isaaclab/utils/warp/fabric.py
@@ -18,12 +18,14 @@
 if TYPE_CHECKING:
     FabricArrayUInt32 = Any
     FabricArrayMat44d = Any
+    IndexedFabricArrayMat44d = Any
     ArrayUInt32 = Any
     ArrayUInt32_1d = Any
     ArrayFloat32_2d = Any
 else:
     FabricArrayUInt32 = wp.fabricarray(dtype=wp.uint32)
     FabricArrayMat44d = wp.fabricarray(dtype=wp.mat44d)
+    IndexedFabricArrayMat44d = wp.indexedfabricarray(dtype=wp.mat44d)
     ArrayUInt32 = wp.array(ndim=1, dtype=wp.uint32)
     ArrayUInt32_1d = wp.array(dtype=wp.uint32)
     ArrayFloat32_2d = wp.array(ndim=2, dtype=wp.float32)
@@ -163,6 +165,176 @@ def compose_fabric_transformation_matrix_from_warp_arrays(
     )
 
 
+@wp.kernel(enable_backward=False)
+def decompose_indexed_fabric_transforms(
+    fabric_matrices: IndexedFabricArrayMat44d,
+    array_positions: ArrayFloat32_2d,
+    array_orientations: ArrayFloat32_2d,
+    array_scales: ArrayFloat32_2d,
+    indices: ArrayUInt32,
+):
+    """Decompose indexed Fabric transformation matrices into position, orientation, and scale.
+
+    Like :func:`decompose_fabric_transformation_matrix_to_warp_arrays` but operates on a
+    :class:`wp.indexedfabricarray` that already encodes the view-to-fabric mapping, removing
+    the need for a separate ``mapping`` array.
+
+    Args:
+        fabric_matrices: Indexed fabric array containing 4x4 transformation matrices.
+        array_positions: Output array for positions [m], shape (N, 3).
+        array_orientations: Output array for quaternions in xyzw format, shape (N, 4).
+        array_scales: Output array for scales, shape (N, 3).
+        indices: View indices to process (subset selection).
+    """
+    output_index = wp.tid()
+    view_index = indices[output_index]
+
+    position, rotation, scale = _decompose_transformation_matrix(wp.mat44f(fabric_matrices[view_index]))
+
+    if array_positions.shape[0] > 0:
+        array_positions[output_index, 0] = position[0]
+        array_positions[output_index, 1] = position[1]
+        array_positions[output_index, 2] = position[2]
+    if array_orientations.shape[0] > 0:
+        array_orientations[output_index, 0] = rotation[0]
+        array_orientations[output_index, 1] = rotation[1]
+        array_orientations[output_index, 2] = rotation[2]
+        array_orientations[output_index, 3] = rotation[3]
+    if array_scales.shape[0] > 0:
+        array_scales[output_index, 0] = scale[0]
+        array_scales[output_index, 1] = scale[1]
+        array_scales[output_index, 2] = scale[2]
+
+
+@wp.kernel(enable_backward=False)
+def compose_indexed_fabric_transforms(
+    fabric_matrices: IndexedFabricArrayMat44d,
+    array_positions: ArrayFloat32_2d,
+    array_orientations: ArrayFloat32_2d,
+    array_scales: ArrayFloat32_2d,
+    broadcast_positions: bool,
+    broadcast_orientations: bool,
+    broadcast_scales: bool,
+    indices: ArrayUInt32,
+):
+    """Compose indexed Fabric transformation matrices from position, orientation, and scale.
+
+    Like :func:`compose_fabric_transformation_matrix_from_warp_arrays` but operates on a
+    :class:`wp.indexedfabricarray` that already encodes the view-to-fabric mapping, removing
+    the need for a separate ``mapping`` array.
+
+    Args:
+        fabric_matrices: Indexed fabric array containing 4x4 transformation matrices to update.
+        array_positions: Input array for positions [m], shape (N, 3).
+        array_orientations: Input array for quaternions in xyzw format, shape (N, 4).
+        array_scales: Input array for scales, shape (N, 3).
+        broadcast_positions: If True, use first position for all prims.
+        broadcast_orientations: If True, use first orientation for all prims.
+        broadcast_scales: If True, use first scale for all prims.
+        indices: View indices to process (subset selection).
+    """
+    i = wp.tid()
+    view_index = indices[i]
+    position, rotation, scale = _decompose_transformation_matrix(wp.mat44f(fabric_matrices[view_index]))
+
+    if array_positions.shape[0] > 0:
+        if broadcast_positions:
+            index = 0
+        else:
+            index = i
+        position[0] = array_positions[index, 0]
+        position[1] = array_positions[index, 1]
+        position[2] = array_positions[index, 2]
+    if array_orientations.shape[0] > 0:
+        if broadcast_orientations:
+            index = 0
+        else:
+            index = i
+        rotation[0] = array_orientations[index, 0]
+        rotation[1] = array_orientations[index, 1]
+        rotation[2] = array_orientations[index, 2]
+        rotation[3] = array_orientations[index, 3]
+    if array_scales.shape[0] > 0:
+        if broadcast_scales:
+            index = 0
+        else:
+            index = i
+        scale[0] = array_scales[index, 0]
+        scale[1] = array_scales[index, 1]
+        scale[2] = array_scales[index, 2]
+
+    fabric_matrices[view_index] = wp.mat44d(  # type: ignore[arg-type]
+        wp.transpose(wp.transform_compose(position, rotation, scale))  # type: ignore[arg-type]
+    )
+
+
+@wp.kernel(enable_backward=False)
+def update_indexed_local_matrix_from_world(
+    child_world_matrices: IndexedFabricArrayMat44d,
+    parent_world_matrices: IndexedFabricArrayMat44d,
+    child_local_matrices: IndexedFabricArrayMat44d,
+    indices: ArrayUInt32,
+):
+    """Recompute child localMatrix from (parent worldMatrix, child worldMatrix).
+
+    Computes ``child_local = inv(parent_world) * child_world`` per prim and writes the
+    result back to the child's :data:`omni:fabric:localMatrix` so that subsequent
+    ``get_local_poses`` calls see consistent values after a world-pose write.
+
+    All three indexed arrays are expected to be indexed by the same per-view indices
+    (i.e. ``view_to_child_fabric``, ``view_to_parent_fabric``, ``view_to_child_fabric``)
+    so the kernel only needs the view-side indices.
+
+    Storage convention: Fabric matrices are stored as the transpose of the standard
+    column-major math convention.  Math is ``local = inv(parent) * world``; under
+    the transpose identity ``(A * B)^T = B^T * A^T`` (and ``inv(A^T) = inv(A)^T``)
+    that is equivalent to storage-side ``local^T = world^T * inv(parent^T)``, so we
+    can compute it directly on the stored matrices without explicit transposes.
+
+    Args:
+        child_world_matrices: Indexed fabric array of child world matrices (read).
+        parent_world_matrices: Indexed fabric array of parent world matrices (read).
+        child_local_matrices: Indexed fabric array of child local matrices (written).
+        indices: View indices to process.
+    """
+    i = wp.tid()
+    view_index = indices[i]
+    child_world = wp.mat44f(child_world_matrices[view_index])
+    parent_world = wp.mat44f(parent_world_matrices[view_index])
+    child_local_matrices[view_index] = wp.mat44d(child_world * wp.inverse(parent_world))  # type: ignore[arg-type]
+
+
+@wp.kernel(enable_backward=False)
+def update_indexed_world_matrix_from_local(
+    child_local_matrices: IndexedFabricArrayMat44d,
+    parent_world_matrices: IndexedFabricArrayMat44d,
+    child_world_matrices: IndexedFabricArrayMat44d,
+    indices: ArrayUInt32,
+):
+    """Recompute child worldMatrix from (parent worldMatrix, child localMatrix).
+
+    Computes ``child_world = parent_world * child_local`` per prim and writes the
+    result back to the child's :data:`omni:fabric:worldMatrix`.  Used after a
+    ``set_local_poses`` write so that subsequent ``get_world_poses`` calls see
+    consistent values.  Mirror of :func:`update_indexed_local_matrix_from_world`.
+
+    Args:
+        child_local_matrices: Indexed fabric array of child local matrices (read).
+        parent_world_matrices: Indexed fabric array of parent world matrices (read).
+        child_world_matrices: Indexed fabric array of child world matrices (written).
+        indices: View indices to process.
+
+    Storage convention: same as :func:`update_indexed_local_matrix_from_world`.
+    Math is ``world = parent * local``; under the transpose identity that becomes
+    storage-side ``world^T = local^T * parent^T``, no explicit transposes needed.
+    """
+    i = wp.tid()
+    view_index = indices[i]
+    child_local = wp.mat44f(child_local_matrices[view_index])
+    parent_world = wp.mat44f(parent_world_matrices[view_index])
+    child_world_matrices[view_index] = wp.mat44d(child_local * parent_world)  # type: ignore[arg-type]
+
+
 @wp.func
 def _decompose_transformation_matrix(m: Any):  # -> tuple[wp.vec3f, wp.quatf, wp.vec3f]
     """Decompose a 4x4 transformation matrix into position, orientation, and scale.
diff --git a/source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst b/source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst
new file mode 100644
index 000000000000..43f2eaae0ee7
--- /dev/null
+++ b/source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst
@@ -0,0 +1,24 @@
+Fixed
+^^^^^
+
+* Fixed :meth:`~isaaclab_physx.sim.views.FabricFrameView.get_local_poses`
+  returning stale USD values after Fabric world-pose writes.  Local poses
+  are now read directly from Fabric's ``omni:fabric:localMatrix`` via
+  :class:`wp.indexedfabricarray`, and are kept consistent with worldMatrix
+  through Warp kernels that propagate either direction on writes.
+
+Changed
+^^^^^^^
+
+* Reworked :class:`~isaaclab_physx.sim.views.FabricFrameView` to use three
+  persistent ``PrimSelection`` instances (one per access mode), path-based
+  view → fabric index mapping (no custom prim attributes), and Warp kernels
+  that operate on :class:`wp.indexedfabricarray` so the kernels just index
+  ``ifa[view_index]`` instead of taking a separate mapping array.
+* :meth:`~isaaclab_physx.sim.views.FabricFrameView.set_local_poses` now
+  writes ``omni:fabric:localMatrix`` directly through Fabric.  The next
+  ``get_world_poses`` runs a Warp kernel that recomputes
+  ``child_world = parent_world * child_local``.  Symmetrically,
+  ``set_world_poses`` runs a kernel that recomputes
+  ``child_local = inv(parent_world) * child_world`` so subsequent
+  ``get_local_poses`` calls return consistent values.
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 1bcff86d57ac..633c0436d061 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -3,7 +3,27 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-"""PhysX FrameView with Fabric GPU acceleration."""
+"""PhysX FrameView with Fabric GPU acceleration.
+
+Design follows the prototype in
+``https://github.com/bareya/IsaacLab/tree/pbarejko/camera-update``:
+
+* Three persistent ``PrimSelection`` instances differing only in per-attribute access
+  mode (one for each of {trans_ro, world_rw, local_rw}).
+* ``omni:fabric:localMatrix`` is read/written directly — no software composition of
+  ``inv(parent_world) * child_world`` for ``get_local_poses``/``set_local_poses``.
+* View → Fabric index mapping is an integer Warp array computed from the selection's
+  ordered path list (``selection.GetPaths()``); no custom attributes are written to
+  prims.
+* Read/write happens via ``wp.indexedfabricarray``, so the view-to-fabric mapping is
+  baked into the array itself and the kernels just dereference ``ifa[view_index]``.
+* World ↔ local consistency is maintained:
+    - After ``set_local_poses``: stage is marked dirty; the next world read calls
+      ``IFabricHierarchy.update_world_xforms()`` to propagate local → world.
+    - After ``set_world_poses``: a Warp kernel recomputes child localMatrix from
+      parent worldMatrix on the fly using a parent indexed fabric array, so the
+      next ``get_local_poses`` returns consistent values.
+"""
 
 from __future__ import annotations
 
@@ -12,7 +32,7 @@
 import torch
 import warp as wp
 
-from pxr import Usd
+from pxr import Usd, UsdGeom
 
 import isaaclab.sim as sim_utils
 from isaaclab.app.settings_manager import SettingsManager
@@ -23,19 +43,18 @@
 
 logger = logging.getLogger(__name__)
 
-# TODO: extend this to ``cuda:N`` once we wire up multi-GPU support for the view.
-# Recent Kit / USDRT releases do support multi-GPU ``SelectPrims``, but the
-# rest of the FabricFrameView wiring (selections, indexed arrays, etc.) still
-# assumes a single device — to be tackled in a follow-up.
+# TODO: USDRT SelectPrims still pins to cuda:0 even when ``device='cuda:1'`` is
+# requested.  When that limitation is lifted (see feat/frame-view-enable-mgpu),
+# this allowlist can drop the explicit cuda:0 entry.
 _fabric_supported_devices = ("cpu", "cuda", "cuda:0")
 
 
 def _to_float32_2d(a: wp.array | torch.Tensor) -> wp.array | torch.Tensor:
-    """Ensure array is compatible with Fabric kernels (2-D float32).
+    """Ensure ``wp.array`` inputs are 2-D ``float32`` for the Fabric kernels.
 
-    For ``wp.array`` with vec dtypes (``vec3f``, ``vec4f``), uses
-    :meth:`wp.array.view` for zero-copy reinterpretation.
-    ``torch.Tensor`` and already-correct 2-D float32 arrays pass through.
+    For ``wp.array`` with vec dtypes (``vec3f``, ``vec4f``) this uses
+    :meth:`wp.array.view` for zero-copy reinterpretation.  ``torch.Tensor``
+    and already-correct 2-D float32 arrays pass through.
     """
     if not isinstance(a, wp.array):
         return a
@@ -49,23 +68,26 @@ def _to_float32_2d(a: wp.array | torch.Tensor) -> wp.array | torch.Tensor:
 class FabricFrameView(BaseFrameView):
     """FrameView with Fabric GPU acceleration for the PhysX backend.
 
-    Uses composition: holds a :class:`UsdFrameView` internally for USD
-    fallback and non-accelerated operations (local poses, visibility, scales
-    when Fabric is disabled).
+    Holds a :class:`UsdFrameView` internally for the disabled-Fabric fallback path
+    (when ``/physics/fabricEnabled`` is false or the device is unsupported).
 
-    When Fabric is enabled, world-pose and scale operations use Warp kernels
-    operating on ``omni:fabric:worldMatrix``.  All other operations delegate
-    to the internal USD view.
+    When Fabric is enabled, all transform reads and writes go through Warp kernels
+    operating on Fabric ``omni:fabric:worldMatrix`` and ``omni:fabric:localMatrix``
+    via :class:`wp.indexedfabricarray`.  No prim attributes are added.
 
-    After every Fabric write (``set_world_poses``, ``set_scales``),
-    :meth:`PrepareForReuse` is called on the ``PrimSelection`` to notify
-    the FSD renderer that Fabric data has changed and to detect topology
-    changes that require rebuilding internal mappings.  Read operations
-    do not call PrepareForReuse to avoid unnecessary renderer invalidation.
-
-    Pose getters return :class:`~isaaclab.utils.warp.ProxyArray`.  Setters accept ``wp.array``.
+    Pose getters return :class:`~isaaclab.utils.warp.ProxyArray`.  Setters accept
+    ``wp.array``.
     """
 
+    _WORLD_MATRIX_NAME = "omni:fabric:worldMatrix"
+    _LOCAL_MATRIX_NAME = "omni:fabric:localMatrix"
+
+    # Stage-level shared state.  Multiple FabricFrameView instances on the same stage
+    # share one IFabricHierarchy handle; the dirty-stages set tracks which stages
+    # need ``update_world_xforms()`` before a world read.
+    _hierarchy_cache: dict[int, object] = {}
+    _dirty_stages: set[int] = set()
+
     def __init__(
         self,
         prim_path: str,
@@ -74,18 +96,6 @@ def __init__(
         stage: Usd.Stage | None = None,
         **kwargs,
     ):
-        """Initialize the view.
-
-        Args:
-            prim_path: USD prim-path pattern to match.
-            device: Device for Warp arrays (``"cpu"`` or ``"cuda:0"``).
-            validate_xform_ops: Whether to validate prim xform-ops.
-            stage: USD stage; defaults to the current sim context's stage.
-            **kwargs: Additional keyword arguments (ignored). Matches the signature of
-                :class:`~isaaclab.sim.views.UsdFrameView` so that the top-level
-                :class:`~isaaclab.sim.views.FrameView` factory can forward backend-agnostic
-                kwargs without each backend having to know about every option.
-        """
         self._usd_view = UsdFrameView(prim_path, device=device, validate_xform_ops=validate_xform_ops, stage=stage)
         self._device = device
 
@@ -101,14 +111,27 @@ def __init__(
             )
             self._use_fabric = False
 
+        # Fabric state — all populated lazily in :meth:`_initialize_fabric`.
         self._fabric_initialized = False
-        self._fabric_usd_sync_done = False
-        self._fabric_selection = None
-        self._fabric_to_view: wp.array | None = None
-        self._view_to_fabric: wp.array | None = None
-        self._default_view_indices: wp.array | None = None
+        self._stage_id: int | None = None
+        self._stage = None
         self._fabric_hierarchy = None
-        self._view_index_attr = f"isaaclab:view_index:{abs(hash(self))}"
+        # Selections.
+        self._trans_sel_ro = None
+        self._world_sel_rw = None
+        self._local_sel_rw = None
+        # Index arrays (view-side indices and view→fabric mappings).
+        self._view_indices: wp.array | None = None
+        self._fabric_indices: wp.array | None = None
+        self._parent_fabric_indices: wp.array | None = None
+        # Indexed fabric arrays.
+        self._world_ifa_ro = None
+        self._local_ifa_ro = None
+        self._world_ifa_rw = None
+        self._local_ifa_rw = None
+        self._parent_world_ifa_ro = None
+        # Cached output buffers.
+        self._fabric_dummy_buffer: wp.array | None = None
 
     # ------------------------------------------------------------------
     # Delegated properties
@@ -142,7 +165,7 @@ def set_visibility(self, visibility, indices=None):
         self._usd_view.set_visibility(visibility, indices)
 
     # ------------------------------------------------------------------
-    # World poses — Fabric-accelerated or USD fallback
+    # World poses
     # ------------------------------------------------------------------
 
     def set_world_poses(self, positions=None, orientations=None, indices=None):
@@ -153,39 +176,33 @@ def set_world_poses(self, positions=None, orientations=None, indices=None):
         if not self._fabric_initialized:
             self._initialize_fabric()
 
-        self._prepare_for_reuse()
+        # If a prior set_local_poses left worldMatrix stale, propagate local → world first.
+        self._sync_world_from_local_if_dirty()
 
         indices_wp = self._resolve_indices_wp(indices)
-        count = indices_wp.shape[0]
-
-        dummy = wp.zeros((0, 3), dtype=wp.float32, device=self._device)
-        positions_wp = _to_float32_2d(positions) if positions is not None else dummy
-        orientations_wp = (
-            _to_float32_2d(orientations)
-            if orientations is not None
-            else wp.zeros((0, 4), dtype=wp.float32, device=self._device)
-        )
+        positions_wp = _to_float32_2d(positions) if positions is not None else self._fabric_dummy_buffer
+        orientations_wp = _to_float32_2d(orientations) if orientations is not None else self._fabric_dummy_buffer
 
         wp.launch(
-            kernel=fabric_utils.compose_fabric_transformation_matrix_from_warp_arrays,
-            dim=count,
+            kernel=fabric_utils.compose_indexed_fabric_transforms,
+            dim=indices_wp.shape[0],
             inputs=[
-                self._fabric_world_matrices,
+                self._get_world_rw_array(),
                 positions_wp,
                 orientations_wp,
-                dummy,
+                self._fabric_dummy_buffer,
                 False,
                 False,
                 False,
                 indices_wp,
-                self._view_to_fabric,
             ],
-            device=self._fabric_device,
+            device=self._device,
         )
         wp.synchronize()
 
-        self._fabric_hierarchy.update_world_xforms()
-        self._fabric_usd_sync_done = True
+        # World was just written — recompute child localMatrix from parent worldMatrix
+        # so the next get_local_poses returns consistent values.
+        self._sync_local_from_world(indices_wp)
 
     def get_world_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray, ProxyArray]:
         if not self._use_fabric:
@@ -193,8 +210,9 @@ def get_world_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray,
 
         if not self._fabric_initialized:
             self._initialize_fabric()
-        if not self._fabric_usd_sync_done:
-            self._sync_fabric_from_usd_once()
+
+        # If a prior set_local_poses left worldMatrix stale, propagate local → world first.
+        self._sync_world_from_local_if_dirty()
 
         indices_wp = self._resolve_indices_wp(indices)
         count = indices_wp.shape[0]
@@ -208,17 +226,16 @@ def get_world_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray,
             orientations_wp = wp.zeros((count, 4), dtype=wp.float32, device=self._device)
 
         wp.launch(
-            kernel=fabric_utils.decompose_fabric_transformation_matrix_to_warp_arrays,
+            kernel=fabric_utils.decompose_indexed_fabric_transforms,
             dim=count,
             inputs=[
-                self._fabric_world_matrices,
+                self._get_world_ro_array(),
                 positions_wp,
                 orientations_wp,
                 self._fabric_dummy_buffer,
                 indices_wp,
-                self._view_to_fabric,
             ],
-            device=self._fabric_device,
+            device=self._device,
         )
 
         if use_cached:
@@ -227,17 +244,79 @@ def get_world_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray,
         return ProxyArray(positions_wp), ProxyArray(orientations_wp)
 
     # ------------------------------------------------------------------
-    # Local poses — USD fallback (Fabric only accelerates world poses)
+    # Local poses
     # ------------------------------------------------------------------
 
     def set_local_poses(self, translations=None, orientations=None, indices=None):
-        self._usd_view.set_local_poses(translations, orientations, indices)
+        if not self._use_fabric:
+            self._usd_view.set_local_poses(translations, orientations, indices)
+            return
+
+        if not self._fabric_initialized:
+            self._initialize_fabric()
+
+        indices_wp = self._resolve_indices_wp(indices)
+        translations_wp = _to_float32_2d(translations) if translations is not None else self._fabric_dummy_buffer
+        orientations_wp = _to_float32_2d(orientations) if orientations is not None else self._fabric_dummy_buffer
+
+        wp.launch(
+            kernel=fabric_utils.compose_indexed_fabric_transforms,
+            dim=indices_wp.shape[0],
+            inputs=[
+                self._get_local_rw_array(),
+                translations_wp,
+                orientations_wp,
+                self._fabric_dummy_buffer,
+                False,
+                False,
+                False,
+                indices_wp,
+            ],
+            device=self._device,
+        )
+        wp.synchronize()
+
+        # Mark the stage dirty so the next world read calls update_world_xforms().
+        FabricFrameView._dirty_stages.add(self._stage_id)
 
     def get_local_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray, ProxyArray]:
-        return self._usd_view.get_local_poses(indices)
+        if not self._use_fabric:
+            return self._usd_view.get_local_poses(indices)
+
+        if not self._fabric_initialized:
+            self._initialize_fabric()
+
+        indices_wp = self._resolve_indices_wp(indices)
+        count = indices_wp.shape[0]
+
+        use_cached = indices is None or indices == slice(None)
+        if use_cached:
+            translations_wp = self._fabric_local_translations_buf
+            orientations_wp = self._fabric_local_orientations_buf
+        else:
+            translations_wp = wp.zeros((count, 3), dtype=wp.float32, device=self._device)
+            orientations_wp = wp.zeros((count, 4), dtype=wp.float32, device=self._device)
+
+        wp.launch(
+            kernel=fabric_utils.decompose_indexed_fabric_transforms,
+            dim=count,
+            inputs=[
+                self._get_local_ro_array(),
+                translations_wp,
+                orientations_wp,
+                self._fabric_dummy_buffer,
+                indices_wp,
+            ],
+            device=self._device,
+        )
+
+        if use_cached:
+            wp.synchronize()
+            return self._fabric_local_translations_ta, self._fabric_local_orientations_ta
+        return ProxyArray(translations_wp), ProxyArray(orientations_wp)
 
     # ------------------------------------------------------------------
-    # Scales — Fabric-accelerated or USD fallback
+    # Scales
     # ------------------------------------------------------------------
 
     def set_scales(self, scales, indices=None):
@@ -248,44 +327,38 @@ def set_scales(self, scales, indices=None):
         if not self._fabric_initialized:
             self._initialize_fabric()
 
-        self._prepare_for_reuse()
+        # Sync world matrices first if local writes are pending.
+        self._sync_world_from_local_if_dirty()
 
         indices_wp = self._resolve_indices_wp(indices)
-        count = indices_wp.shape[0]
-
-        dummy3 = wp.zeros((0, 3), dtype=wp.float32, device=self._device)
-        dummy4 = wp.zeros((0, 4), dtype=wp.float32, device=self._device)
-        scales_wp = _to_float32_2d(scales)
+        scales_wp = _to_float32_2d(scales) if scales is not None else self._fabric_dummy_buffer
 
         wp.launch(
-            kernel=fabric_utils.compose_fabric_transformation_matrix_from_warp_arrays,
-            dim=count,
+            kernel=fabric_utils.compose_indexed_fabric_transforms,
+            dim=indices_wp.shape[0],
             inputs=[
-                self._fabric_world_matrices,
-                dummy3,
-                dummy4,
+                self._get_world_rw_array(),
+                self._fabric_dummy_buffer,
+                self._fabric_dummy_buffer,
                 scales_wp,
                 False,
                 False,
                 False,
                 indices_wp,
-                self._view_to_fabric,
             ],
-            device=self._fabric_device,
+            device=self._device,
         )
         wp.synchronize()
 
-        self._fabric_hierarchy.update_world_xforms()
-        self._fabric_usd_sync_done = True
-
     def get_scales(self, indices=None):
         if not self._use_fabric:
             return self._usd_view.get_scales(indices)
 
         if not self._fabric_initialized:
             self._initialize_fabric()
-        if not self._fabric_usd_sync_done:
-            self._sync_fabric_from_usd_once()
+
+        # Sync world matrices first if local writes are pending.
+        self._sync_world_from_local_if_dirty()
 
         indices_wp = self._resolve_indices_wp(indices)
         count = indices_wp.shape[0]
@@ -297,17 +370,16 @@ def get_scales(self, indices=None):
             scales_wp = wp.zeros((count, 3), dtype=wp.float32, device=self._device)
 
         wp.launch(
-            kernel=fabric_utils.decompose_fabric_transformation_matrix_to_warp_arrays,
+            kernel=fabric_utils.decompose_indexed_fabric_transforms,
             dim=count,
             inputs=[
-                self._fabric_world_matrices,
+                self._get_world_ro_array(),
                 self._fabric_dummy_buffer,
                 self._fabric_dummy_buffer,
                 scales_wp,
                 indices_wp,
-                self._view_to_fabric,
             ],
-            device=self._fabric_device,
+            device=self._device,
         )
 
         if use_cached:
@@ -315,153 +387,356 @@ def get_scales(self, indices=None):
         return scales_wp
 
     # ------------------------------------------------------------------
-    # Internal — PrepareForReuse (renderer notification + topology tracking)
+    # Internal — sync helpers
     # ------------------------------------------------------------------
 
-    def _prepare_for_reuse(self) -> None:
-        """Call PrepareForReuse on the PrimSelection to notify the renderer.
-
-        PrepareForReuse serves two purposes:
+    def _sync_world_from_local_if_dirty(self) -> None:
+        """If a prior local write left world matrices stale, recompute them on the fly.
 
-        1. **Renderer notification**: Tells FSD/Storm that Fabric data has
-           been (or will be) modified, so the next rendered frame reflects
-           the updated transforms.
-        2. **Topology change detection**: Returns True when Fabric's
-           internal memory layout changed (e.g., prims added/removed).
-           In that case, view-to-fabric index mappings and fabricarrays
-           must be rebuilt.
+        We deliberately do NOT call ``IFabricHierarchy.update_world_xforms()`` —
+        in practice that re-reads USD's authored xformOps and overwrites the Fabric
+        local+world matrices we just authored.  Instead we fire a Warp kernel that
+        does ``child_world = parent_world * child_local`` per child, leaving the
+        Fabric-side localMatrix untouched.
         """
-        if self._fabric_selection is None:
+        if self._stage_id is None or self._stage_id not in FabricFrameView._dirty_stages:
             return
+        # Make sure the parent indexed array is up-to-date with the current trans selection.
+        if self._parent_world_ifa_ro is None:
+            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+        wp.launch(
+            kernel=fabric_utils.update_indexed_world_matrix_from_local,
+            dim=self.count,
+            inputs=[
+                self._get_local_ro_array(),
+                self._parent_world_ifa_ro,
+                self._get_world_rw_array(),
+                self._view_indices,
+            ],
+            device=self._device,
+        )
+        wp.synchronize()
+        FabricFrameView._dirty_stages.discard(self._stage_id)
 
-        topology_changed = self._fabric_selection.PrepareForReuse()
-        if topology_changed:
-            logger.info("Fabric topology changed — rebuilding view-to-fabric index mapping.")
-            self._rebuild_fabric_arrays()
-
-    def _rebuild_fabric_arrays(self) -> None:
-        """Rebuild fabricarray and view↔fabric mappings after a topology change.
+    def _sync_local_from_world(self, indices_wp: wp.array) -> None:
+        """Recompute child ``localMatrix`` from (parent worldMatrix, child worldMatrix).
 
-        Note: Only index mappings and fabricarrays are rebuilt.  Position/orientation/scale
-        buffers are *not* resized because ``self.count`` is derived from the USD prim-path
-        pattern (via ``_usd_view.count``) and does not change when Fabric rearranges its
-        internal memory layout.  The assertion below guards this invariant.
+        Called after ``set_world_poses`` so that subsequent ``get_local_poses`` returns
+        values consistent with the just-written world poses.  Fabric Hierarchy does
+        not provide a built-in world → local sync, so we do it via a Warp kernel
+        using the parent indexed fabric array.
         """
-        assert self.count == self._default_view_indices.shape[0], (
-            f"Prim count changed ({self.count} vs {self._default_view_indices.shape[0]}). "
-            "Fabric topology change added/removed tracked prims — full re-initialization required."
-        )
-        self._view_to_fabric = wp.zeros((self.count,), dtype=wp.uint32, device=self._fabric_device)
-        self._fabric_to_view = wp.fabricarray(self._fabric_selection, self._view_index_attr)
-
         wp.launch(
-            kernel=fabric_utils.set_view_to_fabric_array,
-            dim=self._fabric_to_view.shape[0],
-            inputs=[self._fabric_to_view, self._view_to_fabric],
-            device=self._fabric_device,
+            kernel=fabric_utils.update_indexed_local_matrix_from_world,
+            dim=indices_wp.shape[0],
+            inputs=[
+                self._get_world_ro_array(),
+                self._get_parent_world_ro_array(),
+                self._get_local_rw_array(),
+                indices_wp,
+            ],
+            device=self._device,
         )
         wp.synchronize()
 
-        self._fabric_world_matrices = wp.fabricarray(self._fabric_selection, "omni:fabric:worldMatrix")
+    # ------------------------------------------------------------------
+    # Internal — selection accessors with on-demand index rebuild
+    # ------------------------------------------------------------------
+
+    def _get_world_ro_array(self):
+        if self._trans_sel_ro.PrepareForReuse():
+            self._rebuild_indices_for(self._trans_sel_ro)
+            self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
+            self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
+            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+        return self._world_ifa_ro
+
+    def _get_local_ro_array(self):
+        if self._trans_sel_ro.PrepareForReuse():
+            self._rebuild_indices_for(self._trans_sel_ro)
+            self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
+            self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
+            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+        return self._local_ifa_ro
+
+    def _get_world_rw_array(self):
+        if self._world_sel_rw.PrepareForReuse():
+            self._rebuild_indices_for(self._world_sel_rw)
+            self._world_ifa_rw = self._build_indexed_array(self._world_sel_rw, self._WORLD_MATRIX_NAME)
+        return self._world_ifa_rw
+
+    def _get_local_rw_array(self):
+        if self._local_sel_rw.PrepareForReuse():
+            self._rebuild_indices_for(self._local_sel_rw)
+            self._local_ifa_rw = self._build_indexed_array(self._local_sel_rw, self._LOCAL_MATRIX_NAME)
+        return self._local_ifa_rw
+
+    def _get_parent_world_ro_array(self):
+        # Built and refreshed alongside the trans_ro selection (parents share that selection).
+        if self._parent_world_ifa_ro is None or self._trans_sel_ro.PrepareForReuse():
+            self._rebuild_indices_for(self._trans_sel_ro)
+            self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
+            self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
+            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+        return self._parent_world_ifa_ro
+
+    # ------------------------------------------------------------------
+    # Internal — index computation
+    # ------------------------------------------------------------------
+
+    def _rebuild_indices_for(self, selection) -> None:
+        """Recompute ``_fabric_indices`` (view → fabric) from a selection's path order."""
+        self._fabric_indices = self._compute_fabric_indices(selection)
+
+    def _compute_fabric_indices(self, selection) -> wp.array:
+        fabric_paths = selection.GetPaths()
+        path_to_fabric_idx: dict[str, int] = {str(p): i for i, p in enumerate(fabric_paths)}
+        indices: list[int] = []
+        for prim_path in self.prim_paths:
+            fabric_idx = path_to_fabric_idx.get(prim_path)
+            if fabric_idx is None:
+                raise RuntimeError(
+                    f"Prim '{prim_path}' not found in Fabric selection. Ensure the hierarchy has been populated."
+                )
+            indices.append(fabric_idx)
+        return wp.array(indices, dtype=wp.int32, device=self._device)
+
+    def _compute_parent_fabric_indices(self, selection) -> wp.array:
+        """For each child in this view, look up the parent prim's fabric index."""
+        fabric_paths = selection.GetPaths()
+        path_to_fabric_idx: dict[str, int] = {str(p): i for i, p in enumerate(fabric_paths)}
+        indices: list[int] = []
+        for prim_path in self.prim_paths:
+            parent_path = prim_path.rsplit("/", 1)[0]
+            fabric_idx = path_to_fabric_idx.get(parent_path)
+            if fabric_idx is None:
+                raise RuntimeError(
+                    f"Parent prim '{parent_path}' (for child '{prim_path}') not found in Fabric selection. "
+                    "Ensure parents have Fabric world+local matrices populated."
+                )
+            indices.append(fabric_idx)
+        return wp.array(indices, dtype=wp.int32, device=self._device)
+
+    def _build_indexed_array(self, selection, attribute_name: str) -> wp.indexedfabricarray:
+        fa = wp.fabricarray(selection, attribute_name)
+        return wp.indexedfabricarray(fa=fa, indices=self._fabric_indices)
+
+    def _build_parent_indexed_array(self, selection) -> wp.indexedfabricarray:
+        self._parent_fabric_indices = self._compute_parent_fabric_indices(selection)
+        fa = wp.fabricarray(selection, self._WORLD_MATRIX_NAME)
+        return wp.indexedfabricarray(fa=fa, indices=self._parent_fabric_indices)
+
+    def _resolve_indices_wp(self, indices: wp.array | None) -> wp.array:
+        """Resolve view indices as a Warp uint32 array."""
+        if indices is None or indices == slice(None):
+            if self._view_indices is None:
+                raise RuntimeError("Fabric view indices are not initialized.")
+            return self._view_indices
+        if indices.dtype != wp.uint32:
+            return wp.array(indices.numpy().astype("uint32"), dtype=wp.uint32, device=self._device)
+        return indices
 
     # ------------------------------------------------------------------
     # Internal — Fabric initialization
     # ------------------------------------------------------------------
 
     def _initialize_fabric(self) -> None:
-        """Initialize Fabric batch infrastructure for GPU-accelerated pose queries."""
+        """One-time Fabric setup: hierarchy handle, attribute population, selections, indexed arrays."""
         import usdrt  # noqa: PLC0415
         from usdrt import Rt  # noqa: PLC0415
 
-        stage_id = sim_utils.get_current_stage_id()
-        fabric_stage = usdrt.Usd.Stage.Attach(stage_id)
-
-        for i in range(self.count):
-            rt_prim = fabric_stage.GetPrimAtPath(self.prim_paths[i])
-            rt_xformable = Rt.Xformable(rt_prim)
-
-            has_attr = (
-                rt_xformable.HasFabricHierarchyWorldMatrixAttr()
-                if hasattr(rt_xformable, "HasFabricHierarchyWorldMatrixAttr")
-                else False
+        self._stage_id = sim_utils.get_current_stage_id()
+        self._stage = usdrt.Usd.Stage.Attach(self._stage_id)
+        self._stage.SynchronizeToFabric()
+
+        # Reuse (or create) a hierarchy handle for this stage.  Enable change-tracking
+        # BEFORE we author any local matrices, so the per-prim
+        # ``SetLocalXformFromUsd`` calls below mark themselves dirty and the next
+        # ``update_world_xforms()`` walks the parent chain to populate worldMatrix.
+        if self._stage_id not in FabricFrameView._hierarchy_cache:
+            hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
+                self._stage.GetFabricId(), self._stage.GetStageIdAsStageId()
             )
-            if not has_attr:
+            hierarchy.track_local_xform_changes(True)
+            hierarchy.track_world_xform_changes(True)
+            FabricFrameView._hierarchy_cache[self._stage_id] = hierarchy
+        self._fabric_hierarchy = FabricFrameView._hierarchy_cache[self._stage_id]
+
+        # Ensure each child prim AND its parent have BOTH Fabric world and local matrix
+        # attributes.  Our ``trans_ro`` selection requires both, so prims missing either
+        # would silently be excluded.  ``Create*Attr`` calls are idempotent.
+        #
+        # ``SetWorldXformFromUsd`` writes Fabric's worldMatrix from USD's accumulated
+        # local-to-world transform (so it picks up the parent chain).
+        # ``SetLocalXformFromUsd`` writes Fabric's localMatrix from USD's authored
+        # xformOps on this prim only.  Calling both gives Fabric a consistent
+        # (worldMatrix, localMatrix) pair for each prim before we touch the hierarchy.
+        seen_paths: set[str] = set()
+        for child_path in self.prim_paths:
+            for path in (child_path, child_path.rsplit("/", 1)[0]):
+                if path in seen_paths:
+                    continue
+                seen_paths.add(path)
+                rt_prim = self._stage.GetPrimAtPath(path)
+                if not rt_prim.IsValid():
+                    continue
+                rt_xformable = Rt.Xformable(rt_prim)
                 rt_xformable.CreateFabricHierarchyWorldMatrixAttr()
+                rt_xformable.CreateFabricHierarchyLocalMatrixAttr()
+                rt_xformable.SetLocalXformFromUsd()
+                rt_xformable.SetWorldXformFromUsd()
+
+        # Three persistent selections — read both, write world, write local.
+        matrix = usdrt.Sdf.ValueTypeNames.Matrix4d
+        ro = usdrt.Usd.Access.Read
+        rw = usdrt.Usd.Access.ReadWrite
+        wm_ro = (matrix, self._WORLD_MATRIX_NAME, ro)
+        lm_ro = (matrix, self._LOCAL_MATRIX_NAME, ro)
+        wm_rw = (matrix, self._WORLD_MATRIX_NAME, rw)
+        lm_rw = (matrix, self._LOCAL_MATRIX_NAME, rw)
+        self._trans_sel_ro = self._stage.SelectPrims(require_attrs=[wm_ro, lm_ro], device=self._device, want_paths=True)
+        self._world_sel_rw = self._stage.SelectPrims(require_attrs=[wm_rw, lm_ro], device=self._device, want_paths=True)
+        self._local_sel_rw = self._stage.SelectPrims(require_attrs=[wm_ro, lm_rw], device=self._device, want_paths=True)
+
+        # Build the view-side indices array (just [0..count-1]) and the view→fabric mapping.
+        self._view_indices = wp.array(list(range(self.count)), dtype=wp.uint32, device=self._device)
+        self._fabric_indices = self._compute_fabric_indices(self._trans_sel_ro)
+
+        # Indexed fabric arrays per (selection × attribute).
+        self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
+        self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
+        self._world_ifa_rw = self._build_indexed_array(self._world_sel_rw, self._WORLD_MATRIX_NAME)
+        self._local_ifa_rw = self._build_indexed_array(self._local_sel_rw, self._LOCAL_MATRIX_NAME)
+        self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+
+        # Pre-allocated reusable output buffers (world + local + scales).
+        self._fabric_positions_buf = wp.zeros((self.count, 3), dtype=wp.float32, device=self._device)
+        self._fabric_orientations_buf = wp.zeros((self.count, 4), dtype=wp.float32, device=self._device)
+        self._fabric_scales_buf = wp.zeros((self.count, 3), dtype=wp.float32, device=self._device)
+        self._fabric_local_translations_buf = wp.zeros((self.count, 3), dtype=wp.float32, device=self._device)
+        self._fabric_local_orientations_buf = wp.zeros((self.count, 4), dtype=wp.float32, device=self._device)
+        self._fabric_dummy_buffer = wp.zeros((0, 3), dtype=wp.float32, device=self._device)
 
-            rt_xformable.SetWorldXformFromUsd()
-
-            rt_prim.CreateAttribute(self._view_index_attr, usdrt.Sdf.ValueTypeNames.UInt, custom=True)
-            rt_prim.GetAttribute(self._view_index_attr).Set(i)
+        self._fabric_positions_ta = ProxyArray(self._fabric_positions_buf)
+        self._fabric_orientations_ta = ProxyArray(self._fabric_orientations_buf)
+        self._fabric_local_translations_ta = ProxyArray(self._fabric_local_translations_buf)
+        self._fabric_local_orientations_ta = ProxyArray(self._fabric_local_orientations_buf)
 
-        self._fabric_hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-            fabric_stage.GetFabricId(), fabric_stage.GetStageIdAsStageId()
-        )
-        self._fabric_hierarchy.update_world_xforms()
+        self._fabric_initialized = True
 
-        self._default_view_indices = wp.zeros((self.count,), dtype=wp.uint32, device=self._device)
-        wp.launch(
-            kernel=fabric_utils.arange_k, dim=self.count, inputs=[self._default_view_indices], device=self._device
-        )
-        wp.synchronize()
+        # Seed Fabric matrices from USD authoritatively.  ``SetWorldXformFromUsd`` /
+        # ``SetLocalXformFromUsd`` are no-ops on freshly authored stages that haven't
+        # been rendered yet; we instead read through the USD view (children) and
+        # ``UsdGeom.XformCache`` (parents) and write via the same compose kernel that
+        # ``set_world_poses`` uses.
+        self._sync_fabric_from_usd_initial()
 
-        # The constructor should have taken care of this, but double check here to avoid regressions
-        assert self._device in _fabric_supported_devices
+    def _sync_fabric_from_usd_initial(self) -> None:
+        """Populate Fabric world+local matrices for children and parents from USD.
 
-        self._fabric_selection = fabric_stage.SelectPrims(
-            require_attrs=[
-                (usdrt.Sdf.ValueTypeNames.UInt, self._view_index_attr, usdrt.Usd.Access.Read),
-                (usdrt.Sdf.ValueTypeNames.Matrix4d, "omni:fabric:worldMatrix", usdrt.Usd.Access.ReadWrite),
+        Performed once during ``_initialize_fabric``.  Without this step Fabric's
+        matrices are identity for stages that haven't been rendered yet, and our
+        getters (which read from Fabric) would return wrong values.
+        """
+        # --- Children ---
+        pos_ta, ori_ta = self._usd_view.get_world_poses()
+        scales_obj = self._usd_view.get_scales()
+        scales_wp = (
+            scales_obj.warp
+            if hasattr(scales_obj, "warp")
+            else scales_obj
+            if isinstance(scales_obj, wp.array)
+            else self._fabric_dummy_buffer
+        )
+        local_pos_ta, local_ori_ta = self._usd_view.get_local_poses()
+        # Compose into child worldMatrix.
+        wp.launch(
+            kernel=fabric_utils.compose_indexed_fabric_transforms,
+            dim=self.count,
+            inputs=[
+                self._world_ifa_rw,
+                _to_float32_2d(pos_ta.warp),
+                _to_float32_2d(ori_ta.warp),
+                _to_float32_2d(scales_wp),
+                False,
+                False,
+                False,
+                self._view_indices,
             ],
             device=self._device,
         )
-
-        self._view_to_fabric = wp.zeros((self.count,), dtype=wp.uint32, device=self._device)
-        self._fabric_to_view = wp.fabricarray(self._fabric_selection, self._view_index_attr)
-
+        # Compose into child localMatrix.
         wp.launch(
-            kernel=fabric_utils.set_view_to_fabric_array,
-            dim=self._fabric_to_view.shape[0],
-            inputs=[self._fabric_to_view, self._view_to_fabric],
+            kernel=fabric_utils.compose_indexed_fabric_transforms,
+            dim=self.count,
+            inputs=[
+                self._local_ifa_rw,
+                _to_float32_2d(local_pos_ta.warp),
+                _to_float32_2d(local_ori_ta.warp),
+                self._fabric_dummy_buffer,
+                False,
+                False,
+                False,
+                self._view_indices,
+            ],
             device=self._device,
         )
-        wp.synchronize()
-
-        self._fabric_positions_buf = wp.zeros((self.count, 3), dtype=wp.float32, device=self._device)
-        self._fabric_orientations_buf = wp.zeros((self.count, 4), dtype=wp.float32, device=self._device)
-        self._fabric_positions_ta = ProxyArray(self._fabric_positions_buf)
-        self._fabric_orientations_ta = ProxyArray(self._fabric_orientations_buf)
-        self._fabric_scales_buf = wp.zeros((self.count, 3), dtype=wp.float32, device=self._device)
-        self._fabric_dummy_buffer = wp.zeros((0, 3), dtype=wp.float32, device=self._device)
-        self._fabric_world_matrices = wp.fabricarray(self._fabric_selection, "omni:fabric:worldMatrix")
-        self._fabric_stage = fabric_stage
-        self._fabric_device = self._device
 
-        self._fabric_initialized = True
-        self._fabric_usd_sync_done = False
-
-    def _sync_fabric_from_usd_once(self) -> None:
-        """Sync Fabric world matrices from USD once, on the first read.
-
-        ``set_world_poses`` and ``set_scales`` each set ``_fabric_usd_sync_done``
-        themselves, so no explicit flag assignment is needed here.
-        """
-        if not self._fabric_initialized:
-            self._initialize_fabric()
-
-        positions_usd_ta, orientations_usd_ta = self._usd_view.get_world_poses()
-        positions_usd = positions_usd_ta.warp
-        orientations_usd = orientations_usd_ta.warp
-        scales_usd = self._usd_view.get_scales()
-
-        self.set_world_poses(positions_usd, orientations_usd)
-        self.set_scales(scales_usd)
+        # --- Parents (one entry per unique parent path) ---
+        unique_parent_paths = list(dict.fromkeys(p.rsplit("/", 1)[0] for p in self.prim_paths))
+        if unique_parent_paths:
+            usd_stage = sim_utils.get_current_stage()
+            xform_cache = UsdGeom.XformCache(Usd.TimeCode.Default())
+            world_pos_rows: list[list[float]] = []
+            world_ori_rows: list[list[float]] = []
+            for path in unique_parent_paths:
+                prim = usd_stage.GetPrimAtPath(path)
+                tf = xform_cache.GetLocalToWorldTransform(prim)
+                tf.Orthonormalize()
+                t = tf.ExtractTranslation()
+                q = tf.ExtractRotationQuat()
+                img, real = q.GetImaginary(), q.GetReal()
+                world_pos_rows.append([float(t[0]), float(t[1]), float(t[2])])
+                world_ori_rows.append([float(img[0]), float(img[1]), float(img[2]), float(real)])
+            parent_view_indices = wp.array(list(range(len(unique_parent_paths))), dtype=wp.uint32, device=self._device)
+            parent_pos_wp = wp.array(world_pos_rows, dtype=wp.float32, device=self._device)
+            parent_ori_wp = wp.array(world_ori_rows, dtype=wp.float32, device=self._device)
+            parent_unit_scale = wp.array(
+                [[1.0, 1.0, 1.0]] * len(unique_parent_paths),
+                dtype=wp.float32,
+                device=self._device,
+            )
+            # Compose worldMatrix for parents (use a one-shot indexed array against
+            # ``world_sel_rw`` keyed on the unique parent paths).
+            parent_world_rw = wp.indexedfabricarray(
+                fa=wp.fabricarray(self._world_sel_rw, self._WORLD_MATRIX_NAME),
+                indices=self._compute_fabric_indices_for(self._world_sel_rw, unique_parent_paths),
+            )
+            wp.launch(
+                kernel=fabric_utils.compose_indexed_fabric_transforms,
+                dim=len(unique_parent_paths),
+                inputs=[
+                    parent_world_rw,
+                    parent_pos_wp,
+                    parent_ori_wp,
+                    parent_unit_scale,
+                    False,
+                    False,
+                    False,
+                    parent_view_indices,
+                ],
+                device=self._device,
+            )
+        wp.synchronize()
 
-    def _resolve_indices_wp(self, indices: wp.array | None) -> wp.array:
-        """Resolve view indices as a Warp uint32 array."""
-        if indices is None or indices == slice(None):
-            if self._default_view_indices is None:
-                raise RuntimeError("Fabric indices are not initialized.")
-            return self._default_view_indices
-        if indices.dtype != wp.uint32:
-            return wp.array(indices.numpy().astype("uint32"), dtype=wp.uint32, device=self._device)
-        return indices
+    def _compute_fabric_indices_for(self, selection, paths: list[str]) -> wp.array:
+        """Path-dict lookup helper used to build one-shot indexed arrays for a custom path set."""
+        fabric_paths = selection.GetPaths()
+        path_to_idx = {str(p): i for i, p in enumerate(fabric_paths)}
+        indices: list[int] = []
+        for path in paths:
+            idx = path_to_idx.get(path)
+            if idx is None:
+                raise RuntimeError(f"Path '{path}' not found in Fabric selection.")
+            indices.append(idx)
+        return wp.array(indices, dtype=wp.int32, device=self._device)
diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index f0c18ccb98c7..f31d5ec911b2 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -23,7 +23,7 @@
 import torch  # noqa: E402
 import warp as wp  # noqa: E402
 from frame_view_contract_utils import *  # noqa: F401, F403, E402
-from frame_view_contract_utils import CHILD_OFFSET, ViewBundle, test_set_world_updates_local  # noqa: E402
+from frame_view_contract_utils import CHILD_OFFSET, ViewBundle  # noqa: E402
 from isaaclab_physx.sim.views import FabricFrameView as FrameView  # noqa: E402
 
 from pxr import Gf, UsdGeom  # noqa: E402
@@ -106,28 +106,11 @@ def factory(num_envs: int, device: str) -> ViewBundle:
 
 
 # ------------------------------------------------------------------
-# Override shared contract test with expected failure for Fabric.
-# FabricFrameView.set_world_poses writes to Fabric worldMatrix only; the local
-# pose (read via USD) does not reflect the change because there is no
-# Fabric → USD writeback for local poses.  This is tracked as Issue #5
-# (localMatrix: set_local_poses falls back to USD).
+# Override: ensure the shared contract test runs without xfail now that
+# get_local_poses computes local from Fabric world matrices.
 # ------------------------------------------------------------------
-
-
-@pytest.mark.parametrize("device", ["cpu", "cuda:0"])
-@pytest.mark.xfail(
-    reason=(
-        "Issue #5: FabricFrameView.set_world_poses writes to Fabric worldMatrix only. "
-        "get_local_poses reads from stale USD because there is no Fabric→USD "
-        "writeback for local poses."
-    ),
-    strict=True,
-)
-def test_set_world_updates_local(device, view_factory):  # noqa: F811
-    """Override the shared test to mark it as expected failure."""
-    from frame_view_contract_utils import test_set_world_updates_local as _impl  # noqa: PLC0415
-
-    _impl(device, view_factory)
+# (No override needed — the shared test_set_world_updates_local from
+#  frame_view_contract_utils is imported via wildcard and will run as-is.)
 
 
 # ------------------------------------------------------------------
@@ -188,48 +171,103 @@ def test_fabric_set_world_does_not_write_back_to_usd(device, view_factory):
 
 
 @pytest.mark.parametrize("device", ["cpu", "cuda:0"])
-def test_fabric_rebuild_after_topology_change(device, view_factory, monkeypatch):
-    """Forcing the topology-changed branch on a write triggers
-    :meth:`_rebuild_fabric_arrays` and leaves the view in a state where
-    subsequent writes/reads still produce correct data.
-
-    Real ``PrimSelection.PrepareForReuse`` reports topology change only when
-    Fabric reallocates internally, which is hard to provoke from a unit test.
-    Instead we monkeypatch ``_prepare_for_reuse`` on the instance to always
-    take the rebuild branch and verify the view remains usable.
+def test_fabric_rebuild_after_topology_change(device, view_factory):
+    """A simulated topology change rebuilds the indexed fabric arrays and leaves
+    the view in a state where subsequent writes/reads still produce correct data.
+
+    Real ``PrimSelection.PrepareForReuse`` reports topology change only when Fabric
+    reallocates internally, which is hard to provoke from a unit test.  Instead we
+    invoke :meth:`FabricFrameView._compute_fabric_indices` and rebuild the indexed
+    arrays manually, mimicking what ``_get_*_array`` would do on a real topology
+    event, then verify a roundtrip still works.
     """
     bundle = view_factory(2, device)
     view = bundle.view
 
-    # First write — initializes Fabric and binds _fabric_selection.
+    # First write — initializes Fabric.
     initial = wp.zeros((2, 3), dtype=wp.float32, device=device)
     wp.launch(kernel=_fill_position, dim=2, inputs=[initial, 1.0, 2.0, 3.0], device=device)
     view.set_world_poses(positions=initial)
 
-    rebuild_calls = []
-    real_rebuild = view._rebuild_fabric_arrays
+    # Simulate topology change: recompute fabric indices and rebuild every indexed array.
+    view._fabric_indices = view._compute_fabric_indices(view._trans_sel_ro)
+    view._world_ifa_ro = view._build_indexed_array(view._trans_sel_ro, view._WORLD_MATRIX_NAME)
+    view._local_ifa_ro = view._build_indexed_array(view._trans_sel_ro, view._LOCAL_MATRIX_NAME)
+    view._world_ifa_rw = view._build_indexed_array(view._world_sel_rw, view._WORLD_MATRIX_NAME)
+    view._local_ifa_rw = view._build_indexed_array(view._local_sel_rw, view._LOCAL_MATRIX_NAME)
+    view._parent_world_ifa_ro = view._build_parent_indexed_array(view._trans_sel_ro)
 
-    def spy_rebuild():
-        rebuild_calls.append(True)
-        real_rebuild()
-
-    def force_topology_changed():
-        if view._fabric_selection is not None:
-            view._fabric_selection.PrepareForReuse()
-            spy_rebuild()
-
-    monkeypatch.setattr(view, "_prepare_for_reuse", force_topology_changed)
-
-    # Trigger another write — goes through the forced topology-change branch.
+    # Trigger another write through the rebuilt arrays.
     new = wp.zeros((2, 3), dtype=wp.float32, device=device)
     wp.launch(kernel=_fill_position, dim=2, inputs=[new, 4.0, 5.0, 6.0], device=device)
     view.set_world_poses(positions=new)
 
-    assert rebuild_calls, "Forced topology-change branch did not invoke _rebuild_fabric_arrays"
-
-    # Read back — proves the rebuilt _view_to_fabric and _fabric_world_matrices
-    # are still consistent.
     ret_pos, _ = view.get_world_poses()
     pos_torch = wp.to_torch(ret_pos)
     expected = torch.tensor([[4.0, 5.0, 6.0], [4.0, 5.0, 6.0]], device=device)
     assert torch.allclose(pos_torch, expected, atol=1e-7), f"Read after rebuild failed on {device}: {pos_torch}"
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_prepare_for_reuse_detects_topology_change(device, view_factory):
+    """Each persistent ``PrimSelection`` exposes ``PrepareForReuse`` and returns a
+    bool.  When the underlying Fabric topology is unchanged it returns False.
+    """
+    bundle = view_factory(1, device)
+    view = bundle.view
+    view.get_world_poses()  # trigger Fabric init
+
+    assert view._trans_sel_ro is not None, "trans_sel_ro selection not initialized"
+    for selection in (view._trans_sel_ro, view._world_sel_rw, view._local_sel_rw):
+        result = selection.PrepareForReuse()
+        assert isinstance(result, bool), f"PrepareForReuse should return bool, got {type(result)}"
+        assert not result, "PrepareForReuse should return False when no topology change"
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_set_local_via_fabric_path(device, view_factory):
+    """Exercise the Fabric-native set_local_poses path.
+
+    Ensures set_local_poses computes child_world = parent_world * local
+    entirely within Fabric (not falling back to USD) by first triggering
+    the Fabric sync via get_world_poses.
+    """
+    bundle = view_factory(num_envs=1, device=device)
+    view = bundle.view
+
+    # Trigger Fabric init and sync (sets _fabric_usd_sync_done = True)
+    view.get_world_poses()
+
+    # Now set_local_poses should take the Fabric path
+    new_local_pos = wp.zeros((1, 3), dtype=wp.float32, device=device)
+    wp.launch(kernel=_fill_position, dim=1, inputs=[new_local_pos, 1.0, 2.0, 3.0], device=device)
+    ori = torch.tensor([[0.0, 0.0, 0.0, 1.0]], dtype=torch.float32, device=device)
+    new_local_ori = wp.from_torch(ori)
+
+    view.set_local_poses(translations=new_local_pos, orientations=new_local_ori)
+
+    # Verify: world = parent(0,0,1) + local(1,2,3) = (1,2,4)
+    world_pos, _ = view.get_world_poses()
+    expected = torch.tensor([[1.0, 2.0, 4.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(world_pos.torch, expected, atol=1e-4, rtol=0)
+
+    # Verify get_local_poses returns the local offset
+    local_pos, _ = view.get_local_poses()
+    expected_local = torch.tensor([[1.0, 2.0, 3.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(local_pos.torch, expected_local, atol=1e-4, rtol=0)
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_get_scales_fabric_path(device, view_factory):
+    """Exercise the Fabric-native get_scales path."""
+    bundle = view_factory(num_envs=1, device=device)
+    view = bundle.view
+
+    # Trigger Fabric init
+    view.get_world_poses()
+
+    scales = view.get_scales()
+    scales_t = wp.to_torch(scales)
+    # Default scale should be (1, 1, 1)
+    expected = torch.tensor([[1.0, 1.0, 1.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(scales_t, expected, atol=1e-4, rtol=0)

From 284c0a51dc12efca2e7f873181b8ea6b9942dce5 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Thu, 7 May 2026 11:47:22 +0000
Subject: [PATCH 02/26] Fix FabricFrameView local-sync and per-selection
 indices

Three review fixes on the indexedfabricarray refactor:

* set_scales wrote the new scale into worldMatrix but never refreshed
  localMatrix, so a subsequent get_local_poses returned the stale
  scale.  Call _sync_local_from_world after the world write, matching
  set_world_poses.

* The view->fabric mapping was stored in a single shared
  _fabric_indices field that each accessor overwrote from its own
  selection's GetPaths() ordering.  Selections do not guarantee a
  shared path ordering, so this was brittle and hard to reason about.
  Cache the mapping per selection (_trans_ro_fabric_indices,
  _world_rw_fabric_indices, _local_rw_fabric_indices) and pass it
  explicitly to _build_indexed_array.  The three trans_ro accessors
  now share a _rebuild_trans_ro_arrays helper.

* Update two test comments that referenced the removed
  _fabric_usd_sync_done attribute to point at the lazy
  _initialize_fabric() call instead.
---
 .../sim/views/fabric_frame_view.py            | 86 ++++++++++++-------
 .../test/sim/test_views_xform_prim_fabric.py  |  4 +-
 2 files changed, 58 insertions(+), 32 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 633c0436d061..8f79317bbdfd 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -120,9 +120,14 @@ def __init__(
         self._trans_sel_ro = None
         self._world_sel_rw = None
         self._local_sel_rw = None
-        # Index arrays (view-side indices and view→fabric mappings).
+        # Index arrays (view-side indices and view→fabric mappings).  Each selection's
+        # ``GetPaths()`` ordering is independent, so the view→fabric mapping is cached
+        # per selection rather than shared — sharing would silently corrupt indexed
+        # arrays whose selection didn't fire ``PrepareForReuse`` on the same frame.
         self._view_indices: wp.array | None = None
-        self._fabric_indices: wp.array | None = None
+        self._trans_ro_fabric_indices: wp.array | None = None
+        self._world_rw_fabric_indices: wp.array | None = None
+        self._local_rw_fabric_indices: wp.array | None = None
         self._parent_fabric_indices: wp.array | None = None
         # Indexed fabric arrays.
         self._world_ifa_ro = None
@@ -350,6 +355,10 @@ def set_scales(self, scales, indices=None):
         )
         wp.synchronize()
 
+        # World was just written — recompute child localMatrix from parent worldMatrix
+        # so the next get_local_poses returns the new scale rather than the stale one.
+        self._sync_local_from_world(indices_wp)
+
     def get_scales(self, indices=None):
         if not self._use_fabric:
             return self._usd_view.get_scales(indices)
@@ -445,49 +454,55 @@ def _sync_local_from_world(self, indices_wp: wp.array) -> None:
 
     def _get_world_ro_array(self):
         if self._trans_sel_ro.PrepareForReuse():
-            self._rebuild_indices_for(self._trans_sel_ro)
-            self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
-            self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
-            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+            self._rebuild_trans_ro_arrays()
         return self._world_ifa_ro
 
     def _get_local_ro_array(self):
         if self._trans_sel_ro.PrepareForReuse():
-            self._rebuild_indices_for(self._trans_sel_ro)
-            self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
-            self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
-            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+            self._rebuild_trans_ro_arrays()
         return self._local_ifa_ro
 
     def _get_world_rw_array(self):
         if self._world_sel_rw.PrepareForReuse():
-            self._rebuild_indices_for(self._world_sel_rw)
-            self._world_ifa_rw = self._build_indexed_array(self._world_sel_rw, self._WORLD_MATRIX_NAME)
+            self._world_rw_fabric_indices = self._compute_fabric_indices(self._world_sel_rw)
+            self._world_ifa_rw = self._build_indexed_array(
+                self._world_sel_rw, self._WORLD_MATRIX_NAME, self._world_rw_fabric_indices
+            )
         return self._world_ifa_rw
 
     def _get_local_rw_array(self):
         if self._local_sel_rw.PrepareForReuse():
-            self._rebuild_indices_for(self._local_sel_rw)
-            self._local_ifa_rw = self._build_indexed_array(self._local_sel_rw, self._LOCAL_MATRIX_NAME)
+            self._local_rw_fabric_indices = self._compute_fabric_indices(self._local_sel_rw)
+            self._local_ifa_rw = self._build_indexed_array(
+                self._local_sel_rw, self._LOCAL_MATRIX_NAME, self._local_rw_fabric_indices
+            )
         return self._local_ifa_rw
 
     def _get_parent_world_ro_array(self):
         # Built and refreshed alongside the trans_ro selection (parents share that selection).
         if self._parent_world_ifa_ro is None or self._trans_sel_ro.PrepareForReuse():
-            self._rebuild_indices_for(self._trans_sel_ro)
-            self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
-            self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
-            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+            self._rebuild_trans_ro_arrays()
         return self._parent_world_ifa_ro
 
+    def _rebuild_trans_ro_arrays(self) -> None:
+        """Rebuild the trans_ro indices and the three indexed arrays that depend on them.
+
+        ``_world_ifa_ro``, ``_local_ifa_ro`` and ``_parent_world_ifa_ro`` are all
+        keyed off the ``trans_sel_ro`` path ordering, so they are refreshed together.
+        """
+        self._trans_ro_fabric_indices = self._compute_fabric_indices(self._trans_sel_ro)
+        self._world_ifa_ro = self._build_indexed_array(
+            self._trans_sel_ro, self._WORLD_MATRIX_NAME, self._trans_ro_fabric_indices
+        )
+        self._local_ifa_ro = self._build_indexed_array(
+            self._trans_sel_ro, self._LOCAL_MATRIX_NAME, self._trans_ro_fabric_indices
+        )
+        self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+
     # ------------------------------------------------------------------
     # Internal — index computation
     # ------------------------------------------------------------------
 
-    def _rebuild_indices_for(self, selection) -> None:
-        """Recompute ``_fabric_indices`` (view → fabric) from a selection's path order."""
-        self._fabric_indices = self._compute_fabric_indices(selection)
-
     def _compute_fabric_indices(self, selection) -> wp.array:
         fabric_paths = selection.GetPaths()
         path_to_fabric_idx: dict[str, int] = {str(p): i for i, p in enumerate(fabric_paths)}
@@ -517,9 +532,9 @@ def _compute_parent_fabric_indices(self, selection) -> wp.array:
             indices.append(fabric_idx)
         return wp.array(indices, dtype=wp.int32, device=self._device)
 
-    def _build_indexed_array(self, selection, attribute_name: str) -> wp.indexedfabricarray:
+    def _build_indexed_array(self, selection, attribute_name: str, fabric_indices: wp.array) -> wp.indexedfabricarray:
         fa = wp.fabricarray(selection, attribute_name)
-        return wp.indexedfabricarray(fa=fa, indices=self._fabric_indices)
+        return wp.indexedfabricarray(fa=fa, indices=fabric_indices)
 
     def _build_parent_indexed_array(self, selection) -> wp.indexedfabricarray:
         self._parent_fabric_indices = self._compute_parent_fabric_indices(selection)
@@ -598,15 +613,26 @@ def _initialize_fabric(self) -> None:
         self._world_sel_rw = self._stage.SelectPrims(require_attrs=[wm_rw, lm_ro], device=self._device, want_paths=True)
         self._local_sel_rw = self._stage.SelectPrims(require_attrs=[wm_ro, lm_rw], device=self._device, want_paths=True)
 
-        # Build the view-side indices array (just [0..count-1]) and the view→fabric mapping.
+        # Build the view-side indices array (just [0..count-1]) and a per-selection
+        # view→fabric mapping (selections do not guarantee a shared path ordering).
         self._view_indices = wp.array(list(range(self.count)), dtype=wp.uint32, device=self._device)
-        self._fabric_indices = self._compute_fabric_indices(self._trans_sel_ro)
+        self._trans_ro_fabric_indices = self._compute_fabric_indices(self._trans_sel_ro)
+        self._world_rw_fabric_indices = self._compute_fabric_indices(self._world_sel_rw)
+        self._local_rw_fabric_indices = self._compute_fabric_indices(self._local_sel_rw)
 
         # Indexed fabric arrays per (selection × attribute).
-        self._world_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._WORLD_MATRIX_NAME)
-        self._local_ifa_ro = self._build_indexed_array(self._trans_sel_ro, self._LOCAL_MATRIX_NAME)
-        self._world_ifa_rw = self._build_indexed_array(self._world_sel_rw, self._WORLD_MATRIX_NAME)
-        self._local_ifa_rw = self._build_indexed_array(self._local_sel_rw, self._LOCAL_MATRIX_NAME)
+        self._world_ifa_ro = self._build_indexed_array(
+            self._trans_sel_ro, self._WORLD_MATRIX_NAME, self._trans_ro_fabric_indices
+        )
+        self._local_ifa_ro = self._build_indexed_array(
+            self._trans_sel_ro, self._LOCAL_MATRIX_NAME, self._trans_ro_fabric_indices
+        )
+        self._world_ifa_rw = self._build_indexed_array(
+            self._world_sel_rw, self._WORLD_MATRIX_NAME, self._world_rw_fabric_indices
+        )
+        self._local_ifa_rw = self._build_indexed_array(
+            self._local_sel_rw, self._LOCAL_MATRIX_NAME, self._local_rw_fabric_indices
+        )
         self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
 
         # Pre-allocated reusable output buffers (world + local + scales).
diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index f31d5ec911b2..979c7766939f 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -235,7 +235,7 @@ def test_set_local_via_fabric_path(device, view_factory):
     bundle = view_factory(num_envs=1, device=device)
     view = bundle.view
 
-    # Trigger Fabric init and sync (sets _fabric_usd_sync_done = True)
+    # Trigger lazy `_initialize_fabric()` so subsequent calls take the Fabric path.
     view.get_world_poses()
 
     # Now set_local_poses should take the Fabric path
@@ -263,7 +263,7 @@ def test_get_scales_fabric_path(device, view_factory):
     bundle = view_factory(num_envs=1, device=device)
     view = bundle.view
 
-    # Trigger Fabric init
+    # Trigger lazy `_initialize_fabric()` so the get_scales call below uses Fabric.
     view.get_world_poses()
 
     scales = view.get_scales()

From 95ba0dd7d3f5e2a04b7d4e588de6204dfcf751b8 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Thu, 7 May 2026 16:37:28 +0200
Subject: [PATCH 03/26] Some cleanup

---
 .../sim/views/fabric_frame_view.py            | 123 +++++++++++-------
 1 file changed, 79 insertions(+), 44 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 8f79317bbdfd..5f536acf2609 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -5,8 +5,7 @@
 
 """PhysX FrameView with Fabric GPU acceleration.
 
-Design follows the prototype in
-``https://github.com/bareya/IsaacLab/tree/pbarejko/camera-update``:
+Design:
 
 * Three persistent ``PrimSelection`` instances differing only in per-attribute access
   mode (one for each of {trans_ro, world_rw, local_rw}).
@@ -28,6 +27,7 @@
 from __future__ import annotations
 
 import logging
+from typing import ClassVar
 
 import torch
 import warp as wp
@@ -43,18 +43,19 @@
 
 logger = logging.getLogger(__name__)
 
-# TODO: USDRT SelectPrims still pins to cuda:0 even when ``device='cuda:1'`` is
-# requested.  When that limitation is lifted (see feat/frame-view-enable-mgpu),
-# this allowlist can drop the explicit cuda:0 entry.
+# TODO: extend this to ``cuda:N`` once we wire up multi-GPU support for the view.
+# Recent Kit / USDRT releases do support multi-GPU ``SelectPrims``, but the
+# rest of the FabricFrameView wiring (selections, indexed arrays, etc.) still
+# assumes a single device — to be tackled in a follow-up.
 _fabric_supported_devices = ("cpu", "cuda", "cuda:0")
 
 
 def _to_float32_2d(a: wp.array | torch.Tensor) -> wp.array | torch.Tensor:
-    """Ensure ``wp.array`` inputs are 2-D ``float32`` for the Fabric kernels.
+    """Ensure array is compatible with Fabric kernels (2-D float32).
 
-    For ``wp.array`` with vec dtypes (``vec3f``, ``vec4f``) this uses
-    :meth:`wp.array.view` for zero-copy reinterpretation.  ``torch.Tensor``
-    and already-correct 2-D float32 arrays pass through.
+    For ``wp.array`` with vec dtypes (``vec3f``, ``vec4f``), uses
+    :meth:`wp.array.view` for zero-copy reinterpretation.
+    ``torch.Tensor`` and already-correct 2-D float32 arrays pass through.
     """
     if not isinstance(a, wp.array):
         return a
@@ -68,15 +69,23 @@ def _to_float32_2d(a: wp.array | torch.Tensor) -> wp.array | torch.Tensor:
 class FabricFrameView(BaseFrameView):
     """FrameView with Fabric GPU acceleration for the PhysX backend.
 
-    Holds a :class:`UsdFrameView` internally for the disabled-Fabric fallback path
-    (when ``/physics/fabricEnabled`` is false or the device is unsupported).
+    Uses composition: holds a :class:`UsdFrameView` internally for USD
+    fallback and non-accelerated operations (visibility, scales when
+    Fabric is disabled).
 
-    When Fabric is enabled, all transform reads and writes go through Warp kernels
-    operating on Fabric ``omni:fabric:worldMatrix`` and ``omni:fabric:localMatrix``
-    via :class:`wp.indexedfabricarray`.  No prim attributes are added.
+    When Fabric is enabled, world-pose, local-pose, and scale operations
+    use Warp kernels operating on ``omni:fabric:worldMatrix`` and
+    ``omni:fabric:localMatrix``.  All other operations delegate to the
+    internal USD view.
 
-    Pose getters return :class:`~isaaclab.utils.warp.ProxyArray`.  Setters accept
-    ``wp.array``.
+    After every Fabric write (``set_world_poses``, ``set_local_poses``,
+    ``set_scales``), :meth:`PrepareForReuse` is called on the
+    ``PrimSelection`` to notify the FSD renderer that Fabric data has
+    changed and to detect topology changes that require rebuilding
+    internal mappings.  Read operations do not call PrepareForReuse to
+    avoid unnecessary renderer invalidation.
+
+    Pose getters return :class:`~isaaclab.utils.warp.ProxyArray`.  Setters accept ``wp.array``.
     """
 
     _WORLD_MATRIX_NAME = "omni:fabric:worldMatrix"
@@ -84,9 +93,11 @@ class FabricFrameView(BaseFrameView):
 
     # Stage-level shared state.  Multiple FabricFrameView instances on the same stage
     # share one IFabricHierarchy handle; the dirty-stages set tracks which stages
-    # need ``update_world_xforms()`` before a world read.
-    _hierarchy_cache: dict[int, object] = {}
-    _dirty_stages: set[int] = set()
+    # need ``update_world_xforms()`` before a world read.  ``ClassVar`` plus the
+    # ``_static_`` prefix make it explicit (and enforceable by type checkers) that
+    # these are class-level — never shadow them with ``self.<name> = ...``.
+    _static_hierarchy_cache: ClassVar[dict[int, object]] = {}
+    _static_dirty_stages: ClassVar[set[int]] = set()
 
     def __init__(
         self,
@@ -96,6 +107,18 @@ def __init__(
         stage: Usd.Stage | None = None,
         **kwargs,
     ):
+        """Initialize the view.
+
+        Args:
+            prim_path: USD prim-path pattern to match.
+            device: Device for Warp arrays (``"cpu"`` or ``"cuda:0"``).
+            validate_xform_ops: Whether to validate prim xform-ops.
+            stage: USD stage; defaults to the current sim context's stage.
+            **kwargs: Additional keyword arguments (ignored). Matches the signature of
+                :class:`~isaaclab.sim.views.UsdFrameView` so that the top-level
+                :class:`~isaaclab.sim.views.FrameView` factory can forward backend-agnostic
+                kwargs without each backend having to know about every option.
+        """
         self._usd_view = UsdFrameView(prim_path, device=device, validate_xform_ops=validate_xform_ops, stage=stage)
         self._device = device
 
@@ -116,10 +139,12 @@ def __init__(
         self._stage_id: int | None = None
         self._stage = None
         self._fabric_hierarchy = None
+
         # Selections.
         self._trans_sel_ro = None
         self._world_sel_rw = None
         self._local_sel_rw = None
+
         # Index arrays (view-side indices and view→fabric mappings).  Each selection's
         # ``GetPaths()`` ordering is independent, so the view→fabric mapping is cached
         # per selection rather than shared — sharing would silently corrupt indexed
@@ -129,14 +154,21 @@ def __init__(
         self._world_rw_fabric_indices: wp.array | None = None
         self._local_rw_fabric_indices: wp.array | None = None
         self._parent_fabric_indices: wp.array | None = None
+
         # Indexed fabric arrays.
         self._world_ifa_ro = None
         self._local_ifa_ro = None
         self._world_ifa_rw = None
         self._local_ifa_rw = None
         self._parent_world_ifa_ro = None
-        # Cached output buffers.
-        self._fabric_dummy_buffer: wp.array | None = None
+
+        # Sentinel passed to ``compose_indexed_fabric_transforms`` /
+        # ``decompose_indexed_fabric_transforms`` for slots the caller does not want
+        # written or read.  The kernels gate every per-row access on
+        # ``shape[0] > 0``, so a ``(0, 0)`` array is enough — the inner dim is never
+        # indexed.  One shared instance covers all "unused" slots regardless of
+        # whether they would have held positions, quaternions, or scales.
+        self._fabric_empty_2d_array_sentinel: wp.array | None = None
 
     # ------------------------------------------------------------------
     # Delegated properties
@@ -170,7 +202,7 @@ def set_visibility(self, visibility, indices=None):
         self._usd_view.set_visibility(visibility, indices)
 
     # ------------------------------------------------------------------
-    # World poses
+    # World poses — Fabric-accelerated or USD fallback
     # ------------------------------------------------------------------
 
     def set_world_poses(self, positions=None, orientations=None, indices=None):
@@ -185,8 +217,8 @@ def set_world_poses(self, positions=None, orientations=None, indices=None):
         self._sync_world_from_local_if_dirty()
 
         indices_wp = self._resolve_indices_wp(indices)
-        positions_wp = _to_float32_2d(positions) if positions is not None else self._fabric_dummy_buffer
-        orientations_wp = _to_float32_2d(orientations) if orientations is not None else self._fabric_dummy_buffer
+        positions_wp = self._to_float32_2d_or_empty(positions)
+        orientations_wp = self._to_float32_2d_or_empty(orientations)
 
         wp.launch(
             kernel=fabric_utils.compose_indexed_fabric_transforms,
@@ -195,7 +227,7 @@ def set_world_poses(self, positions=None, orientations=None, indices=None):
                 self._get_world_rw_array(),
                 positions_wp,
                 orientations_wp,
-                self._fabric_dummy_buffer,
+                self._fabric_empty_2d_array_sentinel,
                 False,
                 False,
                 False,
@@ -237,7 +269,7 @@ def get_world_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray,
                 self._get_world_ro_array(),
                 positions_wp,
                 orientations_wp,
-                self._fabric_dummy_buffer,
+                self._fabric_empty_2d_array_sentinel,
                 indices_wp,
             ],
             device=self._device,
@@ -261,8 +293,8 @@ def set_local_poses(self, translations=None, orientations=None, indices=None):
             self._initialize_fabric()
 
         indices_wp = self._resolve_indices_wp(indices)
-        translations_wp = _to_float32_2d(translations) if translations is not None else self._fabric_dummy_buffer
-        orientations_wp = _to_float32_2d(orientations) if orientations is not None else self._fabric_dummy_buffer
+        translations_wp = self._to_float32_2d_or_empty(translations)
+        orientations_wp = self._to_float32_2d_or_empty(orientations)
 
         wp.launch(
             kernel=fabric_utils.compose_indexed_fabric_transforms,
@@ -271,7 +303,7 @@ def set_local_poses(self, translations=None, orientations=None, indices=None):
                 self._get_local_rw_array(),
                 translations_wp,
                 orientations_wp,
-                self._fabric_dummy_buffer,
+                self._fabric_empty_2d_array_sentinel,
                 False,
                 False,
                 False,
@@ -282,7 +314,7 @@ def set_local_poses(self, translations=None, orientations=None, indices=None):
         wp.synchronize()
 
         # Mark the stage dirty so the next world read calls update_world_xforms().
-        FabricFrameView._dirty_stages.add(self._stage_id)
+        FabricFrameView._static_dirty_stages.add(self._stage_id)
 
     def get_local_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray, ProxyArray]:
         if not self._use_fabric:
@@ -309,7 +341,7 @@ def get_local_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray,
                 self._get_local_ro_array(),
                 translations_wp,
                 orientations_wp,
-                self._fabric_dummy_buffer,
+                self._fabric_empty_2d_array_sentinel,
                 indices_wp,
             ],
             device=self._device,
@@ -336,15 +368,15 @@ def set_scales(self, scales, indices=None):
         self._sync_world_from_local_if_dirty()
 
         indices_wp = self._resolve_indices_wp(indices)
-        scales_wp = _to_float32_2d(scales) if scales is not None else self._fabric_dummy_buffer
+        scales_wp = self._to_float32_2d_or_empty(scales)
 
         wp.launch(
             kernel=fabric_utils.compose_indexed_fabric_transforms,
             dim=indices_wp.shape[0],
             inputs=[
                 self._get_world_rw_array(),
-                self._fabric_dummy_buffer,
-                self._fabric_dummy_buffer,
+                self._fabric_empty_2d_array_sentinel,
+                self._fabric_empty_2d_array_sentinel,
                 scales_wp,
                 False,
                 False,
@@ -383,8 +415,8 @@ def get_scales(self, indices=None):
             dim=count,
             inputs=[
                 self._get_world_ro_array(),
-                self._fabric_dummy_buffer,
-                self._fabric_dummy_buffer,
+                self._fabric_empty_2d_array_sentinel,
+                self._fabric_empty_2d_array_sentinel,
                 scales_wp,
                 indices_wp,
             ],
@@ -399,6 +431,9 @@ def get_scales(self, indices=None):
     # Internal — sync helpers
     # ------------------------------------------------------------------
 
+    def _to_float32_2d_or_empty(self, data):
+        return self._fabric_empty_2d_array_sentinel if data is None else _to_float32_2d(data)
+
     def _sync_world_from_local_if_dirty(self) -> None:
         """If a prior local write left world matrices stale, recompute them on the fly.
 
@@ -408,7 +443,7 @@ def _sync_world_from_local_if_dirty(self) -> None:
         does ``child_world = parent_world * child_local`` per child, leaving the
         Fabric-side localMatrix untouched.
         """
-        if self._stage_id is None or self._stage_id not in FabricFrameView._dirty_stages:
+        if self._stage_id is None or self._stage_id not in FabricFrameView._static_dirty_stages:
             return
         # Make sure the parent indexed array is up-to-date with the current trans selection.
         if self._parent_world_ifa_ro is None:
@@ -425,7 +460,7 @@ def _sync_world_from_local_if_dirty(self) -> None:
             device=self._device,
         )
         wp.synchronize()
-        FabricFrameView._dirty_stages.discard(self._stage_id)
+        FabricFrameView._static_dirty_stages.discard(self._stage_id)
 
     def _sync_local_from_world(self, indices_wp: wp.array) -> None:
         """Recompute child ``localMatrix`` from (parent worldMatrix, child worldMatrix).
@@ -568,14 +603,14 @@ def _initialize_fabric(self) -> None:
         # BEFORE we author any local matrices, so the per-prim
         # ``SetLocalXformFromUsd`` calls below mark themselves dirty and the next
         # ``update_world_xforms()`` walks the parent chain to populate worldMatrix.
-        if self._stage_id not in FabricFrameView._hierarchy_cache:
+        if self._stage_id not in FabricFrameView._static_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
                 self._stage.GetFabricId(), self._stage.GetStageIdAsStageId()
             )
             hierarchy.track_local_xform_changes(True)
             hierarchy.track_world_xform_changes(True)
-            FabricFrameView._hierarchy_cache[self._stage_id] = hierarchy
-        self._fabric_hierarchy = FabricFrameView._hierarchy_cache[self._stage_id]
+            FabricFrameView._static_hierarchy_cache[self._stage_id] = hierarchy
+        self._fabric_hierarchy = FabricFrameView._static_hierarchy_cache[self._stage_id]
 
         # Ensure each child prim AND its parent have BOTH Fabric world and local matrix
         # attributes.  Our ``trans_ro`` selection requires both, so prims missing either
@@ -641,7 +676,7 @@ def _initialize_fabric(self) -> None:
         self._fabric_scales_buf = wp.zeros((self.count, 3), dtype=wp.float32, device=self._device)
         self._fabric_local_translations_buf = wp.zeros((self.count, 3), dtype=wp.float32, device=self._device)
         self._fabric_local_orientations_buf = wp.zeros((self.count, 4), dtype=wp.float32, device=self._device)
-        self._fabric_dummy_buffer = wp.zeros((0, 3), dtype=wp.float32, device=self._device)
+        self._fabric_empty_2d_array_sentinel = wp.zeros((0, 0), dtype=wp.float32, device=self._device)
 
         self._fabric_positions_ta = ProxyArray(self._fabric_positions_buf)
         self._fabric_orientations_ta = ProxyArray(self._fabric_orientations_buf)
@@ -672,7 +707,7 @@ def _sync_fabric_from_usd_initial(self) -> None:
             if hasattr(scales_obj, "warp")
             else scales_obj
             if isinstance(scales_obj, wp.array)
-            else self._fabric_dummy_buffer
+            else self._fabric_empty_2d_array_sentinel
         )
         local_pos_ta, local_ori_ta = self._usd_view.get_local_poses()
         # Compose into child worldMatrix.
@@ -699,7 +734,7 @@ def _sync_fabric_from_usd_initial(self) -> None:
                 self._local_ifa_rw,
                 _to_float32_2d(local_pos_ta.warp),
                 _to_float32_2d(local_ori_ta.warp),
-                self._fabric_dummy_buffer,
+                self._fabric_empty_2d_array_sentinel,
                 False,
                 False,
                 False,

From 57821b2d34211f545ca73221f13755f0cc1fe14e Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Thu, 7 May 2026 16:41:24 +0200
Subject: [PATCH 04/26] Fixed tests

---
 .../test/sim/test_views_xform_prim_fabric.py   | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index 979c7766939f..1196ddcc5516 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -189,13 +189,17 @@ def test_fabric_rebuild_after_topology_change(device, view_factory):
     wp.launch(kernel=_fill_position, dim=2, inputs=[initial, 1.0, 2.0, 3.0], device=device)
     view.set_world_poses(positions=initial)
 
-    # Simulate topology change: recompute fabric indices and rebuild every indexed array.
-    view._fabric_indices = view._compute_fabric_indices(view._trans_sel_ro)
-    view._world_ifa_ro = view._build_indexed_array(view._trans_sel_ro, view._WORLD_MATRIX_NAME)
-    view._local_ifa_ro = view._build_indexed_array(view._trans_sel_ro, view._LOCAL_MATRIX_NAME)
-    view._world_ifa_rw = view._build_indexed_array(view._world_sel_rw, view._WORLD_MATRIX_NAME)
-    view._local_ifa_rw = view._build_indexed_array(view._local_sel_rw, view._LOCAL_MATRIX_NAME)
-    view._parent_world_ifa_ro = view._build_parent_indexed_array(view._trans_sel_ro)
+    # Simulate topology change: recompute per-selection fabric indices and rebuild
+    # every indexed array, mirroring the lazy paths in the ``_get_*_array`` accessors.
+    view._rebuild_trans_ro_arrays()
+    view._world_rw_fabric_indices = view._compute_fabric_indices(view._world_sel_rw)
+    view._world_ifa_rw = view._build_indexed_array(
+        view._world_sel_rw, view._WORLD_MATRIX_NAME, view._world_rw_fabric_indices
+    )
+    view._local_rw_fabric_indices = view._compute_fabric_indices(view._local_sel_rw)
+    view._local_ifa_rw = view._build_indexed_array(
+        view._local_sel_rw, view._LOCAL_MATRIX_NAME, view._local_rw_fabric_indices
+    )
 
     # Trigger another write through the rebuilt arrays.
     new = wp.zeros((2, 3), dtype=wp.float32, device=device)

From 95ea82e14c25fef9897c2e3c7aa94a3b609906bb Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Thu, 7 May 2026 17:24:18 +0000
Subject: [PATCH 05/26] Loosen rebuild-after-topology tolerance to 1e-5

The chained set_world_poses -> get_world_poses round-trip in
test_fabric_rebuild_after_topology_change goes through Warp's float32
SRT compose/decompose, which accumulates a few ULP of drift.

At the test's position magnitudes (~4-6), one float32 ULP is
~4.77e-7, so the prior atol of 1e-7 demanded sub-ULP agreement and
was sensitive to GPU/codegen variation -- it passed locally on the
A6000 but flaked in CI.

1e-5 corresponds to roughly 20 ULP at those magnitudes: tight enough
to catch any real bug (a wrong index or stale read would be at least
~1e-3 off given the test setup) and consistent with the shared
contract harness in frame_view_contract_utils.py, which already
documents and uses ATOL = 1e-5 for compose/decompose-through-float32
checks.
---
 source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index 1196ddcc5516..d066757f9cd7 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -209,7 +209,8 @@ def test_fabric_rebuild_after_topology_change(device, view_factory):
     ret_pos, _ = view.get_world_poses()
     pos_torch = wp.to_torch(ret_pos)
     expected = torch.tensor([[4.0, 5.0, 6.0], [4.0, 5.0, 6.0]], device=device)
-    assert torch.allclose(pos_torch, expected, atol=1e-7), f"Read after rebuild failed on {device}: {pos_torch}"
+    # 1e-5 ≈ 20 ULP at magnitudes ~4-6; absorbs float32 SRT compose/decompose drift.
+    assert torch.allclose(pos_torch, expected, atol=1e-5), f"Read after rebuild failed on {device}: {pos_torch}"
 
 
 @pytest.mark.parametrize("device", ["cuda:0"])

From ae5a10f9041c91dca7ccd0590c1379786003b309 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Mon, 11 May 2026 14:31:44 +0000
Subject: [PATCH 06/26] Make world-dirty tracking per-view in FabricFrameView

The previous _static_dirty_stages set was keyed by stage_id, but
_sync_world_from_local_if_dirty only recomputes worldMatrix for the
*calling view's* children before clearing the flag.  With two views
on the same stage, view B's world-read would clear the flag set by
view A's set_local_poses, leaving A's worlds silently stale.

Replace the class-level set with a per-instance bool.  Each view now
tracks its own dirty state, which matches the actual scope of the
recompute kernel and removes a mutable ClassVar.

Also:
- Raise a clearer error when _compute_parent_fabric_indices is asked
  to look up the parent of a root-level prim (rsplit produces ""),
  instead of bubbling up the generic "not found in selection" message.
- Document on the remaining _static_hierarchy_cache that it is not
  thread-safe by design (Isaac Lab's loop is single-threaded; adding a
  lock would negate the per-stage caching benefit).
- Update the module docstring to reflect the per-view dirty model and
  drop the stale reference to IFabricHierarchy.update_world_xforms.
---
 .../sim/views/fabric_frame_view.py            | 34 +++++++++++++------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 5f536acf2609..2415ad249a6e 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -17,8 +17,10 @@
 * Read/write happens via ``wp.indexedfabricarray``, so the view-to-fabric mapping is
   baked into the array itself and the kernels just dereference ``ifa[view_index]``.
 * World ↔ local consistency is maintained:
-    - After ``set_local_poses``: stage is marked dirty; the next world read calls
-      ``IFabricHierarchy.update_world_xforms()`` to propagate local → world.
+    - After ``set_local_poses``: the view is marked dirty; the next world read
+      fires a Warp kernel that recomputes ``child_world = parent_world *
+      child_local`` for this view's children.  Tracking is per-view so that
+      multiple views on the same stage don't clear each other's dirty flag.
     - After ``set_world_poses``: a Warp kernel recomputes child localMatrix from
       parent worldMatrix on the fly using a parent indexed fabric array, so the
       next ``get_local_poses`` returns consistent values.
@@ -92,12 +94,12 @@ class FabricFrameView(BaseFrameView):
     _LOCAL_MATRIX_NAME = "omni:fabric:localMatrix"
 
     # Stage-level shared state.  Multiple FabricFrameView instances on the same stage
-    # share one IFabricHierarchy handle; the dirty-stages set tracks which stages
-    # need ``update_world_xforms()`` before a world read.  ``ClassVar`` plus the
-    # ``_static_`` prefix make it explicit (and enforceable by type checkers) that
-    # these are class-level — never shadow them with ``self.<name> = ...``.
+    # share one IFabricHierarchy handle.  ``ClassVar`` plus the ``_static_`` prefix
+    # make it explicit (and enforceable by type checkers) that this is class-level —
+    # never shadow it with ``self.<name> = ...``.  The cache is not protected by a
+    # lock; Isaac Lab's simulation loop is single-threaded, and adding locking would
+    # negate the per-stage hit-cache cost it exists to avoid.
     _static_hierarchy_cache: ClassVar[dict[int, object]] = {}
-    _static_dirty_stages: ClassVar[set[int]] = set()
 
     def __init__(
         self,
@@ -139,6 +141,10 @@ def __init__(
         self._stage_id: int | None = None
         self._stage = None
         self._fabric_hierarchy = None
+        # Set by ``set_local_poses``; cleared by ``_sync_world_from_local_if_dirty``.
+        # Per-view (not per-stage) so concurrent views on the same stage don't clear
+        # each other's flag.
+        self._world_dirty: bool = False
 
         # Selections.
         self._trans_sel_ro = None
@@ -313,8 +319,8 @@ def set_local_poses(self, translations=None, orientations=None, indices=None):
         )
         wp.synchronize()
 
-        # Mark the stage dirty so the next world read calls update_world_xforms().
-        FabricFrameView._static_dirty_stages.add(self._stage_id)
+        # Mark this view's worlds stale so the next world read recomputes them.
+        self._world_dirty = True
 
     def get_local_poses(self, indices: wp.array | None = None) -> tuple[ProxyArray, ProxyArray]:
         if not self._use_fabric:
@@ -443,7 +449,7 @@ def _sync_world_from_local_if_dirty(self) -> None:
         does ``child_world = parent_world * child_local`` per child, leaving the
         Fabric-side localMatrix untouched.
         """
-        if self._stage_id is None or self._stage_id not in FabricFrameView._static_dirty_stages:
+        if not self._world_dirty:
             return
         # Make sure the parent indexed array is up-to-date with the current trans selection.
         if self._parent_world_ifa_ro is None:
@@ -460,7 +466,7 @@ def _sync_world_from_local_if_dirty(self) -> None:
             device=self._device,
         )
         wp.synchronize()
-        FabricFrameView._static_dirty_stages.discard(self._stage_id)
+        self._world_dirty = False
 
     def _sync_local_from_world(self, indices_wp: wp.array) -> None:
         """Recompute child ``localMatrix`` from (parent worldMatrix, child worldMatrix).
@@ -558,6 +564,12 @@ def _compute_parent_fabric_indices(self, selection) -> wp.array:
         indices: list[int] = []
         for prim_path in self.prim_paths:
             parent_path = prim_path.rsplit("/", 1)[0]
+            if parent_path == "":
+                raise RuntimeError(
+                    f"Child prim '{prim_path}' is at stage root and has no parent prim. "
+                    "FabricFrameView requires every prim to have a non-pseudoroot parent "
+                    "with Fabric world+local matrices."
+                )
             fabric_idx = path_to_fabric_idx.get(parent_path)
             if fabric_idx is None:
                 raise RuntimeError(

From 2327579a5dc0c56c4b27c6f5d3c49806454e4ca5 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Tue, 12 May 2026 13:11:23 +0000
Subject: [PATCH 07/26] Tighten FabricFrameView lifecycle and verify transpose
 math

Three review fixes plus the missing coverage for the transpose
storage convention.

* Hierarchy cache eviction: cache key is now (stage_id, fabric_id) so a
  recycled stage_id paired with a new Fabric attachment never returns a
  stale handle.  Added FabricFrameView.clear_static_caches() classmethod
  for explicit teardown and wired it into the test fixture so cached
  handles do not accumulate across the suite.

* PrepareForReuse no longer fires twice per sync.  Both _sync_local_from_world
  and _sync_world_from_local_if_dirty refresh trans_sel_ro exactly once,
  then read _world_ifa_ro / _local_ifa_ro / _parent_world_ifa_ro directly
  from the fields instead of going through the accessors.

* Class docstring rewritten to describe the actual PrepareForReuse policy
  (every accessor calls it; idempotent and cheap in the steady state).
  The prior wording claimed reads avoided PrepareForReuse, which has not
  been true since the indexedfabricarray rewrite.

* New regression tests for the transpose storage convention.  The standard
  fixture parents are translation-only, so the rotation block is identity
  and equal to its transpose - which means a wrong transpose convention
  would still pass every existing test.  Two new tests place a parent
  rotated 90 degrees around Z and verify the world<->local round-trip:
  - test_set_local_then_get_world_with_rotated_parent exercises
    update_indexed_world_matrix_from_local
  - test_set_world_then_get_local_with_rotated_parent exercises
    update_indexed_local_matrix_from_world
  Confirmed locally that flipping the multiply order in either kernel
  makes the matching test fail.

45 tests pass on cpu and cuda:0.
---
 .../sim/views/fabric_frame_view.py            | 81 +++++++++++++------
 .../test/sim/test_views_xform_prim_fabric.py  | 81 +++++++++++++++++++
 2 files changed, 137 insertions(+), 25 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 2415ad249a6e..697109c7d030 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -80,12 +80,18 @@ class FabricFrameView(BaseFrameView):
     ``omni:fabric:localMatrix``.  All other operations delegate to the
     internal USD view.
 
-    After every Fabric write (``set_world_poses``, ``set_local_poses``,
-    ``set_scales``), :meth:`PrepareForReuse` is called on the
-    ``PrimSelection`` to notify the FSD renderer that Fabric data has
-    changed and to detect topology changes that require rebuilding
-    internal mappings.  Read operations do not call PrepareForReuse to
-    avoid unnecessary renderer invalidation.
+    Every accessor for a selection's indexed fabric array
+    (:meth:`_get_world_ro_array`, :meth:`_get_local_ro_array`,
+    :meth:`_get_world_rw_array`, :meth:`_get_local_rw_array`,
+    :meth:`_get_parent_world_ro_array`) calls :meth:`PrepareForReuse` on its
+    backing ``PrimSelection`` to detect Fabric topology changes that would
+    require rebuilding the view→fabric index mapping.  ``PrepareForReuse``
+    is idempotent and cheap in the steady state (returns ``False`` with no
+    rebuild), so the per-access cost is negligible.  The two sync helpers
+    (:meth:`_sync_local_from_world`, :meth:`_sync_world_from_local_if_dirty`)
+    refresh ``trans_sel_ro`` at most once per call to avoid the redundant
+    USDRT round-trip that would otherwise occur when ``_world_ifa_ro``,
+    ``_local_ifa_ro`` and ``_parent_world_ifa_ro`` are read consecutively.
 
     Pose getters return :class:`~isaaclab.utils.warp.ProxyArray`.  Setters accept ``wp.array``.
     """
@@ -94,12 +100,29 @@ class FabricFrameView(BaseFrameView):
     _LOCAL_MATRIX_NAME = "omni:fabric:localMatrix"
 
     # Stage-level shared state.  Multiple FabricFrameView instances on the same stage
-    # share one IFabricHierarchy handle.  ``ClassVar`` plus the ``_static_`` prefix
-    # make it explicit (and enforceable by type checkers) that this is class-level —
-    # never shadow it with ``self.<name> = ...``.  The cache is not protected by a
-    # lock; Isaac Lab's simulation loop is single-threaded, and adding locking would
-    # negate the per-stage hit-cache cost it exists to avoid.
-    _static_hierarchy_cache: ClassVar[dict[int, object]] = {}
+    # share one ``IFabricHierarchy`` handle, keyed by ``(stage_id, fabric_id)`` so
+    # that a recycled ``stage_id`` paired with a fresh ``fabric_id`` never reuses a
+    # stale handle.  ``ClassVar`` plus the ``_static_`` prefix make it explicit (and
+    # enforceable by type checkers) that this is class-level — never shadow it with
+    # ``self.<name> = ...``.  The cache is not protected by a lock; Isaac Lab's
+    # simulation loop is single-threaded, and adding locking would negate the
+    # per-stage hit-cache cost it exists to avoid.  Call
+    # :meth:`clear_static_caches` on stage teardown if you tear down many stages
+    # in one process.
+    _static_hierarchy_cache: ClassVar[dict[tuple[int, int], object]] = {}
+
+    @classmethod
+    def clear_static_caches(cls) -> None:
+        """Drop all cached ``IFabricHierarchy`` handles.
+
+        Call this after tearing down a USD stage if the process will continue
+        opening new stages.  Cached handles are otherwise retained for the
+        lifetime of the process, which can leak memory in long test suites and,
+        in theory, allow a recycled ``stage_id`` paired with a new Fabric
+        attachment to read a stale handle (though the ``(stage_id, fabric_id)``
+        cache key already guards against the latter).
+        """
+        cls._static_hierarchy_cache.clear()
 
     def __init__(
         self,
@@ -451,14 +474,15 @@ def _sync_world_from_local_if_dirty(self) -> None:
         """
         if not self._world_dirty:
             return
-        # Make sure the parent indexed array is up-to-date with the current trans selection.
-        if self._parent_world_ifa_ro is None:
-            self._parent_world_ifa_ro = self._build_parent_indexed_array(self._trans_sel_ro)
+        # Refresh trans_sel_ro once, then read _local_ifa_ro and _parent_world_ifa_ro
+        # directly to avoid calling PrepareForReuse twice on the same selection.
+        if self._trans_sel_ro.PrepareForReuse() or self._parent_world_ifa_ro is None:
+            self._rebuild_trans_ro_arrays()
         wp.launch(
             kernel=fabric_utils.update_indexed_world_matrix_from_local,
             dim=self.count,
             inputs=[
-                self._get_local_ro_array(),
+                self._local_ifa_ro,
                 self._parent_world_ifa_ro,
                 self._get_world_rw_array(),
                 self._view_indices,
@@ -476,12 +500,15 @@ def _sync_local_from_world(self, indices_wp: wp.array) -> None:
         not provide a built-in world → local sync, so we do it via a Warp kernel
         using the parent indexed fabric array.
         """
+        # Refresh trans_sel_ro once; _world_ifa_ro and _parent_world_ifa_ro share it.
+        if self._trans_sel_ro.PrepareForReuse() or self._parent_world_ifa_ro is None:
+            self._rebuild_trans_ro_arrays()
         wp.launch(
             kernel=fabric_utils.update_indexed_local_matrix_from_world,
             dim=indices_wp.shape[0],
             inputs=[
-                self._get_world_ro_array(),
-                self._get_parent_world_ro_array(),
+                self._world_ifa_ro,
+                self._parent_world_ifa_ro,
                 self._get_local_rw_array(),
                 indices_wp,
             ],
@@ -611,18 +638,22 @@ def _initialize_fabric(self) -> None:
         self._stage = usdrt.Usd.Stage.Attach(self._stage_id)
         self._stage.SynchronizeToFabric()
 
-        # Reuse (or create) a hierarchy handle for this stage.  Enable change-tracking
+        # Reuse (or create) a hierarchy handle for this stage.  The cache key is
+        # ``(stage_id, fabric_id)`` so a recycled ``stage_id`` paired with a fresh
+        # Fabric attachment never returns a stale handle.  Enable change-tracking
         # BEFORE we author any local matrices, so the per-prim
-        # ``SetLocalXformFromUsd`` calls below mark themselves dirty and the next
-        # ``update_world_xforms()`` walks the parent chain to populate worldMatrix.
-        if self._stage_id not in FabricFrameView._static_hierarchy_cache:
+        # ``SetLocalXformFromUsd`` calls below mark themselves dirty.
+        fabric_id = self._stage.GetFabricId()
+        self._fabric_id = fabric_id
+        cache_key = (self._stage_id, fabric_id)
+        if cache_key not in FabricFrameView._static_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-                self._stage.GetFabricId(), self._stage.GetStageIdAsStageId()
+                fabric_id, self._stage.GetStageIdAsStageId()
             )
             hierarchy.track_local_xform_changes(True)
             hierarchy.track_world_xform_changes(True)
-            FabricFrameView._static_hierarchy_cache[self._stage_id] = hierarchy
-        self._fabric_hierarchy = FabricFrameView._static_hierarchy_cache[self._stage_id]
+            FabricFrameView._static_hierarchy_cache[cache_key] = hierarchy
+        self._fabric_hierarchy = FabricFrameView._static_hierarchy_cache[cache_key]
 
         # Ensure each child prim AND its parent have BOTH Fabric world and local matrix
         # attributes.  Our ``trans_ro`` selection requires both, so prims missing either
diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index d066757f9cd7..5d36edae3f94 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -41,6 +41,9 @@ def test_setup_teardown():
     yield
     sim_utils.clear_stage()
     sim_utils.SimulationContext.clear_instance()
+    # Each test creates a fresh stage; drop cached IFabricHierarchy handles so
+    # the next test does not reuse a handle attached to the disposed stage.
+    FrameView.clear_static_caches()
 
 
 def _skip_if_unavailable(device: str):
@@ -276,3 +279,81 @@ def test_get_scales_fabric_path(device, view_factory):
     # Default scale should be (1, 1, 1)
     expected = torch.tensor([[1.0, 1.0, 1.0]], dtype=torch.float32, device=device)
     torch.testing.assert_close(scales_t, expected, atol=1e-4, rtol=0)
+
+
+# ------------------------------------------------------------------
+# Transpose-convention verification: world ↔ local kernels rely on the
+# identity ``(A·B)ᵀ = Bᵀ·Aᵀ`` to drop explicit transposes when operating
+# on Fabric's column-transposed matrix storage.  The translation-only
+# parents used by the standard fixture cannot distinguish the right
+# convention from the wrong one — the rotation block is identity and
+# equals its own transpose.  These tests use a parent rotated 90° around
+# Z so that an incorrect storage convention would produce a clearly
+# wrong child pose.
+# ------------------------------------------------------------------
+
+
+# Parent at (0, 0, 1) rotated +90° around Z (so the parent X axis points
+# along world +Y).  Quaternion components in (x, y, z, w) order.
+_ROTATED_PARENT_POS = (0.0, 0.0, 1.0)
+_ROTATED_PARENT_QUAT_XYZW = (0.0, 0.0, 0.70710678, 0.70710678)
+
+
+def _build_rotated_parent_view(device: str) -> "FrameView":
+    """Build a 1-env FabricFrameView whose parent is rotated 90° around Z."""
+    stage = sim_utils.get_current_stage()
+    sim_utils.create_prim(
+        "/World/Parent_0",
+        "Xform",
+        translation=_ROTATED_PARENT_POS,
+        orientation=_ROTATED_PARENT_QUAT_XYZW,
+        stage=stage,
+    )
+    sim_utils.create_prim("/World/Parent_0/Child", "Camera", translation=(0.0, 0.0, 0.0), stage=stage)
+    sim_utils.SimulationContext(sim_utils.SimulationCfg(dt=0.01, device=device, use_fabric=True))
+    view = FrameView("/World/Parent_.*/Child", device=device)
+    view.get_world_poses()  # force Fabric init and USD→Fabric seed
+    return view
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda:0"])
+def test_set_local_then_get_world_with_rotated_parent(device):
+    """Verify ``update_indexed_world_matrix_from_local`` under non-identity parent rotation.
+
+    With parent rotated +90° around Z, a child local translation of (1, 0, 0)
+    must produce world translation (0, 1, 1) — parent_pos + R · local.  If the
+    transpose convention in the kernel were wrong, the rotation would flip
+    direction and the world position would land at (0, -1, 1) instead.
+    """
+    _skip_if_unavailable(device)
+    view = _build_rotated_parent_view(device)
+
+    new_local = wp.zeros((1, 3), dtype=wp.float32, device=device)
+    wp.launch(kernel=_fill_position, dim=1, inputs=[new_local, 1.0, 0.0, 0.0], device=device)
+    identity_quat = wp.from_torch(torch.tensor([[0.0, 0.0, 0.0, 1.0]], dtype=torch.float32, device=device))
+    view.set_local_poses(translations=new_local, orientations=identity_quat)
+
+    world_pos, _ = view.get_world_poses()
+    expected = torch.tensor([[0.0, 1.0, 1.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(world_pos.torch, expected, atol=1e-5, rtol=0)
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda:0"])
+def test_set_world_then_get_local_with_rotated_parent(device):
+    """Verify ``update_indexed_local_matrix_from_world`` under non-identity parent rotation.
+
+    With parent rotated +90° around Z and at (0, 0, 1), writing child world
+    translation (5, 0, 2) must yield child local translation Rᵀ · (5, 0, 1) =
+    (0, -5, 1).  A wrong transpose convention would invert the rotation in the
+    wrong direction and produce (0, 5, 1) instead.
+    """
+    _skip_if_unavailable(device)
+    view = _build_rotated_parent_view(device)
+
+    new_world = wp.zeros((1, 3), dtype=wp.float32, device=device)
+    wp.launch(kernel=_fill_position, dim=1, inputs=[new_world, 5.0, 0.0, 2.0], device=device)
+    view.set_world_poses(positions=new_world)
+
+    local_pos, _ = view.get_local_poses()
+    expected = torch.tensor([[0.0, -5.0, 1.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(local_pos.torch, expected, atol=1e-5, rtol=0)

From b03b18dad991d3c8b17cb3c9daa01e7662e89f83 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Tue, 12 May 2026 15:13:22 +0000
Subject: [PATCH 08/26] Add API page for isaaclab_physx.sim.views

FabricFrameView is referenced by fully-qualified name in the migration
guide and in this PR's changelog fragment, but no RST file documented
the module - so the Sphinx :class: and :meth: cross-refs were not
resolvable.  Add a thin automodule page mirroring the sibling pages
under docs/source/api/lab_physx/ and register it in the API index
toctree.  This also picks up the new clear_static_caches() classmethod
automatically via :members:.
---
 docs/source/api/index.rst                       |  2 ++
 .../api/lab_physx/isaaclab_physx.sim.views.rst  | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 docs/source/api/lab_physx/isaaclab_physx.sim.views.rst

diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
index a378a2333e0f..4812e93c7f97 100644
--- a/docs/source/api/index.rst
+++ b/docs/source/api/index.rst
@@ -131,6 +131,7 @@ The following modules are available in the ``isaaclab_physx`` extension:
    sensors
    sim.schemas
    sim.spawners
+   sim.views
 
 .. toctree::
    :hidden:
@@ -142,6 +143,7 @@ The following modules are available in the ``isaaclab_physx`` extension:
    lab_physx/isaaclab_physx.sensors
    lab_physx/isaaclab_physx.sim.schemas
    lab_physx/isaaclab_physx.sim.spawners
+   lab_physx/isaaclab_physx.sim.views
 
 isaaclab_newton extension
 -------------------------
diff --git a/docs/source/api/lab_physx/isaaclab_physx.sim.views.rst b/docs/source/api/lab_physx/isaaclab_physx.sim.views.rst
new file mode 100644
index 000000000000..4ee8e9e96745
--- /dev/null
+++ b/docs/source/api/lab_physx/isaaclab_physx.sim.views.rst
@@ -0,0 +1,17 @@
+isaaclab\_physx.sim.views
+=========================
+
+.. automodule:: isaaclab_physx.sim.views
+
+   .. rubric:: Classes
+
+   .. autosummary::
+
+      FabricFrameView
+
+Fabric Frame View
+-----------------
+
+.. autoclass:: FabricFrameView
+   :members:
+   :show-inheritance:

From 8cb7c4dc1728f40b0f71a0c23b76c09f43ff842e Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Tue, 12 May 2026 15:22:56 +0000
Subject: [PATCH 09/26] Document isaaclab.utils.warp.fabric kernels

The submodule was not surfaced anywhere in the Sphinx tree, so :func:
cross-references to its kernels (added in the changelog fragment for
this branch and used by FabricFrameView throughout) did not resolve.

Add a Warp Fabric kernels subsection to isaaclab.utils.rst that
automodule's the submodule, and add __all__ to fabric.py so the
generated page lists only the eight public kernels - the type aliases
(FabricArrayMat44d, ArrayUInt32, ...) and the re-imported `wp` /
`TYPE_CHECKING` / `Any` symbols stay out of the rendered docs.

The page covers both the pre-existing kernels
(compose/decompose_fabric_transformation_matrix_*_warp_arrays,
set_view_to_fabric_array, arange_k) and the four kernels added on
this branch.
---
 docs/source/api/lab/isaaclab.utils.rst        | 13 +++++++++++++
 source/isaaclab/isaaclab/utils/warp/fabric.py | 11 +++++++++++
 2 files changed, 24 insertions(+)

diff --git a/docs/source/api/lab/isaaclab.utils.rst b/docs/source/api/lab/isaaclab.utils.rst
index 5b352152e0b5..bf9291c25b3a 100644
--- a/docs/source/api/lab/isaaclab.utils.rst
+++ b/docs/source/api/lab/isaaclab.utils.rst
@@ -188,3 +188,16 @@ Warp operations
    :members:
    :imported-members:
    :show-inheritance:
+
+Warp Fabric kernels
+^^^^^^^^^^^^^^^^^^^
+
+Warp kernels for reading and writing Fabric ``Matrix4d`` attributes
+(``omni:fabric:worldMatrix`` / ``omni:fabric:localMatrix``) via
+:class:`wp.fabricarray` and :class:`wp.indexedfabricarray`.  Used by
+:class:`~isaaclab_physx.sim.views.FabricFrameView` to keep child world and
+local matrices consistent without round-tripping through USD.
+
+.. automodule:: isaaclab.utils.warp.fabric
+   :members:
+   :show-inheritance:
diff --git a/source/isaaclab/isaaclab/utils/warp/fabric.py b/source/isaaclab/isaaclab/utils/warp/fabric.py
index bcf65fc02ae9..0754c917a9ad 100644
--- a/source/isaaclab/isaaclab/utils/warp/fabric.py
+++ b/source/isaaclab/isaaclab/utils/warp/fabric.py
@@ -15,6 +15,17 @@
 
 import warp as wp
 
+__all__ = [
+    "arange_k",
+    "compose_fabric_transformation_matrix_from_warp_arrays",
+    "compose_indexed_fabric_transforms",
+    "decompose_fabric_transformation_matrix_to_warp_arrays",
+    "decompose_indexed_fabric_transforms",
+    "set_view_to_fabric_array",
+    "update_indexed_local_matrix_from_world",
+    "update_indexed_world_matrix_from_local",
+]
+
 if TYPE_CHECKING:
     FabricArrayUInt32 = Any
     FabricArrayMat44d = Any

From 2a54a8a1eaadf99c750a727b9863f1dbf843437a Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Tue, 12 May 2026 17:55:23 +0000
Subject: [PATCH 10/26] Fix Fabric seed for scaled parents and scaled children

_sync_fabric_from_usd_initial had two scale-related bugs in the
USD->Fabric seed path that produced silently wrong matrices whenever a
parent or child had a non-unit scale.  Both kernels that recompute
world<->local consistency read those seeded matrices, so the error
propagated.

* Parent worldMatrix was composed with a hardcoded (1, 1, 1) scale.
  Orthonormalize() strips scale from the local-to-world transform, so
  we now extract the scale via Gf.Transform.GetScale() *before*
  orthonormalizing and pass it through to the compose kernel.

* Child localMatrix was composed with the empty-array sentinel for the
  scale slot, leaving the kernel-side scale at the identity default.
  We now pass the locally-authored scale (already fetched via
  _usd_view.get_scales()) so the matrix carries the right scale.

* Child worldMatrix is still composed from get_world_poses() position
  and orientation plus the child's local scale, which is wrong when a
  parent has non-unit world scale.  Instead of fixing the seed by hand
  (would require per-child world-scale lookups), mark the view dirty at
  the end of the seed.  The very next world read fires
  _sync_world_from_local_if_dirty, which computes
  child_world = parent_world * child_local on the GPU - and with both
  matrices now correctly scaled, the multiply produces the right
  world-space scale automatically.

Added test_initial_seed_with_scaled_parent regression test: parent
world scale (2, 1, 1), child local scale (3, 1, 1).  Locally verified
the test fails when either fix is reverted in isolation.
---
 .../sim/views/fabric_frame_view.py            | 31 ++++++++----
 .../test/sim/test_views_xform_prim_fabric.py  | 48 +++++++++++++++++++
 2 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 697109c7d030..2d9fe6fa0c06 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -34,7 +34,7 @@
 import torch
 import warp as wp
 
-from pxr import Usd, UsdGeom
+from pxr import Gf, Usd, UsdGeom
 
 import isaaclab.sim as sim_utils
 from isaaclab.app.settings_manager import SettingsManager
@@ -769,7 +769,10 @@ def _sync_fabric_from_usd_initial(self) -> None:
             ],
             device=self._device,
         )
-        # Compose into child localMatrix.
+        # Compose into child localMatrix.  Pass the locally-authored scale so
+        # that a subsequent ``_sync_world_from_local_if_dirty`` produces the
+        # right world-space scale (``world = parent_world * local`` carries
+        # ``local``'s scale through the multiply).
         wp.launch(
             kernel=fabric_utils.compose_indexed_fabric_transforms,
             dim=self.count,
@@ -777,7 +780,7 @@ def _sync_fabric_from_usd_initial(self) -> None:
                 self._local_ifa_rw,
                 _to_float32_2d(local_pos_ta.warp),
                 _to_float32_2d(local_ori_ta.warp),
-                self._fabric_empty_2d_array_sentinel,
+                _to_float32_2d(scales_wp),
                 False,
                 False,
                 False,
@@ -793,23 +796,25 @@ def _sync_fabric_from_usd_initial(self) -> None:
             xform_cache = UsdGeom.XformCache(Usd.TimeCode.Default())
             world_pos_rows: list[list[float]] = []
             world_ori_rows: list[list[float]] = []
+            world_scale_rows: list[list[float]] = []
+            decomposer = Gf.Transform()
             for path in unique_parent_paths:
                 prim = usd_stage.GetPrimAtPath(path)
                 tf = xform_cache.GetLocalToWorldTransform(prim)
+                # Extract scale before ``Orthonormalize`` strips it from the rows.
+                decomposer.SetMatrix(tf)
+                s = decomposer.GetScale()
                 tf.Orthonormalize()
                 t = tf.ExtractTranslation()
                 q = tf.ExtractRotationQuat()
                 img, real = q.GetImaginary(), q.GetReal()
                 world_pos_rows.append([float(t[0]), float(t[1]), float(t[2])])
                 world_ori_rows.append([float(img[0]), float(img[1]), float(img[2]), float(real)])
+                world_scale_rows.append([float(s[0]), float(s[1]), float(s[2])])
             parent_view_indices = wp.array(list(range(len(unique_parent_paths))), dtype=wp.uint32, device=self._device)
             parent_pos_wp = wp.array(world_pos_rows, dtype=wp.float32, device=self._device)
             parent_ori_wp = wp.array(world_ori_rows, dtype=wp.float32, device=self._device)
-            parent_unit_scale = wp.array(
-                [[1.0, 1.0, 1.0]] * len(unique_parent_paths),
-                dtype=wp.float32,
-                device=self._device,
-            )
+            parent_scale_wp = wp.array(world_scale_rows, dtype=wp.float32, device=self._device)
             # Compose worldMatrix for parents (use a one-shot indexed array against
             # ``world_sel_rw`` keyed on the unique parent paths).
             parent_world_rw = wp.indexedfabricarray(
@@ -823,7 +828,7 @@ def _sync_fabric_from_usd_initial(self) -> None:
                     parent_world_rw,
                     parent_pos_wp,
                     parent_ori_wp,
-                    parent_unit_scale,
+                    parent_scale_wp,
                     False,
                     False,
                     False,
@@ -833,6 +838,14 @@ def _sync_fabric_from_usd_initial(self) -> None:
             )
         wp.synchronize()
 
+        # The child worldMatrix above was composed with the child's *local* scale,
+        # which is wrong whenever a parent has a non-unit world scale.  Mark the
+        # view dirty so the next world read fires ``_sync_world_from_local_if_dirty``
+        # and recomputes ``child_world = parent_world * child_local`` — that
+        # multiply produces the correct world-space scale because the parent and
+        # local matrices now both carry the right scale (seeded above).
+        self._world_dirty = True
+
     def _compute_fabric_indices_for(self, selection, paths: list[str]) -> wp.array:
         """Path-dict lookup helper used to build one-shot indexed arrays for a custom path set."""
         fabric_paths = selection.GetPaths()
diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index 5d36edae3f94..c0c885c43fc4 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -357,3 +357,51 @@ def test_set_world_then_get_local_with_rotated_parent(device):
     local_pos, _ = view.get_local_poses()
     expected = torch.tensor([[0.0, -5.0, 1.0]], dtype=torch.float32, device=device)
     torch.testing.assert_close(local_pos.torch, expected, atol=1e-5, rtol=0)
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda:0"])
+def test_initial_seed_with_scaled_parent(device):
+    """Verify the initial USD→Fabric seed handles non-unit scales correctly.
+
+    Sets up a parent with world scale (2, 1, 1) and a child with local scale
+    (3, 1, 1) at local translation (1, 0, 0).  Expected world-space values for
+    the child:
+
+    * world scale = parent_scale * child_local_scale = (6, 1, 1)
+    * world position = parent_pos + parent_scale * child_local_pos
+                     = (0, 0, 1) + (2 * 1, 0, 0) = (2, 0, 1)
+
+    If the parent's worldMatrix is seeded with a hardcoded unit scale,
+    ``get_scales`` returns (3, 1, 1) instead of (6, 1, 1) and ``get_world_poses``
+    returns (1, 0, 1) instead of (2, 0, 1).  If the child's localMatrix is
+    seeded without scale, after ``_sync_world_from_local_if_dirty`` the world
+    scale collapses to (2, 1, 1).  This test catches both regressions.
+    """
+    _skip_if_unavailable(device)
+    stage = sim_utils.get_current_stage()
+    sim_utils.create_prim("/World/Parent_0", "Xform", translation=(0.0, 0.0, 1.0), scale=(2.0, 1.0, 1.0), stage=stage)
+    sim_utils.create_prim(
+        "/World/Parent_0/Child",
+        "Camera",
+        translation=(1.0, 0.0, 0.0),
+        scale=(3.0, 1.0, 1.0),
+        stage=stage,
+    )
+    sim_utils.SimulationContext(sim_utils.SimulationCfg(dt=0.01, device=device, use_fabric=True))
+    view = FrameView("/World/Parent_.*/Child", device=device)
+
+    world_pos, _ = view.get_world_poses()
+    torch.testing.assert_close(
+        world_pos.torch,
+        torch.tensor([[2.0, 0.0, 1.0]], dtype=torch.float32, device=device),
+        atol=1e-5,
+        rtol=0,
+    )
+
+    scales = wp.to_torch(view.get_scales())
+    torch.testing.assert_close(
+        scales,
+        torch.tensor([[6.0, 1.0, 1.0]], dtype=torch.float32, device=device),
+        atol=1e-5,
+        rtol=0,
+    )

From 0fd5aa6ae7c7871b3617163e71de6f3a5b51d235 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Wed, 13 May 2026 10:57:10 +0000
Subject: [PATCH 11/26] Test per-view dirty isolation and fix the hierarchy
 cache key

While adding a regression test for the per-view world-dirty flag, I
discovered the IFabricHierarchy cache silently misses on every view
because ``Stage.GetFabricId()`` returns a fresh ``FabricId`` wrapper
on every call, with no value equality between wrappers for the same
underlying Fabric.  The cache stored (stage_id, wrapper) tuples, so
two views on the same stage produced two distinct cache keys and
re-fetched the hierarchy on each init.  The bug was harmless in
practice -- USDRT's ``get_fabric_hierarchy`` itself returns a
process-wide singleton per Fabric stage, so both views happened to
end up with the same handle anyway -- but the cache wasn't doing the
work it was advertised to do.

Fix: key the cache on ``(stage_id, fabric_id.id)`` where ``.id`` is
the stable ``int`` underneath the wrapper.

The new test exercises the multi-view-per-stage scenario:

* two FabricFrameView instances on disjoint child prims under
  different parent sub-trees of one stage
* writes on view A must not dirty view B
* world reads on view B must not clear view A's dirty flag
* both views share one cached IFabricHierarchy and the cache has
  exactly one entry after both inits (this assertion is what
  surfaced the FabricId-wrapper bug)
* symmetric pass: writes on B must not affect A's post-read state

Verified locally that the test fails with both the wrapper-keyed
cache (cache size > 1) and with a synthetic stage-shared dirty flag
(cross-view stomp).

Module-level coverage of fabric_frame_view.py with this test added:
85% line / 78% branch.  Remaining uncovered code is the USD-fallback
delegations (Fabric disabled), defensive RuntimeError raises, and
topology-rebuild branches inside the accessors.

49 tests pass on cpu and cuda:0.
---
 .../sim/views/fabric_frame_view.py            | 30 ++++---
 .../test/sim/test_views_xform_prim_fabric.py  | 88 +++++++++++++++++++
 2 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 2d9fe6fa0c06..6bd41a8ee8b7 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -100,13 +100,16 @@ class FabricFrameView(BaseFrameView):
     _LOCAL_MATRIX_NAME = "omni:fabric:localMatrix"
 
     # Stage-level shared state.  Multiple FabricFrameView instances on the same stage
-    # share one ``IFabricHierarchy`` handle, keyed by ``(stage_id, fabric_id)`` so
-    # that a recycled ``stage_id`` paired with a fresh ``fabric_id`` never reuses a
-    # stale handle.  ``ClassVar`` plus the ``_static_`` prefix make it explicit (and
-    # enforceable by type checkers) that this is class-level — never shadow it with
-    # ``self.<name> = ...``.  The cache is not protected by a lock; Isaac Lab's
-    # simulation loop is single-threaded, and adding locking would negate the
-    # per-stage hit-cache cost it exists to avoid.  Call
+    # share one ``IFabricHierarchy`` handle, keyed by ``(stage_id, fabric_id_int)``
+    # so that a recycled ``stage_id`` paired with a fresh Fabric attachment never
+    # reuses a stale handle.  The Fabric id is stored as a plain ``int`` (extracted
+    # from ``FabricId.id``) because the ``FabricId`` wrapper itself is freshly
+    # allocated on every ``GetFabricId()`` call and has no value equality, which
+    # would defeat the cache.  ``ClassVar`` plus the ``_static_`` prefix make it
+    # explicit (and enforceable by type checkers) that this is class-level — never
+    # shadow it with ``self.<name> = ...``.  The cache is not protected by a lock;
+    # Isaac Lab's simulation loop is single-threaded, and adding locking would
+    # negate the per-stage hit-cache cost it exists to avoid.  Call
     # :meth:`clear_static_caches` on stage teardown if you tear down many stages
     # in one process.
     _static_hierarchy_cache: ClassVar[dict[tuple[int, int], object]] = {}
@@ -639,13 +642,16 @@ def _initialize_fabric(self) -> None:
         self._stage.SynchronizeToFabric()
 
         # Reuse (or create) a hierarchy handle for this stage.  The cache key is
-        # ``(stage_id, fabric_id)`` so a recycled ``stage_id`` paired with a fresh
-        # Fabric attachment never returns a stale handle.  Enable change-tracking
-        # BEFORE we author any local matrices, so the per-prim
+        # ``(stage_id, fabric_id_int)`` so a recycled ``stage_id`` paired with a fresh
+        # Fabric attachment never returns a stale handle.  Note: ``GetFabricId()``
+        # returns a fresh ``FabricId`` *wrapper* on every call, and the wrapper has
+        # no value equality (two wrappers for the same underlying Fabric compare
+        # unequal and hash differently), so we key on its stable ``.id`` int.
+        # Enable change-tracking BEFORE we author any local matrices, so the per-prim
         # ``SetLocalXformFromUsd`` calls below mark themselves dirty.
         fabric_id = self._stage.GetFabricId()
-        self._fabric_id = fabric_id
-        cache_key = (self._stage_id, fabric_id)
+        self._fabric_id = fabric_id.id
+        cache_key = (self._stage_id, fabric_id.id)
         if cache_key not in FabricFrameView._static_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
                 fabric_id, self._stage.GetStageIdAsStageId()
diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index c0c885c43fc4..059446f8a5b7 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -405,3 +405,91 @@ def test_initial_seed_with_scaled_parent(device):
         atol=1e-5,
         rtol=0,
     )
+
+
+# ------------------------------------------------------------------
+# Multi-view per stage: per-view dirty-flag isolation
+# ------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda:0"])
+def test_multi_view_per_view_dirty_isolation(device):
+    """Two ``FabricFrameView`` instances on the same stage must not clear each other's
+    pending local→world sync.
+
+    Background: an earlier implementation stored the world-dirty flag at the class
+    level keyed by ``stage_id``.  With two views on the same stage, view B reading
+    worlds would clear the flag set by view A's ``set_local_poses``, leaving A's
+    world matrices silently stale because A's per-view sync kernel never fired.
+
+    This test sets up two views over disjoint child prims (under different parent
+    sub-trees of the same stage), interleaves their writes and reads, and verifies:
+
+    * view A's ``set_local_poses`` only dirties view A
+    * view B's ``get_world_poses`` does not clear view A's flag
+    * after both views' world reads, each one's worlds reflect its own latest local
+    * neither view's reads/writes corrupt the other view's poses
+    """
+    _skip_if_unavailable(device)
+    stage = sim_utils.get_current_stage()
+
+    # Two disjoint sub-trees under the same stage.  Use different parent names so
+    # the regex patterns for the two views don't accidentally overlap.
+    sim_utils.create_prim("/World/EnvA_0", "Xform", translation=(0.0, 0.0, 1.0), stage=stage)
+    sim_utils.create_prim("/World/EnvA_0/ChildA", "Camera", translation=(0.1, 0.0, 0.0), stage=stage)
+    sim_utils.create_prim("/World/EnvB_0", "Xform", translation=(0.0, 0.0, 2.0), stage=stage)
+    sim_utils.create_prim("/World/EnvB_0/ChildB", "Camera", translation=(0.2, 0.0, 0.0), stage=stage)
+
+    sim_utils.SimulationContext(sim_utils.SimulationCfg(dt=0.01, device=device, use_fabric=True))
+    view_a = FrameView("/World/EnvA_.*/ChildA", device=device)
+    view_b = FrameView("/World/EnvB_.*/ChildB", device=device)
+
+    # Initial reads — triggers Fabric init + the seed-time ``_world_dirty = True``
+    # path on both views, then clears it.
+    expected_a0 = torch.tensor([[0.1, 0.0, 1.0]], dtype=torch.float32, device=device)
+    expected_b0 = torch.tensor([[0.2, 0.0, 2.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(view_a.get_world_poses()[0].torch, expected_a0, atol=1e-5, rtol=0)
+    torch.testing.assert_close(view_b.get_world_poses()[0].torch, expected_b0, atol=1e-5, rtol=0)
+    assert view_a._world_dirty is False
+    assert view_b._world_dirty is False
+    # Both views must reuse the same cached IFabricHierarchy (one stage = one handle).
+    assert view_a._fabric_hierarchy is view_b._fabric_hierarchy
+    assert len(FrameView._static_hierarchy_cache) == 1
+
+    # Write a new local pose on view A only.
+    new_local_a = wp.zeros((1, 3), dtype=wp.float32, device=device)
+    wp.launch(kernel=_fill_position, dim=1, inputs=[new_local_a, 1.0, 0.0, 0.0], device=device)
+    identity_quat = wp.from_torch(torch.tensor([[0.0, 0.0, 0.0, 1.0]], dtype=torch.float32, device=device))
+    view_a.set_local_poses(translations=new_local_a, orientations=identity_quat)
+
+    # Only view A should be dirty.  Critical: a per-stage flag would have dirtied
+    # both views (or neither) at this point.
+    assert view_a._world_dirty is True, "set_local_poses should mark its own view dirty"
+    assert view_b._world_dirty is False, "set_local_poses on view A must not dirty view B"
+
+    # Read worlds from view B FIRST.  With a per-stage flag, B's
+    # ``_sync_world_from_local_if_dirty`` would fire and clear the flag, leaving A
+    # stale.  With the per-view flag, B's read is a no-op sync-wise.
+    torch.testing.assert_close(view_b.get_world_poses()[0].torch, expected_b0, atol=1e-5, rtol=0)
+    assert view_b._world_dirty is False
+    assert view_a._world_dirty is True, "view B's world read must not clear view A's dirty flag"
+
+    # Now read view A's worlds — sync fires, world reflects the new local.
+    expected_a1 = torch.tensor([[1.0, 0.0, 1.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(view_a.get_world_poses()[0].torch, expected_a1, atol=1e-5, rtol=0)
+    assert view_a._world_dirty is False
+
+    # Symmetric pass: write on B, ensure A is undisturbed.
+    new_local_b = wp.zeros((1, 3), dtype=wp.float32, device=device)
+    wp.launch(kernel=_fill_position, dim=1, inputs=[new_local_b, 3.0, 0.0, 0.0], device=device)
+    view_b.set_local_poses(translations=new_local_b, orientations=identity_quat)
+    assert view_a._world_dirty is False
+    assert view_b._world_dirty is True
+
+    # A's worlds must still read back the post-set-local value from above; no
+    # cross-view stomp on the world matrix.
+    torch.testing.assert_close(view_a.get_world_poses()[0].torch, expected_a1, atol=1e-5, rtol=0)
+    expected_b1 = torch.tensor([[3.0, 0.0, 2.0]], dtype=torch.float32, device=device)
+    torch.testing.assert_close(view_b.get_world_poses()[0].torch, expected_b1, atol=1e-5, rtol=0)
+    assert view_a._world_dirty is False
+    assert view_b._world_dirty is False

From 3e2de5fd42ac287d3fc895110953def34f5a7078 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Thu, 14 May 2026 08:18:24 +0000
Subject: [PATCH 12/26] refactor: use wp.where instead of if/else for broadcast
 index in Warp kernels

Replace if/else broadcast branching with wp.where() for branchless
predicated selection. wp.select was deprecated in Warp 1.7 and removed
in 1.10; wp.where has the more intuitive (cond, true, false) order.
---
 source/isaaclab/isaaclab/utils/warp/fabric.py | 30 ++++---------------
 1 file changed, 6 insertions(+), 24 deletions(-)

diff --git a/source/isaaclab/isaaclab/utils/warp/fabric.py b/source/isaaclab/isaaclab/utils/warp/fabric.py
index 0754c917a9ad..6f9963f290a1 100644
--- a/source/isaaclab/isaaclab/utils/warp/fabric.py
+++ b/source/isaaclab/isaaclab/utils/warp/fabric.py
@@ -143,29 +143,20 @@ def compose_fabric_transformation_matrix_from_warp_arrays(
     position, rotation, scale = _decompose_transformation_matrix(wp.mat44f(fabric_matrices[fabric_index]))
     # update position (check if array has elements, not just if it exists)
     if array_positions.shape[0] > 0:
-        if broadcast_positions:
-            index = 0
-        else:
-            index = i
+        index = wp.where(broadcast_positions, 0, i)
         position[0] = array_positions[index, 0]
         position[1] = array_positions[index, 1]
         position[2] = array_positions[index, 2]
     # update orientation (convert from wxyz to xyzw for Warp)
     if array_orientations.shape[0] > 0:
-        if broadcast_orientations:
-            index = 0
-        else:
-            index = i
+        index = wp.where(broadcast_orientations, 0, i)
         rotation[0] = array_orientations[index, 0]  # x
         rotation[1] = array_orientations[index, 1]  # y
         rotation[2] = array_orientations[index, 2]  # z
         rotation[3] = array_orientations[index, 3]  # w
     # update scale
     if array_scales.shape[0] > 0:
-        if broadcast_scales:
-            index = 0
-        else:
-            index = i
+        index = wp.where(broadcast_scales, 0, i)
         scale[0] = array_scales[index, 0]
         scale[1] = array_scales[index, 1]
         scale[2] = array_scales[index, 2]
@@ -249,27 +240,18 @@ def compose_indexed_fabric_transforms(
     position, rotation, scale = _decompose_transformation_matrix(wp.mat44f(fabric_matrices[view_index]))
 
     if array_positions.shape[0] > 0:
-        if broadcast_positions:
-            index = 0
-        else:
-            index = i
+        index = wp.where(broadcast_positions, 0, i)
         position[0] = array_positions[index, 0]
         position[1] = array_positions[index, 1]
         position[2] = array_positions[index, 2]
     if array_orientations.shape[0] > 0:
-        if broadcast_orientations:
-            index = 0
-        else:
-            index = i
+        index = wp.where(broadcast_orientations, 0, i)
         rotation[0] = array_orientations[index, 0]
         rotation[1] = array_orientations[index, 1]
         rotation[2] = array_orientations[index, 2]
         rotation[3] = array_orientations[index, 3]
     if array_scales.shape[0] > 0:
-        if broadcast_scales:
-            index = 0
-        else:
-            index = i
+        index = wp.where(broadcast_scales, 0, i)
         scale[0] = array_scales[index, 0]
         scale[1] = array_scales[index, 1]
         scale[2] = array_scales[index, 2]

From 5f28340622d017caebee89c85a11d24784c93f4a Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Fri, 15 May 2026 14:32:41 +0000
Subject: [PATCH 13/26] Revert module docstring to upstream one-liner

The class docstring on FabricFrameView already covers the design
points (composition with UsdFrameView, the kernel sync model,
PrepareForReuse policy, return types), so the module-level summary
duplicating them was redundant.
---
 .../sim/views/fabric_frame_view.py            | 23 +------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 6bd41a8ee8b7..23a3311dfd37 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -3,28 +3,7 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-"""PhysX FrameView with Fabric GPU acceleration.
-
-Design:
-
-* Three persistent ``PrimSelection`` instances differing only in per-attribute access
-  mode (one for each of {trans_ro, world_rw, local_rw}).
-* ``omni:fabric:localMatrix`` is read/written directly — no software composition of
-  ``inv(parent_world) * child_world`` for ``get_local_poses``/``set_local_poses``.
-* View → Fabric index mapping is an integer Warp array computed from the selection's
-  ordered path list (``selection.GetPaths()``); no custom attributes are written to
-  prims.
-* Read/write happens via ``wp.indexedfabricarray``, so the view-to-fabric mapping is
-  baked into the array itself and the kernels just dereference ``ifa[view_index]``.
-* World ↔ local consistency is maintained:
-    - After ``set_local_poses``: the view is marked dirty; the next world read
-      fires a Warp kernel that recomputes ``child_world = parent_world *
-      child_local`` for this view's children.  Tracking is per-view so that
-      multiple views on the same stage don't clear each other's dirty flag.
-    - After ``set_world_poses``: a Warp kernel recomputes child localMatrix from
-      parent worldMatrix on the fly using a parent indexed fabric array, so the
-      next ``get_local_poses`` returns consistent values.
-"""
+"""PhysX FrameView with Fabric GPU acceleration."""
 
 from __future__ import annotations
 

From 5edd276e1844bc29a130958248d7dceff4d8c607 Mon Sep 17 00:00:00 2001
From: Peter Verswyvelen <pverswyvelen@nvidia.com>
Date: Fri, 15 May 2026 14:53:30 +0000
Subject: [PATCH 14/26] Rewrite FabricFrameView class docstring as user-facing
 contract

The docstring leaked implementation details that belong in source
comments: private accessor names (_get_*_ro_array), internal selection
identifiers (trans_sel_ro), indexed-array field names (_world_ifa_ro,
_local_ifa_ro, _parent_world_ifa_ro), and the PrepareForReuse
deduplication optimization in the two sync helpers.  Those facts
already live as inline comments at the relevant call sites.

Replace the leaky paragraphs with a user-facing summary covering what
callers can actually rely on:

* Performance characterization (150-260x vs USD at 1024 prims, with a
  pointer to the benchmark script).
* Backend selection and the parts that always delegate to UsdFrameView.
* Three behavioral contracts: no write-back to USD, world<->local
  consistency maintained automatically, and topology-adaptive (no
  manual refresh API).
* Return-type contract: ProxyArray getters, wp.array setters.
---
 .../sim/views/fabric_frame_view.py            | 55 +++++++++++--------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 23a3311dfd37..74e669ab5c8b 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -50,29 +50,38 @@ def _to_float32_2d(a: wp.array | torch.Tensor) -> wp.array | torch.Tensor:
 class FabricFrameView(BaseFrameView):
     """FrameView with Fabric GPU acceleration for the PhysX backend.
 
-    Uses composition: holds a :class:`UsdFrameView` internally for USD
-    fallback and non-accelerated operations (visibility, scales when
-    Fabric is disabled).
-
-    When Fabric is enabled, world-pose, local-pose, and scale operations
-    use Warp kernels operating on ``omni:fabric:worldMatrix`` and
-    ``omni:fabric:localMatrix``.  All other operations delegate to the
-    internal USD view.
-
-    Every accessor for a selection's indexed fabric array
-    (:meth:`_get_world_ro_array`, :meth:`_get_local_ro_array`,
-    :meth:`_get_world_rw_array`, :meth:`_get_local_rw_array`,
-    :meth:`_get_parent_world_ro_array`) calls :meth:`PrepareForReuse` on its
-    backing ``PrimSelection`` to detect Fabric topology changes that would
-    require rebuilding the view→fabric index mapping.  ``PrepareForReuse``
-    is idempotent and cheap in the steady state (returns ``False`` with no
-    rebuild), so the per-access cost is negligible.  The two sync helpers
-    (:meth:`_sync_local_from_world`, :meth:`_sync_world_from_local_if_dirty`)
-    refresh ``trans_sel_ro`` at most once per call to avoid the redundant
-    USDRT round-trip that would otherwise occur when ``_world_ifa_ro``,
-    ``_local_ifa_ro`` and ``_parent_world_ifa_ro`` are read consecutively.
-
-    Pose getters return :class:`~isaaclab.utils.warp.ProxyArray`.  Setters accept ``wp.array``.
+    World-pose, local-pose, and scale operations run on the GPU via Warp
+    kernels that read and write ``omni:fabric:worldMatrix`` and
+    ``omni:fabric:localMatrix`` directly.  Typical speedup vs. the
+    :class:`~isaaclab.sim.views.UsdFrameView` baseline at 1024 prims is
+    150-260× per call (see ``scripts/benchmarks/benchmark_view_comparison.py``).
+
+    When Fabric is unavailable — ``/physics/fabricEnabled`` is false or the
+    device is unsupported — the view transparently falls back to
+    :class:`~isaaclab.sim.views.UsdFrameView` for all pose and scale
+    operations.  The ``count``, ``prims``, ``prim_paths`` properties and the
+    ``get_visibility`` / ``set_visibility`` methods always delegate to
+    :class:`~isaaclab.sim.views.UsdFrameView`; Fabric has no equivalent fast
+    path for those.
+
+    Behavior:
+
+    * **No write-back to USD.**  Fabric writes update only
+      ``omni:fabric:worldMatrix`` / ``omni:fabric:localMatrix``; the prim's
+      USD ``xformOp:*`` attributes are unchanged.  Downstream consumers that
+      read the prim's USD attributes after a Fabric write will see stale
+      values until the next USD-side sync.
+    * **World ↔ local consistency.**  After ``set_world_poses`` (or
+      ``set_scales``) the local matrix is updated so that subsequent
+      ``get_local_poses`` is consistent; after ``set_local_poses`` the world
+      matrix is recomputed on the next world read.  Both directions stay in
+      sync without round-tripping through USD.
+    * **Topology-adaptive.**  Fabric topology changes are detected on each
+      access; the view rebuilds its internal mapping automatically and no
+      manual refresh is required.  Steady-state overhead is negligible.
+
+    Pose getters return :class:`~isaaclab.utils.warp.ProxyArray`; setters
+    accept :class:`wp.array`.
     """
 
     _WORLD_MATRIX_NAME = "omni:fabric:worldMatrix"

From 02c5be0af87a3daede1415c1ee78cad42781a680 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 11:25:35 +0000
Subject: [PATCH 15/26] fix: use GetStageIdAsUInt() instead of
 GetStageIdAsStageId() for stability

GetStageIdAsUInt() returns a plain int which is more stable than the
StageReaderWriterId wrapper returned by GetStageIdAsStageId().
---
 .../isaaclab_physx/sim/views/fabric_frame_view.py               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 74e669ab5c8b..252b9a2e714c 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -642,7 +642,7 @@ def _initialize_fabric(self) -> None:
         cache_key = (self._stage_id, fabric_id.id)
         if cache_key not in FabricFrameView._static_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-                fabric_id, self._stage.GetStageIdAsStageId()
+                fabric_id, self._stage.GetStageIdAsUInt()
             )
             hierarchy.track_local_xform_changes(True)
             hierarchy.track_world_xform_changes(True)

From ead5b707bddb2e1cdc2287a22d35e686b1c05136 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 11:29:18 +0000
Subject: [PATCH 16/26] Revert "fix: use GetStageIdAsUInt() instead of
 GetStageIdAsStageId() for stability"

This reverts commit 02c5be0af87a3daede1415c1ee78cad42781a680.
---
 .../isaaclab_physx/sim/views/fabric_frame_view.py               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 252b9a2e714c..74e669ab5c8b 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -642,7 +642,7 @@ def _initialize_fabric(self) -> None:
         cache_key = (self._stage_id, fabric_id.id)
         if cache_key not in FabricFrameView._static_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-                fabric_id, self._stage.GetStageIdAsUInt()
+                fabric_id, self._stage.GetStageIdAsStageId()
             )
             hierarchy.track_local_xform_changes(True)
             hierarchy.track_world_xform_changes(True)

From b65c298f0f7c736fb5fa7236a3d6a0841a5436bf Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 15:05:32 +0000
Subject: [PATCH 17/26] refactor: move Fabric hierarchy cache from
 FabricFrameView class-level to SimulationContext

The IFabricHierarchy handle cache was a class-level dict on FabricFrameView
(_static_hierarchy_cache) with manual clear_static_caches() cleanup that was
only ever called in one test. This is a classic global-state anti-pattern.

Move the cache to SimulationContext where it has proper lifecycle:
- Created when SimulationContext initializes
- Cleared automatically in clear_instance() on stage teardown
- Accessed via sim_context.get_fabric_hierarchy() by FabricFrameView

This eliminates:
- The ClassVar global dict on FabricFrameView
- The clear_static_caches() classmethod
- Manual cleanup calls in test fixtures

FabricFrameView now requires an active SimulationContext (which was already
true in practice for all Fabric-enabled paths).
---
 .../isaaclab/sim/simulation_context.py        | 36 +++++++++++
 .../sim/views/fabric_frame_view.py            | 61 +++++--------------
 .../test/sim/test_views_xform_prim_fabric.py  |  6 +-
 3 files changed, 54 insertions(+), 49 deletions(-)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 175961fcd383..2b0a4f973479 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -214,6 +214,10 @@ def __init__(self, cfg: SimulationCfg | None = None):
             order=5,
         )
 
+        # Fabric hierarchy cache — shared across all FabricFrameView instances.
+        # Keyed by (stage_id, fabric_id_int) so recycled IDs don't return stale handles.
+        self._fabric_hierarchy_cache: dict[tuple[int, int], object] = {}
+
         type(self)._instance = self  # Mark as valid singleton only after successful init
 
     def _apply_render_cfg_settings(self) -> None:
@@ -852,6 +856,35 @@ def get_setting(self, name: str) -> Any:
         """Get a setting value."""
         return self._settings_helper.get(name)
 
+    def get_fabric_hierarchy(self, stage_id: int, fabric_id_int: int, fabric_id_obj: object, stage: object) -> object:
+        """Return a shared IFabricHierarchy handle for the given (stage, fabric) pair.
+
+        Multiple :class:`~isaaclab_physx.sim.views.FabricFrameView` instances that
+        operate on the same stage share a single hierarchy handle.  The handle is
+        created on first access and cached for the lifetime of this
+        :class:`SimulationContext`.
+
+        Args:
+            stage_id: Integer stage identifier (from :func:`~isaaclab.sim.utils.stage.get_current_stage_id`).
+            fabric_id_int: Stable integer from ``FabricId.id``.
+            fabric_id_obj: The ``FabricId`` wrapper (needed by ``get_fabric_hierarchy``).
+            stage: The ``usdrt.Usd.Stage`` instance.
+
+        Returns:
+            The ``IFabricHierarchy`` handle with change-tracking enabled.
+        """
+        import usdrt  # noqa: PLC0415
+
+        cache_key = (stage_id, fabric_id_int)
+        if cache_key not in self._fabric_hierarchy_cache:
+            hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
+                fabric_id_obj, stage.GetStageIdAsStageId()
+            )
+            hierarchy.track_local_xform_changes(True)
+            hierarchy.track_world_xform_changes(True)
+            self._fabric_hierarchy_cache[cache_key] = hierarchy
+        return self._fabric_hierarchy_cache[cache_key]
+
     @classmethod
     def clear_instance(cls) -> None:
         """Clean up resources and clear the singleton instance."""
@@ -865,6 +898,9 @@ def clear_instance(cls) -> None:
                 viz.close()
             cls._instance._visualizers.clear()
 
+            # Drop cached Fabric hierarchy handles (they reference the dying stage)
+            cls._instance._fabric_hierarchy_cache.clear()
+
             # Tear down the stage. We skip clear_stage() (prim-by-prim deletion) since
             # close_stage() + app shutdown destroy the entire stage at once.
             stage_utils.close_stage()
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 74e669ab5c8b..f3dac633c453 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -8,7 +8,7 @@
 from __future__ import annotations
 
 import logging
-from typing import ClassVar
+from typing import TYPE_CHECKING
 
 import torch
 import warp as wp
@@ -22,6 +22,9 @@
 from isaaclab.utils.warp import ProxyArray
 from isaaclab.utils.warp import fabric as fabric_utils
 
+if TYPE_CHECKING:
+    from isaaclab.sim.simulation_context import SimulationContext
+
 logger = logging.getLogger(__name__)
 
 # TODO: extend this to ``cuda:N`` once we wire up multi-GPU support for the view.
@@ -87,34 +90,6 @@ class FabricFrameView(BaseFrameView):
     _WORLD_MATRIX_NAME = "omni:fabric:worldMatrix"
     _LOCAL_MATRIX_NAME = "omni:fabric:localMatrix"
 
-    # Stage-level shared state.  Multiple FabricFrameView instances on the same stage
-    # share one ``IFabricHierarchy`` handle, keyed by ``(stage_id, fabric_id_int)``
-    # so that a recycled ``stage_id`` paired with a fresh Fabric attachment never
-    # reuses a stale handle.  The Fabric id is stored as a plain ``int`` (extracted
-    # from ``FabricId.id``) because the ``FabricId`` wrapper itself is freshly
-    # allocated on every ``GetFabricId()`` call and has no value equality, which
-    # would defeat the cache.  ``ClassVar`` plus the ``_static_`` prefix make it
-    # explicit (and enforceable by type checkers) that this is class-level — never
-    # shadow it with ``self.<name> = ...``.  The cache is not protected by a lock;
-    # Isaac Lab's simulation loop is single-threaded, and adding locking would
-    # negate the per-stage hit-cache cost it exists to avoid.  Call
-    # :meth:`clear_static_caches` on stage teardown if you tear down many stages
-    # in one process.
-    _static_hierarchy_cache: ClassVar[dict[tuple[int, int], object]] = {}
-
-    @classmethod
-    def clear_static_caches(cls) -> None:
-        """Drop all cached ``IFabricHierarchy`` handles.
-
-        Call this after tearing down a USD stage if the process will continue
-        opening new stages.  Cached handles are otherwise retained for the
-        lifetime of the process, which can leak memory in long test suites and,
-        in theory, allow a recycled ``stage_id`` paired with a new Fabric
-        attachment to read a stale handle (though the ``(stage_id, fabric_id)``
-        cache key already guards against the latter).
-        """
-        cls._static_hierarchy_cache.clear()
-
     def __init__(
         self,
         prim_path: str,
@@ -629,25 +604,21 @@ def _initialize_fabric(self) -> None:
         self._stage = usdrt.Usd.Stage.Attach(self._stage_id)
         self._stage.SynchronizeToFabric()
 
-        # Reuse (or create) a hierarchy handle for this stage.  The cache key is
-        # ``(stage_id, fabric_id_int)`` so a recycled ``stage_id`` paired with a fresh
-        # Fabric attachment never returns a stale handle.  Note: ``GetFabricId()``
-        # returns a fresh ``FabricId`` *wrapper* on every call, and the wrapper has
-        # no value equality (two wrappers for the same underlying Fabric compare
-        # unequal and hash differently), so we key on its stable ``.id`` int.
-        # Enable change-tracking BEFORE we author any local matrices, so the per-prim
-        # ``SetLocalXformFromUsd`` calls below mark themselves dirty.
+        # Reuse (or create) a hierarchy handle for this stage via the SimulationContext.
+        # The context owns the cache and clears it on stage teardown.
         fabric_id = self._stage.GetFabricId()
         self._fabric_id = fabric_id.id
-        cache_key = (self._stage_id, fabric_id.id)
-        if cache_key not in FabricFrameView._static_hierarchy_cache:
-            hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-                fabric_id, self._stage.GetStageIdAsStageId()
+        from isaaclab.sim.simulation_context import SimulationContext  # noqa: PLC0415
+
+        sim_context = SimulationContext.instance()
+        if sim_context is None:
+            raise RuntimeError(
+                "FabricFrameView requires an active SimulationContext. "
+                "Create a SimulationContext before instantiating FabricFrameView."
             )
-            hierarchy.track_local_xform_changes(True)
-            hierarchy.track_world_xform_changes(True)
-            FabricFrameView._static_hierarchy_cache[cache_key] = hierarchy
-        self._fabric_hierarchy = FabricFrameView._static_hierarchy_cache[cache_key]
+        self._fabric_hierarchy = sim_context.get_fabric_hierarchy(
+            self._stage_id, fabric_id.id, fabric_id, self._stage
+        )
 
         # Ensure each child prim AND its parent have BOTH Fabric world and local matrix
         # attributes.  Our ``trans_ro`` selection requires both, so prims missing either
diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index 059446f8a5b7..9d5a8cd3dd2c 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -41,9 +41,6 @@ def test_setup_teardown():
     yield
     sim_utils.clear_stage()
     sim_utils.SimulationContext.clear_instance()
-    # Each test creates a fresh stage; drop cached IFabricHierarchy handles so
-    # the next test does not reuse a handle attached to the disposed stage.
-    FrameView.clear_static_caches()
 
 
 def _skip_if_unavailable(device: str):
@@ -454,7 +451,8 @@ def test_multi_view_per_view_dirty_isolation(device):
     assert view_b._world_dirty is False
     # Both views must reuse the same cached IFabricHierarchy (one stage = one handle).
     assert view_a._fabric_hierarchy is view_b._fabric_hierarchy
-    assert len(FrameView._static_hierarchy_cache) == 1
+    sim_context = sim_utils.SimulationContext.instance()
+    assert len(sim_context._fabric_hierarchy_cache) == 1
 
     # Write a new local pose on view A only.
     new_local_a = wp.zeros((1, 3), dtype=wp.float32, device=device)

From 5732902df1f3ee3972abc9490d9e61bba7e474fc Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 16:22:31 +0000
Subject: [PATCH 18/26] simplify: key hierarchy cache on fabric_id_int only,
 drop stage_id

SimulationContext is 1:1 with a stage, so stage_id is constant for the
lifetime of the cache.  Key on fabric_id_int alone.  In practice this
dict holds at most one entry (Isaac Lab uses one Fabric attachment per
stage), but we keep a dict for future-proofing multi-Fabric scenarios.
---
 .../isaaclab/sim/simulation_context.py        | 24 ++++++++++---------
 .../sim/views/fabric_frame_view.py            |  2 +-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 2b0a4f973479..9a37dd7eb7ed 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -215,8 +215,12 @@ def __init__(self, cfg: SimulationCfg | None = None):
         )
 
         # Fabric hierarchy cache — shared across all FabricFrameView instances.
-        # Keyed by (stage_id, fabric_id_int) so recycled IDs don't return stale handles.
-        self._fabric_hierarchy_cache: dict[tuple[int, int], object] = {}
+        # Keyed by fabric_id_int (the stable .id integer from FabricId).  Currently
+        # Isaac Lab always has exactly one Fabric attachment per stage, so this dict
+        # will hold at most one entry.  We use a dict rather than a plain Optional so
+        # that the design naturally extends to multi-Fabric scenarios (e.g. multiple
+        # physics scenes or partial Fabric attachments) without an API change.
+        self._fabric_hierarchy_cache: dict[int, object] = {}
 
         type(self)._instance = self  # Mark as valid singleton only after successful init
 
@@ -856,16 +860,15 @@ def get_setting(self, name: str) -> Any:
         """Get a setting value."""
         return self._settings_helper.get(name)
 
-    def get_fabric_hierarchy(self, stage_id: int, fabric_id_int: int, fabric_id_obj: object, stage: object) -> object:
-        """Return a shared IFabricHierarchy handle for the given (stage, fabric) pair.
+    def get_fabric_hierarchy(self, fabric_id_int: int, fabric_id_obj: object, stage: object) -> object:
+        """Return a shared IFabricHierarchy handle for the given Fabric attachment.
 
         Multiple :class:`~isaaclab_physx.sim.views.FabricFrameView` instances that
-        operate on the same stage share a single hierarchy handle.  The handle is
-        created on first access and cached for the lifetime of this
+        operate on the same Fabric attachment share a single hierarchy handle.  The
+        handle is created on first access and cached for the lifetime of this
         :class:`SimulationContext`.
 
         Args:
-            stage_id: Integer stage identifier (from :func:`~isaaclab.sim.utils.stage.get_current_stage_id`).
             fabric_id_int: Stable integer from ``FabricId.id``.
             fabric_id_obj: The ``FabricId`` wrapper (needed by ``get_fabric_hierarchy``).
             stage: The ``usdrt.Usd.Stage`` instance.
@@ -875,15 +878,14 @@ def get_fabric_hierarchy(self, stage_id: int, fabric_id_int: int, fabric_id_obj:
         """
         import usdrt  # noqa: PLC0415
 
-        cache_key = (stage_id, fabric_id_int)
-        if cache_key not in self._fabric_hierarchy_cache:
+        if fabric_id_int not in self._fabric_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
                 fabric_id_obj, stage.GetStageIdAsStageId()
             )
             hierarchy.track_local_xform_changes(True)
             hierarchy.track_world_xform_changes(True)
-            self._fabric_hierarchy_cache[cache_key] = hierarchy
-        return self._fabric_hierarchy_cache[cache_key]
+            self._fabric_hierarchy_cache[fabric_id_int] = hierarchy
+        return self._fabric_hierarchy_cache[fabric_id_int]
 
     @classmethod
     def clear_instance(cls) -> None:
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index f3dac633c453..45c1af40fad7 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -617,7 +617,7 @@ def _initialize_fabric(self) -> None:
                 "Create a SimulationContext before instantiating FabricFrameView."
             )
         self._fabric_hierarchy = sim_context.get_fabric_hierarchy(
-            self._stage_id, fabric_id.id, fabric_id, self._stage
+            fabric_id.id, fabric_id, self._stage
         )
 
         # Ensure each child prim AND its parent have BOTH Fabric world and local matrix

From e00c337f9970dd8ba86e7228cfe5666be7c6e109 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 16:30:00 +0000
Subject: [PATCH 19/26] docs: mention multi-GPU as the concrete multi-Fabric
 scenario

---
 source/isaaclab/isaaclab/sim/simulation_context.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 9a37dd7eb7ed..9efb56c17c38 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -218,8 +218,8 @@ def __init__(self, cfg: SimulationCfg | None = None):
         # Keyed by fabric_id_int (the stable .id integer from FabricId).  Currently
         # Isaac Lab always has exactly one Fabric attachment per stage, so this dict
         # will hold at most one entry.  We use a dict rather than a plain Optional so
-        # that the design naturally extends to multi-Fabric scenarios (e.g. multiple
-        # physics scenes or partial Fabric attachments) without an API change.
+        # that the design naturally extends to multi-Fabric scenarios (e.g. multi-GPU
+        # support, where each GPU gets its own Fabric attachment) without an API change.
         self._fabric_hierarchy_cache: dict[int, object] = {}
 
         type(self)._instance = self  # Mark as valid singleton only after successful init

From ba72458a29942a68085350dcec293f33b72c7c62 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 16:44:38 +0000
Subject: [PATCH 20/26] cleanup: SimulationContext.get_fabric_hierarchy() owns
 all Fabric state

- No more passing fabric_id + fabric_id.id + stage from caller
- SimulationContext derives usdrt stage from its own self.stage
- Returns (stage_id, usdrt_stage, hierarchy, fabric_id_int) tuple
- FabricFrameView._initialize_fabric is now a clean one-liner call
---
 .../isaaclab/sim/simulation_context.py        | 31 +++++++++++--------
 .../sim/views/fabric_frame_view.py            | 14 ++-------
 2 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 9efb56c17c38..7fcaeb8a97e7 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -860,32 +860,37 @@ def get_setting(self, name: str) -> Any:
         """Get a setting value."""
         return self._settings_helper.get(name)
 
-    def get_fabric_hierarchy(self, fabric_id_int: int, fabric_id_obj: object, stage: object) -> object:
-        """Return a shared IFabricHierarchy handle for the given Fabric attachment.
+    def get_fabric_hierarchy(self) -> "tuple[int, object, object, int]":
+        """Return the usdrt stage and a shared IFabricHierarchy handle.
 
-        Multiple :class:`~isaaclab_physx.sim.views.FabricFrameView` instances that
-        operate on the same Fabric attachment share a single hierarchy handle.  The
-        handle is created on first access and cached for the lifetime of this
+        Multiple :class:`~isaaclab_physx.sim.views.FabricFrameView` instances
+        share a single hierarchy handle per Fabric attachment.  The handle is
+        created on first access and cached for the lifetime of this
         :class:`SimulationContext`.
 
-        Args:
-            fabric_id_int: Stable integer from ``FabricId.id``.
-            fabric_id_obj: The ``FabricId`` wrapper (needed by ``get_fabric_hierarchy``).
-            stage: The ``usdrt.Usd.Stage`` instance.
-
         Returns:
-            The ``IFabricHierarchy`` handle with change-tracking enabled.
+            A tuple of ``(stage_id, usdrt_stage, hierarchy_handle, fabric_id_int)``.
+            The hierarchy has change-tracking enabled for both local and world
+            xforms.
         """
         import usdrt  # noqa: PLC0415
 
+        stage_id = UsdUtils.StageCache.Get().GetId(self.stage).ToLongInt()
+        usdrt_stage = usdrt.Usd.Stage.Attach(stage_id)
+        usdrt_stage.SynchronizeToFabric()
+
+        fabric_id = usdrt_stage.GetFabricId()
+        fabric_id_int = fabric_id.id
+
         if fabric_id_int not in self._fabric_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-                fabric_id_obj, stage.GetStageIdAsStageId()
+                fabric_id, usdrt_stage.GetStageIdAsStageId()
             )
             hierarchy.track_local_xform_changes(True)
             hierarchy.track_world_xform_changes(True)
             self._fabric_hierarchy_cache[fabric_id_int] = hierarchy
-        return self._fabric_hierarchy_cache[fabric_id_int]
+
+        return stage_id, usdrt_stage, self._fabric_hierarchy_cache[fabric_id_int], fabric_id_int
 
     @classmethod
     def clear_instance(cls) -> None:
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 45c1af40fad7..6b2287af737b 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -597,17 +597,8 @@ def _resolve_indices_wp(self, indices: wp.array | None) -> wp.array:
 
     def _initialize_fabric(self) -> None:
         """One-time Fabric setup: hierarchy handle, attribute population, selections, indexed arrays."""
-        import usdrt  # noqa: PLC0415
         from usdrt import Rt  # noqa: PLC0415
 
-        self._stage_id = sim_utils.get_current_stage_id()
-        self._stage = usdrt.Usd.Stage.Attach(self._stage_id)
-        self._stage.SynchronizeToFabric()
-
-        # Reuse (or create) a hierarchy handle for this stage via the SimulationContext.
-        # The context owns the cache and clears it on stage teardown.
-        fabric_id = self._stage.GetFabricId()
-        self._fabric_id = fabric_id.id
         from isaaclab.sim.simulation_context import SimulationContext  # noqa: PLC0415
 
         sim_context = SimulationContext.instance()
@@ -616,9 +607,8 @@ def _initialize_fabric(self) -> None:
                 "FabricFrameView requires an active SimulationContext. "
                 "Create a SimulationContext before instantiating FabricFrameView."
             )
-        self._fabric_hierarchy = sim_context.get_fabric_hierarchy(
-            fabric_id.id, fabric_id, self._stage
-        )
+
+        self._stage_id, self._stage, self._fabric_hierarchy, self._fabric_id = sim_context.get_fabric_hierarchy()
 
         # Ensure each child prim AND its parent have BOTH Fabric world and local matrix
         # attributes.  Our ``trans_ro`` selection requires both, so prims missing either

From 5853aa557b583c393f9e12783c3e1132ed370fbc Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 16:47:49 +0000
Subject: [PATCH 21/26] cache usdrt stage in SimulationContext, don't re-attach
 on every call

usdrt.Usd.Stage.Attach() + SynchronizeToFabric() was called on every
get_fabric_hierarchy() invocation. Now lazily created once and cached
alongside the hierarchy handles. Cleared in clear_instance().
---
 .../isaaclab/sim/simulation_context.py        | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 7fcaeb8a97e7..706163fdcc1a 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -214,12 +214,16 @@ def __init__(self, cfg: SimulationCfg | None = None):
             order=5,
         )
 
-        # Fabric hierarchy cache — shared across all FabricFrameView instances.
-        # Keyed by fabric_id_int (the stable .id integer from FabricId).  Currently
-        # Isaac Lab always has exactly one Fabric attachment per stage, so this dict
-        # will hold at most one entry.  We use a dict rather than a plain Optional so
-        # that the design naturally extends to multi-Fabric scenarios (e.g. multi-GPU
-        # support, where each GPU gets its own Fabric attachment) without an API change.
+        # Fabric state — lazily initialized by get_fabric_hierarchy().
+        # The usdrt stage and hierarchy handles are cached for the lifetime of this
+        # SimulationContext.  The hierarchy cache is keyed by fabric_id_int (the stable
+        # .id integer from FabricId).  Currently Isaac Lab always has exactly one Fabric
+        # attachment per stage, so this dict will hold at most one entry.  We use a dict
+        # rather than a plain Optional so that the design naturally extends to
+        # multi-Fabric scenarios (e.g. multi-GPU support, where each GPU gets its own
+        # Fabric attachment) without an API change.
+        self._usdrt_stage: object | None = None
+        self._usdrt_stage_id: int | None = None
         self._fabric_hierarchy_cache: dict[int, object] = {}
 
         type(self)._instance = self  # Mark as valid singleton only after successful init
@@ -864,9 +868,9 @@ def get_fabric_hierarchy(self) -> "tuple[int, object, object, int]":
         """Return the usdrt stage and a shared IFabricHierarchy handle.
 
         Multiple :class:`~isaaclab_physx.sim.views.FabricFrameView` instances
-        share a single hierarchy handle per Fabric attachment.  The handle is
-        created on first access and cached for the lifetime of this
-        :class:`SimulationContext`.
+        share a single hierarchy handle per Fabric attachment.  The usdrt stage
+        and hierarchy are created on first access and cached for the lifetime of
+        this :class:`SimulationContext`.
 
         Returns:
             A tuple of ``(stage_id, usdrt_stage, hierarchy_handle, fabric_id_int)``.
@@ -875,22 +879,24 @@ def get_fabric_hierarchy(self) -> "tuple[int, object, object, int]":
         """
         import usdrt  # noqa: PLC0415
 
-        stage_id = UsdUtils.StageCache.Get().GetId(self.stage).ToLongInt()
-        usdrt_stage = usdrt.Usd.Stage.Attach(stage_id)
-        usdrt_stage.SynchronizeToFabric()
+        # Lazily attach the usdrt stage once.
+        if self._usdrt_stage is None:
+            self._usdrt_stage_id = UsdUtils.StageCache.Get().GetId(self.stage).ToLongInt()
+            self._usdrt_stage = usdrt.Usd.Stage.Attach(self._usdrt_stage_id)
+            self._usdrt_stage.SynchronizeToFabric()
 
-        fabric_id = usdrt_stage.GetFabricId()
+        fabric_id = self._usdrt_stage.GetFabricId()
         fabric_id_int = fabric_id.id
 
         if fabric_id_int not in self._fabric_hierarchy_cache:
             hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-                fabric_id, usdrt_stage.GetStageIdAsStageId()
+                fabric_id, self._usdrt_stage.GetStageIdAsStageId()
             )
             hierarchy.track_local_xform_changes(True)
             hierarchy.track_world_xform_changes(True)
             self._fabric_hierarchy_cache[fabric_id_int] = hierarchy
 
-        return stage_id, usdrt_stage, self._fabric_hierarchy_cache[fabric_id_int], fabric_id_int
+        return self._usdrt_stage_id, self._usdrt_stage, self._fabric_hierarchy_cache[fabric_id_int], fabric_id_int
 
     @classmethod
     def clear_instance(cls) -> None:
@@ -905,7 +911,9 @@ def clear_instance(cls) -> None:
                 viz.close()
             cls._instance._visualizers.clear()
 
-            # Drop cached Fabric hierarchy handles (they reference the dying stage)
+            # Drop cached Fabric state (usdrt stage + hierarchy handles reference the dying stage)
+            cls._instance._usdrt_stage = None
+            cls._instance._usdrt_stage_id = None
             cls._instance._fabric_hierarchy_cache.clear()
 
             # Tear down the stage. We skip clear_stage() (prim-by-prim deletion) since

From e75fe51805ec1374a035c30afa400c9bf4edb3cc Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 16:55:02 +0000
Subject: [PATCH 22/26] fix: restore 'import usdrt' in _initialize_fabric

The module is referenced directly (usdrt.Sdf.ValueTypeNames, usdrt.Usd.Access)
later in the function. Was accidentally removed when we moved stage attachment
into SimulationContext.
---
 source/isaaclab/isaaclab/sim/simulation_context.py             | 2 +-
 .../isaaclab_physx/sim/views/fabric_frame_view.py              | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 706163fdcc1a..a7feffcd280e 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -864,7 +864,7 @@ def get_setting(self, name: str) -> Any:
         """Get a setting value."""
         return self._settings_helper.get(name)
 
-    def get_fabric_hierarchy(self) -> "tuple[int, object, object, int]":
+    def get_fabric_hierarchy(self) -> tuple[int, object, object, int]:
         """Return the usdrt stage and a shared IFabricHierarchy handle.
 
         Multiple :class:`~isaaclab_physx.sim.views.FabricFrameView` instances
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 6b2287af737b..3f031a876191 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -23,7 +23,7 @@
 from isaaclab.utils.warp import fabric as fabric_utils
 
 if TYPE_CHECKING:
-    from isaaclab.sim.simulation_context import SimulationContext
+    pass
 
 logger = logging.getLogger(__name__)
 
@@ -597,6 +597,7 @@ def _resolve_indices_wp(self, indices: wp.array | None) -> wp.array:
 
     def _initialize_fabric(self) -> None:
         """One-time Fabric setup: hierarchy handle, attribute population, selections, indexed arrays."""
+        import usdrt  # noqa: PLC0415
         from usdrt import Rt  # noqa: PLC0415
 
         from isaaclab.sim.simulation_context import SimulationContext  # noqa: PLC0415

From 653b3f34bd94c73d0565656dca30b00438a0bee9 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 17:06:34 +0000
Subject: [PATCH 23/26] refactor: service locator pattern for Fabric cache,
 remove usdrt from SimulationContext

- Add generic get_service()/set_service() on SimulationContext, keyed by class.
  All services are cleared on clear_instance().
- Move all Fabric/usdrt code to new FabricStageCache in isaaclab_physx.
  SimulationContext no longer imports or knows about usdrt.
- FabricFrameView lazily creates and registers FabricStageCache on first use.
- Drop dead _stage_id field from FabricFrameView.
- Remove unused TYPE_CHECKING import from fabric_frame_view.py.
---
 .../isaaclab/sim/simulation_context.py        | 66 +++++++------------
 .../isaaclab_physx/sim/fabric_stage_cache.py  | 63 ++++++++++++++++++
 .../sim/views/fabric_frame_view.py            | 16 +++--
 .../test/sim/test_views_xform_prim_fabric.py  |  6 +-
 uv.lock                                       |  9 +++
 5 files changed, 111 insertions(+), 49 deletions(-)
 create mode 100644 source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py
 create mode 100644 uv.lock

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index a7feffcd280e..78648b9b706d 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -11,7 +11,7 @@
 import traceback
 from collections.abc import Iterator
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, TypeVar
 
 import toml
 import torch
@@ -38,6 +38,8 @@
 if TYPE_CHECKING:
     from isaaclab.cloner.clone_plan import ClonePlan
 
+_T = TypeVar("_T")
+
 from .simulation_cfg import SimulationCfg
 from .spawners import DomeLightCfg, GroundPlaneCfg
 
@@ -214,17 +216,10 @@ def __init__(self, cfg: SimulationCfg | None = None):
             order=5,
         )
 
-        # Fabric state — lazily initialized by get_fabric_hierarchy().
-        # The usdrt stage and hierarchy handles are cached for the lifetime of this
-        # SimulationContext.  The hierarchy cache is keyed by fabric_id_int (the stable
-        # .id integer from FabricId).  Currently Isaac Lab always has exactly one Fabric
-        # attachment per stage, so this dict will hold at most one entry.  We use a dict
-        # rather than a plain Optional so that the design naturally extends to
-        # multi-Fabric scenarios (e.g. multi-GPU support, where each GPU gets its own
-        # Fabric attachment) without an API change.
-        self._usdrt_stage: object | None = None
-        self._usdrt_stage_id: int | None = None
-        self._fabric_hierarchy_cache: dict[int, object] = {}
+        # Singleton service registry — backend-specific caches (e.g. Fabric hierarchy)
+        # register themselves here, keyed by their class.  All services are cleared when
+        # the SimulationContext is torn down via clear_instance().
+        self._services: dict[type, object] = {}
 
         type(self)._instance = self  # Mark as valid singleton only after successful init
 
@@ -864,39 +859,28 @@ def get_setting(self, name: str) -> Any:
         """Get a setting value."""
         return self._settings_helper.get(name)
 
-    def get_fabric_hierarchy(self) -> tuple[int, object, object, int]:
-        """Return the usdrt stage and a shared IFabricHierarchy handle.
+    def get_service(self, cls: type[_T]) -> _T | None:
+        """Retrieve a registered singleton service by its class.
 
-        Multiple :class:`~isaaclab_physx.sim.views.FabricFrameView` instances
-        share a single hierarchy handle per Fabric attachment.  The usdrt stage
-        and hierarchy are created on first access and cached for the lifetime of
-        this :class:`SimulationContext`.
+        Args:
+            cls: The service class used as key.
 
         Returns:
-            A tuple of ``(stage_id, usdrt_stage, hierarchy_handle, fabric_id_int)``.
-            The hierarchy has change-tracking enabled for both local and world
-            xforms.
+            The registered instance, or ``None`` if not registered.
         """
-        import usdrt  # noqa: PLC0415
-
-        # Lazily attach the usdrt stage once.
-        if self._usdrt_stage is None:
-            self._usdrt_stage_id = UsdUtils.StageCache.Get().GetId(self.stage).ToLongInt()
-            self._usdrt_stage = usdrt.Usd.Stage.Attach(self._usdrt_stage_id)
-            self._usdrt_stage.SynchronizeToFabric()
+        return self._services.get(cls)  # type: ignore[return-value]
 
-        fabric_id = self._usdrt_stage.GetFabricId()
-        fabric_id_int = fabric_id.id
+    def set_service(self, cls: type[_T], instance: _T) -> None:
+        """Register a singleton service, keyed by its class.
 
-        if fabric_id_int not in self._fabric_hierarchy_cache:
-            hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
-                fabric_id, self._usdrt_stage.GetStageIdAsStageId()
-            )
-            hierarchy.track_local_xform_changes(True)
-            hierarchy.track_world_xform_changes(True)
-            self._fabric_hierarchy_cache[fabric_id_int] = hierarchy
+        Overwrites any previously registered instance for the same class.
+        The service is automatically cleared when :meth:`clear_instance` is called.
 
-        return self._usdrt_stage_id, self._usdrt_stage, self._fabric_hierarchy_cache[fabric_id_int], fabric_id_int
+        Args:
+            cls: The service class used as key.
+            instance: The service instance to register.
+        """
+        self._services[cls] = instance
 
     @classmethod
     def clear_instance(cls) -> None:
@@ -911,10 +895,8 @@ def clear_instance(cls) -> None:
                 viz.close()
             cls._instance._visualizers.clear()
 
-            # Drop cached Fabric state (usdrt stage + hierarchy handles reference the dying stage)
-            cls._instance._usdrt_stage = None
-            cls._instance._usdrt_stage_id = None
-            cls._instance._fabric_hierarchy_cache.clear()
+            # Drop all registered singleton services (they may reference the dying stage)
+            cls._instance._services.clear()
 
             # Tear down the stage. We skip clear_stage() (prim-by-prim deletion) since
             # close_stage() + app shutdown destroy the entire stage at once.
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py b/source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py
new file mode 100644
index 000000000000..c2b75193943a
--- /dev/null
+++ b/source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Fabric stage and hierarchy cache, registered as a service on SimulationContext."""
+
+from __future__ import annotations
+
+from pxr import UsdUtils
+
+
+class FabricStageCache:
+    """Caches the usdrt stage attachment and IFabricHierarchy handles.
+
+    Registered as a singleton service on :class:`~isaaclab.sim.SimulationContext` via
+    ``set_service(FabricStageCache, ...)``.  Multiple
+    :class:`~isaaclab_physx.sim.views.FabricFrameView` instances share a single
+    hierarchy handle per Fabric attachment.
+
+    The hierarchy cache is keyed by ``fabric_id_int`` (the stable ``.id`` integer from
+    ``FabricId``).  Currently Isaac Lab always has exactly one Fabric attachment per
+    stage, so this dict will hold at most one entry.  A dict is used rather than a plain
+    attribute so the design naturally extends to multi-Fabric scenarios (e.g. multi-GPU
+    support, where each GPU gets its own Fabric attachment) without an API change.
+    """
+
+    def __init__(self, usd_stage) -> None:
+        import usdrt  # noqa: PLC0415
+
+        stage_id = UsdUtils.StageCache.Get().GetId(usd_stage).ToLongInt()
+        self._stage = usdrt.Usd.Stage.Attach(stage_id)
+        self._stage.SynchronizeToFabric()
+        self._hierarchy_cache: dict[int, object] = {}
+
+    @property
+    def stage(self):
+        """The usdrt stage (already attached and synchronized)."""
+        return self._stage
+
+    def get_hierarchy(self):
+        """Return the IFabricHierarchy handle for the current Fabric attachment.
+
+        Creates and caches the handle on first call.  Change-tracking is enabled
+        for both local and world xforms.
+
+        Returns:
+            A tuple of ``(hierarchy_handle, fabric_id_int)``.
+        """
+        import usdrt  # noqa: PLC0415
+
+        fabric_id = self._stage.GetFabricId()
+        fabric_id_int = fabric_id.id
+
+        if fabric_id_int not in self._hierarchy_cache:
+            hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy(
+                fabric_id, self._stage.GetStageIdAsStageId()
+            )
+            hierarchy.track_local_xform_changes(True)
+            hierarchy.track_world_xform_changes(True)
+            self._hierarchy_cache[fabric_id_int] = hierarchy
+
+        return self._hierarchy_cache[fabric_id_int], fabric_id_int
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
index 3f031a876191..c4274f1f69fb 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/views/fabric_frame_view.py
@@ -8,7 +8,6 @@
 from __future__ import annotations
 
 import logging
-from typing import TYPE_CHECKING
 
 import torch
 import warp as wp
@@ -22,9 +21,6 @@
 from isaaclab.utils.warp import ProxyArray
 from isaaclab.utils.warp import fabric as fabric_utils
 
-if TYPE_CHECKING:
-    pass
-
 logger = logging.getLogger(__name__)
 
 # TODO: extend this to ``cuda:N`` once we wire up multi-GPU support for the view.
@@ -127,7 +123,6 @@ def __init__(
 
         # Fabric state — all populated lazily in :meth:`_initialize_fabric`.
         self._fabric_initialized = False
-        self._stage_id: int | None = None
         self._stage = None
         self._fabric_hierarchy = None
         # Set by ``set_local_poses``; cleared by ``_sync_world_from_local_if_dirty``.
@@ -602,6 +597,8 @@ def _initialize_fabric(self) -> None:
 
         from isaaclab.sim.simulation_context import SimulationContext  # noqa: PLC0415
 
+        from isaaclab_physx.sim.fabric_stage_cache import FabricStageCache  # noqa: PLC0415
+
         sim_context = SimulationContext.instance()
         if sim_context is None:
             raise RuntimeError(
@@ -609,7 +606,14 @@ def _initialize_fabric(self) -> None:
                 "Create a SimulationContext before instantiating FabricFrameView."
             )
 
-        self._stage_id, self._stage, self._fabric_hierarchy, self._fabric_id = sim_context.get_fabric_hierarchy()
+        # Get or create the FabricStageCache service.
+        cache = sim_context.get_service(FabricStageCache)
+        if cache is None:
+            cache = FabricStageCache(sim_context.stage)
+            sim_context.set_service(FabricStageCache, cache)
+
+        self._stage = cache.stage
+        self._fabric_hierarchy, self._fabric_id = cache.get_hierarchy()
 
         # Ensure each child prim AND its parent have BOTH Fabric world and local matrix
         # attributes.  Our ``trans_ro`` selection requires both, so prims missing either
diff --git a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
index 9d5a8cd3dd2c..11d7e1ab717c 100644
--- a/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
+++ b/source/isaaclab_physx/test/sim/test_views_xform_prim_fabric.py
@@ -451,8 +451,12 @@ def test_multi_view_per_view_dirty_isolation(device):
     assert view_b._world_dirty is False
     # Both views must reuse the same cached IFabricHierarchy (one stage = one handle).
     assert view_a._fabric_hierarchy is view_b._fabric_hierarchy
+    from isaaclab_physx.sim.fabric_stage_cache import FabricStageCache
+
     sim_context = sim_utils.SimulationContext.instance()
-    assert len(sim_context._fabric_hierarchy_cache) == 1
+    cache = sim_context.get_service(FabricStageCache)
+    assert cache is not None
+    assert len(cache._hierarchy_cache) == 1
 
     # Write a new local pose on view A only.
     new_local_a = wp.zeros((1, 3), dtype=wp.float32, device=device)
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 000000000000..04b814a22708
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,9 @@
+version = 1
+revision = 3
+requires-python = ">=3.12"
+
+[options]
+prerelease-mode = "allow"
+
+[manifest]
+overrides = [{ name = "numpy", specifier = ">=2" }]

From 4e4833f53713a63078feeeac6b20b69838b242eb Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 17:09:46 +0000
Subject: [PATCH 24/26] docs: add changelog entries for service locator and
 FabricStageCache

---
 source/isaaclab/changelog.d/fix-fabric-local-matrix.rst     | 6 ++++++
 .../isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst  | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/source/isaaclab/changelog.d/fix-fabric-local-matrix.rst b/source/isaaclab/changelog.d/fix-fabric-local-matrix.rst
index e3c6cc22ba5c..a048992a809d 100644
--- a/source/isaaclab/changelog.d/fix-fabric-local-matrix.rst
+++ b/source/isaaclab/changelog.d/fix-fabric-local-matrix.rst
@@ -16,3 +16,9 @@ Added
   explicit transposes).  Used by
   :class:`~isaaclab_physx.sim.views.FabricFrameView` to keep child world and
   local matrices consistent across writes without round-tripping through USD.
+* Added :meth:`~isaaclab.sim.SimulationContext.get_service` and
+  :meth:`~isaaclab.sim.SimulationContext.set_service` — a typed singleton
+  service locator on :class:`~isaaclab.sim.SimulationContext`.  Backend-specific
+  caches (e.g. Fabric hierarchy handles) register themselves here instead of
+  living as class-level globals.  Services are automatically cleared on
+  :meth:`~isaaclab.sim.SimulationContext.clear_instance`.
diff --git a/source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst b/source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst
index 43f2eaae0ee7..a99a5fe19a5c 100644
--- a/source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst
+++ b/source/isaaclab_physx/changelog.d/fix-fabric-local-matrix.rst
@@ -15,6 +15,10 @@ Changed
   view → fabric index mapping (no custom prim attributes), and Warp kernels
   that operate on :class:`wp.indexedfabricarray` so the kernels just index
   ``ifa[view_index]`` instead of taking a separate mapping array.
+* Moved the ``IFabricHierarchy`` handle cache out of ``FabricFrameView`` (class-level
+  global) into a new :class:`~isaaclab_physx.sim.fabric_stage_cache.FabricStageCache`,
+  registered as a service on :class:`~isaaclab.sim.SimulationContext`.  The cache is
+  automatically cleared on stage teardown.
 * :meth:`~isaaclab_physx.sim.views.FabricFrameView.set_local_poses` now
   writes ``omni:fabric:localMatrix`` directly through Fabric.  The next
   ``get_world_poses`` runs a Warp kernel that recomputes

From 3dc43e22179638c3c5cf6c88b55bbd4aaa56499e Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 17:18:22 +0000
Subject: [PATCH 25/26] call close() on services during teardown (IDisposable
 pattern)

SimulationContext.clear_instance() now calls close() on each registered
service before clearing the dict. FabricStageCache.close() releases the
hierarchy handles and stage reference.
---
 source/isaaclab/isaaclab/sim/simulation_context.py           | 5 ++++-
 .../isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py  | 5 +++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 78648b9b706d..21e62d2e1987 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -895,7 +895,10 @@ def clear_instance(cls) -> None:
                 viz.close()
             cls._instance._visualizers.clear()
 
-            # Drop all registered singleton services (they may reference the dying stage)
+            # Close and drop all registered singleton services
+            for service in cls._instance._services.values():
+                if hasattr(service, "close"):
+                    service.close()
             cls._instance._services.clear()
 
             # Tear down the stage. We skip clear_stage() (prim-by-prim deletion) since
diff --git a/source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py b/source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py
index c2b75193943a..22386b858b5c 100644
--- a/source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py
+++ b/source/isaaclab_physx/isaaclab_physx/sim/fabric_stage_cache.py
@@ -38,6 +38,11 @@ def stage(self):
         """The usdrt stage (already attached and synchronized)."""
         return self._stage
 
+    def close(self) -> None:
+        """Release cached handles.  Called by SimulationContext on teardown."""
+        self._hierarchy_cache.clear()
+        self._stage = None
+
     def get_hierarchy(self):
         """Return the IFabricHierarchy handle for the current Fabric attachment.
 

From 2a391e9c27610d47395112cb159fd09ec7e705be Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Sun, 17 May 2026 17:27:48 +0000
Subject: [PATCH 26/26] close existing service when replaced via set_service()

---
 source/isaaclab/isaaclab/sim/simulation_context.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 21e62d2e1987..211155b065b1 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -880,6 +880,9 @@ def set_service(self, cls: type[_T], instance: _T) -> None:
             cls: The service class used as key.
             instance: The service instance to register.
         """
+        old = self._services.get(cls)
+        if old is not None and old is not instance and hasattr(old, "close"):
+            old.close()
         self._services[cls] = instance
 
     @classmethod