From 1e7994be9073ee3aebc0809718042920ff832df7 Mon Sep 17 00:00:00 2001
From: Antonio Ojea <aojea@google.com>
Date: Fri, 18 Jul 2025 11:16:01 +0000
Subject: [PATCH 1/8] add references

Change-Id: Iec531798d210fbadb3d322885741911c0e2f4d55
---
 site/content/docs/concepts/references.md | 15 +++++++++++++--
 site/layouts/shortcodes/embed-pdf.html   |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 site/layouts/shortcodes/embed-pdf.html
diff --git a/site/content/docs/concepts/references.md b/site/content/docs/concepts/references.md
index 0e0f29ac..a725680f 100644
--- a/site/content/docs/concepts/references.md
+++ b/site/content/docs/concepts/references.md
@@ -4,10 +4,21 @@ date: 2024-12-19T11:20:46Z
 ---
 
 - [The Kubernetes Network Driver Model: A Composable Architecture for High-Performance Networking](/docs/kubernetes_network_driver_model_dranet_paper.pdf) - This paper introduces the Kubernetes Network Driver model and provides a detailed performance evaluation of DraNet, demonstrating significant bandwidth improvements for AI/ML workloads.
+
+{{< embed-pdf "/docs/kubernetes_network_driver_model_dranet_paper.pdf" >}}
+
+- [The Challenges of AI/ML Multi-Node Workloads in Kubernetes - Antonio Ojea, Google - Regular SIG Network Meeting for 2025-07-17](https://www.youtube.com/playlist?list=PL69nYSiGNLP2E8vmnqo5MwPOY25sDWIxb)
+
+<iframe src="https://docs.google.com/presentation/d/e/2PACX-1vSBButnm46ReLtbtgBa2b4xkmr3oXEtH5yf10xsQ4fjcqF4jSOc5MzeZQUS02Ev2j6DKFj8vQAjCIoy/pubembed?start=true&loop=true&delayms=3000" frameborder="0" width="480" height="299" allowfullscreen="true" mozallowfullscreen="true" webkitallowfullscreen="true"></iframe>
+
+- [Kubernetes Network Drivers, Antonio Ojea, Presentation](https://docs.google.com/presentation/d/1Vdr7BhbYXeWjwmLjGmqnUkvJr_eOUdU0x-JxfXWxUT8/edit?usp=sharing)
+<iframe src="https://docs.google.com/presentation/d/e/2PACX-1vRVritcaQFYkvaPuTPsxkgOt0ZfWhqYPcCjNN0UgZcEh9HR1yh3bFDXSOiPbPUayoMzbefZ_qvFoWCX/pubembed?start=true&loop=true&delayms=3000" frameborder="0" width="480" height="299" allowfullscreen="true" mozallowfullscreen="true" webkitallowfullscreen="true"></iframe>
+
 - [KEP 3063 - Dynamic Resource Allocation #306](https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/3063-dynamic-resource-allocation/README.md)
 - [KEP 3695 - DRA: structured parameters #438](https://github.com/kubernetes/enhancements/issues/4381)
 - [Extend PodResources to include resources from Dynamic Resource Allocation (DRA)](https://github.com/kubernetes/enhancements/issues/3695)
 - [Working Group Device Management](https://github.com/kubernetes-sigs/wg-device-management)
-- [Kubernetes Network Drivers, Antonio Ojea, Presentation](https://docs.google.com/presentation/d/1Vdr7BhbYXeWjwmLjGmqnUkvJr_eOUdU0x-JxfXWxUT8/edit?usp=sharing)
+
+
 - [The Future of Kubernetes Networking - Antonio Ojea, Googe & Dan Winship, Red Hat - Kubernetes Contributor Summit EU 2024](https://sched.co/1aOqO)
-- [Better Together! GPU, TPU and NIC Topological Alignment with DRA - John Belamaric, Google & Patrick Ohly, Intel - Kubecon US 2024](https://sched.co/1i7pv)
\ No newline at end of file
+- [Better Together! GPU, TPU and NIC Topological Alignment with DRA - John Belamaric, Google & Patrick Ohly, Intel - Kubecon US 2024](https://sched.co/1i7pv)
diff --git a/site/layouts/shortcodes/embed-pdf.html b/site/layouts/shortcodes/embed-pdf.html
new file mode 100644
index 00000000..b86f4e40
--- /dev/null
+++ b/site/layouts/shortcodes/embed-pdf.html
@@ -0,0 +1 @@
+<iframe src="{{.Get 0}}" width=50% height="400" frameborder="0" scrolling="auto" allowfullscreen="allowfullscreen"></iframe>

From 386b862838da2df92d16912fd8376930f5ce5faf Mon Sep 17 00:00:00 2001
From: Antonio Ojea <aojea@google.com>
Date: Fri, 18 Jul 2025 11:40:22 +0000
Subject: [PATCH 2/8] update nvida dra gpu driver with match attributes

Change-Id: I283931165056aa4de511c58957d127fc4ad597f0
---
 examples/demo_nvidia_dranet/deviceclass.yaml  |   2 +-
 .../demo_nvidia_dranet/resourceclaims.yaml    |  25 +--
 examples/demo_nvidia_dranet/statefulset.yaml  |  35 ++--
 site/content/docs/user/nvidia-dranet.md       | 185 +++++++++++++-----
 4 files changed, 157 insertions(+), 90 deletions(-)

diff --git a/examples/demo_nvidia_dranet/deviceclass.yaml b/examples/demo_nvidia_dranet/deviceclass.yaml
index 1c196dc5..6d04d1e0 100644
--- a/examples/demo_nvidia_dranet/deviceclass.yaml
+++ b/examples/demo_nvidia_dranet/deviceclass.yaml
@@ -19,4 +19,4 @@ metadata:
 spec:
   selectors:
     - cel:
-        expression: device.driver == "dra.net"
\ No newline at end of file
+        expression: device.driver == "dra.net"
diff --git a/examples/demo_nvidia_dranet/resourceclaims.yaml b/examples/demo_nvidia_dranet/resourceclaims.yaml
index e834b12f..b16932ad 100644
--- a/examples/demo_nvidia_dranet/resourceclaims.yaml
+++ b/examples/demo_nvidia_dranet/resourceclaims.yaml
@@ -11,11 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 apiVersion: resource.k8s.io/v1beta1
 kind: ResourceClaimTemplate
 metadata:
-  name: 2-gpu
+  name: 2-gpu-nic-aligned
 spec:
   spec:
     devices:
@@ -25,26 +24,12 @@ spec:
         count: 2
         selectors:
         - cel:
-            expression: |
-                device.attributes["gpu.nvidia.com"].index < 2
-        
----
-apiVersion: resource.k8s.io/v1beta1
-kind: ResourceClaimTemplate
-metadata:
-  name: 2-nic
-spec:
-  spec:
-    devices:
-      requests:
+            expression: device.attributes["gpu.nvidia.com"].index <= 2
       - name: nic
         deviceClassName: dranet
         count: 2
         selectors:
         - cel:
-            expression: device.attributes["dra.net"].rdma == true &&
-              (
-                (device.attributes["dra.net"].ifName.startsWith("gpu") &&
-                 device.attributes["dra.net"].ifName.endsWith("rdma0") &&
-                 int(device.attributes["dra.net"].ifName.substring(3, 4)) < 2)
-              )
\ No newline at end of file
+            expression: device.attributes["dra.net"].rdma == true
+      constraints:
+      - matchAttribute: "resource.kubernetes.io/pcieRoot"
diff --git a/examples/demo_nvidia_dranet/statefulset.yaml b/examples/demo_nvidia_dranet/statefulset.yaml
index 95556c4f..1b52049f 100644
--- a/examples/demo_nvidia_dranet/statefulset.yaml
+++ b/examples/demo_nvidia_dranet/statefulset.yaml
@@ -1,17 +1,12 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+apiVersion: v1
+kind: Service
+metadata:
+  name: nccl-gib-test
+spec:
+  selector:
+    name: nccl-gib-test
+  clusterIP: None
+---
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
@@ -36,8 +31,8 @@ spec:
           capabilities:
             add: ["IPC_LOCK"]
         volumeMounts:
-          # - name: library-dir-host
-          #   mountPath: /usr/local/nvidia
+          - name: library-dir-host
+            mountPath: /usr/local/nvidia
           - name: gib
             mountPath: /usr/local/gib
           - name: shared-memory
@@ -57,7 +52,7 @@ spec:
             sleep infinity
         resources:
           claims:
-          - name: gpu            
+          - name: gpu
       volumes:
         - name: library-dir-host
           hostPath:
@@ -71,11 +66,9 @@ spec:
             sizeLimit: 250Gi
       resourceClaims:
         - name: gpu
-          resourceClaimTemplateName: 2-gpu
-        - name: nic
-          resourceClaimTemplateName: 2-nic
+          resourceClaimTemplateName: 2-gpu-nic-aligned
       tolerations:
       - key: "nvidia.com/gpu"
         operator: "Equal"
         value: "present"
-        effect: "NoSchedule"
\ No newline at end of file
+        effect: "NoSchedule"
diff --git a/site/content/docs/user/nvidia-dranet.md b/site/content/docs/user/nvidia-dranet.md
index 56c60489..df8f3e9a 100644
--- a/site/content/docs/user/nvidia-dranet.md
+++ b/site/content/docs/user/nvidia-dranet.md
@@ -4,7 +4,10 @@ date: 2025-06-20T10:10:40Z
 ---
 
 
-To get started, create a [GKE cluster with DRA support](https://cloud.google.com/kubernetes-engine/docs/how-to/set-up-dra) and the corresponding [VPC and subnets](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute-custom#create-vpcs-and-subnets)
+To get started, create a [GKE cluster with DRA
+support](https://cloud.google.com/kubernetes-engine/docs/how-to/set-up-dra) and
+the corresponding [VPC and
+subnets](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute-custom#create-vpcs-and-subnets)
 
 It should look like 
 
@@ -82,7 +85,10 @@ gcloud container node-pools create dranet-a4 \
   --additional-node-network network=${RDMA_NETWORK_PREFIX}-net,subnetwork=${RDMA_NETWORK_PREFIX}-sub-7
 ```
 
-Apply the following DaemonSet to install the RDMA binaries and the NCCL library on the node. The RDMA binaries are stored in `/home/kubernetes/bin/gib` directory and the NCCL library is stored in `/home/kubernetes/bin/nvidia/lib64` directory on the VM:
+Apply the following DaemonSet to install the RDMA binaries and the NCCL library
+on the node. The RDMA binaries are stored in `/home/kubernetes/bin/gib`
+directory and the NCCL library is stored in `/home/kubernetes/bin/nvidia/lib64`
+directory on the VM:
 
 ```sh
 kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/refs/heads/master/gpudirect-rdma/nccl-rdma-installer.yaml
@@ -95,13 +101,25 @@ kubectl apply -f https://raw.githubusercontent.com/google/dranet/refs/heads/main
 
 #### Installing Nvidia DRA Drivers
 
-In order to install the NVIDIA DRA Drivers you will need to clone the [NVIDIA DRA](https://github.com/NVIDIA/k8s-dra-driver-gpu) repo. Ensure you have [helm](https://helm.sh/docs/intro/install/) installed.
+In order to install the NVIDIA DRA Drivers you will need to clone the [NVIDIA
+DRA](https://github.com/NVIDIA/k8s-dra-driver-gpu) repo. Ensure you have
+[helm](https://helm.sh/docs/intro/install/) installed.
+
+[KEP #4381](https://github.com/kubernetes/enhancements/pull/5316) proposes the
+standard PCI Root attribute. This is an important field to have for devices
+since  the alignment of multiple devices on the PCI bus can have major
+implications of how fast the devices can communicate with each other.
+
+Please ensure the GPU Driver image [includes the standard attribute
+`resources.kubernetes.io/pcieRoot`](https://github.com/NVIDIA/k8s-dra-driver-gpu/pull/429)
+so both GPU DRA driver and DraNet can use it for NIC alignment.
 
 ```
 helm upgrade -i --create-namespace --namespace nvidia-dra-driver-gpu nvidia-dra-driver-gpu ./k8s-dra-driver-gpu/deployments/helm/nvidia-dra-driver-gpu --set gpuResourcesEnabledOverride=true --values https://raw.githubusercontent.com/google/dranet/refs/heads/main/examples/demo_nvidia_dranet/values.yaml --wait 
 ```
 
-The values.yaml adds some additional tolerations and removes some priorities that need to be done in order to work nicely with GKE.
+The values.yaml adds some additional tolerations and removes some priorities
+that need to be done in order to work nicely with GKE.
 
 Once this is done, you can run 
 
@@ -110,7 +128,8 @@ kubectl get pods -n nvidia-dra-driver-gpu
 NAME                                                READY   STATUS     RESTARTS   AGE
 nvidia-dra-driver-gpu-controller-66696889cd-86m8f   1/1     Running    0          13m
 ```
-If you only see the controller like above, you will need to label the nodes with GPUs on them in order to get the kubelet plugin running.
+If you only see the controller like above, you will need to label the nodes with
+GPUs on them in order to get the kubelet plugin running.
 
 ```sh
 kubectl label node -l cloud.google.com/gke-gpu=true --overwrite nvidia.com/gpu.present=true
@@ -124,15 +143,12 @@ nvidia-dra-driver-gpu-kubelet-plugin-qsp2d          2/2     Running    0
 
 Once you see all these pods, the NVIDIA DRA plugin is working as expected
 
-#### PCI Attributes and GPU Indices
-
-[KEP #4381](https://github.com/kubernetes/enhancements/pull/5316) proposes the standard PCI Root attribute. This is an important field to have for devices since  the alignment of multiple devices on the PCI bus can have major implications of how fast the devices can communicate with each other.
-
-At the moment since the KEP just got merged, many drivers do not implement it. In the meantime, the GPU Index can be used for NVIDIA and GKE also provides a numerical index on the NIC itself to show whether it is aligned with the GPU or not. 
-
 #### Creating a GPU workload
 
-We can create a `ResourceClaimTemplate` to specify what GPUs we want. We currently don't have PCI attributes yet in the NVIDIA driver library so we will want to specify the index for the time being. This isn't too important for this section but will come into relevance once we start pairing NICs to the nodes.
+We can create a `ResourceClaimTemplate` to specify what GPUs we want. We
+currently don't have PCI attributes yet in the NVIDIA driver library so we will
+want to specify the index for the time being. This isn't too important for this
+section but will come into relevance once we start pairing NICs to the nodes.
 
 ```yaml
 apiVersion: resource.k8s.io/v1beta1
@@ -179,6 +195,8 @@ spec:
           capabilities:
             add: ["IPC_LOCK"]
         volumeMounts:
+          - name: library-dir-host
+            mountPath: /usr/local/nvidia
           - name: gib
             mountPath: /usr/local/gib
           - name: shared-memory
@@ -200,6 +218,9 @@ spec:
           claims:
           - name: gpu            
       volumes:
+        - name: library-dir-host
+          hostPath:
+            path: /home/kubernetes/bin/nvidia
         - name: gib
           hostPath:
             path: /home/kubernetes/bin/gib
@@ -217,7 +238,10 @@ spec:
         effect: "NoSchedule"
 ```
 
-Note how unlike the other examples, we don't use the resources field in the spec to allocate GPUs, nor do we manually mount the Nvidia libraries. This is all handled by the DRA driver that Nvidia provides. Execing into one of these nodes and listing the gpus shows that two B200 GPUs were allocated.
+Note how unlike the other examples, we don't use the resources field in the spec
+to allocate GPUs, nor do we manually mount the Nvidia libraries. This is all
+handled by the DRA driver that Nvidia provides. Execing into one of these nodes
+and listing the gpus shows that two B200 GPUs were allocated.
 
 ```sh
 root@nccl-gib-test-0:/usr/bin# nvidia-smi -L
@@ -280,7 +304,8 @@ NCCL version 2.26.6+cuda12.8
 #        (B)    (elements)                               (us)  (GB/s)  (GB/s)            (us)  (GB/s)  (GB/s)
 ```
 
-Uh oh! We can gather the info between the pods but we can't run data? Running `ip a` shows us the issue.
+Uh oh! We can gather the info between the pods but we can't run data? Running
+`ip a` shows us the issue.
 
 ```sh
 root@nccl-gib-test-0:/diagnostic# ip a
@@ -298,7 +323,8 @@ There are no NICs to transmit the data. This is where DraNet can help!
 
 #### Nvidia DRA + DraNet
 
-We create one more `ResourceClaimTemplate`, for the RDMA devices on the node, along with a `DeviceClass` for the RDMA device.
+We create one more `ResourceClaimTemplate`, for the RDMA devices on the node,
+along with a `DeviceClass` for the RDMA device.
 
 ```yaml
 apiVersion: resource.k8s.io/v1beta1
@@ -311,30 +337,54 @@ spec:
         expression: device.driver == "dra.net"
 ```
 
+The `ResourceClaimTemplate` allows to specify multiple devices, in this case 2
+GPUs and 2 NICs and also apply a constraint so the NICs and the GPUs share the
+same pcie root, avoiding the penaly of suboptimal topologies.
+
+It is important to indicate that each Pod will obtain a `ResourceClaim` from the
+`ResourceClaimTemplate`, and since your serves may be connected in a [rail
+optimized
+architecture](https://docs.nvidia.com/networking/display/ibclusterbringupprocedure/setting+the+infiniband+cluster+topology),
+the GPUs requested need to be also aligned across the different servers. In this
+example, will request GPU0 and GPU1 of each node.
+
 ```yaml
 apiVersion: resource.k8s.io/v1beta1
 kind: ResourceClaimTemplate
 metadata:
-  name: 2-nic
+  name: 2-gpu-nic-aligned
 spec:
   spec:
     devices:
       requests:
+      - name: gpu
+        deviceClassName: gpu.nvidia.com
+        count: 2
+        selectors:
+        - cel:
+            expression: device.attributes["gpu.nvidia.com"].index <= 2
       - name: nic
         deviceClassName: dranet
         count: 2
         selectors:
         - cel:
-            expression: device.attributes["dra.net"].rdma == true &&
-              (
-                (device.attributes["dra.net"].ifName.startsWith("gpu") &&
-                 device.attributes["dra.net"].ifName.endsWith("rdma0") &&
-                 int(device.attributes["dra.net"].ifName.substring(3, 4)) < 2)
+            expression: device.attributes["dra.net"].rdma == true
+      constraints:
+      - matchAttribute: "resource.kubernetes.io/pcieRoot"
 ```
 
 Add this resourceclaim to the statefulset
 
 ```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: nccl-gib-test
+spec:
+  selector:
+    name: nccl-gib-test
+  clusterIP: None
+---
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
@@ -359,8 +409,8 @@ spec:
           capabilities:
             add: ["IPC_LOCK"]
         volumeMounts:
-          # - name: library-dir-host
-          #   mountPath: /usr/local/nvidia
+          - name: library-dir-host
+            mountPath: /usr/local/nvidia
           - name: gib
             mountPath: /usr/local/gib
           - name: shared-memory
@@ -380,7 +430,7 @@ spec:
             sleep infinity
         resources:
           claims:
-          - name: gpu            
+          - name: gpu
       volumes:
         - name: library-dir-host
           hostPath:
@@ -394,9 +444,7 @@ spec:
             sizeLimit: 250Gi
       resourceClaims:
         - name: gpu
-          resourceClaimTemplateName: 2-gpu
-        - name: nic
-          resourceClaimTemplateName: 2-nic
+          resourceClaimTemplateName: 2-gpu-nic-aligned
       tolerations:
       - key: "nvidia.com/gpu"
         operator: "Equal"
@@ -429,40 +477,81 @@ root@nccl-gib-test-0:/usr/bin# ip a
 And now we run NCCL again.
 
 ```sh
-root@nccl-gib-test-0:/diagnostic# /usr/local/gib/scripts/run_nccl_tests.sh   -t all_gather -b 1K -e 8G   nccl-gib-test-0 10.68.5.40 -g 2
+$ kubectl apply -f statefulset.yaml && kubectl rollout status --watch --timeout=600s statefulset/nccl-gib-test
+
+statefulset.apps/nccl-gib-test created
+Waiting for 2 pods to be ready...
+Waiting for 2 pods to be ready...
+Waiting for 1 pods to be ready...
+Waiting for 1 pods to be ready...
+partitioned roll out complete: 2 new pods have been updated...
+```
+
+```sh
+$ kubectl exec nccl-gib-test-0 -it -- /usr/local/gib/scripts/run_nccl_tests.sh -t all_gather -b 8 -e 1G -f 2 -g 1 -n 100 -w 50 nccl-gib-test-0.nccl-gib-test nccl-gib-test-1.nccl-gib-test
+
 Initializing SSH...
-Hello from nccl-gib-test-0
-Warning: Permanently added '[10.68.5.40]:222' (ECDSA) to the list of known hosts.
-Hello from 10.68.5.40
+Warning: Permanently added '[nccl-gib-test-0.nccl-gib-test]:222,[10.44.3.37]:222' (ECDSA) to the list of known hosts.
+Hello from nccl-gib-test-0.nccl-gib-test
+Warning: Permanently added '[nccl-gib-test-1.nccl-gib-test]:222,[10.44.4.37]:222' (ECDSA) to the list of known hosts.
+Hello from nccl-gib-test-1.nccl-gib-test
 Generating hostfiles for 2 hosts:
-nccl-gib-test-0
-10.68.5.40
-# nThread 1 nGpus 1 minBytes 1024 maxBytes 8589934592 step: 2(factor) warmup iters: 50 iters: 100 agg iters: 1 validation: 1 graph: 0
+nccl-gib-test-0.nccl-gib-test
+nccl-gib-test-1.nccl-gib-test
+# nThread 1 nGpus 1 minBytes 8 maxBytes 1073741824 step: 2(factor) warmup iters: 50 iters: 100 agg iters: 1 validation: 1 graph: 0
 #
 # Using devices
-#  Rank  0 Group  0 Pid   3521 on nccl-gib-test-0 device  0 [0000:8f:00] NVIDIA B200
-#  Rank  1 Group  0 Pid   3514 on nccl-gib-test-0 device  1 [0000:90:00] NVIDIA B200
-#  Rank  2 Group  0 Pid   2071 on nccl-gib-test-1 device  0 [0000:8f:00] NVIDIA B200
-#  Rank  3 Group  0 Pid   2076 on nccl-gib-test-1 device  1 [0000:90:00] NVIDIA B200
+#  Rank  0 Group  0 Pid   1444 on nccl-gib-test-0 device  0 [0000:8f:00] NVIDIA B200
+#  Rank  1 Group  0 Pid   1415 on nccl-gib-test-1 device  0 [0000:8f:00] NVIDIA B200
 NCCL version 2.26.6+cuda12.8
 #
 #                                                              out-of-place                       in-place
 #       size         count      type   redop    root     time   algbw   busbw #wrong     time   algbw   busbw #wrong
 #        (B)    (elements)                               (us)  (GB/s)  (GB/s)            (us)  (GB/s)  (GB/s)
-...
-   268435456      16777216     float    none      -1   3059.9   87.73   65.80      0   3134.1   85.65   64.24      0
-   536870912      33554432     float    none      -1   5886.9   91.20   68.40      0   6082.2   88.27   66.20      0
-  1073741824      67108864     float    none      -1    12452   86.23   64.67      0    11550   92.97   69.73      0
-  2147483648     134217728     float    none      -1    24426   87.92   65.94      0    23694   90.63   67.97      0
-  4294967296     268435456     float    none      -1    47420   90.57   67.93      0    47000   91.38   68.54      0
-
-  8589934592     536870912     float    none      -1    96100   89.39   67.04      0    94047   91.34   68.50      0
+           0             0     float    none      -1     0.06    0.00    0.00      0     0.06    0.00    0.00      0
+           0             0     float    none      -1     0.06    0.00    0.00      0     0.06    0.00    0.00      0
+          32             4     float    none      -1    14.18    0.00    0.00      0    14.12    0.00    0.00      0
+          64             8     float    none      -1    14.30    0.00    0.00      0    14.12    0.00    0.00      0
+         128            16     float    none      -1    14.16    0.01    0.00      0    14.14    0.01    0.00      0
+         256            32     float    none      -1    14.32    0.02    0.01      0    14.37    0.02    0.01      0
+         512            64     float    none      -1    14.46    0.04    0.02      0    14.25    0.04    0.02      0
+        1024           128     float    none      -1    14.44    0.07    0.04      0    14.49    0.07    0.04      0
+        2048           256     float    none      -1    14.89    0.14    0.07      0    14.53    0.14    0.07      0
+        4096           512     float    none      -1    15.35    0.27    0.13      0    15.15    0.27    0.14      0
+        8192          1024     float    none      -1    17.06    0.48    0.24      0    16.80    0.49    0.24      0
+       16384          2048     float    none      -1    18.65    0.88    0.44      0    18.15    0.90    0.45      0
+       32768          4096     float    none      -1    19.29    1.70    0.85      0    19.22    1.70    0.85      0
+       65536          8192     float    none      -1    22.30    2.94    1.47      0    22.05    2.97    1.49      0
+      131072         16384     float    none      -1    28.69    4.57    2.28      0    28.35    4.62    2.31      0
+      262144         32768     float    none      -1    30.96    8.47    4.23      0    30.25    8.67    4.33      0
+      524288         65536     float    none      -1    37.04   14.16    7.08      0    34.90   15.02    7.51      0
+     1048576        131072     float    none      -1    46.45   22.58   11.29      0    43.78   23.95   11.98      0
+     2097152        262144     float    none      -1    63.16   33.21   16.60      0    59.59   35.19   17.60      0
+     4194304        524288     float    none      -1    101.5   41.31   20.66      0    93.90   44.67   22.33      0
+     8388608       1048576     float    none      -1    150.1   55.87   27.93      0    142.9   58.68   29.34      0
+    16777216       2097152     float    none      -1    268.2   62.56   31.28      0    252.5   66.43   33.22      0
+    33554432       4194304     float    none      -1    519.5   64.59   32.29      0    484.5   69.26   34.63      0
+    67108864       8388608     float    none      -1   1019.6   65.82   32.91      0    931.9   72.02   36.01      0
+   134217728      16777216     float    none      -1   1989.8   67.45   33.73      0   1746.0   76.87   38.44      0
+   268435456      33554432     float    none      -1   3842.6   69.86   34.93      0   3208.5   83.66   41.83      0
+   536870912      67108864     float    none      -1   7502.0   71.56   35.78      0   6146.5   87.35   43.67      0
+  1073741824     134217728     float    none      -1    14640   73.35   36.67      0    11892   90.29   45.14      0
+# Out of bounds values : 0 OK
+# Avg bus bandwidth    : 12.5463
+#
 ```
 
 They now connect!
 
 #### Conclusion
 
-Using both DraNet and the Nvidia DRA libraries in combination is a way to quickly allocate both GPUs and RDMA devices in order to create interconnected workloads that can span multiple nodes. This can be used the create workloads that span multiple nodes and take advantage of spare resources on nodes. 
+Using both DraNet and the Nvidia DRA libraries in combination is a way to
+quickly allocate both GPUs and RDMA devices in order to create interconnected
+workloads that can span multiple nodes. This can be used the create workloads
+that span multiple nodes and take advantage of spare resources on nodes.
 
-For instance, consider that you have 2 nodes with 8 GPUs apiece. If you ran 2 training jobs that took 6 GPUs each then you would have 4 GPUs idle. By enabling DraNet you could take advantage of those remaining 4 for another training job. Without providing the RDMA devics, these GPUs would only be able to communicate within the same node.
+For instance, consider that you have 2 nodes with 8 GPUs apiece. If you ran 2
+training jobs that took 6 GPUs each then you would have 4 GPUs idle. By enabling
+DraNet you could take advantage of those remaining 4 for another training job.
+Without providing the RDMA devics, these GPUs would only be able to communicate
+within the same node.

From bfd444d03f3246f926ac3b8fec36001cd692176d Mon Sep 17 00:00:00 2001
From: Antonio Ojea <antonio.ojea.garcia@gmail.com>
Date: Fri, 18 Jul 2025 14:11:21 +0200
Subject: [PATCH 3/8] Update site/layouts/shortcodes/embed-pdf.html

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 site/layouts/shortcodes/embed-pdf.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/layouts/shortcodes/embed-pdf.html b/site/layouts/shortcodes/embed-pdf.html
index b86f4e40..4dc71b67 100644
--- a/site/layouts/shortcodes/embed-pdf.html
+++ b/site/layouts/shortcodes/embed-pdf.html
@@ -1 +1 @@
-<iframe src="{{.Get 0}}" width=50% height="400" frameborder="0" scrolling="auto" allowfullscreen="allowfullscreen"></iframe>
+<iframe src="{{.Get 0}}" width="50%" height="400" frameborder="0" scrolling="auto" allowfullscreen="allowfullscreen"></iframe>

From cb582ce5531170cc22fc424c9ad6b7da07d52e41 Mon Sep 17 00:00:00 2001
From: Antonio Ojea <antonio.ojea.garcia@gmail.com>
Date: Fri, 18 Jul 2025 14:11:28 +0200
Subject: [PATCH 4/8] Update site/content/docs/user/nvidia-dranet.md

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 site/content/docs/user/nvidia-dranet.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/content/docs/user/nvidia-dranet.md b/site/content/docs/user/nvidia-dranet.md
index df8f3e9a..c8a6d348 100644
--- a/site/content/docs/user/nvidia-dranet.md
+++ b/site/content/docs/user/nvidia-dranet.md
@@ -553,5 +553,5 @@ that span multiple nodes and take advantage of spare resources on nodes.
 For instance, consider that you have 2 nodes with 8 GPUs apiece. If you ran 2
 training jobs that took 6 GPUs each then you would have 4 GPUs idle. By enabling
 DraNet you could take advantage of those remaining 4 for another training job.
-Without providing the RDMA devics, these GPUs would only be able to communicate
+Without providing the RDMA devices, these GPUs would only be able to communicate
 within the same node.

From 474c556b3f573b4e6f0e094d514eaca998e1e3dc Mon Sep 17 00:00:00 2001
From: Antonio Ojea <antonio.ojea.garcia@gmail.com>
Date: Fri, 18 Jul 2025 14:11:33 +0200
Subject: [PATCH 5/8] Update site/content/docs/user/nvidia-dranet.md

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 site/content/docs/user/nvidia-dranet.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/content/docs/user/nvidia-dranet.md b/site/content/docs/user/nvidia-dranet.md
index c8a6d348..d5f2c45f 100644
--- a/site/content/docs/user/nvidia-dranet.md
+++ b/site/content/docs/user/nvidia-dranet.md
@@ -342,7 +342,7 @@ GPUs and 2 NICs and also apply a constraint so the NICs and the GPUs share the
 same pcie root, avoiding the penaly of suboptimal topologies.
 
 It is important to indicate that each Pod will obtain a `ResourceClaim` from the
-`ResourceClaimTemplate`, and since your serves may be connected in a [rail
+`ResourceClaimTemplate`, and since your servers may be connected in a [rail
 optimized
 architecture](https://docs.nvidia.com/networking/display/ibclusterbringupprocedure/setting+the+infiniband+cluster+topology),
 the GPUs requested need to be also aligned across the different servers. In this

From 400298f15456c0aa9579dda47045d361cd6438fa Mon Sep 17 00:00:00 2001
From: Antonio Ojea <antonio.ojea.garcia@gmail.com>
Date: Fri, 18 Jul 2025 14:11:39 +0200
Subject: [PATCH 6/8] Update site/content/docs/user/nvidia-dranet.md

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 site/content/docs/user/nvidia-dranet.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/content/docs/user/nvidia-dranet.md b/site/content/docs/user/nvidia-dranet.md
index d5f2c45f..285a4217 100644
--- a/site/content/docs/user/nvidia-dranet.md
+++ b/site/content/docs/user/nvidia-dranet.md
@@ -346,7 +346,7 @@ It is important to indicate that each Pod will obtain a `ResourceClaim` from the
 optimized
 architecture](https://docs.nvidia.com/networking/display/ibclusterbringupprocedure/setting+the+infiniband+cluster+topology),
 the GPUs requested need to be also aligned across the different servers. In this
-example, will request GPU0 and GPU1 of each node.
+example, we will request GPU0 and GPU1 of each node.
 
 ```yaml
 apiVersion: resource.k8s.io/v1beta1

From 890d130b250269aa9410e5c4808fa2b783c1dfaf Mon Sep 17 00:00:00 2001
From: Antonio Ojea <antonio.ojea.garcia@gmail.com>
Date: Fri, 18 Jul 2025 14:11:46 +0200
Subject: [PATCH 7/8] Update site/content/docs/user/nvidia-dranet.md

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 site/content/docs/user/nvidia-dranet.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/content/docs/user/nvidia-dranet.md b/site/content/docs/user/nvidia-dranet.md
index 285a4217..45292e65 100644
--- a/site/content/docs/user/nvidia-dranet.md
+++ b/site/content/docs/user/nvidia-dranet.md
@@ -339,7 +339,7 @@ spec:
 
 The `ResourceClaimTemplate` allows to specify multiple devices, in this case 2
 GPUs and 2 NICs and also apply a constraint so the NICs and the GPUs share the
-same pcie root, avoiding the penaly of suboptimal topologies.
+same pcie root, avoiding the penalty of suboptimal topologies.
 
 It is important to indicate that each Pod will obtain a `ResourceClaim` from the
 `ResourceClaimTemplate`, and since your servers may be connected in a [rail

From 596634d4ebaf4255ee4849f8847c1b29135e3af6 Mon Sep 17 00:00:00 2001
From: Antonio Ojea <antonio.ojea.garcia@gmail.com>
Date: Fri, 18 Jul 2025 14:11:52 +0200
Subject: [PATCH 8/8] Update site/content/docs/user/nvidia-dranet.md

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 site/content/docs/user/nvidia-dranet.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/content/docs/user/nvidia-dranet.md b/site/content/docs/user/nvidia-dranet.md
index 45292e65..103c7623 100644
--- a/site/content/docs/user/nvidia-dranet.md
+++ b/site/content/docs/user/nvidia-dranet.md
@@ -547,7 +547,7 @@ They now connect!
 
 Using both DraNet and the Nvidia DRA libraries in combination is a way to
 quickly allocate both GPUs and RDMA devices in order to create interconnected
-workloads that can span multiple nodes. This can be used the create workloads
+workloads that can span multiple nodes. This can be used to create workloads
 that span multiple nodes and take advantage of spare resources on nodes.
 
 For instance, consider that you have 2 nodes with 8 GPUs apiece. If you ran 2