idefix-code · glesur · Mar 11, 2025 · Jan 18, 2025 · Jan 17, 2025 · Jan 24, 2025
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@ doc/source/_static/*
 doc/source/_public/*
 doc/source/api/*
 doc/source/xml/*
+doc/env/*
 
 # compiled files
 **/__pycache__

diff --git a/doc/python_requirements.txt b/doc/python_requirements.txt
@@ -14,3 +14,4 @@ exhale==0.3.7
 m2r2==0.3.2
 sphinx-copybutton==0.5.2
 #sphinxcontrib-applehelp==1.0.7
+matplotlib==3.10.0
diff --git a/doc/source/bench.json b/doc/source/bench.json
@@ -0,0 +1,162 @@
+[
+  {
+    "date": "2025-03-04_12:57:12",
+    "gpumodel": "v100",
+    "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
+    "bench_commit": "37161676db15115c38fed3f35c94fa447cbac7bd",
+    "results": [
+      {
+        "nbgpu": 1,
+        "cell_updates": 1.193720E+8
+      },
+      {
+        "nbgpu": 2,
+        "cell_updates": 1.178864E+8
+      },
+      {
+        "nbgpu": 4,
+        "cell_updates": 1.155336E+8
+      },
+      {
+        "nbgpu": 8,
+        "cell_updates": 1.014338E+8
+      },
+      {
+        "nbgpu": 16,
+        "cell_updates": 9.855007E+7
+      },
+      {
+        "nbgpu": 32,
+        "cell_updates": 9.012061E+7
+      },
+      {
+        "nbgpu": 64,
+        "cell_updates": 8.538461E+7
+      },
+      {
+        "nbgpu": 128,
+        "cell_updates": 8.531021E+7
+      }
+    ]
+  },
+  {
+    "date": "2025-03-04_13:07:10",
+    "gpumodel": "a100",
+    "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
+    "bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5",
+    "results": [
+      {
+        "nbgpu": 1,
+        "cell_updates": 2.044728E+8
+      },
+      {
+        "nbgpu": 2,
+        "cell_updates": 2.003563E+8
+      },
+      {
+        "nbgpu": 4,
+        "cell_updates": 1.963512E+8
+      },
+      {
+        "nbgpu": 8,
+        "cell_updates": 1.933039E+8
+      },
+      {
+        "nbgpu": 16,
+        "cell_updates": 9.759154E+7
+      },
+      {
+        "nbgpu": 32,
+        "cell_updates": 6.369645E+7
+      },
+      {
+        "nbgpu": 64,
+        "cell_updates": 4.629474E+7
+      },
+      {
+        "nbgpu": 128,
+        "cell_updates": 4.580281E+7
+      }
+    ]
+  },
+  {
+    "date": "2025-03-04_13:16:01",
+    "gpumodel": "h100",
+    "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
+    "bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5",
+    "results": [
+      {
+        "nbgpu": 1,
+        "cell_updates": 3.079643E+8
+      },
+      {
+        "nbgpu": 2,
+        "cell_updates": 3.012300E+8
+      },
+      {
+        "nbgpu": 4,
+        "cell_updates": 2.944091E+8
+      },
+      {
+        "nbgpu": 8,
+        "cell_updates": 2.837224E+8
+      },
+      {
+        "nbgpu": 16,
+        "cell_updates": 2.827778E+8
+      },
+      {
+        "nbgpu": 32,
+        "cell_updates": 2.822657E+8
+      },
+      {
+        "nbgpu": 64,
+        "cell_updates": 2.767820E+8
+      },
+      {
+        "nbgpu": 128,
+        "cell_updates": 2.767322E+8
+      }
+    ]
+  },
+  {
+    "date": "2025-03-06_11:21:56",
+    "gpumodel": "mi250x",
+    "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
+    "bench_commit": "868be0a87c6fcda665cbb62db7020aeff70dc62d",
+    "results": [
+      {
+        "nbgpu": 1,
+        "cell_updates": 1.436580E+8
+      },
+      {
+        "nbgpu": 2,
+        "cell_updates": 1.372499E+8
+      },
+      {
+        "nbgpu": 4,
+        "cell_updates": 1.344528E+8
+      },
+      {
+        "nbgpu": 8,
+        "cell_updates": 1.293602E+8
+      },
+      {
+        "nbgpu": 16,
+        "cell_updates": 1.260359E+8
+      },
+      {
+        "nbgpu": 32,
+        "cell_updates": 1.204980E+8
+      },
+      {
+        "nbgpu": 64,
+        "cell_updates": 1.163099E+8
+      },
+      {
+        "nbgpu": 128,
+        "cell_updates": 1.192343E+8
+      }
+    ]
+  }
+]
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -35,6 +35,7 @@
 extensions = [
     "sphinx_rtd_theme",
     'sphinx_git',
+    'matplotlib.sphinxext.plot_directive',
     "breathe",
     "exhale",
     "m2r2",

diff --git a/doc/source/performances.rst b/doc/source/performances.rst
@@ -6,9 +6,8 @@ We report below the performances obtained on various architectures using Idefix.
 is the 3D MHD Orszag-Tang test problem with 2nd order reconstruction and uct_contact EMFS bundled in
 Idefix test suite, disabling passive tracers. The test is computed with a 128\ :sup:`3` resolution per
 MPI sub-domain on GPUs or 32\ :sup:`3` per MPI sub-domain on CPUs. All of the performances measures
-have been obtained enabling MPI on *one full node*, but we report here the performance *per GPU*
-(i.e. with 2 GCDs on AMD Mi250) or *per core* (on CPU), i.e. dividing the node performance by the number of GPU/core
-to simplify the comparison with other clusters.
+have been obtained enabling MPI and we reporte here the performance *per GPU*, *per GCD* (on Mi250)
+or *per core* (on CPU).
 
 The complete scalability tests are available in Idefix `method paper <https://ui.adsabs.harvard.edu/abs/2023A%26A...677A...9L/abstract>`_.
 The performances mentionned below are updated for each major revision of Idefix, so they might slightly differ from the method paper.
@@ -33,16 +32,14 @@ CPU performances
 | IDRIS/Jean Zay      | Intel Cascade Lake | 0.62                                               |
 +---------------------+--------------------+----------------------------------------------------+
 
-
 GPU performances
 ================
 
-+----------------------+--------------------+----------------------------------------------------+
-| Cluster name         | GPU                | Performances (in 10\ :sup:`6` cell/s/GPU)          |
-+======================+====================+====================================================+
-| IDRIS/Jean Zay       | NVIDIA V100        | 110                                                |
-+----------------------+--------------------+----------------------------------------------------+
-| IDRIS/Jean Zay       | NVIDIA A100        | 194                                                |
-+----------------------+--------------------+----------------------------------------------------+
-| CINES/Adastra        | AMD Mi250          | 250                                                |
-+----------------------+--------------------+----------------------------------------------------+
+.. plot::
+
+   import plot_idefix_bench
+   plot_idefix_bench.do_plot('Performance on NVidia and AMD GPUs', 'bench.json', ['v100','a100','h100','mi250x'])
+
+.. note::
+
+    The inter-node communication on Jean Zay is not optimal on A100 nodes. A ticket is opened with IDRIS support to fix this issue.
diff --git a/doc/source/plot_idefix_bench.py b/doc/source/plot_idefix_bench.py
@@ -0,0 +1,26 @@
+import matplotlib.pyplot as plt
+import json
+
+def do_plot(title, bench_file, gpumodels):
+    with open(bench_file, 'r') as f:
+        benches = json.load(f)
+
+    plt.figure()
+    xmax=0
+    ymax=0
+    for gpumodel in gpumodels:
+        select = [bench for bench in benches if bench['gpumodel'] == gpumodel][-1]
+
+        xs = [r['nbgpu'] for r in select['results']]
+        ys = [r['cell_updates'] for r in select['results']]
+        plt.plot(xs, ys,'o-',label=gpumodel)
+        xmax=max(xmax,max(xs))
+        ymax=max(ymax,max(ys))
+
+    plt.xscale("log", base=2)
+    plt.ylim(0,ymax*1.1)
+    plt.xlim(1,xmax*1.1)
+    plt.legend()
+    plt.xlabel("Number of GPUs/GCDs")
+    plt.ylabel("Performance (cells / second / GPU)")
+    plt.title(title)