diff --git a/.gitignore b/.gitignore index 422647c3..4de7fb76 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ doc/source/_static/* doc/source/_public/* doc/source/api/* doc/source/xml/* +doc/env/* # compiled files **/__pycache__ diff --git a/doc/python_requirements.txt b/doc/python_requirements.txt index 35f72dcf..c75fa02b 100644 --- a/doc/python_requirements.txt +++ b/doc/python_requirements.txt @@ -14,3 +14,4 @@ exhale==0.3.7 m2r2==0.3.2 sphinx-copybutton==0.5.2 #sphinxcontrib-applehelp==1.0.7 +matplotlib==3.10.0 diff --git a/doc/source/bench.json b/doc/source/bench.json new file mode 100644 index 00000000..08a3c804 --- /dev/null +++ b/doc/source/bench.json @@ -0,0 +1,162 @@ +[ + { + "date": "2025-03-04_12:57:12", + "gpumodel": "v100", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "37161676db15115c38fed3f35c94fa447cbac7bd", + "results": [ + { + "nbgpu": 1, + "cell_updates": 1.193720E+8 + }, + { + "nbgpu": 2, + "cell_updates": 1.178864E+8 + }, + { + "nbgpu": 4, + "cell_updates": 1.155336E+8 + }, + { + "nbgpu": 8, + "cell_updates": 1.014338E+8 + }, + { + "nbgpu": 16, + "cell_updates": 9.855007E+7 + }, + { + "nbgpu": 32, + "cell_updates": 9.012061E+7 + }, + { + "nbgpu": 64, + "cell_updates": 8.538461E+7 + }, + { + "nbgpu": 128, + "cell_updates": 8.531021E+7 + } + ] + }, + { + "date": "2025-03-04_13:07:10", + "gpumodel": "a100", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5", + "results": [ + { + "nbgpu": 1, + "cell_updates": 2.044728E+8 + }, + { + "nbgpu": 2, + "cell_updates": 2.003563E+8 + }, + { + "nbgpu": 4, + "cell_updates": 1.963512E+8 + }, + { + "nbgpu": 8, + "cell_updates": 1.933039E+8 + }, + { + "nbgpu": 16, + "cell_updates": 9.759154E+7 + }, + { + "nbgpu": 32, + "cell_updates": 6.369645E+7 + }, + { + "nbgpu": 64, + "cell_updates": 4.629474E+7 + }, + { + "nbgpu": 128, + "cell_updates": 4.580281E+7 + } + ] + }, + { + "date": "2025-03-04_13:16:01", + "gpumodel": "h100", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5", + "results": [ + { + "nbgpu": 1, + "cell_updates": 3.079643E+8 + }, + { + "nbgpu": 2, + "cell_updates": 3.012300E+8 + }, + { + "nbgpu": 4, + "cell_updates": 2.944091E+8 + }, + { + "nbgpu": 8, + "cell_updates": 2.837224E+8 + }, + { + "nbgpu": 16, + "cell_updates": 2.827778E+8 + }, + { + "nbgpu": 32, + "cell_updates": 2.822657E+8 + }, + { + "nbgpu": 64, + "cell_updates": 2.767820E+8 + }, + { + "nbgpu": 128, + "cell_updates": 2.767322E+8 + } + ] + }, + { + "date": "2025-03-06_11:21:56", + "gpumodel": "mi250x", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "868be0a87c6fcda665cbb62db7020aeff70dc62d", + "results": [ + { + "nbgpu": 1, + "cell_updates": 1.436580E+8 + }, + { + "nbgpu": 2, + "cell_updates": 1.372499E+8 + }, + { + "nbgpu": 4, + "cell_updates": 1.344528E+8 + }, + { + "nbgpu": 8, + "cell_updates": 1.293602E+8 + }, + { + "nbgpu": 16, + "cell_updates": 1.260359E+8 + }, + { + "nbgpu": 32, + "cell_updates": 1.204980E+8 + }, + { + "nbgpu": 64, + "cell_updates": 1.163099E+8 + }, + { + "nbgpu": 128, + "cell_updates": 1.192343E+8 + } + ] + } +] diff --git a/doc/source/conf.py b/doc/source/conf.py index 24f9709b..6f0cffa4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -35,6 +35,7 @@ extensions = [ "sphinx_rtd_theme", 'sphinx_git', + 'matplotlib.sphinxext.plot_directive', "breathe", "exhale", "m2r2", diff --git a/doc/source/performances.rst b/doc/source/performances.rst index 84042685..deae1194 100644 --- a/doc/source/performances.rst +++ b/doc/source/performances.rst @@ -6,9 +6,8 @@ We report below the performances obtained on various architectures using Idefix. is the 3D MHD Orszag-Tang test problem with 2nd order reconstruction and uct_contact EMFS bundled in Idefix test suite, disabling passive tracers. The test is computed with a 128\ :sup:`3` resolution per MPI sub-domain on GPUs or 32\ :sup:`3` per MPI sub-domain on CPUs. All of the performances measures -have been obtained enabling MPI on *one full node*, but we report here the performance *per GPU* -(i.e. with 2 GCDs on AMD Mi250) or *per core* (on CPU), i.e. dividing the node performance by the number of GPU/core -to simplify the comparison with other clusters. +have been obtained enabling MPI and we reporte here the performance *per GPU*, *per GCD* (on Mi250) +or *per core* (on CPU). The complete scalability tests are available in Idefix `method paper `_. The performances mentionned below are updated for each major revision of Idefix, so they might slightly differ from the method paper. @@ -33,16 +32,14 @@ CPU performances | IDRIS/Jean Zay | Intel Cascade Lake | 0.62 | +---------------------+--------------------+----------------------------------------------------+ - GPU performances ================ -+----------------------+--------------------+----------------------------------------------------+ -| Cluster name | GPU | Performances (in 10\ :sup:`6` cell/s/GPU) | -+======================+====================+====================================================+ -| IDRIS/Jean Zay | NVIDIA V100 | 110 | -+----------------------+--------------------+----------------------------------------------------+ -| IDRIS/Jean Zay | NVIDIA A100 | 194 | -+----------------------+--------------------+----------------------------------------------------+ -| CINES/Adastra | AMD Mi250 | 250 | -+----------------------+--------------------+----------------------------------------------------+ +.. plot:: + + import plot_idefix_bench + plot_idefix_bench.do_plot('Performance on NVidia and AMD GPUs', 'bench.json', ['v100','a100','h100','mi250x']) + +.. note:: + + The inter-node communication on Jean Zay is not optimal on A100 nodes. A ticket is opened with IDRIS support to fix this issue. diff --git a/doc/source/plot_idefix_bench.py b/doc/source/plot_idefix_bench.py new file mode 100644 index 00000000..43482eb0 --- /dev/null +++ b/doc/source/plot_idefix_bench.py @@ -0,0 +1,26 @@ +import matplotlib.pyplot as plt +import json + +def do_plot(title, bench_file, gpumodels): + with open(bench_file, 'r') as f: + benches = json.load(f) + + plt.figure() + xmax=0 + ymax=0 + for gpumodel in gpumodels: + select = [bench for bench in benches if bench['gpumodel'] == gpumodel][-1] + + xs = [r['nbgpu'] for r in select['results']] + ys = [r['cell_updates'] for r in select['results']] + plt.plot(xs, ys,'o-',label=gpumodel) + xmax=max(xmax,max(xs)) + ymax=max(ymax,max(ys)) + + plt.xscale("log", base=2) + plt.ylim(0,ymax*1.1) + plt.xlim(1,xmax*1.1) + plt.legend() + plt.xlabel("Number of GPUs/GCDs") + plt.ylabel("Performance (cells / second / GPU)") + plt.title(title)