From d0f348a823da191a9b5e1c869c0d25aefb779e50 Mon Sep 17 00:00:00 2001 From: Marc Coiffier Date: Fri, 17 Jan 2025 18:19:42 +0100 Subject: [PATCH 1/7] =?UTF-8?q?G=C3=A9n=C3=A9ration=20automatique=20de=20p?= =?UTF-8?q?lots=20de=20performance=20dans=20la=20doc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/python_requirements.txt | 1 + doc/source/bench-jeanzay.json | 122 ++++++++++++++++++++++++++++++++ doc/source/conf.py | 1 + doc/source/performances.rst | 20 ++++-- doc/source/plot_idefix_bench.py | 14 ++++ 5 files changed, 153 insertions(+), 5 deletions(-) create mode 100755 doc/source/bench-jeanzay.json create mode 100644 doc/source/plot_idefix_bench.py diff --git a/doc/python_requirements.txt b/doc/python_requirements.txt index 35f72dcf..c75fa02b 100644 --- a/doc/python_requirements.txt +++ b/doc/python_requirements.txt @@ -14,3 +14,4 @@ exhale==0.3.7 m2r2==0.3.2 sphinx-copybutton==0.5.2 #sphinxcontrib-applehelp==1.0.7 +matplotlib==3.10.0 diff --git a/doc/source/bench-jeanzay.json b/doc/source/bench-jeanzay.json new file mode 100755 index 00000000..f58d38d4 --- /dev/null +++ b/doc/source/bench-jeanzay.json @@ -0,0 +1,122 @@ +[ + { + "date": "2025-01-17_15:33:13", + "gpumodel": "v100", + "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 1.192208E+8 + }, + { + "nbgpu": 2, + "cell_updates": 1.178276E+8 + }, + { + "nbgpu": 4, + "cell_updates": 1.151014E+8 + } + ] + }, + { + "date": "2025-01-17_15:35:16", + "gpumodel": "v100", + "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 1.192139E+8 + }, + { + "nbgpu": 2, + "cell_updates": 1.178690E+8 + }, + { + "nbgpu": 4, + "cell_updates": 1.092092E+8 + }, + { + "nbgpu": 8, + "cell_updates": 7.464571E+7 + }, + { + "nbgpu": 16, + "cell_updates": 7.485223E+7 + }, + { + "nbgpu": 32, + "cell_updates": 6.795755E+7 + } + ] + }, + { + "date": "2025-01-17_16:57:02", + "gpumodel": "v100", + "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 1.192784E+8 + }, + { + "nbgpu": 2, + "cell_updates": 1.117701E+8 + }, + { + "nbgpu": 4, + "cell_updates": 1.087580E+8 + }, + { + "nbgpu": 8, + "cell_updates": 7.472364E+7 + }, + { + "nbgpu": 16, + "cell_updates": 7.554497E+7 + }, + { + "nbgpu": 32, + "cell_updates": 6.669339E+7 + } + ] + }, + { + "date": "2025-01-17_17:09:39", + "gpumodel": "a100", + "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 2.035935E+8 + }, + { + "nbgpu": 2, + "cell_updates": 1.992927E+8 + }, + { + "nbgpu": 4, + "cell_updates": 1.958386E+8 + }, + { + "nbgpu": 8, + "cell_updates": 1.929568E+8 + } + ] + }, + { + "date": "2025-01-17_17:23:13", + "gpumodel": "h100", + "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 3.083581E+8 + } + ] + } +] diff --git a/doc/source/conf.py b/doc/source/conf.py index 24f9709b..6f0cffa4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -35,6 +35,7 @@ extensions = [ "sphinx_rtd_theme", 'sphinx_git', + 'matplotlib.sphinxext.plot_directive', "breathe", "exhale", "m2r2", diff --git a/doc/source/performances.rst b/doc/source/performances.rst index 84042685..985991e3 100644 --- a/doc/source/performances.rst +++ b/doc/source/performances.rst @@ -33,16 +33,26 @@ CPU performances | IDRIS/Jean Zay | Intel Cascade Lake | 0.62 | +---------------------+--------------------+----------------------------------------------------+ - GPU performances ================ +.. plot:: + + import plot_idefix_bench + plot_idefix_bench.do_plot('Performance on NVidia V100 GPUs at Jean-Zay', 'bench-jeanzay.json', 'v100') + +.. plot:: + + import plot_idefix_bench + plot_idefix_bench.do_plot('Performance on NVidia A100 GPUs at Jean-Zay', 'bench-jeanzay.json', 'a100') + +.. plot:: + + import plot_idefix_bench + plot_idefix_bench.do_plot('Performance on NVidia H100 GPUs at Jean-Zay', 'bench-jeanzay.json', 'h100') + +----------------------+--------------------+----------------------------------------------------+ | Cluster name | GPU | Performances (in 10\ :sup:`6` cell/s/GPU) | +======================+====================+====================================================+ -| IDRIS/Jean Zay | NVIDIA V100 | 110 | -+----------------------+--------------------+----------------------------------------------------+ -| IDRIS/Jean Zay | NVIDIA A100 | 194 | -+----------------------+--------------------+----------------------------------------------------+ | CINES/Adastra | AMD Mi250 | 250 | +----------------------+--------------------+----------------------------------------------------+ diff --git a/doc/source/plot_idefix_bench.py b/doc/source/plot_idefix_bench.py new file mode 100644 index 00000000..cd45aa5a --- /dev/null +++ b/doc/source/plot_idefix_bench.py @@ -0,0 +1,14 @@ +import matplotlib.pyplot as plt +import json + +def do_plot(title, bench_file, gpumodel): + with open(bench_file, 'r') as f: + benches = json.load(f) + + select = [bench for bench in benches if bench['gpumodel'] == gpumodel][-1] + xs = [r['nbgpu'] for r in select['results']] + ys = [r['cell_updates'] for r in select['results']] + + plt.xscale("log") + plt.plot(xs, ys) + plt.title(title) From 76e399aadfc86e48add05dc079b9d6131de320de Mon Sep 17 00:00:00 2001 From: Marc Coiffier Date: Fri, 24 Jan 2025 11:56:44 +0100 Subject: [PATCH 2/7] =?UTF-8?q?R=C3=A9cup=C3=A9ration=20des=20derniers=20b?= =?UTF-8?q?enchmarks=20sur=20JeanZay?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/source/bench-jeanzay.json | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) mode change 100755 => 100644 doc/source/bench-jeanzay.json diff --git a/doc/source/bench-jeanzay.json b/doc/source/bench-jeanzay.json old mode 100755 new mode 100644 index f58d38d4..6d5fd5ca --- a/doc/source/bench-jeanzay.json +++ b/doc/source/bench-jeanzay.json @@ -80,6 +80,14 @@ { "nbgpu": 32, "cell_updates": 6.669339E+7 + }, + { + "nbgpu": 64, + "cell_updates": 5.497455E+7 + }, + { + "nbgpu": 128, + "cell_updates": 5.440610E+7 } ] }, @@ -104,6 +112,22 @@ { "nbgpu": 8, "cell_updates": 1.929568E+8 + }, + { + "nbgpu": 16, + "cell_updates": 9.408132E+7 + }, + { + "nbgpu": 32, + "cell_updates": 5.937487E+7 + }, + { + "nbgpu": 64, + "cell_updates": 4.159838E+7 + }, + { + "nbgpu": 128, + "cell_updates": 5.061931E+7 } ] }, From 49a56ee5b83d0822831bcb8f41859104bee5ea41 Mon Sep 17 00:00:00 2001 From: Marc Coiffier Date: Fri, 24 Jan 2025 14:33:29 +0100 Subject: [PATCH 3/7] =?UTF-8?q?R=C3=A9cup=C3=A9ration=20de=20benchs=20pour?= =?UTF-8?q?=20H100;=20ajout=20de=20labels=20aux=20axes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/source/bench-jeanzay.json | 64 +++++++++++++++++++++++++++++++++ doc/source/plot_idefix_bench.py | 7 ++-- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/doc/source/bench-jeanzay.json b/doc/source/bench-jeanzay.json index 6d5fd5ca..7ded6914 100644 --- a/doc/source/bench-jeanzay.json +++ b/doc/source/bench-jeanzay.json @@ -142,5 +142,69 @@ "cell_updates": 3.083581E+8 } ] + }, + { + "date": "2025-01-24_13:02:08", + "gpumodel": "h100", + "idefix_commit": "54482d582361f1cd05c39717902f752b3d04ae6e", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 3.076537E+8 + } + ] + }, + { + "date": "2025-01-24_13:04:02", + "gpumodel": "h100", + "idefix_commit": "54482d582361f1cd05c39717902f752b3d04ae6e", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 3.032686E+8 + }, + { + "nbgpu": 2, + "cell_updates": 3.007089E+8 + }, + { + "nbgpu": 4, + "cell_updates": 2.938962E+8 + } + ] + }, + { + "date": "2025-01-24_14:17:30", + "gpumodel": "h100", + "idefix_commit": "54482d582361f1cd05c39717902f752b3d04ae6e", + "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "results": [ + { + "nbgpu": 1, + "cell_updates": 3.074363E+8 + }, + { + "nbgpu": 2, + "cell_updates": 3.009340E+8 + }, + { + "nbgpu": 4, + "cell_updates": 2.938868E+8 + }, + { + "nbgpu": 8, + "cell_updates": 2.833054E+8 + }, + { + "nbgpu": 16, + "cell_updates": 2.822483E+8 + }, + { + "nbgpu": 32, + "cell_updates": 2.817151E+8 + } + ] } ] diff --git a/doc/source/plot_idefix_bench.py b/doc/source/plot_idefix_bench.py index cd45aa5a..ac91b787 100644 --- a/doc/source/plot_idefix_bench.py +++ b/doc/source/plot_idefix_bench.py @@ -8,7 +8,10 @@ def do_plot(title, bench_file, gpumodel): select = [bench for bench in benches if bench['gpumodel'] == gpumodel][-1] xs = [r['nbgpu'] for r in select['results']] ys = [r['cell_updates'] for r in select['results']] - - plt.xscale("log") + + plt.xscale("log", base=2) plt.plot(xs, ys) + plt.ylim(0,max(ys)*1.1) + plt.xlabel("Number of GPUs") + plt.ylabel("Performance (cells / second / GPU)") plt.title(title) From 0cb52eaeecb730f3264344e2b5c6ccd36c18c92e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:34:54 +0000 Subject: [PATCH 4/7] [pre-commit.ci lite] apply automatic fixes --- doc/source/performances.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/performances.rst b/doc/source/performances.rst index 985991e3..6f8c88b2 100644 --- a/doc/source/performances.rst +++ b/doc/source/performances.rst @@ -50,7 +50,7 @@ GPU performances import plot_idefix_bench plot_idefix_bench.do_plot('Performance on NVidia H100 GPUs at Jean-Zay', 'bench-jeanzay.json', 'h100') - + +----------------------+--------------------+----------------------------------------------------+ | Cluster name | GPU | Performances (in 10\ :sup:`6` cell/s/GPU) | +======================+====================+====================================================+ From 4c9985fbdd64d8923626dc7b7c10218d10d14e2d Mon Sep 17 00:00:00 2001 From: Geoffroy Lesur Date: Tue, 4 Mar 2025 15:18:05 +0100 Subject: [PATCH 5/7] add environement to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 422647c3..4de7fb76 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ doc/source/_static/* doc/source/_public/* doc/source/api/* doc/source/xml/* +doc/env/* # compiled files **/__pycache__ From 1d9d101860f1a6561030c2f59e04b27e544f5afe Mon Sep 17 00:00:00 2001 From: Geoffroy Lesur Date: Tue, 4 Mar 2025 16:09:46 +0100 Subject: [PATCH 6/7] use one single plot update performance page --- doc/source/{bench-jeanzay.json => bench.json} | 0 doc/source/performances.rst | 17 +++----------- doc/source/plot_idefix_bench.py | 23 +++++++++++++------ 3 files changed, 19 insertions(+), 21 deletions(-) rename doc/source/{bench-jeanzay.json => bench.json} (100%) diff --git a/doc/source/bench-jeanzay.json b/doc/source/bench.json similarity index 100% rename from doc/source/bench-jeanzay.json rename to doc/source/bench.json diff --git a/doc/source/performances.rst b/doc/source/performances.rst index 6f8c88b2..0cf8f862 100644 --- a/doc/source/performances.rst +++ b/doc/source/performances.rst @@ -6,9 +6,8 @@ We report below the performances obtained on various architectures using Idefix. is the 3D MHD Orszag-Tang test problem with 2nd order reconstruction and uct_contact EMFS bundled in Idefix test suite, disabling passive tracers. The test is computed with a 128\ :sup:`3` resolution per MPI sub-domain on GPUs or 32\ :sup:`3` per MPI sub-domain on CPUs. All of the performances measures -have been obtained enabling MPI on *one full node*, but we report here the performance *per GPU* -(i.e. with 2 GCDs on AMD Mi250) or *per core* (on CPU), i.e. dividing the node performance by the number of GPU/core -to simplify the comparison with other clusters. +have been obtained enabling MPI and we reporte here the performance *per GPU*, *per GCD* (on Mi250) + or *per core* (on CPU). The complete scalability tests are available in Idefix `method paper `_. The performances mentionned below are updated for each major revision of Idefix, so they might slightly differ from the method paper. @@ -39,17 +38,7 @@ GPU performances .. plot:: import plot_idefix_bench - plot_idefix_bench.do_plot('Performance on NVidia V100 GPUs at Jean-Zay', 'bench-jeanzay.json', 'v100') - -.. plot:: - - import plot_idefix_bench - plot_idefix_bench.do_plot('Performance on NVidia A100 GPUs at Jean-Zay', 'bench-jeanzay.json', 'a100') - -.. plot:: - - import plot_idefix_bench - plot_idefix_bench.do_plot('Performance on NVidia H100 GPUs at Jean-Zay', 'bench-jeanzay.json', 'h100') + plot_idefix_bench.do_plot('Performance on NVidia and AMD GPUs', 'bench.json', ['v100','a100','h100']) +----------------------+--------------------+----------------------------------------------------+ | Cluster name | GPU | Performances (in 10\ :sup:`6` cell/s/GPU) | diff --git a/doc/source/plot_idefix_bench.py b/doc/source/plot_idefix_bench.py index ac91b787..43482eb0 100644 --- a/doc/source/plot_idefix_bench.py +++ b/doc/source/plot_idefix_bench.py @@ -1,17 +1,26 @@ import matplotlib.pyplot as plt import json -def do_plot(title, bench_file, gpumodel): +def do_plot(title, bench_file, gpumodels): with open(bench_file, 'r') as f: benches = json.load(f) - select = [bench for bench in benches if bench['gpumodel'] == gpumodel][-1] - xs = [r['nbgpu'] for r in select['results']] - ys = [r['cell_updates'] for r in select['results']] + plt.figure() + xmax=0 + ymax=0 + for gpumodel in gpumodels: + select = [bench for bench in benches if bench['gpumodel'] == gpumodel][-1] + + xs = [r['nbgpu'] for r in select['results']] + ys = [r['cell_updates'] for r in select['results']] + plt.plot(xs, ys,'o-',label=gpumodel) + xmax=max(xmax,max(xs)) + ymax=max(ymax,max(ys)) plt.xscale("log", base=2) - plt.plot(xs, ys) - plt.ylim(0,max(ys)*1.1) - plt.xlabel("Number of GPUs") + plt.ylim(0,ymax*1.1) + plt.xlim(1,xmax*1.1) + plt.legend() + plt.xlabel("Number of GPUs/GCDs") plt.ylabel("Performance (cells / second / GPU)") plt.title(title) From b2168dfb0c10b87f04a6d7ce34243e3fdd323a47 Mon Sep 17 00:00:00 2001 From: Geoffroy Lesur Date: Mon, 10 Mar 2025 16:13:46 +0100 Subject: [PATCH 7/7] add benchmark data --- doc/source/bench.json | 162 +++++++++++++----------------------- doc/source/performances.rst | 12 ++- 2 files changed, 62 insertions(+), 112 deletions(-) diff --git a/doc/source/bench.json b/doc/source/bench.json index 7ded6914..08a3c804 100644 --- a/doc/source/bench.json +++ b/doc/source/bench.json @@ -1,209 +1,161 @@ [ { - "date": "2025-01-17_15:33:13", + "date": "2025-03-04_12:57:12", "gpumodel": "v100", - "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "37161676db15115c38fed3f35c94fa447cbac7bd", "results": [ { "nbgpu": 1, - "cell_updates": 1.192208E+8 + "cell_updates": 1.193720E+8 }, { "nbgpu": 2, - "cell_updates": 1.178276E+8 + "cell_updates": 1.178864E+8 }, { "nbgpu": 4, - "cell_updates": 1.151014E+8 - } - ] - }, - { - "date": "2025-01-17_15:35:16", - "gpumodel": "v100", - "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", - "results": [ - { - "nbgpu": 1, - "cell_updates": 1.192139E+8 + "cell_updates": 1.155336E+8 }, { - "nbgpu": 2, - "cell_updates": 1.178690E+8 + "nbgpu": 8, + "cell_updates": 1.014338E+8 }, { - "nbgpu": 4, - "cell_updates": 1.092092E+8 + "nbgpu": 16, + "cell_updates": 9.855007E+7 }, { - "nbgpu": 8, - "cell_updates": 7.464571E+7 + "nbgpu": 32, + "cell_updates": 9.012061E+7 }, { - "nbgpu": 16, - "cell_updates": 7.485223E+7 + "nbgpu": 64, + "cell_updates": 8.538461E+7 }, { - "nbgpu": 32, - "cell_updates": 6.795755E+7 + "nbgpu": 128, + "cell_updates": 8.531021E+7 } ] }, { - "date": "2025-01-17_16:57:02", - "gpumodel": "v100", - "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "date": "2025-03-04_13:07:10", + "gpumodel": "a100", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5", "results": [ { "nbgpu": 1, - "cell_updates": 1.192784E+8 + "cell_updates": 2.044728E+8 }, { "nbgpu": 2, - "cell_updates": 1.117701E+8 + "cell_updates": 2.003563E+8 }, { "nbgpu": 4, - "cell_updates": 1.087580E+8 + "cell_updates": 1.963512E+8 }, { "nbgpu": 8, - "cell_updates": 7.472364E+7 + "cell_updates": 1.933039E+8 }, { "nbgpu": 16, - "cell_updates": 7.554497E+7 + "cell_updates": 9.759154E+7 }, { "nbgpu": 32, - "cell_updates": 6.669339E+7 + "cell_updates": 6.369645E+7 }, { "nbgpu": 64, - "cell_updates": 5.497455E+7 + "cell_updates": 4.629474E+7 }, { "nbgpu": 128, - "cell_updates": 5.440610E+7 + "cell_updates": 4.580281E+7 } ] }, { - "date": "2025-01-17_17:09:39", - "gpumodel": "a100", - "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "date": "2025-03-04_13:16:01", + "gpumodel": "h100", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5", "results": [ { "nbgpu": 1, - "cell_updates": 2.035935E+8 + "cell_updates": 3.079643E+8 }, { "nbgpu": 2, - "cell_updates": 1.992927E+8 + "cell_updates": 3.012300E+8 }, { "nbgpu": 4, - "cell_updates": 1.958386E+8 + "cell_updates": 2.944091E+8 }, { "nbgpu": 8, - "cell_updates": 1.929568E+8 + "cell_updates": 2.837224E+8 }, { "nbgpu": 16, - "cell_updates": 9.408132E+7 + "cell_updates": 2.827778E+8 }, { "nbgpu": 32, - "cell_updates": 5.937487E+7 + "cell_updates": 2.822657E+8 }, { "nbgpu": 64, - "cell_updates": 4.159838E+7 + "cell_updates": 2.767820E+8 }, { "nbgpu": 128, - "cell_updates": 5.061931E+7 + "cell_updates": 2.767322E+8 } ] }, { - "date": "2025-01-17_17:23:13", - "gpumodel": "h100", - "idefix_commit": "d0e82202ca90d0b664c71582962c2ae255bc6528", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", + "date": "2025-03-06_11:21:56", + "gpumodel": "mi250x", + "idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae", + "bench_commit": "868be0a87c6fcda665cbb62db7020aeff70dc62d", "results": [ { "nbgpu": 1, - "cell_updates": 3.083581E+8 - } - ] - }, - { - "date": "2025-01-24_13:02:08", - "gpumodel": "h100", - "idefix_commit": "54482d582361f1cd05c39717902f752b3d04ae6e", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", - "results": [ - { - "nbgpu": 1, - "cell_updates": 3.076537E+8 - } - ] - }, - { - "date": "2025-01-24_13:04:02", - "gpumodel": "h100", - "idefix_commit": "54482d582361f1cd05c39717902f752b3d04ae6e", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", - "results": [ - { - "nbgpu": 1, - "cell_updates": 3.032686E+8 + "cell_updates": 1.436580E+8 }, { "nbgpu": 2, - "cell_updates": 3.007089E+8 + "cell_updates": 1.372499E+8 }, { "nbgpu": 4, - "cell_updates": 2.938962E+8 - } - ] - }, - { - "date": "2025-01-24_14:17:30", - "gpumodel": "h100", - "idefix_commit": "54482d582361f1cd05c39717902f752b3d04ae6e", - "bench_commit": "a94bc00490b3c62f81f7713327f4388e44a2abea", - "results": [ - { - "nbgpu": 1, - "cell_updates": 3.074363E+8 + "cell_updates": 1.344528E+8 }, { - "nbgpu": 2, - "cell_updates": 3.009340E+8 + "nbgpu": 8, + "cell_updates": 1.293602E+8 }, { - "nbgpu": 4, - "cell_updates": 2.938868E+8 + "nbgpu": 16, + "cell_updates": 1.260359E+8 }, { - "nbgpu": 8, - "cell_updates": 2.833054E+8 + "nbgpu": 32, + "cell_updates": 1.204980E+8 }, { - "nbgpu": 16, - "cell_updates": 2.822483E+8 + "nbgpu": 64, + "cell_updates": 1.163099E+8 }, { - "nbgpu": 32, - "cell_updates": 2.817151E+8 + "nbgpu": 128, + "cell_updates": 1.192343E+8 } ] } diff --git a/doc/source/performances.rst b/doc/source/performances.rst index 0cf8f862..deae1194 100644 --- a/doc/source/performances.rst +++ b/doc/source/performances.rst @@ -7,7 +7,7 @@ is the 3D MHD Orszag-Tang test problem with 2nd order reconstruction and uct_con Idefix test suite, disabling passive tracers. The test is computed with a 128\ :sup:`3` resolution per MPI sub-domain on GPUs or 32\ :sup:`3` per MPI sub-domain on CPUs. All of the performances measures have been obtained enabling MPI and we reporte here the performance *per GPU*, *per GCD* (on Mi250) - or *per core* (on CPU). +or *per core* (on CPU). The complete scalability tests are available in Idefix `method paper `_. The performances mentionned below are updated for each major revision of Idefix, so they might slightly differ from the method paper. @@ -38,10 +38,8 @@ GPU performances .. plot:: import plot_idefix_bench - plot_idefix_bench.do_plot('Performance on NVidia and AMD GPUs', 'bench.json', ['v100','a100','h100']) + plot_idefix_bench.do_plot('Performance on NVidia and AMD GPUs', 'bench.json', ['v100','a100','h100','mi250x']) -+----------------------+--------------------+----------------------------------------------------+ -| Cluster name | GPU | Performances (in 10\ :sup:`6` cell/s/GPU) | -+======================+====================+====================================================+ -| CINES/Adastra | AMD Mi250 | 250 | -+----------------------+--------------------+----------------------------------------------------+ +.. note:: + + The inter-node communication on Jean Zay is not optimal on A100 nodes. A ticket is opened with IDRIS support to fix this issue.