Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ doc/source/_static/*
doc/source/_public/*
doc/source/api/*
doc/source/xml/*
doc/env/*

# compiled files
**/__pycache__
Expand Down
1 change: 1 addition & 0 deletions doc/python_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ exhale==0.3.7
m2r2==0.3.2
sphinx-copybutton==0.5.2
#sphinxcontrib-applehelp==1.0.7
matplotlib==3.10.0
162 changes: 162 additions & 0 deletions doc/source/bench.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
[
{
"date": "2025-03-04_12:57:12",
"gpumodel": "v100",
"idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
"bench_commit": "37161676db15115c38fed3f35c94fa447cbac7bd",
"results": [
{
"nbgpu": 1,
"cell_updates": 1.193720E+8
},
{
"nbgpu": 2,
"cell_updates": 1.178864E+8
},
{
"nbgpu": 4,
"cell_updates": 1.155336E+8
},
{
"nbgpu": 8,
"cell_updates": 1.014338E+8
},
{
"nbgpu": 16,
"cell_updates": 9.855007E+7
},
{
"nbgpu": 32,
"cell_updates": 9.012061E+7
},
{
"nbgpu": 64,
"cell_updates": 8.538461E+7
},
{
"nbgpu": 128,
"cell_updates": 8.531021E+7
}
]
},
{
"date": "2025-03-04_13:07:10",
"gpumodel": "a100",
"idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
"bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5",
"results": [
{
"nbgpu": 1,
"cell_updates": 2.044728E+8
},
{
"nbgpu": 2,
"cell_updates": 2.003563E+8
},
{
"nbgpu": 4,
"cell_updates": 1.963512E+8
},
{
"nbgpu": 8,
"cell_updates": 1.933039E+8
},
{
"nbgpu": 16,
"cell_updates": 9.759154E+7
},
{
"nbgpu": 32,
"cell_updates": 6.369645E+7
},
{
"nbgpu": 64,
"cell_updates": 4.629474E+7
},
{
"nbgpu": 128,
"cell_updates": 4.580281E+7
}
]
},
{
"date": "2025-03-04_13:16:01",
"gpumodel": "h100",
"idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
"bench_commit": "b536949200e50fac68d8a46d5db38fc8e3f02da5",
"results": [
{
"nbgpu": 1,
"cell_updates": 3.079643E+8
},
{
"nbgpu": 2,
"cell_updates": 3.012300E+8
},
{
"nbgpu": 4,
"cell_updates": 2.944091E+8
},
{
"nbgpu": 8,
"cell_updates": 2.837224E+8
},
{
"nbgpu": 16,
"cell_updates": 2.827778E+8
},
{
"nbgpu": 32,
"cell_updates": 2.822657E+8
},
{
"nbgpu": 64,
"cell_updates": 2.767820E+8
},
{
"nbgpu": 128,
"cell_updates": 2.767322E+8
}
]
},
{
"date": "2025-03-06_11:21:56",
"gpumodel": "mi250x",
"idefix_commit": "2bc09a0d218459f278e2b28506a09e4591b103ae",
"bench_commit": "868be0a87c6fcda665cbb62db7020aeff70dc62d",
"results": [
{
"nbgpu": 1,
"cell_updates": 1.436580E+8
},
{
"nbgpu": 2,
"cell_updates": 1.372499E+8
},
{
"nbgpu": 4,
"cell_updates": 1.344528E+8
},
{
"nbgpu": 8,
"cell_updates": 1.293602E+8
},
{
"nbgpu": 16,
"cell_updates": 1.260359E+8
},
{
"nbgpu": 32,
"cell_updates": 1.204980E+8
},
{
"nbgpu": 64,
"cell_updates": 1.163099E+8
},
{
"nbgpu": 128,
"cell_updates": 1.192343E+8
}
]
}
]
1 change: 1 addition & 0 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
extensions = [
"sphinx_rtd_theme",
'sphinx_git',
'matplotlib.sphinxext.plot_directive',
"breathe",
"exhale",
"m2r2",
Expand Down
23 changes: 10 additions & 13 deletions doc/source/performances.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ We report below the performances obtained on various architectures using Idefix.
is the 3D MHD Orszag-Tang test problem with 2nd order reconstruction and uct_contact EMFS bundled in
Idefix test suite, disabling passive tracers. The test is computed with a 128\ :sup:`3` resolution per
MPI sub-domain on GPUs or 32\ :sup:`3` per MPI sub-domain on CPUs. All of the performances measures
have been obtained enabling MPI on *one full node*, but we report here the performance *per GPU*
(i.e. with 2 GCDs on AMD Mi250) or *per core* (on CPU), i.e. dividing the node performance by the number of GPU/core
to simplify the comparison with other clusters.
have been obtained enabling MPI and we reporte here the performance *per GPU*, *per GCD* (on Mi250)
or *per core* (on CPU).

The complete scalability tests are available in Idefix `method paper <https://ui.adsabs.harvard.edu/abs/2023A%26A...677A...9L/abstract>`_.
The performances mentionned below are updated for each major revision of Idefix, so they might slightly differ from the method paper.
Expand All @@ -33,16 +32,14 @@ CPU performances
| IDRIS/Jean Zay | Intel Cascade Lake | 0.62 |
+---------------------+--------------------+----------------------------------------------------+


GPU performances
================

+----------------------+--------------------+----------------------------------------------------+
| Cluster name | GPU | Performances (in 10\ :sup:`6` cell/s/GPU) |
+======================+====================+====================================================+
| IDRIS/Jean Zay | NVIDIA V100 | 110 |
+----------------------+--------------------+----------------------------------------------------+
| IDRIS/Jean Zay | NVIDIA A100 | 194 |
+----------------------+--------------------+----------------------------------------------------+
| CINES/Adastra | AMD Mi250 | 250 |
+----------------------+--------------------+----------------------------------------------------+
.. plot::

import plot_idefix_bench
plot_idefix_bench.do_plot('Performance on NVidia and AMD GPUs', 'bench.json', ['v100','a100','h100','mi250x'])

.. note::

The inter-node communication on Jean Zay is not optimal on A100 nodes. A ticket is opened with IDRIS support to fix this issue.
26 changes: 26 additions & 0 deletions doc/source/plot_idefix_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import matplotlib.pyplot as plt
import json

def do_plot(title, bench_file, gpumodels):
with open(bench_file, 'r') as f:
benches = json.load(f)

plt.figure()
xmax=0
ymax=0
for gpumodel in gpumodels:
select = [bench for bench in benches if bench['gpumodel'] == gpumodel][-1]

xs = [r['nbgpu'] for r in select['results']]
ys = [r['cell_updates'] for r in select['results']]
plt.plot(xs, ys,'o-',label=gpumodel)
xmax=max(xmax,max(xs))
ymax=max(ymax,max(ys))

plt.xscale("log", base=2)
plt.ylim(0,ymax*1.1)
plt.xlim(1,xmax*1.1)
plt.legend()
plt.xlabel("Number of GPUs/GCDs")
plt.ylabel("Performance (cells / second / GPU)")
plt.title(title)