From 7bde8621757f9903f1e9ca5b1fce73d740c56e22 Mon Sep 17 00:00:00 2001 From: Paul Romano Date: Thu, 26 Feb 2026 23:16:05 -0600 Subject: [PATCH 1/2] Script for getting photon mass attenuation from NIST 126 --- make_mass_attenuation.py | 73 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 make_mass_attenuation.py diff --git a/make_mass_attenuation.py b/make_mass_attenuation.py new file mode 100644 index 0000000..e5b87b1 --- /dev/null +++ b/make_mass_attenuation.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +"""Obtain X-ray mass attenuation coefficients from the NIST Standard Reference +Database 126 (https://www.nist.gov/pml/x-ray-mass-attenuation-coefficients) and +write them to an HDF5 file. + +For each element Z=1 to 92, the data is scraped from the individual element +pages on the NIST site. The resulting HDF5 file contains 92 datasets, each a +2D array with shape (2, N) where row 0 is photon energy in eV and row 1 is +the mass attenuation coefficient mu/rho in cm^2/g. +""" + +import re +import time +from urllib.request import urlopen, Request + +from lxml import html +import numpy as np +import h5py +from openmc.data import ATOMIC_SYMBOL + + +BASE_URL = 'https://physics.nist.gov/PhysRefData/XrayMassCoef/ElemTab/z{:02}.html' + +# Pattern to match scientific notation floats (e.g. 1.00000E-03, 7.217E+00) +FLOAT_RE = re.compile(r'[0-9]+\.[0-9]+E[+-][0-9]+') + +# ============================================================================== +# SCRAPE DATA FROM NIST SITE AND GENERATE MASS ATTENUATION HDF5 FILE + +print('Generating mass_attenuation.h5...') + +with h5py.File('mass_attenuation.h5', 'w') as f: + + for Z in range(1, 93): + print(f' Processing {ATOMIC_SYMBOL[Z]} (Z={Z})...') + + # Fetch page for this element + url = BASE_URL.format(Z) + req = Request(url, headers={'User-Agent': 'openmc-data/1.0'}) + with urlopen(req) as response: + page = response.read() + + # Extract text within
 tags (the ASCII-formatted table)
+        tree = html.fromstring(page)
+        pre_text = tree.xpath('//pre//text()')
+
+        # The data is in the last text node, after the underline separator.
+        # Split on underlines and take the last part to skip the header.
+        data_text = pre_text[-1]
+        parts = data_text.split('____')
+        data_section = parts[-1] if len(parts) > 1 else data_text
+
+        # Extract all floats in scientific notation -- this cleanly skips
+        # absorption edge labels (K, L1, L2, L3, M1, etc.) and any other text
+        values = np.array([float(x) for x in FLOAT_RE.findall(data_section)])
+
+        if len(values) % 3 != 0:
+            raise ValueError(
+                f'Number of parsed values ({len(values)}) for Z={Z} '
+                f'is not divisible by 3'
+            )
+
+        # Reshape into rows of 3: energy, mu/rho, mu_en/rho
+        table = values.reshape((-1, 3))
+
+        # Create dataset as 2D array: row 0 = energy, row 1 = mu/rho
+        data = np.vstack([1e6 * table[:, 0], table[:, 1]])
+        f.create_dataset(f'{Z:03}', data=data)
+
+        # Be respectful to the NIST server
+        time.sleep(0.5)
+
+print('Done! Wrote mass_attenuation.h5')

From 6661ecb56075407b9bf43fc2999f243f7e5d157f Mon Sep 17 00:00:00 2001
From: Paul Romano 
Date: Tue, 3 Mar 2026 09:17:37 -0600
Subject: [PATCH 2/2] Switch to NIST SRD 8

---
 make_mass_attenuation.py | 110 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 42 deletions(-)

diff --git a/make_mass_attenuation.py b/make_mass_attenuation.py
index e5b87b1..6fe7793 100644
--- a/make_mass_attenuation.py
+++ b/make_mass_attenuation.py
@@ -1,16 +1,18 @@
 #!/usr/bin/env python
-"""Obtain X-ray mass attenuation coefficients from the NIST Standard Reference
-Database 126 (https://www.nist.gov/pml/x-ray-mass-attenuation-coefficients) and
-write them to an HDF5 file.
-
-For each element Z=1 to 92, the data is scraped from the individual element
-pages on the NIST site. The resulting HDF5 file contains 92 datasets, each a
-2D array with shape (2, N) where row 0 is photon energy in eV and row 1 is
-the mass attenuation coefficient mu/rho in cm^2/g.
+"""Obtain photon mass attenuation coefficients for elements Z=1 to 100 from the
+NIST Standard Reference Database 8 (XCOM Photon Cross Sections Database,
+https://www.nist.gov/pml/xcom-photon-cross-sections-database) and write them
+to an HDF5 file.
+
+Data are retrieved by submitting a POST request to the XCOM CGI for each
+element. The resulting HDF5 file contains 100 datasets (keyed by zero-padded
+atomic number, e.g. '001'--'100'), each a 2D array with shape (2, N) where
+row 0 is photon energy in [eV] and row 1 is the mass attenuation coefficient
+mu/rho (Total Attenuation with Coherent Scattering) in [cm^2/g].
 """
 
-import re
 import time
+from urllib.parse import urlencode
 from urllib.request import urlopen, Request
 
 from lxml import html
@@ -19,52 +21,76 @@
 from openmc.data import ATOMIC_SYMBOL
 
 
-BASE_URL = 'https://physics.nist.gov/PhysRefData/XrayMassCoef/ElemTab/z{:02}.html'
-
-# Pattern to match scientific notation floats (e.g. 1.00000E-03, 7.217E+00)
-FLOAT_RE = re.compile(r'[0-9]+\.[0-9]+E[+-][0-9]+')
+XCOM_URL = 'https://physics.nist.gov/cgi-bin/Xcom/xcom3_1'
+
+# POST payload template
+# OutOpt='PIC': all quantities in cm2/g
+# Output='on':  include the standard energy grid
+# NumAdd='1':   number of additional energy input rows (hidden field default)
+# Graph0='on':  select "None" for graph (faster; data table is unaffected)
+PAYLOAD_TEMPLATE = {
+    'ZSym': '',
+    'OutOpt': 'PIC',
+    'Output': 'on',
+    'NumAdd': '1',
+    'Energies': '',
+    'WindowXmin': '0.001',
+    'WindowXmax': '100000',
+    'ResizeFlag': 'on',
+    'Graph0': 'on',
+}
 
 # ==============================================================================
-# SCRAPE DATA FROM NIST SITE AND GENERATE MASS ATTENUATION HDF5 FILE
+# QUERY XCOM AND GENERATE MASS ATTENUATION HDF5 FILE
 
 print('Generating mass_attenuation.h5...')
 
 with h5py.File('mass_attenuation.h5', 'w') as f:
 
-    for Z in range(1, 93):
+    for Z in range(1, 101):
         print(f'  Processing {ATOMIC_SYMBOL[Z]} (Z={Z})...')
 
-        # Fetch page for this element
-        url = BASE_URL.format(Z)
-        req = Request(url, headers={'User-Agent': 'openmc-data/1.0'})
+        # Build and submit POST request for this element
+        payload = urlencode(dict(PAYLOAD_TEMPLATE, ZNum=str(Z))).encode('utf-8')
+        req = Request(XCOM_URL, data=payload, headers={'User-Agent': 'openmc-data/1.0'})
         with urlopen(req) as response:
             page = response.read()
 
-        # Extract text within 
 tags (the ASCII-formatted table)
+        # Parse the HTML results table.
+        # Row structure: rows 0-2 are headers; data rows follow.
+        # Columns: 0=edge label, 1=energy(MeV), 2=coherent, 3=incoherent,
+        #          4=photoelectric, 5=pair(nuclear), 6=pair(electron),
+        #          7=total w/ coherent, 8=total w/o coherent
         tree = html.fromstring(page)
-        pre_text = tree.xpath('//pre//text()')
-
-        # The data is in the last text node, after the underline separator.
-        # Split on underlines and take the last part to skip the header.
-        data_text = pre_text[-1]
-        parts = data_text.split('____')
-        data_section = parts[-1] if len(parts) > 1 else data_text
-
-        # Extract all floats in scientific notation -- this cleanly skips
-        # absorption edge labels (K, L1, L2, L3, M1, etc.) and any other text
-        values = np.array([float(x) for x in FLOAT_RE.findall(data_section)])
-
-        if len(values) % 3 != 0:
-            raise ValueError(
-                f'Number of parsed values ({len(values)}) for Z={Z} '
-                f'is not divisible by 3'
-            )
-
-        # Reshape into rows of 3: energy, mu/rho, mu_en/rho
-        table = values.reshape((-1, 3))
-
-        # Create dataset as 2D array: row 0 = energy, row 1 = mu/rho
-        data = np.vstack([1e6 * table[:, 0], table[:, 1]])
+        trs = tree.xpath('//table//tr')
+
+        energies = []
+        mu_rho = []
+        for tr in trs[3:]:  # skip 3 header rows
+            tds = tr.xpath('.//td')
+            cells = [td.text_content().strip() for td in tds]
+            if len(cells) < 8:
+                continue
+            try:
+                energies.append(1e6*float(cells[1]))  # convert MeV to eV
+                mu_rho.append(float(cells[7]))  # total attenuation w/ coherent
+            except ValueError:
+                continue
+
+        if not energies:
+            raise ValueError(f'No data parsed for Z={Z}')
+
+        # Convert to numpy arrays
+        energies = np.array(energies)
+        mu_rho = np.array(mu_rho)
+
+        # Only include values up to 20 MeV
+        mask = energies <= 20e6
+        energies = energies[mask]
+        mu_rho = mu_rho[mask]
+
+        # Create dataset as 2D array: row 0 = energy (eV), row 1 = mu/rho (cm2/g)
+        data = np.array([energies, mu_rho])
         f.create_dataset(f'{Z:03}', data=data)
 
         # Be respectful to the NIST server