From 7bde8621757f9903f1e9ca5b1fce73d740c56e22 Mon Sep 17 00:00:00 2001
From: Paul Romano <paul.k.romano@gmail.com>
Date: Thu, 26 Feb 2026 23:16:05 -0600
Subject: [PATCH 1/2] Script for getting photon mass attenuation from NIST 126

---
 make_mass_attenuation.py | 73 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 make_mass_attenuation.py
diff --git a/make_mass_attenuation.py b/make_mass_attenuation.py
new file mode 100644
index 0000000..e5b87b1
--- /dev/null
+++ b/make_mass_attenuation.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+"""Obtain X-ray mass attenuation coefficients from the NIST Standard Reference
+Database 126 (https://www.nist.gov/pml/x-ray-mass-attenuation-coefficients) and
+write them to an HDF5 file.
+
+For each element Z=1 to 92, the data is scraped from the individual element
+pages on the NIST site. The resulting HDF5 file contains 92 datasets, each a
+2D array with shape (2, N) where row 0 is photon energy in eV and row 1 is
+the mass attenuation coefficient mu/rho in cm^2/g.
+"""
+
+import re
+import time
+from urllib.request import urlopen, Request
+
+from lxml import html
+import numpy as np
+import h5py
+from openmc.data import ATOMIC_SYMBOL
+
+
+BASE_URL = 'https://physics.nist.gov/PhysRefData/XrayMassCoef/ElemTab/z{:02}.html'
+
+# Pattern to match scientific notation floats (e.g. 1.00000E-03, 7.217E+00)
+FLOAT_RE = re.compile(r'[0-9]+\.[0-9]+E[+-][0-9]+')
+
+# ==============================================================================
+# SCRAPE DATA FROM NIST SITE AND GENERATE MASS ATTENUATION HDF5 FILE
+
+print('Generating mass_attenuation.h5...')
+
+with h5py.File('mass_attenuation.h5', 'w') as f:
+
+    for Z in range(1, 93):
+        print(f'  Processing {ATOMIC_SYMBOL[Z]} (Z={Z})...')
+
+        # Fetch page for this element
+        url = BASE_URL.format(Z)
+        req = Request(url, headers={'User-Agent': 'openmc-data/1.0'})
+        with urlopen(req) as response:
+            page = response.read()
+
+        # Extract text within <pre> tags (the ASCII-formatted table)
+        tree = html.fromstring(page)
+        pre_text = tree.xpath('//pre//text()')
+
+        # The data is in the last text node, after the underline separator.
+        # Split on underlines and take the last part to skip the header.
+        data_text = pre_text[-1]
+        parts = data_text.split('____')
+        data_section = parts[-1] if len(parts) > 1 else data_text
+
+        # Extract all floats in scientific notation -- this cleanly skips
+        # absorption edge labels (K, L1, L2, L3, M1, etc.) and any other text
+        values = np.array([float(x) for x in FLOAT_RE.findall(data_section)])
+
+        if len(values) % 3 != 0:
+            raise ValueError(
+                f'Number of parsed values ({len(values)}) for Z={Z} '
+                f'is not divisible by 3'
+            )
+
+        # Reshape into rows of 3: energy, mu/rho, mu_en/rho
+        table = values.reshape((-1, 3))
+
+        # Create dataset as 2D array: row 0 = energy, row 1 = mu/rho
+        data = np.vstack([1e6 * table[:, 0], table[:, 1]])
+        f.create_dataset(f'{Z:03}', data=data)
+
+        # Be respectful to the NIST server
+        time.sleep(0.5)
+
+print('Done! Wrote mass_attenuation.h5')

From 6661ecb56075407b9bf43fc2999f243f7e5d157f Mon Sep 17 00:00:00 2001
From: Paul Romano <paul.k.romano@gmail.com>
Date: Tue, 3 Mar 2026 09:17:37 -0600
Subject: [PATCH 2/2] Switch to NIST SRD 8

---
 make_mass_attenuation.py | 110 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 42 deletions(-)

diff --git a/make_mass_attenuation.py b/make_mass_attenuation.py
index e5b87b1..6fe7793 100644
--- a/make_mass_attenuation.py
+++ b/make_mass_attenuation.py
@@ -1,16 +1,18 @@
 #!/usr/bin/env python
-"""Obtain X-ray mass attenuation coefficients from the NIST Standard Reference
-Database 126 (https://www.nist.gov/pml/x-ray-mass-attenuation-coefficients) and
-write them to an HDF5 file.
-
-For each element Z=1 to 92, the data is scraped from the individual element
-pages on the NIST site. The resulting HDF5 file contains 92 datasets, each a
-2D array with shape (2, N) where row 0 is photon energy in eV and row 1 is
-the mass attenuation coefficient mu/rho in cm^2/g.
+"""Obtain photon mass attenuation coefficients for elements Z=1 to 100 from the
+NIST Standard Reference Database 8 (XCOM Photon Cross Sections Database,
+https://www.nist.gov/pml/xcom-photon-cross-sections-database) and write them
+to an HDF5 file.
+
+Data are retrieved by submitting a POST request to the XCOM CGI for each
+element. The resulting HDF5 file contains 100 datasets (keyed by zero-padded
+atomic number, e.g. '001'--'100'), each a 2D array with shape (2, N) where
+row 0 is photon energy in [eV] and row 1 is the mass attenuation coefficient
+mu/rho (Total Attenuation with Coherent Scattering) in [cm^2/g].
 """
 
-import re
 import time
+from urllib.parse import urlencode
 from urllib.request import urlopen, Request
 
 from lxml import html
@@ -19,52 +21,76 @@
 from openmc.data import ATOMIC_SYMBOL
 
 
-BASE_URL = 'https://physics.nist.gov/PhysRefData/XrayMassCoef/ElemTab/z{:02}.html'
-
-# Pattern to match scientific notation floats (e.g. 1.00000E-03, 7.217E+00)
-FLOAT_RE = re.compile(r'[0-9]+\.[0-9]+E[+-][0-9]+')
+XCOM_URL = 'https://physics.nist.gov/cgi-bin/Xcom/xcom3_1'
+
+# POST payload template
+# OutOpt='PIC': all quantities in cm2/g
+# Output='on':  include the standard energy grid
+# NumAdd='1':   number of additional energy input rows (hidden field default)
+# Graph0='on':  select "None" for graph (faster; data table is unaffected)
+PAYLOAD_TEMPLATE = {
+    'ZSym': '',
+    'OutOpt': 'PIC',
+    'Output': 'on',
+    'NumAdd': '1',
+    'Energies': '',
+    'WindowXmin': '0.001',
+    'WindowXmax': '100000',
+    'ResizeFlag': 'on',
+    'Graph0': 'on',
+}
 
 # ==============================================================================
-# SCRAPE DATA FROM NIST SITE AND GENERATE MASS ATTENUATION HDF5 FILE
+# QUERY XCOM AND GENERATE MASS ATTENUATION HDF5 FILE
 
 print('Generating mass_attenuation.h5...')
 
 with h5py.File('mass_attenuation.h5', 'w') as f:
 
-    for Z in range(1, 93):
+    for Z in range(1, 101):
         print(f'  Processing {ATOMIC_SYMBOL[Z]} (Z={Z})...')
 
-        # Fetch page for this element
-        url = BASE_URL.format(Z)
-        req = Request(url, headers={'User-Agent': 'openmc-data/1.0'})
+        # Build and submit POST request for this element
+        payload = urlencode(dict(PAYLOAD_TEMPLATE, ZNum=str(Z))).encode('utf-8')
+        req = Request(XCOM_URL, data=payload, headers={'User-Agent': 'openmc-data/1.0'})
         with urlopen(req) as response:
             page = response.read()
 
-        # Extract text within <pre> tags (the ASCII-formatted table)
+        # Parse the HTML results table.
+        # Row structure: rows 0-2 are headers; data rows follow.
+        # Columns: 0=edge label, 1=energy(MeV), 2=coherent, 3=incoherent,
+        #          4=photoelectric, 5=pair(nuclear), 6=pair(electron),
+        #          7=total w/ coherent, 8=total w/o coherent
         tree = html.fromstring(page)
-        pre_text = tree.xpath('//pre//text()')
-
-        # The data is in the last text node, after the underline separator.
-        # Split on underlines and take the last part to skip the header.
-        data_text = pre_text[-1]
-        parts = data_text.split('____')
-        data_section = parts[-1] if len(parts) > 1 else data_text
-
-        # Extract all floats in scientific notation -- this cleanly skips
-        # absorption edge labels (K, L1, L2, L3, M1, etc.) and any other text
-        values = np.array([float(x) for x in FLOAT_RE.findall(data_section)])
-
-        if len(values) % 3 != 0:
-            raise ValueError(
-                f'Number of parsed values ({len(values)}) for Z={Z} '
-                f'is not divisible by 3'
-            )
-
-        # Reshape into rows of 3: energy, mu/rho, mu_en/rho
-        table = values.reshape((-1, 3))
-
-        # Create dataset as 2D array: row 0 = energy, row 1 = mu/rho
-        data = np.vstack([1e6 * table[:, 0], table[:, 1]])
+        trs = tree.xpath('//table//tr')
+
+        energies = []
+        mu_rho = []
+        for tr in trs[3:]:  # skip 3 header rows
+            tds = tr.xpath('.//td')
+            cells = [td.text_content().strip() for td in tds]
+            if len(cells) < 8:
+                continue
+            try:
+                energies.append(1e6*float(cells[1]))  # convert MeV to eV
+                mu_rho.append(float(cells[7]))  # total attenuation w/ coherent
+            except ValueError:
+                continue
+
+        if not energies:
+            raise ValueError(f'No data parsed for Z={Z}')
+
+        # Convert to numpy arrays
+        energies = np.array(energies)
+        mu_rho = np.array(mu_rho)
+
+        # Only include values up to 20 MeV
+        mask = energies <= 20e6
+        energies = energies[mask]
+        mu_rho = mu_rho[mask]
+
+        # Create dataset as 2D array: row 0 = energy (eV), row 1 = mu/rho (cm2/g)
+        data = np.array([energies, mu_rho])
         f.create_dataset(f'{Z:03}', data=data)
 
         # Be respectful to the NIST server