From 8ea3a5bf7fab54b5830f902736f4b8af464f9920 Mon Sep 17 00:00:00 2001 From: Naim Goksel Karacayli Date: Mon, 13 Mar 2023 14:27:31 -0400 Subject: [PATCH 1/7] new generation of mock scripts --- bin/newGenDESILiteMocks.py | 10 + py/qsotools/scripts/generate_mocks.py | 567 ++++++++++++++++++++++++++ setup.py | 32 +- 3 files changed, 593 insertions(+), 16 deletions(-) create mode 100755 bin/newGenDESILiteMocks.py create mode 100644 py/qsotools/scripts/generate_mocks.py diff --git a/bin/newGenDESILiteMocks.py b/bin/newGenDESILiteMocks.py new file mode 100755 index 0000000..02c94a4 --- /dev/null +++ b/bin/newGenDESILiteMocks.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +from qsotools.scripts.generate_mocks import main +import logging + +if __name__ == '__main__': + try: + main() + except Exception as e: + logging.error(e) + exit(1) diff --git a/py/qsotools/scripts/generate_mocks.py b/py/qsotools/scripts/generate_mocks.py new file mode 100644 index 0000000..7ee0e83 --- /dev/null +++ b/py/qsotools/scripts/generate_mocks.py @@ -0,0 +1,567 @@ +from os.path import join as ospath_join +from os import makedirs as os_makedirs +import argparse +import logging +from multiprocessing import Pool + +from numba import njit +import numpy as np +from healpy import nside2npix, ang2pix + +from pkg_resources import resource_filename + +import qsotools.mocklib as lm +import qsotools.specops as so +import qsotools.fiducial as fid +from qsotools.io import BinaryQSO, QQFile, PiccaFile +from qsotools.utils import Progress + +PKG_ICDF_Z_TABLE = resource_filename( + 'qsotools', 'tables/invcdf_nz_qso_zmin2.1_zmax4.4.dat') + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("OutputDir", help="Output directory") + parser.add_argument( + "--master-file", + help=("Master file location. Generate mocks with the exact RA, DEC & Z" + " distribution. nmocks option is ignored when this is passed.")) + parser.add_argument( + "--fixed-zforest", type=float, + help="Generate forest at this redshift & turn off redshift evolution.") + parser.add_argument( + "--nmocks", type=int, default=1, + help="Number of mocks to generate.") + + parser.add_argument( + "--save-qqfile", action="store_true", + help=("Saves in quickquasar fileformat. Spectra are not chunked and " + "all pixels are kept. Sets sigma-per-pixel=0, specres=0, " + "keep-nolya-pixels=True and save-full-flux=True")) + parser.add_argument( + "--save-picca", action="store_true", help="Saves in picca fileformat.") + parser.add_argument("--use-optimal-rmat", action="store_true") + parser.add_argument( + "--oversample-rmat", type=int, default=1, + help=("Oversampling factor for resolution matrix. " + "Pass >1 to get finely space response function.")) + + parser.add_argument( + "--seed", type=int, default=332298, + help="Seed to generate random numbers.") + + parser.add_argument( + "--sigma-per-pixel", type=float, default=0.7, + help="Add Gaussian error to mocks with given sigma.") + parser.add_argument( + "--specres", type=int, default=3200, help="Spectral resolution.") + parser.add_argument( + "--pixel-dv", type=float, default=30., + help="Pixel size (km/s) of the log-spaced wave grid.") + parser.add_argument( + "--pixel-dlambda", type=float, default=0.2, + help="Pixel size (A) of the linearly-spaced wave grid.") + parser.add_argument( + "--use-logspaced-wave", action="store_true", + help="Use log spaced array as final grid.") + + parser.add_argument( + "--desi-w1", type=float, default=3500., + help="Lower wavelength of DESI wave grid in A.") + parser.add_argument( + "--desi-w2", type=float, default=10000., + help="Higher wavelength of DESI wave grid in A.") + parser.add_argument( + "--desi-dec", action="store_true", help="Limit dec to (-25, 85).") + + parser.add_argument( + "--fixed-zqso", type=float, + help="Generate QSOs at this redshift only. Overrides all.") + parser.add_argument( + "--use-analytic-cdf", action="store_true", + help="Uses an analytic CDF for quasar redshifts.") + parser.add_argument( + "--z-quasar-min", type=float, default=2.1, + help="Lowest quasar redshift.") + parser.add_argument( + "--z-quasar-max", type=float, default=6.0, + help="Maximum quasar redshift.") + parser.add_argument( + "--z-forest-min", type=float, default=1.9, + help="Lower end of the forest.") + parser.add_argument( + "--z-forest-max", type=float, default=5.3, + help="Upper end of the forest.") + + parser.add_argument( + "--keep-nolya-pixels", action="store_true", + help="Instead of removing pixels, set flux=1 for lambda>L_lya") + parser.add_argument( + "--invcdf-nz", default=PKG_ICDF_Z_TABLE, + help="Table for inverse cdf of n(z).") + + parser.add_argument( + "--chunk-dyn", action="store_true", + help=("Splits spectrum into three chunks " + "if n>2N/3 or into two chunks if n>N/3.")) + parser.add_argument( + "--chunk-fixed", action="store_true", + help="Splits spectrum into 3 chunks at fixed rest frame wavelengths") + + parser.add_argument( + "--nosave", help="Does not save mocks to output when passed", + action="store_true") + + parser.add_argument( + "--gauss", help="Generate Gaussian mocks", action="store_true") + parser.add_argument( + "--save-full-flux", action="store_true", + help="When passed saves flux instead of fluctuations around truth.") + + parser.add_argument( + "--log2ngrid", help="Number of grid points.", + type=int, default=18) + parser.add_argument( + "--griddv", help="Pixel size of the grid in km/s.", + type=float, default=2.) + + # healpix support + parser.add_argument("--hp-nside", help="NSIDE", type=int, default=16) + parser.add_argument( + "--hp-ring", action="store_true", + help="Use RING pixel ordering. Default is NESTED.") + + # parallel support + parser.add_argument("--nproc", type=int, default=None) + parser.add_argument( + "--debug", help="Set logger to DEBUG level.", action="store_true") + + return parser + + +def save_parameters(txt_basefilename, args): + mock_type = "Gaussian Mocks" if args.gauss else "Lognormal Mocks" + z_evo_txt = "ON" if not args.fixed_zforest else "OFF" + catalog_name = args.master_file if args.master_file else "None" + Parameters_txt = ( + "Parameters for these mocks\n" + f"Type : {mock_type}\n" + f"Velocity to Redshift : Logarithmic\n" + f"Errors : {args.sigma_per_pixel:.2f}\n" + f"Specres : {args.specres}\n" + f"LowResPixelSize : {args.pixel_dv}\n" + f"Seed : {args.seed}\n" + f"log2NGrid : {args.log2ngrid}\n" + f"GridPixelSize : {args.griddv}\n" + f"Redshift Evolution : {z_evo_txt}\n" + f"Catalog used : {catalog_name}\n" + ) + + temp_fname = "%s_parameters.txt" % txt_basefilename + logging.info(f"Saving parameteres to {temp_fname}") + toWrite = open(temp_fname, 'w') + toWrite.write(Parameters_txt) + toWrite.close() + + +# Returns observed wavelength centers +def getDESIwavegrid(args): + # Set up DESI observed wavelength grid + if args.use_logspaced_wave: + logging.info( + f"Using logspaced wavelength grid, dv={args.pixel_dv} km/s.") + base = np.exp(args.pixel_dv / fid.LIGHT_SPEED) + npix_desi = int(np.log( + args.desi_w2 / args.desi_w1 + ) * fid.LIGHT_SPEED / args.pixel_dv) + DESI_WAVEGRID = args.desi_w1 * np.power(base, np.arange(npix_desi)) + else: + logging.info( + f"Using linear wavelength grid, dlambda={args.pixel_dlambda} A.") + npix_desi = int((args.desi_w2 - args.desi_w1) / args.pixel_dlambda) + DESI_WAVEGRID = ( + np.arange(npix_desi) * args.pixel_dlambda + args.desi_w1) + + DESI_WAVEEDGES = so.createEdgesFromCenters( + DESI_WAVEGRID, logspacing=args.use_logspaced_wave) + + return DESI_WAVEGRID, DESI_WAVEEDGES + + +def _genRNDDec(RNST, N, dec1_deg, dec2_deg): + asin1 = np.sin(dec1_deg * np.pi / 180.) + asin2 = np.sin(dec2_deg * np.pi / 180.) + rnd_asin = (asin2 - asin1) * RNST.random(N) + asin1 + + return np.arcsin(rnd_asin) * 180. / np.pi + + +# Returns metadata array and number of pixels +def getMetadata(args): + # The METADATA HDU contains a binary table + # with (at least) RA, DEC, Z, TARGETID + meta_dt = np.dtype([ + ('RA', 'f8'), ('DEC', 'f8'), ('Z', 'f8'), ('MOCKID', 'i8'), + ('PIXNUM', 'i4'), ('COADD_EXPTIME', 'f8'), ('FLUX_R', 'f8') + ]) + dt_list = list(meta_dt.names) + dt_list.remove('PIXNUM') + if args.master_file: + logging.info(f"Reading master file: {args.master_file}") + master_file = QQFile(args.master_file) + l1 = master_file.readMetadata() + master_file.close() + + # Remove low redshift quasars + zqso_cut_index = master_file.metadata['Z'] > args.z_quasar_min + zqso_cut_index &= master_file.metadata['Z'] < args.z_quasar_max + master_file.metadata = master_file.metadata[zqso_cut_index] + args.nmocks = master_file.metadata.size + + # Add pixnum field to metadata + metadata = np.zeros(args.nmocks, dtype=meta_dt) + for mcol in list(set(l1) & set(dt_list)): + metadata[mcol] = master_file.metadata[mcol] + + logging.info(f"Number of mocks to generate: {args.nmocks}") + else: + logging.info("Generating random metadata.") + metadata = np.zeros(args.nmocks, dtype=meta_dt) + metadata['MOCKID'] = np.arange(args.nmocks) + # Use the same seed for all process to generate the same metadata + RNST = np.random.default_rng(args.seed) + # Generate coords in degrees + metadata['RA'] = RNST.random(args.nmocks) * 360. + # metadata['DEC'] = (RNST.random(args.nmocks)-0.5) * 180. + + dec1, dec2 = (-25., 85.) if args.desi_dec else (-90., 90.) + metadata['DEC'] = _genRNDDec(RNST, args.nmocks, dec1, dec2) + + if args.fixed_zqso: + metadata['Z'] = args.fixed_zqso + else: + # Read inverse cumulative distribution function + # Generate uniform random numbers + # Use inverse CDF to map these to QSO redshifts + + GenZ = lm.RedshiftGenerator( + args.invcdf_nz, args.z_quasar_min, args.z_quasar_max, + args.use_analytic_cdf) + metadata['Z'] = GenZ.generate(RNST, args.nmocks) + + logging.info(f"Number of nside for heal pixels: {args.hp_nside}") + if args.hp_nside: + npixels = nside2npix(args.hp_nside) + # when lonlat=True: RA first, Dec later + # when lonlat=False (Default): Dec first, RA later + metadata['PIXNUM'] = ang2pix( + args.hp_nside, metadata['RA'], metadata['DEC'], + nest=not args.hp_ring, lonlat=True) + else: + npixels = 1 + metadata['PIXNUM'] = 0 + + header = {'HPXNSIDE': args.hp_nside, 'HPXNEST': not args.hp_ring} + mstrfname = ospath_join(args.OutputDir, "master.fits") + qqfile = QQFile(mstrfname, 'rw') + qqfile.writeMetadata(metadata, header) + qqfile.close() + logging.info(f"Saved master metadata to {mstrfname}") + + return metadata, npixels + + +def setResolutionMatrix(wave, args, ndiags=11): + assert args.save_picca + assert args.fixed_zqso + + Ngrid = wave.size + Rint = args.specres + dv = args.pixel_dv + if args.use_optimal_rmat: + logging.info("Using optimal resolution matrix.") + logging.info("Calculating correlation function.") + z = np.median(wave) / fid.LYA_WAVELENGTH - 1 + _, xi = lm.lognPowerSpGH(z, numvpoints=2**16, corr=True) + xi = xi.ravel() + xi = np.fft.fftshift(xi) + logging.info("Calculating optimal rmatrix.") + return so.getOptimalResolutionMatrix(Ngrid, xi, Rint, dv) + else: + logging.info("Using Gaussian resolution matrix.") + rmat = so.getGaussianResolutionMatrix(wave, Rint) + + if args.oversample_rmat > 1: + rmat = so.getOversampledRMat(rmat, args.oversample_rmat) + return rmat + + +def chunkHelper(i, waves, fluxes, errors, z_qso, args): + wave_c, flux_c, err_c = waves[i], fluxes[i], errors[i] + + if args.chunk_dyn: + wave_c, flux_c, err_c = so.chunkDynamic( + wave_c, flux_c, err_c, len(wave_c)) + if args.chunk_fixed: + NUMBER_OF_CHUNKS = 3 + FIXED_CHUNK_EDGES = np.linspace( + fid.LYA_FIRST_WVL, fid.LYA_LAST_WVL, num=NUMBER_OF_CHUNKS + 1) + wave_c, flux_c, err_c = so.divideIntoChunks( + wave_c, flux_c, err_c, z_qso[i], FIXED_CHUNK_EDGES) + else: + wave_c = [wave_c] + flux_c = [flux_c] + err_c = [err_c] + + return wave_c, flux_c, err_c + + +def save_data( + wave, fmocks, emocks, fnames, z_qso, dec, ra, args, rmat, picca, tid): + if picca: + fnames = [] + for (w, f, e) in zip(wave, fmocks, emocks): + fname = picca.writeSpectrum( + tid, w, f, e, args.specres, z_qso, ra, dec, rmat.T, + islinbin=not args.use_logspaced_wave, + oversampling=args.oversample_rmat) + fnames.append(fname) + else: + for (w, f, e, fname) in zip(wave, fmocks, emocks, fnames): + mfile = BinaryQSO(ospath_join(args.OutputDir, fname), 'w') + mfile.save( + w, f, e, len(w), z_qso, dec, ra, 0., + args.specres, args.pixel_dv) + + return fnames + + +def saveQQFile(ipix, meta1, wave, fluxes, args): + P = ipix // 100 + dir1 = ospath_join(args.OutputDir, f"{P}") + dir2 = ospath_join(dir1, f"{ipix}") + os_makedirs(dir1, exist_ok=True) + os_makedirs(dir2, exist_ok=True) + fname = ospath_join( + dir2, f"lya-transmission-{args.hp_nside}-{ipix}.fits.gz") + + qqfile = QQFile(fname, 'rw') + header = {'HPXNSIDE': args.hp_nside, 'HPXNEST': not args.hp_ring} + qqfile.writeAll(meta1, header, wave, fluxes) + + return fname + + +@njit +def remove_above_lya_absoprtion(wave, fluxes, z_qso): + nmocks = fluxes.shape[0] + # Remove absorption above Lya + nonlya_ind = wave > fid.LYA_WAVELENGTH * (1 + z_qso) + for i in range(nmocks): + fluxes[i][nonlya_ind[i]] = 1 + + return fluxes + + +class MockGenerator(object): + def __init__(self, args): + self.args = args + self.TURNOFF_ZEVO = args.fixed_zforest is not None + # Set up DESI observed wavelength grid + self.DESI_WAVEGRID, self.DESI_WAVEEDGES = getDESIwavegrid(args) + + self.sett_txt = '_gaussian' if self.args.gauss else '_lognormal' + if self.args.gauss: + self.mean_flux_function = fid.meanFluxFG08 + else: + self.mean_flux_function = lm.lognMeanFluxGH + + def generate_wfe_hpx(self, ipix, nmocks): + """New seed is seed + healpix no""" + lya_m = lm.LyaMocks( + self.args.seed + ipix, + N_CELLS=2**self.args.log2ngrid, + DV_KMS=self.args.griddv, + GAUSSIAN_MOCKS=self.args.gauss, + REDSHIFT_ON=not self.TURNOFF_ZEVO + ) + + if self.TURNOFF_ZEVO: + lya_m.setCentralRedshift(self.args.fixed_zforest) + else: + lya_m.setCentralRedshift(3.0) + + wave, fluxes, errors = lya_m.resampledMocks( + nmocks, + err_per_final_pixel=self.args.sigma_per_pixel, + spectrograph_resolution=self.args.specres, + obs_wave_edges=self.DESI_WAVEEDGES, + keep_empty_bins=self.args.keep_nolya_pixels + ) + + return wave, fluxes, errors + + def divide_by_mean_flux(self, wave, fluxes, errors): + if self.args.save_full_flux: + return fluxes, errors + + if self.TURNOFF_ZEVO: + spectrum_z = self.args.fixed_zforest + else: + spectrum_z = np.array( + wave, dtype=np.double) / fid.LYA_WAVELENGTH - 1 + true_mean_flux = self.mean_flux_function(spectrum_z) + + fluxes = fluxes / true_mean_flux - 1 + errors /= true_mean_flux + + return fluxes, errors + + def cut_lya_forest_region(self, wave, fluxes, errors, z_qso): + nmocks = fluxes.shape[0] + if not self.args.keep_nolya_pixels: + lya_ind = np.logical_and( + wave >= fid.LYA_FIRST_WVL * (1 + z_qso), + wave <= fid.LYA_LAST_WVL * (1 + z_qso)) + forst_bnd = np.logical_and( + wave >= fid.LYA_WAVELENGTH * (1 + self.args.z_forest_min), + wave <= fid.LYA_WAVELENGTH * (1 + self.args.z_forest_max)) + lya_ind = np.logical_and(lya_ind, forst_bnd) + + waves = [wave[lya_ind[i]] for i in range(nmocks)] + fluxes = [fluxes[i][lya_ind[i]] for i in range(nmocks)] + errors = [errors[i][lya_ind[i]] for i in range(nmocks)] + else: + waves = [wave for i in range(nmocks)] + + return waves, fluxes, errors + + def save_nonqq_files( + self, imock, waves, fluxes, errors, z_qso, meta1, pcfile): + wave_c, flux_c, err_c = chunkHelper( + imock, waves, fluxes, errors, z_qso, self.args) + + nchunks = len(wave_c) + nid = meta1['MOCKID'][imock] + z_qso_i = z_qso[imock] + dec, ra = meta1['DEC'][imock], meta1['RA'][imock] + mockid = meta1['MOCKID'][imock] + + if not self.args.save_picca: + def _get_bq_fname(nc): + return (f"desilite_seed{self.args.seed}_id{nid}_{nc}" + f"_z{z_qso_i:.1f}{self.sett_txt}.dat") + + fname = [_get_bq_fname(nc) for nc in range(nchunks)] + else: + assert nchunks == 1 + RESOMAT = setResolutionMatrix(wave_c[0], self.args) + fname = None + + if not self.args.nosave: + fname = save_data( + wave_c, flux_c, err_c, fname, z_qso_i, dec, ra, + self.args, RESOMAT, pcfile, mockid) + + return fname + + def __call__(self, ipix_meta): + ipix, meta1 = ipix_meta + nmocks = meta1['MOCKID'].size + z_qso = meta1['Z'][:, None] + + if nmocks == 0: + return [] + + wave, fluxes, errors = self.generate_wfe_hpx(ipix, nmocks) + + # Remove absorption above Lya + fluxes = remove_above_lya_absoprtion(wave, fluxes, z_qso) + + # Divide by mean flux if required + fluxes, errors = self.divide_by_mean_flux(wave, fluxes, errors) + + # If save-qqfile option is passed, do not save as BinaryQSO files + # This also means no chunking or removing pixels + if not self.args.nosave and self.args.save_qqfile: + fname = saveQQFile(ipix, meta1, wave, fluxes, self.args) + + return [fname] + + # Cut Lyman-alpha forest region and convert wave to waves = [wave] + waves, fluxes, errors = self.cut_lya_forest_region( + wave, fluxes, errors, z_qso) + + if self.args.save_picca: + pcfname = ospath_join(self.args.OutputDir, f"delta-{ipix}.fits.gz") + pcfile = PiccaFile(pcfname, 'rw') + else: + pcfile = None + + all_fnames = [] + + for i in range(nmocks): + fname = self.save_nonqq_files( + i, waves, fluxes, errors, z_qso, meta1, pcfile) + + if fname: + all_fnames.extend(fname) + + return all_fnames + + +def main(): + parser = get_parser() + args = parser.parse_args() + + # Create/Check directory + os_makedirs(args.OutputDir, exist_ok=True) + + logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) + + metadata, npixels = getMetadata(args) + + if args.save_qqfile: + args.sigma_per_pixel = 0 + args.specres = 0 + args.desi_w1 = 3500.0 + args.z_forest_min = 0 + args.keep_nolya_pixels = True + args.save_full_flux = True + + sett_txt = '_gaussian' if args.gauss else '_lognormal' + # sett_txt += '_noz' if args.without_z_evo else '' + + txt_basefilename = f"{args.OutputDir}/desilite_seed{args.seed}{sett_txt}" + + save_parameters(txt_basefilename, args) + + metadata.sort(order='PIXNUM') + logging.info("Metadata sorted.") + + u_pix, s = np.unique(metadata['PIXNUM'], return_index=True) + split_meta = np.split(metadata, s[1:]) + logging.info( + f"Length of split metadata {len(split_meta)} vs npixels {npixels}.") + pcounter = Progress(len(split_meta)) + + filename_list = [] + with Pool(processes=args.nproc) as pool: + imap_it = pool.imap(MockGenerator(args), zip(u_pix, split_meta)) + for fname in imap_it: + filename_list.extend(fname) + pcounter.increase() + + # Save the list of files in a txt + temp_fname = ospath_join(args.OutputDir, "file_list_qso.txt") + # "%s_filelist.txt" % txt_basefilename + logging.info(f"Saving chunk spectra file list as {temp_fname}") + toWrite = open(temp_fname, 'w') + toWrite.write("%d\n" % len(filename_list)) + for f in filename_list: + toWrite.write(f + "\n") + toWrite.close() + + logging.info("DONE!") diff --git a/setup.py b/setup.py index 1235958..5a010a2 100644 --- a/setup.py +++ b/setup.py @@ -1,24 +1,24 @@ -from setuptools import setup +from setuptools import setup, find_namespace_packages import os -binscripts = [os.path.join("bin", f) for f in os.listdir("bin") if f.endswith(".py")] + +binscripts = [os.path.join("bin", f) for f in os.listdir("bin") + if not f.startswith(".")] +with open("requirements.txt") as file_reqs: + requirements = file_reqs.read().splitlines() setup( name="qsotools", - version="1.1", - packages=['qsotools'], + version="1.2", + packages=find_namespace_packages(where='py'), package_dir={'': 'py/'}, + scripts=binscripts, + install_requires=requirements, package_data={"qsotools": ["tables/*"]}, include_package_data=True, - scripts=binscripts, - - # install_requires=["docutils>=0.3"], - - # metadata to display on PyPI - author = "Naim Goksel Karacayli", - author_email = "naimgoksel.karacayli@yale.edu", - description=("Python scripts to generate mock Lyman-alpha forest," - " read & reduce quasar spectra from KODIAQ, XQ-100 & SQUAD."), - url="https://bitbucket.org/naimgk/qsotools", # project home page, if any - - # could also include long_description, download_url, etc. + author="Naim Goksel Karacayli", + author_email="ngokselk@gmail.com", + description=( + "Python scripts to generate mock Lyman-alpha forest," + " read & reduce quasar spectra from KODIAQ, XQ-100 & SQUAD."), + url="https://bitbucket.org/naimgk/qsotools" ) From e6b249913c534055e85fd68cb4caad847470c160 Mon Sep 17 00:00:00 2001 From: Naim Goksel Karacayli Date: Mon, 13 Mar 2023 15:06:19 -0400 Subject: [PATCH 2/7] metadata for qqfile is updated --- py/qsotools/io.py | 40 +++++++++++++++++---------- py/qsotools/scripts/generate_mocks.py | 9 ++---- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/py/qsotools/io.py b/py/qsotools/io.py index 4ab7099..99e59b2 100644 --- a/py/qsotools/io.py +++ b/py/qsotools/io.py @@ -1517,6 +1517,11 @@ class QQFile(): 3 TRANSMISSION ImageHDU 8 (1864, 571) float32 4 DLA BinTableHDU 17 1864R x 4C [long, double, double, double] """ + meta_dt = np.dtype([ + ('RA', 'f8'), ('DEC', 'f8'), ('Z', 'f8'), ('MOCKID', 'i8'), + ('COADD_EXPTIME', 'f8'), ('FLUX_R', 'f8'), ('TSNR2_LRG', 'f8') + ]) + def __init__(self, fname, rw='r', clobber=True): self.fname = fname self.rw = rw @@ -1549,7 +1554,8 @@ def readAll(self): def getColList(colnames): l1 = [] - def add_colname(radec): + + def add_colname(radec, l1): if f'TARGET_{radec}' in colnames: l1.append(f'TARGET_{radec}') elif radec in colnames: @@ -1557,8 +1563,10 @@ def add_colname(radec): else: raise Exception(f"Missing metadata info: {radec}") - add_colname('RA') - add_colname('DEC') + return l1 + + l1 = add_colname('RA', l1) + l1 = add_colname('DEC', l1) l1.append('Z') # Check must-have columns are present @@ -1581,26 +1589,28 @@ def readMetadata(self): metadata_str = 'QSO_CAT' else: metadata_str = 1 - logging.warning("Metadata not found by hduname. Using extension 1.") + logging.warning( + "Metadata not found by hduname. Using extension 1.") metahdu = self.fitsfile[metadata_str] colnames = metahdu.get_colnames() - meta_dt = np.dtype([('RA','f8'), ('DEC','f8'), ('Z','f8'),('MOCKID','i8'), \ - ('COADD_EXPTIME','f8'), ('FLUX_R','f8')]) + self.metadata = np.zeros(metahdu.get_nrows(), dtype=QQFile.meta_dt) - self.metadata = np.zeros(metahdu.get_nrows(), dtype=meta_dt) - - for mcol, fcol in zip(['RA', 'DEC', 'Z', 'MOCKID'], QQFile.getColList(colnames)): + for mcol, fcol in zip( + ['RA', 'DEC', 'Z', 'MOCKID'], QQFile.getColList(colnames) + ): self.metadata[mcol] = metahdu[fcol].read() + def read_xcols(xcol, l1): + if xcol in colnames: + xres = metahdu[xcol].read() + l1.append(xcol) + return xres, l1 + l1 = ['RA', 'DEC', 'Z', 'MOCKID'] - if 'COADD_EXPTIME' in colnames: - self.metadata['COADD_EXPTIME'] = metahdu['COADD_EXPTIME'].read() - l1.append('COADD_EXPTIME') - if 'FLUX_R' in colnames: - self.metadata['FLUX_R'] = metahdu['FLUX_R'].read() - l1.append('FLUX_R') + for xcol in ['COADD_EXPTIME', 'FLUX_R', 'TSNR2_LRG']: + self.metadata[xcol], l1 = read_xcols(xcol, l1) self.nqso = self.metadata.size diff --git a/py/qsotools/scripts/generate_mocks.py b/py/qsotools/scripts/generate_mocks.py index 7ee0e83..b06cb4d 100644 --- a/py/qsotools/scripts/generate_mocks.py +++ b/py/qsotools/scripts/generate_mocks.py @@ -202,12 +202,9 @@ def _genRNDDec(RNST, N, dec1_deg, dec2_deg): def getMetadata(args): # The METADATA HDU contains a binary table # with (at least) RA, DEC, Z, TARGETID - meta_dt = np.dtype([ - ('RA', 'f8'), ('DEC', 'f8'), ('Z', 'f8'), ('MOCKID', 'i8'), - ('PIXNUM', 'i4'), ('COADD_EXPTIME', 'f8'), ('FLUX_R', 'f8') - ]) - dt_list = list(meta_dt.names) - dt_list.remove('PIXNUM') + meta_dt = np.dtype(QQFile.meta_dt.descr + [('PIXNUM', 'i4')]) + + dt_list = list(QQFile.meta_dt.names) if args.master_file: logging.info(f"Reading master file: {args.master_file}") master_file = QQFile(args.master_file) From 9eb72515953f1f75c7733719f7d222c19f448cd7 Mon Sep 17 00:00:00 2001 From: Naim Goksel Karacayli Date: Mon, 13 Mar 2023 15:22:59 -0400 Subject: [PATCH 3/7] memory saving steps --- py/qsotools/scripts/generate_mocks.py | 38 +++++++++++++++++++++------ 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/py/qsotools/scripts/generate_mocks.py b/py/qsotools/scripts/generate_mocks.py index b06cb4d..1742541 100644 --- a/py/qsotools/scripts/generate_mocks.py +++ b/py/qsotools/scripts/generate_mocks.py @@ -375,7 +375,7 @@ def __init__(self, args): else: self.mean_flux_function = lm.lognMeanFluxGH - def generate_wfe_hpx(self, ipix, nmocks): + def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): """New seed is seed + healpix no""" lya_m = lm.LyaMocks( self.args.seed + ipix, @@ -390,13 +390,35 @@ def generate_wfe_hpx(self, ipix, nmocks): else: lya_m.setCentralRedshift(3.0) - wave, fluxes, errors = lya_m.resampledMocks( - nmocks, - err_per_final_pixel=self.args.sigma_per_pixel, - spectrograph_resolution=self.args.specres, - obs_wave_edges=self.DESI_WAVEEDGES, - keep_empty_bins=self.args.keep_nolya_pixels - ) + nwave = self.DESI_WAVEEDGES.size - 1 + n_iter = int(nmocks / n_one_iter) + n_gen_mocks = 0 + + wave = np.array(self.DESI_WAVEGRID, dtype=np.float32) + fluxes = np.empty((nmocks, nwave), dtype=np.float32) + errors = np.empty_like(fluxes) + + for it in range(n_iter + 1): + rem_mocks = nmocks - n_gen_mocks + + if rem_mocks <= 0: + break + + nthis_mock = min(n_one_iter, rem_mocks) + n_gen_mocks += nthis_mock + + _, _f, _e = lya_m.resampledMocks( + nthis_mock, + err_per_final_pixel=self.args.sigma_per_pixel, + spectrograph_resolution=self.args.specres, + obs_wave_edges=self.DESI_WAVEEDGES, + keep_empty_bins=self.args.keep_nolya_pixels + ) + + _slice = np.s_[n_gen_mocks:n_gen_mocks + nthis_mock] + + fluxes[_slice] = _f.astype(np.float32) + errors[_slice] = _e.astype(np.float32) return wave, fluxes, errors From 0bc45e873b23cd92fdd1337e167bde18f85f473b Mon Sep 17 00:00:00 2001 From: Naim Goksel Karacayli Date: Mon, 13 Mar 2023 15:40:29 -0400 Subject: [PATCH 4/7] grammar fix --- py/qsotools/scripts/generate_mocks.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/py/qsotools/scripts/generate_mocks.py b/py/qsotools/scripts/generate_mocks.py index 1742541..7bc7eee 100644 --- a/py/qsotools/scripts/generate_mocks.py +++ b/py/qsotools/scripts/generate_mocks.py @@ -352,10 +352,10 @@ def saveQQFile(ipix, meta1, wave, fluxes, args): @njit -def remove_above_lya_absoprtion(wave, fluxes, z_qso): +def remove_above_lya_absorption(wave, fluxes, z_qso): nmocks = fluxes.shape[0] # Remove absorption above Lya - nonlya_ind = wave > fid.LYA_WAVELENGTH * (1 + z_qso) + nonlya_ind = wave > (fid.LYA_WAVELENGTH * (1 + z_qso)) for i in range(nmocks): fluxes[i][nonlya_ind[i]] = 1 @@ -390,15 +390,15 @@ def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): else: lya_m.setCentralRedshift(3.0) - nwave = self.DESI_WAVEEDGES.size - 1 - n_iter = int(nmocks / n_one_iter) + nwave = self.DESI_WAVEGRID.size + n_iter = int(nmocks / n_one_iter) + 1 n_gen_mocks = 0 wave = np.array(self.DESI_WAVEGRID, dtype=np.float32) fluxes = np.empty((nmocks, nwave), dtype=np.float32) errors = np.empty_like(fluxes) - for it in range(n_iter + 1): + for _ in range(n_iter): rem_mocks = nmocks - n_gen_mocks if rem_mocks <= 0: @@ -417,8 +417,8 @@ def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): _slice = np.s_[n_gen_mocks:n_gen_mocks + nthis_mock] - fluxes[_slice] = _f.astype(np.float32) - errors[_slice] = _e.astype(np.float32) + fluxes[_slice, :] = _f.astype(np.float32) + errors[_slice, :] = _e.astype(np.float32) return wave, fluxes, errors @@ -497,7 +497,7 @@ def __call__(self, ipix_meta): wave, fluxes, errors = self.generate_wfe_hpx(ipix, nmocks) # Remove absorption above Lya - fluxes = remove_above_lya_absoprtion(wave, fluxes, z_qso) + fluxes = remove_above_lya_absorption(wave, fluxes, z_qso) # Divide by mean flux if required fluxes, errors = self.divide_by_mean_flux(wave, fluxes, errors) From c67bad0d34b9c3cadf40a806edd7e4fc3fa9ca20 Mon Sep 17 00:00:00 2001 From: Naim Goksel Karacayli Date: Mon, 13 Mar 2023 18:59:15 -0400 Subject: [PATCH 5/7] fix --- bin/newGenDESILiteMocks.py | 7 +------ py/qsotools/mocklib.py | 10 ++++++---- py/qsotools/scripts/generate_mocks.py | 21 ++++++++++++--------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/bin/newGenDESILiteMocks.py b/bin/newGenDESILiteMocks.py index 02c94a4..cbaa70f 100755 --- a/bin/newGenDESILiteMocks.py +++ b/bin/newGenDESILiteMocks.py @@ -1,10 +1,5 @@ #!/usr/bin/env python from qsotools.scripts.generate_mocks import main -import logging if __name__ == '__main__': - try: - main() - except Exception as e: - logging.error(e) - exit(1) + main() diff --git a/py/qsotools/mocklib.py b/py/qsotools/mocklib.py index 9568fc0..ebbabff 100644 --- a/py/qsotools/mocklib.py +++ b/py/qsotools/mocklib.py @@ -424,10 +424,12 @@ def qsoMock(self, qso, spectrograph_resolution=None, const_error=None): return qso - def resampledMocks(self, howmany, err_per_final_pixel=0, spectrograph_resolution=None, \ - resample_dv=None, obs_wave_edges=None, delta_z=None, \ - keep_empty_bins=False): - wave = fid.LYA_WAVELENGTH * (1. + self.z_values) + def resampledMocks( + self, howmany, err_per_final_pixel=0, spectrograph_resolution=None, + resample_dv=None, obs_wave_edges=None, delta_z=None, + keep_empty_bins=False + ): + wave = fid.LYA_WAVELENGTH * (1. + self.z_values) self.generateMocks(howmany, spectrograph_resolution) fluxes = self.delta_F diff --git a/py/qsotools/scripts/generate_mocks.py b/py/qsotools/scripts/generate_mocks.py index 7bc7eee..dde6638 100644 --- a/py/qsotools/scripts/generate_mocks.py +++ b/py/qsotools/scripts/generate_mocks.py @@ -344,6 +344,9 @@ def saveQQFile(ipix, meta1, wave, fluxes, args): fname = ospath_join( dir2, f"lya-transmission-{args.hp_nside}-{ipix}.fits.gz") + if args.nosave: + return fname + qqfile = QQFile(fname, 'rw') header = {'HPXNSIDE': args.hp_nside, 'HPXNEST': not args.hp_ring} qqfile.writeAll(meta1, header, wave, fluxes) @@ -405,6 +408,7 @@ def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): break nthis_mock = min(n_one_iter, rem_mocks) + _slice = np.s_[n_gen_mocks:n_gen_mocks + nthis_mock] n_gen_mocks += nthis_mock _, _f, _e = lya_m.resampledMocks( @@ -415,10 +419,8 @@ def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): keep_empty_bins=self.args.keep_nolya_pixels ) - _slice = np.s_[n_gen_mocks:n_gen_mocks + nthis_mock] - - fluxes[_slice, :] = _f.astype(np.float32) - errors[_slice, :] = _e.astype(np.float32) + fluxes[_slice] = _f.astype(np.float32) + errors[_slice] = _e.astype(np.float32) return wave, fluxes, errors @@ -429,8 +431,8 @@ def divide_by_mean_flux(self, wave, fluxes, errors): if self.TURNOFF_ZEVO: spectrum_z = self.args.fixed_zforest else: - spectrum_z = np.array( - wave, dtype=np.double) / fid.LYA_WAVELENGTH - 1 + spectrum_z = wave / fid.LYA_WAVELENGTH - 1 + true_mean_flux = self.mean_flux_function(spectrum_z) fluxes = fluxes / true_mean_flux - 1 @@ -458,13 +460,14 @@ def cut_lya_forest_region(self, wave, fluxes, errors, z_qso): return waves, fluxes, errors def save_nonqq_files( - self, imock, waves, fluxes, errors, z_qso, meta1, pcfile): + self, imock, waves, fluxes, errors, z_qso, meta1, pcfile + ): wave_c, flux_c, err_c = chunkHelper( imock, waves, fluxes, errors, z_qso, self.args) nchunks = len(wave_c) nid = meta1['MOCKID'][imock] - z_qso_i = z_qso[imock] + z_qso_i = z_qso[imock, 0] dec, ra = meta1['DEC'][imock], meta1['RA'][imock] mockid = meta1['MOCKID'][imock] @@ -504,7 +507,7 @@ def __call__(self, ipix_meta): # If save-qqfile option is passed, do not save as BinaryQSO files # This also means no chunking or removing pixels - if not self.args.nosave and self.args.save_qqfile: + if self.args.save_qqfile: fname = saveQQFile(ipix, meta1, wave, fluxes, self.args) return [fname] From 707aa2d1134671e15060e3fa73ac630758ce7882 Mon Sep 17 00:00:00 2001 From: Naim Goksel Karacayli Date: Mon, 13 Mar 2023 19:11:45 -0400 Subject: [PATCH 6/7] if self.args.save_qqfile, errors is not allocated --- py/qsotools/scripts/generate_mocks.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/py/qsotools/scripts/generate_mocks.py b/py/qsotools/scripts/generate_mocks.py index dde6638..97a880b 100644 --- a/py/qsotools/scripts/generate_mocks.py +++ b/py/qsotools/scripts/generate_mocks.py @@ -378,7 +378,7 @@ def __init__(self, args): else: self.mean_flux_function = lm.lognMeanFluxGH - def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): + def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=100): """New seed is seed + healpix no""" lya_m = lm.LyaMocks( self.args.seed + ipix, @@ -399,7 +399,10 @@ def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): wave = np.array(self.DESI_WAVEGRID, dtype=np.float32) fluxes = np.empty((nmocks, nwave), dtype=np.float32) - errors = np.empty_like(fluxes) + if self.args.save_qqfile: + errors = None + else: + errors = np.empty_like(fluxes) for _ in range(n_iter): rem_mocks = nmocks - n_gen_mocks @@ -420,6 +423,10 @@ def generate_wfe_hpx(self, ipix, nmocks, n_one_iter=5000): ) fluxes[_slice] = _f.astype(np.float32) + + if self.args.save_qqfile: + continue + errors[_slice] = _e.astype(np.float32) return wave, fluxes, errors From 235387ac3a2d740a6074e33ebe19663c6140439a Mon Sep 17 00:00:00 2001 From: Naim Goksel Karacayli Date: Mon, 13 Mar 2023 19:20:59 -0400 Subject: [PATCH 7/7] progress increases by mocks generated --- py/qsotools/scripts/generate_mocks.py | 12 +++---- py/qsotools/utils.py | 46 +++++++++++++++++---------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/py/qsotools/scripts/generate_mocks.py b/py/qsotools/scripts/generate_mocks.py index 97a880b..dd4c91d 100644 --- a/py/qsotools/scripts/generate_mocks.py +++ b/py/qsotools/scripts/generate_mocks.py @@ -502,7 +502,7 @@ def __call__(self, ipix_meta): z_qso = meta1['Z'][:, None] if nmocks == 0: - return [] + return [], nmocks wave, fluxes, errors = self.generate_wfe_hpx(ipix, nmocks) @@ -517,7 +517,7 @@ def __call__(self, ipix_meta): if self.args.save_qqfile: fname = saveQQFile(ipix, meta1, wave, fluxes, self.args) - return [fname] + return [fname], nmocks # Cut Lyman-alpha forest region and convert wave to waves = [wave] waves, fluxes, errors = self.cut_lya_forest_region( @@ -538,7 +538,7 @@ def __call__(self, ipix_meta): if fname: all_fnames.extend(fname) - return all_fnames + return all_fnames, nmocks def main(): @@ -574,14 +574,14 @@ def main(): split_meta = np.split(metadata, s[1:]) logging.info( f"Length of split metadata {len(split_meta)} vs npixels {npixels}.") - pcounter = Progress(len(split_meta)) + pcounter = Progress(metadata.size) filename_list = [] with Pool(processes=args.nproc) as pool: imap_it = pool.imap(MockGenerator(args), zip(u_pix, split_meta)) - for fname in imap_it: + for fname, nmocks in imap_it: filename_list.extend(fname) - pcounter.increase() + pcounter.increase(nmocks) # Save the list of files in a txt temp_fname = ospath_join(args.OutputDir, "file_list_qso.txt") diff --git a/py/qsotools/utils.py b/py/qsotools/utils.py index b85ed97..ea6f956 100644 --- a/py/qsotools/utils.py +++ b/py/qsotools/utils.py @@ -4,15 +4,17 @@ import numpy as np + def decomposePiccaFname(picca_fname): - i1 = picca_fname.rfind('[')+1 + i1 = picca_fname.rfind('[') + 1 i2 = picca_fname.rfind(']') - basefname = picca_fname[:i1-1] + basefname = picca_fname[:i1 - 1] hdunum = int(picca_fname[i1:i2]) return (basefname, hdunum) + def getPiccaFList(fnames_spectra): decomp_list = [decomposePiccaFname(fl.rstrip()) for fl in fnames_spectra] decomp_list.sort(key=lambda x: x[0]) @@ -23,8 +25,12 @@ def getPiccaFList(fnames_spectra): return new_fnames + class Progress(object): - """Utility class to log progress. Initialize with total number of operations.""" + """Utility class to log progress. + Initialize with total number of operations. + """ + def __init__(self, total, percThres=5): self.i = 0 self.total = total @@ -32,18 +38,23 @@ def __init__(self, total, percThres=5): self.last_progress = 0 self.start_time = time.time() - def increase(self): - self.i+=1 - curr_progress = int(100*self.i/self.total) - print_condition = (curr_progress-self.last_progress >= self.percThres) or (self.i == 0) + def increase(self, inc=1): + self.i += inc + curr_progress = int(100 * self.i / self.total) + print_condition = (self.i == 0) or ( + curr_progress - self.last_progress >= self.percThres + ) if print_condition: - etime = (time.time()-self.start_time)/60 # min - logging.info(f"Progress: {curr_progress}%. Elapsed time {etime:.1f} mins.") + etime = (time.time() - self.start_time) / 60 # min + logging.info( + f"Progress: {curr_progress}%. Elapsed time {etime:.1f} mins.") self.last_progress = curr_progress + class SubsampleCov(object): """docstring for SubsampleCov""" + def __init__(self, nbins, nsamples, is_weighted=False): self.nbins = nbins self.nsamples = nsamples @@ -68,14 +79,15 @@ def addMeasurement(self, xvec, wvec=None): if (wvec is not None) and self.is_weighted: self.all_weights[self.isample] += wvec - self.isample = (self.isample+1)%self.nsamples + self.isample = (self.isample + 1) % self.nsamples def _normalize(self): if self.is_weighted: self.all_measurements /= self.all_weights + np.finfo(float).eps - self.all_weights /= np.sum(self.all_weights, axis=0) + np.finfo(float).eps + self.all_weights /= np.sum( + self.all_weights, axis=0) + np.finfo(float).eps else: - self.all_weights = np.ones(self.nsamples)/self.nsamples + self.all_weights = np.ones(self.nsamples) / self.nsamples self.is_normalized = True @@ -83,7 +95,7 @@ def getMean(self): if not self.is_normalized: self._normalize() - mean_xvec = np.sum(self.all_measurements*self.all_weights, axis=0) + mean_xvec = np.sum(self.all_measurements * self.all_weights, axis=0) return mean_xvec @@ -95,16 +107,18 @@ def getMeanNCov(self, bias_correct=False): mean_xvec = self.getMean() # remove one measurement, then renormalize - jack_i = (mean_xvec - self.all_measurements*self.all_weights)/(1-self.all_weights) + jack_i = ( + mean_xvec - self.all_measurements * self.all_weights + ) / (1 - self.all_weights) mean_jack = np.mean(jack_i, axis=0) if bias_correct: - bias_jack = (self.nsamples-1) * (mean_jack-mean_xvec) + bias_jack = (self.nsamples - 1) * (mean_jack - mean_xvec) mean_xvec -= bias_jack xdiff = jack_i - mean_jack - cov = np.dot(xdiff.T, xdiff)*(self.nsamples-1)/self.nsamples + cov = np.dot(xdiff.T, xdiff) * (self.nsamples - 1) / self.nsamples return mean_xvec, cov