From 05f4e266d573bc2da52a9ce6e32ddeb20cbaa770 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 29 Mar 2022 21:21:56 -0700 Subject: [PATCH 01/53] :sparkles: Developing PerturbationMatrix class --- src/psfmachine/perturbation.py | 263 +++++++++++++++++++++++++++++++++ tests/test_perturbation.py | 71 +++++++++ 2 files changed, 334 insertions(+) create mode 100644 src/psfmachine/perturbation.py create mode 100644 tests/test_perturbation.py diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py new file mode 100644 index 0000000..4a5b0c8 --- /dev/null +++ b/src/psfmachine/perturbation.py @@ -0,0 +1,263 @@ +"""Classes to deal with perturbation matrices""" + +from dataclasses import dataclass +import numpy as np +from scipy import sparse +from typing import Optional +import matplotlib.pyplot as plt + + +@dataclass +class PerturbationMatrix: + """ + Class to handle perturbation matrices in PSFMachine + """ + + time: np.ndarray + other_vectors: Optional = None + poly_order: int = 3 + bin_or_downsample: str = "bin" + focus: bool = False + cbvs: bool = False + segments: bool = True + clean: bool = True + + def __post_init__(self): + self.vectors = np.vstack( + [self.time ** idx for idx in range(self.poly_order + 1)] + ).T + if self.cbvs: + self._get_cbvs() + if self.focus: + self._get_focus_change() + self._validate() + if self.segments: + self._cut_segments() + if self.clean: + self._clean_vectors() + self.prior_mu = np.ones(self.shape[1]) + self.prior_sigma = np.ones(self.shape[1]) * 1e4 + + def __repr__(self): + return f"PerturbationMatrix {self.shape}" + + @property + def breaks(self): + return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1 + + def _validate(self): + # Check that the shape is correct etc + # Check the priors are right + if self.other_vectors is not None: + if isinstance(self.other_vectors, (list, np.ndarray)): + self.other_vectors = np.atleast_2d(self.other_vectors) + if self.other_vectors.shape[0] != len(self.time): + if self.other_vectors.shape[1] == len(self.time): + self.other_vectors = self.other_vectors.T + else: + raise ValueError("Must pass other vectors in the right shape") + else: + raise ValueError("Must pass a list as other vectors") + self.vectors = np.hstack([self.vectors, self.other_vectors]) + return + + def _cut_segments(self): + x = np.array_split(np.arange(len(self.time)), self.breaks) + masks = np.asarray( + [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x] + ).T + self.vectors = np.hstack( + [ + self.vectors[:, idx][:, None] * masks + for idx in range(self.vectors.shape[1]) + ] + ) + + def _get_focus_change(self, exptime=100): + focus = np.asarray( + [ + np.exp(-exptime * (self.time - self.time[b])) + for b in np.hstack([0, self.breaks]) + ] + ) + focus *= np.asarray( + [ + ((self.time - self.time[b]) >= 0).astype(float) + for b in np.hstack([0, self.breaks]) + ] + ) + self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]]) + return + + def _clean_vectors(self): + """Remove time polynomial from other vectors""" + nvec = self.poly_order + 1 + if self.focus: + nvec += 1 + if self.cbvs: + nvec += self.ncbvs + s = nvec * (len(self.breaks) + 1) + X = self.vectors[:, :s] + w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) + self.vectors[:, s:] -= X.dot(w) + return + + def plot(self): + fig, ax = plt.subplots() + ax.plot(self.time, self.vectors + np.arange(self.shape[1]) * 0.1, c="k") + ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors") + return fig + + def _get_cbvs(self): + self.ncbvs = 0 + # use lightkurve to get CBVs for a given time????????? + # Might need channel information maybe + # self.ncbvs = .... + # self.vectors = np.hstack([self.vectors, cbvs]) + raise NotImplementedError + + def fit(self, y, ye=None, mask=None): + if mask is None: + mask = np.ones(len(y), bool) + elif not isinstance(mask, np.ndarray): + raise ValueError("Pass a mask of booleans") + if ye is None: + ye = np.ones(len(y)) + if mask.sum() == 0: + raise ValueError("All points in perturbation matrix are masked") + X = self.matrix[mask] + if (y.ndim == 1) and (ye.ndim == 1): + sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag( + 1 / self.prior_sigma ** 2 + ) + B = X.T.dot(y[mask] / ye[mask] ** 2) + self.prior_mu / self.prior_sigma ** 2 + w = np.linalg.solve(sigma_w_inv, B) + elif (y.ndim == 2) and (ye.ndim == 1): + sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag( + 1 / self.prior_sigma ** 2 + ) + B = ( + X.T.dot(y[mask] / ye[mask, None] ** 2) + + self.prior_mu[:, None] / self.prior_sigma[:, None] ** 2 + ) + w = np.linalg.solve(sigma_w_inv, B) + elif (y.ndim == 2) and (ye.ndim == 2): + w = [] + for y1, ye1 in zip(y.T, ye.T): + sigma_w_inv = X.T.dot(X.multiply(1 / ye1[mask, None] ** 2)) + np.diag( + 1 / self.prior_sigma ** 2 + ) + B = ( + X.T.dot(y1[mask] / ye1[mask] ** 2) + + self.prior_mu / self.prior_sigma ** 2 + ) + w.append(np.linalg.solve(sigma_w_inv, B)) + w = np.asarray(w).T + else: + raise ValueError("Can not parse input dimensions") + return w + + def dot(self, weights): + return np.asarray(self.matrix.dot(weights)) + + @property + def matrix(self): + return sparse.csr_matrix(self.vectors) + + @property + def shape(self): + return self.vectors.shape + + def bin_vectors(self, var): + """ + Bins down an input variable to the same time resolution as `self` + """ + return var + + def to_low_res(self, resolution=20, method="downsample"): + """ + Convert to a lower resolution matrix + Parameters + ---------- + resolution: int + Number of points to either bin down to or downsample to + """ + + if method is "downsample": + points = [] + b = np.hstack([0, self.breaks, self.shape[0] - 1]) + for b1, b2 in zip(b[:-1], b[1:]): + p = np.arange(b1, b2, resolution) + if p[-1] != b2: + p = np.hstack([p, b2]) + points.append(p) + points = np.hstack(points) + + def bin_func(x): + """ + Bins down an input variable to the same time resolution as `self` + """ + if x.shape[0] == self.shape[0]: + return x[points] + else: + raise ValueError("Wrong size to bin") + + # Make new object, turn off all additional vectors and cleaning + # It will inherrit those from the "other_vectors" variable + low_res_pm = PerturbationMatrix( + time=bin_func(self.time), + other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]), + segments=False, + clean=False, + focus=False, + cbvs=False, + ) + low_res_pm.bin_vectors = bin_func + return low_res_pm + + +# @dataclass +# class PerturbationMatrix3d(PerturbationMatrix): +# """" +# Makes a perturbation matrix that accounts for spatial extent +# """" +# time +# other_vectors +# dx +# dy +# ntime_knots +# knot_radius +# knot_spacing +# +# def __post_init__(self): +# self.time_matrix = PerturbationMatrix(self.time, ....) +# self._get_cartesian_matrix(dx, dy) +# +# def __repr__(self): +# return f'PerturbationMatrix {self.shape}' +# +# @property +# def shape(self): +# +# def _get_cartesian_matrix(self, dx, dy): +# ... +# self.cartesian_matrix = +# +# @property +# def matrix(self): +# # Merge cartesian and time +# +# def to_low_res(self): +# return PerturbationMatrix3d(self.time_matrix.to_low_res....) +# +# def from_time_matrix(self, perturbationmatrix): +# return PerturbationMatrix3d(....) +# +# +# A = PerturbationMatrix3d(time=time, dx=dx, dy=dy, other_vectors=[], focus=True, bin_style='bin') +# w = A.to_low_res.fit(y, ye, mask) +# model = A.dot(w) +# +# +# mac.mean_model +# mac.perturbed_model diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py new file mode 100644 index 0000000..452753b --- /dev/null +++ b/tests/test_perturbation.py @@ -0,0 +1,71 @@ +"""test perturbation matrix""" +import numpy as np +from scipy import sparse +import pytest +from psfmachine.perturbation import PerturbationMatrix + + +def test_perturbation_matrix(): + time = np.arange(0, 10, 0.1) + p = PerturbationMatrix(time=time, focus=False) + assert p.vectors.shape == (100, 4) + p = PerturbationMatrix(time=time, focus=True) + assert p.vectors.shape == (100, 5) + + with pytest.raises(ValueError): + p = PerturbationMatrix( + time=time, other_vectors=np.random.normal(size=(2, 10)), focus=False + ) + p = PerturbationMatrix(time=time, other_vectors=1, focus=False) + p = PerturbationMatrix( + time=time, other_vectors=np.random.normal(size=(2, 100)), focus=False + ) + p = PerturbationMatrix( + time=time, other_vectors=np.random.normal(size=(100, 2)), focus=False + ) + assert p.vectors.shape == (100, 6) + time = np.hstack([np.arange(0, 10, 0.1), np.arange(15, 25, 0.1)]) + p = PerturbationMatrix(time=time, focus=False) + assert p.vectors.shape == (200, 8) + p = PerturbationMatrix(time=time, focus=True) + assert p.vectors.shape == (200, 10) + + assert p.matrix.shape == p.vectors.shape + assert sparse.issparse(p.matrix) + + y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01 + w = p.fit(y, ye) + assert w.shape[0] == p.shape[1] + assert np.isfinite(w).all() + model = p.dot(w) + assert np.sum((y - model) ** 2 / (ye ** 2)) / (200) < 1.5 + + # Fit multiple vectors + y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones(200) * 0.01 + w = p.fit(y, ye) + assert w.shape == (p.shape[1], 30) + assert np.isfinite(w).all() + model = p.dot(w) + assert np.all( + np.sum((y - model) ** 2 / (ye[:, None] ** 2), axis=0) / (200 - 1) < 1.5 + ) + + # Fit multiple vectors and multiple errors + y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones((200, 30)) * 0.01 + w = p.fit(y, ye) + assert w.shape == (p.shape[1], 30) + assert np.isfinite(w).all() + model = p.dot(w) + assert np.all(np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (200 - 1) < 1.5) + + y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01 + assert len(p.bin_vectors(y)) == len(y) + p_low = p.to_low_res(resolution=20) + assert len(p_low.bin_vectors(y)) == 12 + assert p_low.bin_vectors(y[:, None]).shape == (12, 1) + with pytest.raises(ValueError): + p_low.bin_vectors(y[:-4]) + + w = p_low.fit(p_low.bin_vectors(y)) + model = p.dot(w) + assert model.shape[0] == 200 From 172fbdadacbf0f2e27f413120fccec512f0c3bd9 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 29 Mar 2022 21:29:40 -0700 Subject: [PATCH 02/53] :white_check_mark: fixing flake8 --- src/psfmachine/perturbation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 4a5b0c8..801c4f1 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -183,7 +183,7 @@ def to_low_res(self, resolution=20, method="downsample"): Number of points to either bin down to or downsample to """ - if method is "downsample": + if method == "downsample": points = [] b = np.hstack([0, self.breaks, self.shape[0] - 1]) for b1, b2 in zip(b[:-1], b[1:]): From fb31d8b138d52bc5bc59fbb46b671b414c47a9c0 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Thu, 31 Mar 2022 12:22:35 -0700 Subject: [PATCH 03/53] :sparkles: added a 3D matrix --- src/psfmachine/machine.py | 6 +- src/psfmachine/perturbation.py | 220 ++++++++++++++++++++++----------- src/psfmachine/utils.py | 42 +++---- tests/test_perturbation.py | 48 ++++++- 4 files changed, 212 insertions(+), 104 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index f878d7b..ea52eb4 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -887,7 +887,7 @@ def build_time_model(self, plot=False, downsample=False, split_time_model=False) dy, n_knots=self.n_time_knots, radius=self.time_radius, - spacing=self.cartesian_knot_spacing, + knot_spacing_type=self.cartesian_knot_spacing, ) A2 = sparse.vstack([A_c] * time_binned.shape[0], format="csr") # Cartesian spline with time dependence @@ -1049,7 +1049,7 @@ def plot_time_model(self, segment=0): dy, n_knots=self.n_time_knots, radius=self.time_radius, - spacing=self.cartesian_knot_spacing, + knot_spacing_type=self.cartesian_knot_spacing, ) # if self.seg_splits.shape[0] == 2 and segment == 0: # seg_mask = np.ones(time_binned.shape[0], dtype=bool) @@ -1576,7 +1576,7 @@ def fit_model(self, fit_va=False): dy, n_knots=self.n_time_knots, radius=self.time_radius, - spacing=self.cartesian_knot_spacing, + knot_spacing_type=self.cartesian_knot_spacing, ) A_cp3 = sparse.hstack([A_cp, A_cp, A_cp, A_cp], format="csr") diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 801c4f1..35938de 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -4,25 +4,33 @@ import numpy as np from scipy import sparse from typing import Optional +from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt -@dataclass -class PerturbationMatrix: +class PerturbationMatrix(object): """ Class to handle perturbation matrices in PSFMachine """ - time: np.ndarray - other_vectors: Optional = None - poly_order: int = 3 - bin_or_downsample: str = "bin" - focus: bool = False - cbvs: bool = False - segments: bool = True - clean: bool = True + def __init__( + self, + time, + other_vectors=None, + poly_order=3, + focus=False, + cbvs=False, + segments=True, + clean=True, + ): - def __post_init__(self): + self.time = time + self.other_vectors = other_vectors + self.poly_order = poly_order + self.focus = focus + self.cbvs = cbvs + self.segments = segments + self.clean = clean self.vectors = np.vstack( [self.time ** idx for idx in range(self.poly_order + 1)] ).T @@ -35,11 +43,17 @@ def __post_init__(self): self._cut_segments() if self.clean: self._clean_vectors() - self.prior_mu = np.ones(self.shape[1]) - self.prior_sigma = np.ones(self.shape[1]) * 1e4 def __repr__(self): - return f"PerturbationMatrix {self.shape}" + return f"PerturbationMatrix" + + @property + def prior_mu(self): + return np.ones(self.shape[1]) + + @property + def prior_sigma(self): + return np.ones(self.shape[1]) * 1e4 @property def breaks(self): @@ -174,6 +188,27 @@ def bin_vectors(self, var): """ return var + def _get_downsample_func(self, resolution): + points = [] + b = np.hstack([0, self.breaks, len(self.time) - 1]) + for b1, b2 in zip(b[:-1], b[1:]): + p = np.arange(b1, b2, resolution) + if p[-1] != b2: + p = np.hstack([p, b2]) + points.append(p) + points = np.hstack(points) + + def bin_func(x): + """ + Bins down an input variable to the same time resolution as `self` + """ + if x.shape[0] == len(self.time): + return x[points] + else: + raise ValueError("Wrong size to bin") + + return bin_func + def to_low_res(self, resolution=20, method="downsample"): """ Convert to a lower resolution matrix @@ -184,23 +219,7 @@ def to_low_res(self, resolution=20, method="downsample"): """ if method == "downsample": - points = [] - b = np.hstack([0, self.breaks, self.shape[0] - 1]) - for b1, b2 in zip(b[:-1], b[1:]): - p = np.arange(b1, b2, resolution) - if p[-1] != b2: - p = np.hstack([p, b2]) - points.append(p) - points = np.hstack(points) - - def bin_func(x): - """ - Bins down an input variable to the same time resolution as `self` - """ - if x.shape[0] == self.shape[0]: - return x[points] - else: - raise ValueError("Wrong size to bin") + bin_func = self._get_downsample_func(resolution) # Make new object, turn off all additional vectors and cleaning # It will inherrit those from the "other_vectors" variable @@ -216,48 +235,99 @@ def bin_func(x): return low_res_pm -# @dataclass -# class PerturbationMatrix3d(PerturbationMatrix): -# """" -# Makes a perturbation matrix that accounts for spatial extent +class PerturbationMatrix3D(PerturbationMatrix): + """3D perturbation matrix in time, row, column + + NOTE: Radius seems like a bad way of parameterizing something in -cartesian- space?! + """ + + def __init__( + self, + time, + dx, + dy, + other_vectors=None, + poly_order=3, + nknots=10, + radius=8, + focus=False, + cbvs=False, + segments=True, + clean=True, + ): + self.dx = dx + self.dy = dy + self.nknots = nknots + self.radius = radius + self.cartesian_matrix = _make_A_cartesian( + self.dx, + self.dy, + n_knots=self.nknots, + radius=self.radius, + knot_spacing_type="linear", + ) + super().__init__( + time=time, + other_vectors=other_vectors, + poly_order=poly_order, + focus=focus, + cbvs=cbvs, + segments=segments, + clean=clean, + ) + + def __repr__(self): + return f"PerturbationMatrix3D" + + @property + def shape(self): + return ( + self.cartesian_matrix.shape[0] * self.time.shape[0], + self.cartesian_matrix.shape[1] * self.vectors.shape[1], + ) + + @property + def matrix(self): + C = sparse.hstack( + [ + sparse.vstack( + [self.cartesian_matrix * vector[idx] for idx in range(len(vector))], + format="csr", + ) + for vector in self.vectors.T + ], + format="csr", + ) + return C + + def to_low_res(self, resolution=20, method="downsample"): + """ + Convert to a lower resolution matrix + Parameters + ---------- + resolution: int + Number of points to either bin down to or downsample to + """ + + if method == "downsample": + bin_func = self._get_downsample_func(resolution) + + # Make new object, turn off all additional vectors and cleaning + # It will inherrit those from the "other_vectors" variable + low_res_pm = PerturbationMatrix3D( + time=bin_func(self.time), + dx=self.dx, + dy=self.dy, + nknots=self.nknots, + radius=self.radius, + other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]), + segments=False, + clean=False, + focus=False, + cbvs=False, + ) + low_res_pm.bin_vectors = bin_func + return low_res_pm + + # """" -# time -# other_vectors -# dx -# dy -# ntime_knots -# knot_radius -# knot_spacing -# -# def __post_init__(self): -# self.time_matrix = PerturbationMatrix(self.time, ....) -# self._get_cartesian_matrix(dx, dy) -# -# def __repr__(self): -# return f'PerturbationMatrix {self.shape}' -# -# @property -# def shape(self): -# -# def _get_cartesian_matrix(self, dx, dy): -# ... -# self.cartesian_matrix = -# -# @property -# def matrix(self): -# # Merge cartesian and time -# -# def to_low_res(self): -# return PerturbationMatrix3d(self.time_matrix.to_low_res....) -# -# def from_time_matrix(self, perturbationmatrix): -# return PerturbationMatrix3d(....) -# -# -# A = PerturbationMatrix3d(time=time, dx=dx, dy=dy, other_vectors=[], focus=True, bin_style='bin') -# w = A.to_low_res.fit(y, ye, mask) -# model = A.dot(w) -# -# -# mac.mean_model -# mac.perturbed_model diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index 9356680..4850e43 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -196,33 +196,25 @@ def _make_A_polar(phi, r, cut_r=6, rmin=1, rmax=18, n_r_knots=12, n_phi_knots=15 return X1 -def _make_A_cartesian(x, y, n_knots=10, radius=3.0, spacing="sqrt"): - if spacing == "sqrt": - x_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots) - x_knots = np.sign(x_knots) * x_knots ** 2 - else: - x_knots = np.linspace(-radius, radius, n_knots) - x_spline = sparse.csr_matrix( - np.asarray( - dmatrix( - "bs(x, knots=knots, degree=3, include_intercept=True)", - {"x": list(x), "knots": x_knots}, - ) - ) - ) - if spacing == "sqrt": - y_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots) - y_knots = np.sign(y_knots) * y_knots ** 2 - else: - y_knots = np.linspace(-radius, radius, n_knots) - y_spline = sparse.csr_matrix( - np.asarray( - dmatrix( - "bs(x, knots=knots, degree=3, include_intercept=True)", - {"x": list(y), "knots": y_knots}, +def _make_A_cartesian(x, y, n_knots=10, radius=3.0, knot_spacing_type="sqrt"): + def spline(v): + if knot_spacing_type == "sqrt": + knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots) + knots = np.sign(knots) * knots ** 2 + else: + knots = np.linspace(-radius, radius, n_knots) + return sparse.csr_matrix( + np.asarray( + dmatrix( + "bs(x, knots=knots, degree=3, include_intercept=True)", + {"x": list(v), "knots": knots}, + ) ) ) - ) + + x_spline = spline(x) + y_spline = spline(y) + X = sparse.hstack( [x_spline.multiply(y_spline[:, idx]) for idx in range(y_spline.shape[1])], format="csr", diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index 452753b..ab9b749 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -2,7 +2,7 @@ import numpy as np from scipy import sparse import pytest -from psfmachine.perturbation import PerturbationMatrix +from psfmachine.perturbation import PerturbationMatrix, PerturbationMatrix3D def test_perturbation_matrix(): @@ -69,3 +69,49 @@ def test_perturbation_matrix(): w = p_low.fit(p_low.bin_vectors(y)) model = p.dot(w) assert model.shape[0] == 200 + + +def test_perturbation_matrix3d(): + time = np.arange(0, 10, 1) + # 13 x 13 pixels, evenly spaced in x and y + dx, dy = np.mgrid[:13, :13] - 6 + 0.01 + dx, dy = dx.ravel(), dy.ravel() + + # ntime x npixels + flux = ( + np.random.normal(0, 0.01, size=(10, 169)) + + 2 * dx[None, :] ** 2 * 3 * dy[None, :] + ) + flux_err = np.ones((10, 169)) * 0.01 + + p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5) + assert p3.cartesian_matrix.shape == (169, 81) + assert p3.vectors.shape == (10, 4) + assert p3.matrix.shape == (1690, 324) + assert p3.shape == (1690, 324) + w = p3.fit(flux.ravel(), flux_err.ravel()) + assert w.shape[0] == 324 + model = p3.dot(w).reshape(flux.shape) + chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / ( + p3.shape[0] - p3.shape[1] - 1 + ) + assert chi < 1.5 + + time = np.arange(0, 100, 1) + flux = ( + np.random.normal(0, 0.01, size=(100, 169)) + + 2 * dx[None, :] ** 2 * 3 * dy[None, :] + ) + flux_err = np.ones((100, 169)) * 0.01 + + p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5) + assert p3.to_low_res(resolution=20).shape == (169 * 6, 81 * 4) + assert p3.to_low_res(resolution=20).vectors.shape == (6, 4) + + p3l = p3.to_low_res(resolution=10) + w = p3l.fit(p3l.bin_vectors(flux).ravel(), p3l.bin_vectors(flux_err).ravel()) + model = p3.dot(w).reshape(flux.shape) + chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / ( + p3.shape[0] - p3.shape[1] - 1 + ) + assert chi < 1.5 From d37a540f96d4839f86c9ae203142c6a24725d9ea Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Fri, 1 Apr 2022 17:04:52 -0700 Subject: [PATCH 04/53] :art: fairly good sketch --- src/psfmachine/perturbation.py | 265 ++++++++++++++++++--------------- tests/test_perturbation.py | 99 ++++++------ 2 files changed, 199 insertions(+), 165 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 35938de..dc12a98 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -1,9 +1,7 @@ """Classes to deal with perturbation matrices""" -from dataclasses import dataclass import numpy as np from scipy import sparse -from typing import Optional from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt @@ -11,6 +9,23 @@ class PerturbationMatrix(object): """ Class to handle perturbation matrices in PSFMachine + + Parameters + ---------- + time : np.ndarray + Array of time values + other_vectors: list or np.ndarray + Other detrending vectors (e.g. centroids) + poly_order: int + Polynomial order to use for detrending, default 3 + focus : bool + Whether to correct focus using a simple exponent model + segments: bool + Whether to fit portions of data where there is a significant time break as separate segments + resolution: int + How many cadences to bin down via `bin_method` + bin_method: str + How to bin the data under the hood. Default is by mean binning. """ def __init__( @@ -21,7 +36,8 @@ def __init__( focus=False, cbvs=False, segments=True, - clean=True, + resolution=10, + bin_method="bin", ): self.time = time @@ -30,7 +46,8 @@ def __init__( self.focus = focus self.cbvs = cbvs self.segments = segments - self.clean = clean + self.resolution = resolution + self.bin_method = bin_method self.vectors = np.vstack( [self.time ** idx for idx in range(self.poly_order + 1)] ).T @@ -41,15 +58,14 @@ def __init__( self._validate() if self.segments: self._cut_segments() - if self.clean: - self._clean_vectors() + self._clean_vectors() def __repr__(self): - return f"PerturbationMatrix" + return "PerturbationMatrix" @property def prior_mu(self): - return np.ones(self.shape[1]) + return np.zeros(self.shape[1]) @property def prior_sigma(self): @@ -130,75 +146,55 @@ def _get_cbvs(self): # self.vectors = np.hstack([self.vectors, cbvs]) raise NotImplementedError - def fit(self, y, ye=None, mask=None): - if mask is None: - mask = np.ones(len(y), bool) - elif not isinstance(mask, np.ndarray): - raise ValueError("Pass a mask of booleans") - if ye is None: - ye = np.ones(len(y)) - if mask.sum() == 0: - raise ValueError("All points in perturbation matrix are masked") - X = self.matrix[mask] - if (y.ndim == 1) and (ye.ndim == 1): - sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag( - 1 / self.prior_sigma ** 2 - ) - B = X.T.dot(y[mask] / ye[mask] ** 2) + self.prior_mu / self.prior_sigma ** 2 - w = np.linalg.solve(sigma_w_inv, B) - elif (y.ndim == 2) and (ye.ndim == 1): - sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag( - 1 / self.prior_sigma ** 2 - ) - B = ( - X.T.dot(y[mask] / ye[mask, None] ** 2) - + self.prior_mu[:, None] / self.prior_sigma[:, None] ** 2 - ) - w = np.linalg.solve(sigma_w_inv, B) - elif (y.ndim == 2) and (ye.ndim == 2): - w = [] - for y1, ye1 in zip(y.T, ye.T): - sigma_w_inv = X.T.dot(X.multiply(1 / ye1[mask, None] ** 2)) + np.diag( - 1 / self.prior_sigma ** 2 - ) - B = ( - X.T.dot(y1[mask] / ye1[mask] ** 2) - + self.prior_mu / self.prior_sigma ** 2 - ) - w.append(np.linalg.solve(sigma_w_inv, B)) - w = np.asarray(w).T - else: - raise ValueError("Can not parse input dimensions") - return w + def fit(self, flux, flux_err=None): + if flux_err is None: + flux_err = np.ones(len(flux_err)) - def dot(self, weights): - return np.asarray(self.matrix.dot(weights)) + y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel() + X = self.matrix + sigma_w_inv = X.T.dot(X.multiply(1 / ye[:, None] ** 2)) + np.diag( + 1 / self.prior_sigma ** 2 + ) + B = X.T.dot(y / ye ** 2) + self.prior_mu / self.prior_sigma ** 2 + self.weights = np.linalg.solve(sigma_w_inv, B) + return self.weights + + def model(self, time_indices=None): + if time_indices is None: + time_indices = np.ones(len(self.time), bool) + return self.vectors[time_indices].dot(self.weights) @property def matrix(self): - return sparse.csr_matrix(self.vectors) + return sparse.csr_matrix(self.bin_func(self.vectors)) @property def shape(self): return self.vectors.shape - def bin_vectors(self, var): + def bin_func(self, var, **kwargs): """ Bins down an input variable to the same time resolution as `self` """ - return var + if self.bin_method.lower() == "downsample": + func = self._get_downsample_func() + elif self.bin_method.lower() == "bin": + func = self._get_bindown_func() + else: + raise NotImplementedError + return func(var, **kwargs) - def _get_downsample_func(self, resolution): + def _get_downsample_func(self): points = [] b = np.hstack([0, self.breaks, len(self.time) - 1]) for b1, b2 in zip(b[:-1], b[1:]): - p = np.arange(b1, b2, resolution) + p = np.arange(b1, b2, self.resolution) if p[-1] != b2: p = np.hstack([p, b2]) points.append(p) - points = np.hstack(points) + points = np.unique(np.hstack(points)) - def bin_func(x): + def func(x, quad=False): """ Bins down an input variable to the same time resolution as `self` """ @@ -207,38 +203,68 @@ def bin_func(x): else: raise ValueError("Wrong size to bin") - return bin_func - - def to_low_res(self, resolution=20, method="downsample"): - """ - Convert to a lower resolution matrix - Parameters - ---------- - resolution: int - Number of points to either bin down to or downsample to - """ + return func - if method == "downsample": - bin_func = self._get_downsample_func(resolution) - - # Make new object, turn off all additional vectors and cleaning - # It will inherrit those from the "other_vectors" variable - low_res_pm = PerturbationMatrix( - time=bin_func(self.time), - other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]), - segments=False, - clean=False, - focus=False, - cbvs=False, + def _get_bindown_func(self): + b = np.hstack([0, self.breaks, len(self.time) - 1]) + points = np.hstack( + [np.arange(b1, b2, self.resolution) for b1, b2 in zip(b[:-1], b[1:])] ) - low_res_pm.bin_vectors = bin_func - return low_res_pm + points = points[~np.in1d(points, np.hstack([0, len(self.time) - 1]))] + points = np.sort(np.unique(np.hstack([points, self.breaks]))) + + def func(x, quad=False): + """ + Bins down an input variable to the same time resolution as `self` + """ + if x.shape[0] == len(self.time): + if not quad: + return np.asarray( + [i.mean(axis=0) for i in np.array_split(x, points)] + ) + else: + return ( + np.asarray( + [ + np.sum(i ** 2, axis=0) / (len(i) ** 2) + for i in np.array_split(x, points) + ] + ) + ** 0.5 + ) + else: + raise ValueError("Wrong size to bin") + return func -class PerturbationMatrix3D(PerturbationMatrix): - """3D perturbation matrix in time, row, column - NOTE: Radius seems like a bad way of parameterizing something in -cartesian- space?! +class PerturbationMatrix3D(PerturbationMatrix): + """Class to handle 3D perturbation matrices in PSFMachine + + Parameters + ---------- + time : np.ndarray + Array of time values + dx: np.ndarray + Pixel positions in x separation from source center + dy : np.ndaray + Pixel positions in y separation from source center + other_vectors: list or np.ndarray + Other detrending vectors (e.g. centroids) + poly_order: int + Polynomial order to use for detrending, default 3 + nknots: int + Number of knots for the cartesian spline + radius: float + Radius out to which to calculate the cartesian spline + focus : bool + Whether to correct focus using a simple exponent model + segments: bool + Whether to fit portions of data where there is a significant time break as separate segments + resolution: int + How many cadences to bin down via `bin_method` + bin_method: str + How to bin the data under the hood. Default is by mean binning. """ def __init__( @@ -253,7 +279,8 @@ def __init__( focus=False, cbvs=False, segments=True, - clean=True, + resolution=10, + bin_method="downsample", ): self.dx = dx self.dy = dy @@ -273,11 +300,12 @@ def __init__( focus=focus, cbvs=cbvs, segments=segments, - clean=clean, + resolution=resolution, + bin_method=bin_method, ) def __repr__(self): - return f"PerturbationMatrix3D" + return "PerturbationMatrix3D" @property def shape(self): @@ -294,40 +322,43 @@ def matrix(self): [self.cartesian_matrix * vector[idx] for idx in range(len(vector))], format="csr", ) - for vector in self.vectors.T + for vector in self.bin_func(self.vectors).T ], format="csr", ) return C - def to_low_res(self, resolution=20, method="downsample"): - """ - Convert to a lower resolution matrix - Parameters - ---------- - resolution: int - Number of points to either bin down to or downsample to - """ - - if method == "downsample": - bin_func = self._get_downsample_func(resolution) - - # Make new object, turn off all additional vectors and cleaning - # It will inherrit those from the "other_vectors" variable - low_res_pm = PerturbationMatrix3D( - time=bin_func(self.time), - dx=self.dx, - dy=self.dy, - nknots=self.nknots, - radius=self.radius, - other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]), - segments=False, - clean=False, - focus=False, - cbvs=False, + def model(self, time_indices=None): + """We build the matrix for every frame""" + if time_indices is None: + time_indices = np.arange(len(self.time)) + time_indices = np.atleast_1d(time_indices) + if isinstance(time_indices[0], bool): + time_indices = np.where(time_indices[0])[0] + return np.asarray( + [ + sparse.hstack( + [ + sparse.vstack( + self.cartesian_matrix * vector[time_index], + format="csr", + ) + for vector in (self.vectors).T + ], + format="csr", + ).dot(self.weights) + for time_index in time_indices + ] ) - low_res_pm.bin_vectors = bin_func - return low_res_pm - -# """" + def plot_model(self, time_index=0): + if not hasattr(self, "weights"): + raise ValueError("Run `fit` first.") + fig, ax = plt.subplots() + ax.scatter(self.dx, self.dy, c=self.model(time_index)[0]) + ax.set( + xlabel=r"$\delta$x", + ylabel=r"$\delta$y", + title=f"Perturbation Model [Cadence {time_index}]", + ) + return fig diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index ab9b749..de8fd99 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -30,45 +30,38 @@ def test_perturbation_matrix(): p = PerturbationMatrix(time=time, focus=True) assert p.vectors.shape == (200, 10) - assert p.matrix.shape == p.vectors.shape + assert p.matrix.shape == (200 / 10, 10) assert sparse.issparse(p.matrix) + res = 10 + p = PerturbationMatrix(time=time, focus=False, resolution=res) y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01 w = p.fit(y, ye) assert w.shape[0] == p.shape[1] assert np.isfinite(w).all() - model = p.dot(w) - assert np.sum((y - model) ** 2 / (ye ** 2)) / (200) < 1.5 - - # Fit multiple vectors - y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones(200) * 0.01 - w = p.fit(y, ye) - assert w.shape == (p.shape[1], 30) - assert np.isfinite(w).all() - model = p.dot(w) - assert np.all( - np.sum((y - model) ** 2 / (ye[:, None] ** 2), axis=0) / (200 - 1) < 1.5 - ) - - # Fit multiple vectors and multiple errors - y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones((200, 30)) * 0.01 - w = p.fit(y, ye) - assert w.shape == (p.shape[1], 30) - assert np.isfinite(w).all() - model = p.dot(w) - assert np.all(np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (200 - 1) < 1.5) + model = p.model() + assert model.shape == y.shape + chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1) + assert chi < 3 y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01 - assert len(p.bin_vectors(y)) == len(y) - p_low = p.to_low_res(resolution=20) - assert len(p_low.bin_vectors(y)) == 12 - assert p_low.bin_vectors(y[:, None]).shape == (12, 1) - with pytest.raises(ValueError): - p_low.bin_vectors(y[:-4]) + for bin_method in ["downsample", "bin"]: + s = 200 / res + 1 if bin_method == "downsample" else 200 / res + p = PerturbationMatrix( + time=time, focus=False, resolution=res, bin_method=bin_method + ) + assert len(p.bin_func(y)) == s + assert len(p.bin_func(ye, quad=True)) == s + with pytest.raises(ValueError): + p.bin_func(y[:-4]) - w = p_low.fit(p_low.bin_vectors(y)) - model = p.dot(w) - assert model.shape[0] == 200 + w = p.fit(y, ye) + model = p.model() + assert model.shape[0] == 200 + chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1) + assert chi < 3 + + # chi = np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (p.shape[0] - p.shape[1] - 1) def test_perturbation_matrix3d(): @@ -84,14 +77,18 @@ def test_perturbation_matrix3d(): ) flux_err = np.ones((10, 169)) * 0.01 - p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5) + p3 = PerturbationMatrix3D( + time=time, dx=dx, dy=dy, nknots=4, radius=5, resolution=5, poly_order=1 + ) assert p3.cartesian_matrix.shape == (169, 81) - assert p3.vectors.shape == (10, 4) - assert p3.matrix.shape == (1690, 324) - assert p3.shape == (1690, 324) - w = p3.fit(flux.ravel(), flux_err.ravel()) - assert w.shape[0] == 324 - model = p3.dot(w).reshape(flux.shape) + assert p3.vectors.shape == (10, 2) + assert p3.shape == (1690, 81 * 2) + assert p3.matrix.shape == (169 * 3, 81 * 2) + w = p3.fit(flux, flux_err) + assert w.shape[0] == 81 * 2 + model = p3.model() + assert model.shape == flux.shape + chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / ( p3.shape[0] - p3.shape[1] - 1 ) @@ -104,14 +101,20 @@ def test_perturbation_matrix3d(): ) flux_err = np.ones((100, 169)) * 0.01 - p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5) - assert p3.to_low_res(resolution=20).shape == (169 * 6, 81 * 4) - assert p3.to_low_res(resolution=20).vectors.shape == (6, 4) - - p3l = p3.to_low_res(resolution=10) - w = p3l.fit(p3l.bin_vectors(flux).ravel(), p3l.bin_vectors(flux_err).ravel()) - model = p3.dot(w).reshape(flux.shape) - chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / ( - p3.shape[0] - p3.shape[1] - 1 - ) - assert chi < 1.5 + for bin_method in ["downsample", "bin"]: + p3 = PerturbationMatrix3D( + time=time, + dx=dx, + dy=dy, + nknots=4, + radius=5, + poly_order=2, + bin_method=bin_method, + ) + w = p3.fit(flux, flux_err) + model = p3.model() + assert model.shape == flux.shape + chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / ( + p3.shape[0] - p3.shape[1] - 1 + ) + assert chi < 3 From 884f24d6922f9b82df347f1cce5e7337c667d880 Mon Sep 17 00:00:00 2001 From: Christina Hedges <14965634+christinahedges@users.noreply.github.com> Date: Tue, 5 Apr 2022 11:45:08 -0700 Subject: [PATCH 05/53] Update src/psfmachine/perturbation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jorge Martínez-Palomera --- src/psfmachine/perturbation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index dc12a98..e5d5856 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -134,7 +134,7 @@ def _clean_vectors(self): def plot(self): fig, ax = plt.subplots() - ax.plot(self.time, self.vectors + np.arange(self.shape[1]) * 0.1, c="k") +ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1) ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors") return fig From 18cca59b31742fa6fb96987545cbfd329231581f Mon Sep 17 00:00:00 2001 From: Christina Hedges <14965634+christinahedges@users.noreply.github.com> Date: Tue, 5 Apr 2022 11:45:22 -0700 Subject: [PATCH 06/53] Update src/psfmachine/perturbation.py --- src/psfmachine/perturbation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index e5d5856..f7186c4 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -211,7 +211,7 @@ def _get_bindown_func(self): [np.arange(b1, b2, self.resolution) for b1, b2 in zip(b[:-1], b[1:])] ) points = points[~np.in1d(points, np.hstack([0, len(self.time) - 1]))] - points = np.sort(np.unique(np.hstack([points, self.breaks]))) + points = np.unique(np.hstack([points, self.breaks])) def func(x, quad=False): """ From 3df9bea76bba0dc75a84f38a3bfa05a467194675 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 5 Apr 2022 12:06:35 -0700 Subject: [PATCH 07/53] fixing identation and _clean_vectors when segments=True --- src/psfmachine/perturbation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index f7186c4..2f78eb9 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -126,7 +126,10 @@ def _clean_vectors(self): nvec += 1 if self.cbvs: nvec += self.ncbvs - s = nvec * (len(self.breaks) + 1) + if self.segments: + s = nvec * (len(self.breaks) + 1) + else: + s = nvec X = self.vectors[:, :s] w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) self.vectors[:, s:] -= X.dot(w) @@ -134,7 +137,7 @@ def _clean_vectors(self): def plot(self): fig, ax = plt.subplots() -ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1) + ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1) ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors") return fig From b64b9cc20dbf927394089fbaef38714833e20800 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 5 Apr 2022 13:56:24 -0700 Subject: [PATCH 08/53] :zap: faster! --- src/psfmachine/perturbation.py | 46 ++++++++++++++-------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 2f78eb9..2d2e0a8 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -59,6 +59,7 @@ def __init__( if self.segments: self._cut_segments() self._clean_vectors() + self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) def __repr__(self): return "PerturbationMatrix" @@ -167,10 +168,6 @@ def model(self, time_indices=None): time_indices = np.ones(len(self.time), bool) return self.vectors[time_indices].dot(self.weights) - @property - def matrix(self): - return sparse.csr_matrix(self.bin_func(self.vectors)) - @property def shape(self): return self.vectors.shape @@ -196,6 +193,7 @@ def _get_downsample_func(self): p = np.hstack([p, b2]) points.append(p) points = np.unique(np.hstack(points)) + self.nbins = len(points) def func(x, quad=False): """ @@ -215,6 +213,7 @@ def _get_bindown_func(self): ) points = points[~np.in1d(points, np.hstack([0, len(self.time) - 1]))] points = np.unique(np.hstack([points, self.breaks])) + self.nbins = len(points) + 1 def func(x, quad=False): """ @@ -307,6 +306,19 @@ def __init__( bin_method=bin_method, ) + self._cartesian_stacked = sparse.hstack( + [self.cartesian_matrix for idx in range(self.vectors.shape[1])], + format="csr", + ) + repeat1d = np.repeat( + self.bin_func(self.vectors), self.cartesian_matrix.shape[1], axis=1 + ) + repeat2d = np.repeat(repeat1d, self.cartesian_matrix.shape[0], axis=0) + self.matrix = sparse.vstack([self._cartesian_stacked] * self.nbins).multiply( + repeat2d + ) + self.matrix.eliminate_zeros() + def __repr__(self): return "PerturbationMatrix3D" @@ -317,20 +329,6 @@ def shape(self): self.cartesian_matrix.shape[1] * self.vectors.shape[1], ) - @property - def matrix(self): - C = sparse.hstack( - [ - sparse.vstack( - [self.cartesian_matrix * vector[idx] for idx in range(len(vector))], - format="csr", - ) - for vector in self.bin_func(self.vectors).T - ], - format="csr", - ) - return C - def model(self, time_indices=None): """We build the matrix for every frame""" if time_indices is None: @@ -338,17 +336,11 @@ def model(self, time_indices=None): time_indices = np.atleast_1d(time_indices) if isinstance(time_indices[0], bool): time_indices = np.where(time_indices[0])[0] + return np.asarray( [ - sparse.hstack( - [ - sparse.vstack( - self.cartesian_matrix * vector[time_index], - format="csr", - ) - for vector in (self.vectors).T - ], - format="csr", + self._cartesian_stacked.multiply( + np.repeat(self.vectors[time_index], self.cartesian_matrix.shape[1]) ).dot(self.weights) for time_index in time_indices ] From 2d55aa174dcdab11a5f9bbd49da0f36dc3d09900 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 5 Apr 2022 16:11:48 -0700 Subject: [PATCH 09/53] :art: adding pixel masking --- src/psfmachine/perturbation.py | 29 +++++++++++++++++++++++++++-- tests/test_perturbation.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 2d2e0a8..4d35cc4 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -314,8 +314,10 @@ def __init__( self.bin_func(self.vectors), self.cartesian_matrix.shape[1], axis=1 ) repeat2d = np.repeat(repeat1d, self.cartesian_matrix.shape[0], axis=0) - self.matrix = sparse.vstack([self._cartesian_stacked] * self.nbins).multiply( - repeat2d + self.matrix = ( + sparse.vstack([self._cartesian_stacked] * self.nbins) + .multiply(repeat2d) + .tocsr() ) self.matrix.eliminate_zeros() @@ -329,6 +331,29 @@ def shape(self): self.cartesian_matrix.shape[1] * self.vectors.shape[1], ) + def fit(self, flux, flux_err=None, pixel_mask=None): + if pixel_mask is not None: + if not isinstance(pixel_mask, np.ndarray): + raise ValueError("`pixel_mask` must be an `np.ndarray`") + if not pixel_mask.shape[0] == flux.shape[-1]: + raise ValueError( + f"`pixel_mask` must be shape {flux.shape[-1]} (npixels)" + ) + else: + pixel_mask = np.ones(flux.shape[-1], bool) + if flux_err is None: + flux_err = np.ones(len(flux_err)) + + y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel() + k = (np.ones(self.nbins, bool)[:, None] * pixel_mask).ravel() + X = self.matrix + sigma_w_inv = X[k].T.dot(X[k].multiply(1 / ye[k, None] ** 2)) + np.diag( + 1 / self.prior_sigma ** 2 + ) + B = X[k].T.dot(y[k] / ye[k] ** 2) + self.prior_mu / self.prior_sigma ** 2 + self.weights = np.linalg.solve(sigma_w_inv, B) + return self.weights + def model(self, time_indices=None): """We build the matrix for every frame""" if time_indices is None: diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index de8fd99..d5f87f5 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -118,3 +118,35 @@ def test_perturbation_matrix3d(): p3.shape[0] - p3.shape[1] - 1 ) assert chi < 3 + + # Add in one bad pixel + flux[:, 100] = 1e5 + pixel_mask = np.ones(169, bool) + pixel_mask[100] = False + + for bin_method in ["downsample", "bin"]: + p3 = PerturbationMatrix3D( + time=time, + dx=dx, + dy=dy, + nknots=4, + radius=5, + poly_order=2, + bin_method=bin_method, + ) + w = p3.fit(flux, flux_err) + model = p3.model() + chi = np.sum( + (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2 + / (flux_err[:, pixel_mask] ** 2) + ) / (p3.shape[0] - p3.shape[1] - 1) + # Without the pixel masking the model doesn't fit + assert chi > 3 + w = p3.fit(flux, flux_err, pixel_mask=pixel_mask) + model = p3.model() + chi = np.sum( + (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2 + / (flux_err[:, pixel_mask] ** 2) + ) / (p3.shape[0] - p3.shape[1] - 1) + # with pixel masking, it should fit + assert chi < 3 From 7310ed354f744071f2668b92c5a5cf88c31b3ed7 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 5 Apr 2022 16:37:29 -0700 Subject: [PATCH 10/53] :memo: added docstrings --- src/psfmachine/perturbation.py | 168 ++++++++++++++++++++++++--------- tests/test_perturbation.py | 18 ++-- 2 files changed, 138 insertions(+), 48 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 4d35cc4..6620eff 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -1,6 +1,8 @@ """Classes to deal with perturbation matrices""" import numpy as np +import numpy.typing as npt +from typing import Optional from scipy import sparse from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt @@ -25,19 +27,19 @@ class PerturbationMatrix(object): resolution: int How many cadences to bin down via `bin_method` bin_method: str - How to bin the data under the hood. Default is by mean binning. + How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin' """ def __init__( self, - time, - other_vectors=None, - poly_order=3, - focus=False, - cbvs=False, - segments=True, - resolution=10, - bin_method="bin", + time: npt.ArrayLike, + other_vectors: Optional[list] = None, + poly_order: int = 3, + focus: bool = False, + cbvs: bool = False, + segments: bool = True, + resolution: int = 10, + bin_method: str = "bin", ): self.time = time @@ -77,8 +79,10 @@ def breaks(self): return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1 def _validate(self): - # Check that the shape is correct etc - # Check the priors are right + """ + Checks that the `other_vectors` are the right dimensions. + """ + if self.other_vectors is not None: if isinstance(self.other_vectors, (list, np.ndarray)): self.other_vectors = np.atleast_2d(self.other_vectors) @@ -93,6 +97,11 @@ def _validate(self): return def _cut_segments(self): + """ + Cuts the data into "segments" wherever there is a break. Breaks are defined + as anywhere where there is a gap in data of more than 5 times the median + time between observations. + """ x = np.array_split(np.arange(len(self.time)), self.breaks) masks = np.asarray( [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x] @@ -104,7 +113,15 @@ def _cut_segments(self): ] ) - def _get_focus_change(self, exptime=100): + def _get_focus_change(self, exptime: float = 100): + """ + Finds a simple model for the focus change + + Parameters + ---------- + exptime: float + The timescale for an exponential decay. + """ focus = np.asarray( [ np.exp(-exptime * (self.time - self.time[b])) @@ -117,6 +134,7 @@ def _get_focus_change(self, exptime=100): for b in np.hstack([0, self.breaks]) ] ) + focus[focus < 1e-10] = 0 self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]]) return @@ -150,20 +168,51 @@ def _get_cbvs(self): # self.vectors = np.hstack([self.vectors, cbvs]) raise NotImplementedError - def fit(self, flux, flux_err=None): + def _fit_linalg(self, y, ye, k=None): + """Hidden method to fit data with linalg""" + if k is None: + k = np.ones(y.shape[0], bool) + X = self.matrix[k] + sigma_w_inv = X.T.dot(X.multiply(1 / ye[k, None] ** 2)) + np.diag( + 1 / self.prior_sigma ** 2 + ) + B = X.T.dot(y[k] / ye[k] ** 2) + self.prior_mu / self.prior_sigma ** 2 + return np.linalg.solve(sigma_w_inv, B) + + def fit(self, flux: npt.ArrayLike, flux_err: Optional[npt.ArrayLike] = None): + """ + Fits flux to find the best fit model weights. Optionally will include flux errors. + Sets the `self.weights` attribute with best fit weights. + + Parameters + ---------- + flux: npt.ArrayLike + Array of flux values. Should have shape ntimes. + flux: npt.ArrayLike + Optional flux errors. Should have shape ntimes. + """ if flux_err is None: flux_err = np.ones(len(flux_err)) y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel() - X = self.matrix - sigma_w_inv = X.T.dot(X.multiply(1 / ye[:, None] ** 2)) + np.diag( - 1 / self.prior_sigma ** 2 - ) - B = X.T.dot(y / ye ** 2) + self.prior_mu / self.prior_sigma ** 2 - self.weights = np.linalg.solve(sigma_w_inv, B) + self.weights = self._fit_linalg(y, ye) return self.weights - def model(self, time_indices=None): + def model(self, time_indices: Optional[list] = None): + """Returns the best fit model at given `time_indices`. + + Parameters + ---------- + time_indices: list + Optionally pass a list of integers. Model will be evaluated at those indices. + + Returns + ------- + model: npt.ArrayLike + Array of values with the same shape as the `flux` used in `self.fit` + """ + if not hasattr(self, "weights"): + raise ValueError("Run `fit` first.") if time_indices is None: time_indices = np.ones(len(self.time), bool) return self.vectors[time_indices].dot(self.weights) @@ -175,6 +224,12 @@ def shape(self): def bin_func(self, var, **kwargs): """ Bins down an input variable to the same time resolution as `self` + + Parameters + ---------- + var: npt.ArrayLike + Array of values with at least 1 dimension. The first dimension must be + the same shape as `self.time` """ if self.bin_method.lower() == "downsample": func = self._get_downsample_func() @@ -185,6 +240,7 @@ def bin_func(self, var, **kwargs): return func(var, **kwargs) def _get_downsample_func(self): + """Builds a function to lower the resolution of the data through downsampling""" points = [] b = np.hstack([0, self.breaks, len(self.time) - 1]) for b1, b2 in zip(b[:-1], b[1:]): @@ -207,6 +263,7 @@ def func(x, quad=False): return func def _get_bindown_func(self): + """Builds a function to lower the resolution of the data through binning""" b = np.hstack([0, self.breaks, len(self.time) - 1]) points = np.hstack( [np.arange(b1, b2, self.resolution) for b1, b2 in zip(b[:-1], b[1:])] @@ -271,18 +328,18 @@ class PerturbationMatrix3D(PerturbationMatrix): def __init__( self, - time, - dx, - dy, - other_vectors=None, - poly_order=3, - nknots=10, - radius=8, - focus=False, - cbvs=False, - segments=True, - resolution=10, - bin_method="downsample", + time: npt.ArrayLike, + dx: npt.ArrayLike, + dy: npt.ArrayLike, + other_vectors: Optional[list] = None, + poly_order: int = 3, + nknots: int = 10, + radius: float = 8, + focus: bool = False, + cbvs: bool = False, + segments: bool = True, + resolution: int = 10, + bin_method: str = "downsample", ): self.dx = dx self.dy = dy @@ -331,7 +388,26 @@ def shape(self): self.cartesian_matrix.shape[1] * self.vectors.shape[1], ) - def fit(self, flux, flux_err=None, pixel_mask=None): + def fit( + self, + flux: npt.ArrayLike, + flux_err: Optional[npt.ArrayLike] = None, + pixel_mask: Optional[npt.ArrayLike] = None, + ): + """ + Fits flux to find the best fit model weights. Optionally will include flux errors. + Sets the `self.weights` attribute with best fit weights. + + Parameters + ---------- + flux: npt.ArrayLike + Array of flux values. Should have shape ntimes x npixels. + flux_err: npt.ArrayLike + Optional flux errors. Should have shape ntimes x npixels. + pixel_mask: npt.ArrayLike + Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked. + Should have shape npixels. + """ if pixel_mask is not None: if not isinstance(pixel_mask, np.ndarray): raise ValueError("`pixel_mask` must be an `np.ndarray`") @@ -346,16 +422,24 @@ def fit(self, flux, flux_err=None, pixel_mask=None): y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel() k = (np.ones(self.nbins, bool)[:, None] * pixel_mask).ravel() - X = self.matrix - sigma_w_inv = X[k].T.dot(X[k].multiply(1 / ye[k, None] ** 2)) + np.diag( - 1 / self.prior_sigma ** 2 - ) - B = X[k].T.dot(y[k] / ye[k] ** 2) + self.prior_mu / self.prior_sigma ** 2 - self.weights = np.linalg.solve(sigma_w_inv, B) - return self.weights + self.weights = self._fit_linalg(y, ye, k=k) + return - def model(self, time_indices=None): - """We build the matrix for every frame""" + def model(self, time_indices: Optional[list] = None): + """Returns the best fit model at given `time_indices`. + + Parameters + ---------- + time_indices: list + Optionally pass a list of integers. Model will be evaluated at those indices. + + Returns + ------- + model: npt.ArrayLike + Array of values with the same shape as the `flux` used in `self.fit` + """ + if not hasattr(self, "weights"): + raise ValueError("Run `fit` first") if time_indices is None: time_indices = np.arange(len(self.time)) time_indices = np.atleast_1d(time_indices) diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index d5f87f5..c3f93f5 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -36,7 +36,8 @@ def test_perturbation_matrix(): res = 10 p = PerturbationMatrix(time=time, focus=False, resolution=res) y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01 - w = p.fit(y, ye) + p.fit(y, ye) + w = p.weights assert w.shape[0] == p.shape[1] assert np.isfinite(w).all() model = p.model() @@ -55,7 +56,8 @@ def test_perturbation_matrix(): with pytest.raises(ValueError): p.bin_func(y[:-4]) - w = p.fit(y, ye) + p.fit(y, ye) + w = p.weights model = p.model() assert model.shape[0] == 200 chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1) @@ -84,7 +86,8 @@ def test_perturbation_matrix3d(): assert p3.vectors.shape == (10, 2) assert p3.shape == (1690, 81 * 2) assert p3.matrix.shape == (169 * 3, 81 * 2) - w = p3.fit(flux, flux_err) + p3.fit(flux, flux_err) + w = p3.weights assert w.shape[0] == 81 * 2 model = p3.model() assert model.shape == flux.shape @@ -111,7 +114,8 @@ def test_perturbation_matrix3d(): poly_order=2, bin_method=bin_method, ) - w = p3.fit(flux, flux_err) + p3.fit(flux, flux_err) + w = p3.weights model = p3.model() assert model.shape == flux.shape chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / ( @@ -134,7 +138,8 @@ def test_perturbation_matrix3d(): poly_order=2, bin_method=bin_method, ) - w = p3.fit(flux, flux_err) + p3.fit(flux, flux_err) + w = p3.weights model = p3.model() chi = np.sum( (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2 @@ -142,7 +147,8 @@ def test_perturbation_matrix3d(): ) / (p3.shape[0] - p3.shape[1] - 1) # Without the pixel masking the model doesn't fit assert chi > 3 - w = p3.fit(flux, flux_err, pixel_mask=pixel_mask) + p3.fit(flux, flux_err, pixel_mask=pixel_mask) + w = p3.weights model = p3.model() chi = np.sum( (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2 From 34d943cf7562883de11f4b16d8acac2fae8bed75 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 5 Apr 2022 19:25:40 -0700 Subject: [PATCH 11/53] smooth vector fx --- src/psfmachine/utils.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index 4850e43..cafe3c7 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -6,6 +6,7 @@ from scipy import sparse from patsy import dmatrix +from scipy.ndimage import gaussian_filter1d import pyia # size_limit is 1GB @@ -552,3 +553,41 @@ def get_breaks(time): """ dts = np.diff(time) return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)]) + + +def smooth_vector(v, splits=None, filter_size=13, mode="mirror"): + """ + Smooth out a vector + + Parameters + ---------- + v : numpy.ndarray or list of numpy.ndarray + Arrays to be smoothen in the last axis + splits : list of index + List of index where `v` has discontinuity + filter_size : int + Filter window size + mode : str + The `mode` parameter determines how the input array is extended + beyond its boundaries. Options are {'reflect', 'constant', 'nearest', 'mirror', + 'wrap'}. Default is 'mirror' + """ + v_smooth = [] + if isinstance(v, list): + v = np.asarray(v) + else: + v = np.atleast_2d(v) + + if splits is None: + splits = [0, v.shape[1]] + + for i in range(1, len(splits)): + v_smooth.append( + gaussian_filter1d( + v[:, splits[i - 1] : splits[i]], + filter_size, + mode=mode, + axis=1, + ) + ) + return np.hstack(v_smooth) From 7648325d2b4f5279b0a7845dda1880f79a9a7068 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 5 Apr 2022 19:25:52 -0700 Subject: [PATCH 12/53] improved plots and docstrings --- src/psfmachine/perturbation.py | 75 ++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 13 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 6620eff..a4717fc 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -6,6 +6,7 @@ from scipy import sparse from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt +from matplotlib import cm class PerturbationMatrix(object): @@ -23,11 +24,13 @@ class PerturbationMatrix(object): focus : bool Whether to correct focus using a simple exponent model segments: bool - Whether to fit portions of data where there is a significant time break as separate segments + Whether to fit portions of data where there is a significant time break as + separate segments resolution: int How many cadences to bin down via `bin_method` bin_method: str - How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin' + How to bin the data under the hood. Default is by mean binning. Options are + 'downsample' and 'bin' """ def __init__( @@ -155,9 +158,37 @@ def _clean_vectors(self): return def plot(self): - fig, ax = plt.subplots() - ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1) - ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors") + """ + Creates a diagnostic plot showing the vector components. Will show the full + (line) and bindown (dots) version of the components. + + """ + fig, ax = plt.subplots(1, 2, figsize=(12, 4)) + fig.suptitle("Vectors") + bin_time = self.bin_func(self.time) + bin_vec = self.bin_func(self.vectors) + colors = cm.get_cmap('Set1', self.vectors.shape[1]) + for k in range(self.vectors.shape[1]): + ax[0].plot(self.time, self.vectors[:, k] + k * 0.1, color=colors(k)) + ax[0].plot( + bin_time, + bin_vec[:, k] + k * 0.1, + marker=".", + lw=0, + color=colors(k), + ) + ax[0].set(xlabel="Time", ylabel="Vector", yticks=[]) + + im = ax[1].imshow( + self.vectors.T, + origin="lower", + aspect="auto", + interpolation="nearest", + vmin=-1, + vmax=1, + ) + ax[1].set(xlabel="Time Index", yticks=range(self.vectors.shape[1])) + plt.colorbar(im, ax=ax[1]) return fig def _get_cbvs(self): @@ -319,7 +350,8 @@ class PerturbationMatrix3D(PerturbationMatrix): focus : bool Whether to correct focus using a simple exponent model segments: bool - Whether to fit portions of data where there is a significant time break as separate segments + Whether to fit portions of data where there is a significant time break as + separate segments resolution: int How many cadences to bin down via `bin_method` bin_method: str @@ -395,8 +427,8 @@ def fit( pixel_mask: Optional[npt.ArrayLike] = None, ): """ - Fits flux to find the best fit model weights. Optionally will include flux errors. - Sets the `self.weights` attribute with best fit weights. + Fits flux to find the best fit model weights. Optionally will include flux + errors. Sets the `self.weights` attribute with best fit weights. Parameters ---------- @@ -405,8 +437,8 @@ def fit( flux_err: npt.ArrayLike Optional flux errors. Should have shape ntimes x npixels. pixel_mask: npt.ArrayLike - Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked. - Should have shape npixels. + Pixel mask to apply. Values that are `True` will be used in the fit. + Values that are `False` will be masked. Should have shape npixels. """ if pixel_mask is not None: if not isinstance(pixel_mask, np.ndarray): @@ -455,11 +487,28 @@ def model(self, time_indices: Optional[list] = None): ] ) - def plot_model(self, time_index=0): + def plot_model(self, time_index: int = 0): + """Creates a diagnostic plot with the perturbation model at a given cadences + + Parameters + ---------- + time_index: int + Time index (cadence) to show in the figure. + """ if not hasattr(self, "weights"): raise ValueError("Run `fit` first.") - fig, ax = plt.subplots() - ax.scatter(self.dx, self.dy, c=self.model(time_index)[0]) + fig, ax = plt.subplots(figsize=(6, 5)) + im = ax.scatter( + self.dx, + self.dy, + c=self.model(time_index)[0], + s=2, + cmap="viridis", + vmin=0.8, + vmax=1.2, + ) + cbar = fig.colorbar(im, ax=ax, shrink=0.7) + cbar.set_label("Perturbation Value") ax.set( xlabel=r"$\delta$x", ylabel=r"$\delta$y", From b330ee6e754d761554a9b5d97404f7fd73bf8804 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 5 Apr 2022 19:40:04 -0700 Subject: [PATCH 13/53] machine uses PerturbationMatrix3D --- src/psfmachine/machine.py | 511 +++++++++++++------------------------- 1 file changed, 173 insertions(+), 338 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index ea52eb4..cbcf20c 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -18,8 +18,10 @@ _combine_A, threshold_bin, get_breaks, + smooth_vector, ) from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP +from .perturbation import PerturbationMatrix3D __all__ = ["Machine"] @@ -770,9 +772,7 @@ def _time_bin(self, npoints=200, downsample=False): # if poscorr_filter_size == 0 then the smoothing will fail. we take care of # it by setting every value < .5 to 0.1 which leads to no smoothing # (residuals < 1e-4) - self.poscorr_filter_size = ( - 0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size - ) + for i in range(1, len(splits)): pc1_smooth.extend( gaussian_filter1d( @@ -823,7 +823,13 @@ def _time_bin(self, npoints=200, downsample=False): return ta, tm, fm_raw, fm, fem - def build_time_model(self, plot=False, downsample=False, split_time_model=False): + def build_time_model( + self, + plot=False, + bin_method="bin", + split_time_segments=False, + focus_component=False, + ): """ Builds a time model that moves the PRF model to account for the scene movement due to velocity aberration. It has two methods to choose from using the @@ -837,259 +843,160 @@ def build_time_model(self, plot=False, downsample=False, split_time_model=False) ---------- plot: boolean Plot a diagnostic figure. - downsample: boolean + bin_method: boolean If True the `time` and `pos_corr` arrays will be downsampled instead of binned. - split_time_model : boolean, list/array of ints + split_time_segments : boolean, list/array of ints If `True` will split the light curve into segments to fit different time models with a commong pixel normalization. If `False` will fit the full time series as one segment. If list or numpy array of ints, will use the index values as segment breaks. """ - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - ( - time_original, - time_binned, - flux_binned_raw, - flux_binned, - flux_err_binned, - poscorr1_smooth, - poscorr2_smooth, - poscorr1_binned, - poscorr2_binned, - ) = self._time_bin(npoints=self.n_time_points, downsample=downsample) - self.pos_corr1_smooth = poscorr1_smooth - self.pos_corr2_smooth = poscorr2_smooth - else: - ( - time_original, - time_binned, - flux_binned_raw, - flux_binned, - flux_err_binned, - ) = self._time_bin(npoints=self.n_time_points, downsample=downsample) - - self._whitened_time = time_original - self.downsample_time_knots = downsample - # not necessary to take value from Quantity to do .multiply() - dx, dy = ( - self.uncontaminated_source_mask.multiply(self.dra), - self.uncontaminated_source_mask.multiply(self.ddec), + # create the time and space basis + _whitened_time = (self.time - self.time.mean()) / ( + self.time.max() - self.time.mean() + ) + dx, dy, uncontaminated_pixels = ( + self.source_mask.multiply(self.dra), + self.source_mask.multiply(self.ddec), + self.source_mask.multiply(self.uncontaminated_source_mask.todense()), ) dx = dx.data * u.deg.to(u.arcsecond) dy = dy.data * u.deg.to(u.arcsecond) + # uncontaminated pixels mask + uncontaminated_pixels = uncontaminated_pixels.data - A_c = _make_A_cartesian( - dx, - dy, - n_knots=self.n_time_knots, - radius=self.time_radius, - knot_spacing_type=self.cartesian_knot_spacing, + # add other vectors if asked, centroids or poscorrs + splits = get_breaks(self.time) + self.poscorr_filter_size = ( + 0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size ) - A2 = sparse.vstack([A_c] * time_binned.shape[0], format="csr") - # Cartesian spline with time dependence - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - # Cartesian spline with poscor dependence - A3 = _combine_A(A2, poscorr=[poscorr1_binned, poscorr2_binned]) + if self.time_corrector == "polynomial": + other_vectors = None else: - # Cartesian spline with time dependence - A3 = _combine_A(A2, time=time_binned) - - prior_sigma = np.ones(A3.shape[1]) * 10 - prior_mu = np.zeros(A3.shape[1]) - - # fit the time model for each segment - # use user input - if isinstance(split_time_model, (np.ndarray, list)): - # we make sure first and last index are included and sorted - splits = np.unique(np.append(split_time_model, [0, len(self.time)])) - elif isinstance(split_time_model, bool): - # we find the splits in data - if split_time_model: - splits = get_breaks(self.time) - else: - splits = np.array([0, len(self.time)]) - - seg_time_model_w, pix_mask_k = [], [] - # we iterate over segements - for bk in range(len(splits) - 1): - # find the right mask that select the binned times andd flux to fit the - # time model - seg_mask = (time_binned[:, 0] >= time_original[splits[bk]]) & ( - time_binned[:, 0] < time_original[splits[bk + 1] - 1] - ) - # need to rebuild the A3 DM to use the rigth times/poscorrs - A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr") - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - A3 = _combine_A( - A2, poscorr=[poscorr1_binned[seg_mask], poscorr2_binned[seg_mask]] - ) - else: - A3 = _combine_A(A2, time=time_binned[seg_mask]) + if self.time_corrector == "pos_corr" and hasattr(self, "pos_corr1"): + mpc1 = np.nanmedian(self.pos_corr1, axis=0) + mpc2 = np.nanmedian(self.pos_corr2, axis=0) + # other_vectors = + elif self.time_corrector == "centroid" and hasattr(self, "ra_centroid"): + mpc1 = self.ra_centroid.to("arcsec").value / 4 + mpc2 = self.dec_centroid.to("arcsec").value / 4 - # No saturated pixels, 1e5 is a hardcoded value for Kepler. - k = ( - (flux_binned_raw < 1e5).all(axis=0)[None, :] - * np.ones(flux_binned_raw[seg_mask].shape, bool) - ).ravel() - # No faint pixels, 100 is a hardcoded value for Kepler. - k &= ( - (flux_binned_raw[seg_mask] > 100).all(axis=0)[None, :] - * np.ones(flux_binned_raw[seg_mask].shape, bool) - ).ravel() - # No huge variability - k &= ( - (np.abs(flux_binned[seg_mask] - 1) < 1).all(axis=0)[None, :] - * np.ones(flux_binned[seg_mask].shape, bool) - ).ravel() - # No nans - k &= np.isfinite(flux_binned[seg_mask].ravel()) & np.isfinite( - flux_err_binned[seg_mask].ravel() + # we smooth the vectors + mpc1_smooth, mpc2_smooth = smooth_vector( + [mpc1, mpc2], splits=splits, filter_size=self.poscorr_filter_size ) + # we combine them as the first order + other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth] + + # create a 3D perturbation matrix + P3 = PerturbationMatrix3D( + time=_whitened_time, + dx=dx, + dy=dy, + segments=split_time_segments, + focus=focus_component, + resolution=self.n_time_points, + other_vectors=other_vectors, + bin_method=bin_method, + ) - # we solve the segment by using `seg_mask` on all `*_binned` variables - for count in [0, 1, 2]: - sigma_w_inv = A3[k].T.dot( - ( - A3.multiply(1 / flux_err_binned[seg_mask].ravel()[:, None] ** 2) - ).tocsr()[k] - ) - sigma_w_inv += np.diag(1 / prior_sigma ** 2) - # Fit the flux - 1 - B = A3[k].T.dot( - ( - (flux_binned[seg_mask].ravel() - 1) - / flux_err_binned[seg_mask].ravel() ** 2 - )[k] - ) - B += prior_mu / (prior_sigma ** 2) - time_model_w = np.linalg.solve(sigma_w_inv, B) - res = flux_binned[seg_mask] - A3.dot(time_model_w).reshape( - flux_binned[seg_mask].shape + # get uncontaminated pixel norm-flux + flux, flux_err = np.array( + [ + np.array( + [ + self.source_mask.multiply(self.flux[idx]).data, + self.source_mask.multiply(self.flux_err[idx]).data, + ] ) - res = np.ma.masked_array(res, (~k).reshape(flux_binned[seg_mask].shape)) - bad_targets = sigma_clip(res, sigma=5).mask - bad_targets = ( - np.ones(flux_binned[seg_mask].shape, bool) & bad_targets.any(axis=0) - ).ravel() - # k &= ~sigma_clip(flux_binned.ravel() - A3.dot(time_model_w)).mask - k &= ~bad_targets - # we save the weights and pixel mask of each segement for later use - seg_time_model_w.append(time_model_w) - pix_mask_k.append(k) - - self.seg_splits = splits - self.time_model_w = np.asarray(seg_time_model_w) - self._time_masked = np.asarray(pix_mask_k, dtype=object) + for idx in range(self.nt) + ] + ).transpose((1, 0, 2)) + flux_norm = flux / np.nanmean(flux, axis=0) + flux_err_norm = flux_err / np.nanmean(flux, axis=0) + + # bindown flux arrays + flux_binned_raw = P3.bin_func(flux) + flux_binned = P3.bin_func(flux_norm) + flux_err_binned = P3.bin_func(flux_err_norm) + + # No saturated pixels, 1e5 is a hardcoded value for Kepler. + k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones( + flux_binned_raw.shape, bool + ) + # No faint pixels, 100 is a hardcoded value for Kepler. + k &= (flux_binned_raw > 100).all(axis=0)[None, :] * np.ones( + flux_binned_raw.shape, bool + ) + # No huge variability + k &= (np.abs(flux_binned - 1) < 1).all(axis=0)[None, :] * np.ones( + flux_binned.shape, bool + ) + # No nans + k &= np.isfinite(flux_binned) & np.isfinite(flux_err_binned) + k = np.all(k, axis=0) + # combine good-behaved pixels with uncontaminated pixels + k &= uncontaminated_pixels + + # iterate to remvoe outliers + for count in [0, 1, 2]: + # need to add pixel_mask=k + P3.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k) + res = flux_binned - P3.matrix.dot(P3.weights).reshape(flux_binned.shape) + # res = np.ma.masked_array(res, (~k).reshape(flux_binned.shape)) + res = np.ma.masked_array(res, ~np.tile(k, flux_binned.shape[0]).T) + bad_targets = sigma_clip(res, sigma=5).mask + bad_targets = bad_targets.any(axis=0) + k &= ~bad_targets + + # bookkeeping + self.flux_binned = flux_binned + self._time_masked_pix = k + self.P3 = P3 if plot: return self.plot_time_model() return - def plot_time_model(self, segment=0): + def _get_perturbed_model(self, time_indices=None): """ - Diagnostic plot of time model. + Computes the perturbed model at given times + """ + if time_indices is None: + time_indices = np.arange(len(self.time)) + time_indices = np.atleast_1d(time_indices) + if isinstance(time_indices[0], bool): + time_indices = np.where(time_indices[0])[0] + + perturbed_model = [] + for tdx in time_indices: + X = self.mean_model.copy() + X.data *= self.P3.model(time_indices=tdx).ravel() + perturbed_model.append(X) + self.perturbed_model = perturbed_model - Parameters - ---------- - segment : int - Which light curve segment will be plotted, default is first one. + return + + # raise NotImplementedError + + def plot_time_model(self): + """ + Diagnostic plot of time model. Returns ------- fig : matplotlib.Figure Figure. """ - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - ( - time_original, - time_binned, - flux_binned_raw, - flux_binned, - flux_err_binned, - poscorr1_smooth, - poscorr2_smooth, - poscorr1_binned, - poscorr2_binned, - ) = self._time_bin( - npoints=self.n_time_points, downsample=self.downsample_time_knots - ) - else: - ( - time_original, - time_binned, - flux_binned_raw, - flux_binned, - flux_err_binned, - ) = self._time_bin( - npoints=self.n_time_points, downsample=self.downsample_time_knots - ) - - # not necessary to take value from Quantity to do .multiply() - dx, dy = ( - self.uncontaminated_source_mask.multiply(self.dra), - self.uncontaminated_source_mask.multiply(self.ddec), - ) - dx = dx.data * u.deg.to(u.arcsecond) - dy = dy.data * u.deg.to(u.arcsecond) - - A_c = _make_A_cartesian( - dx, - dy, - n_knots=self.n_time_knots, - radius=self.time_radius, - knot_spacing_type=self.cartesian_knot_spacing, - ) - # if self.seg_splits.shape[0] == 2 and segment == 0: - # seg_mask = np.ones(time_binned.shape[0], dtype=bool) - # else: - seg_mask = (time_binned[:, 0] >= time_original[self.seg_splits[segment]]) & ( - time_binned[:, 0] < time_original[self.seg_splits[segment + 1] - 1] - ) - # find the right mask that select the binned times andd flux - A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr") - - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - # Cartesian spline with poscor dependence - A3 = _combine_A( - A2, poscorr=[poscorr1_binned[seg_mask], poscorr2_binned[seg_mask]] - ) - else: - # Cartesian spline with time dependence - A3 = _combine_A(A2, time=time_binned[seg_mask]) - - model = ( - A3.dot(self.time_model_w[segment]).reshape(flux_binned[seg_mask].shape) + 1 - ) - fig, ax = plt.subplots(2, 2, figsize=(7, 6), facecolor="w") - k1 = ( - self._time_masked[segment] - .reshape(flux_binned[seg_mask].shape)[0] - .astype(bool) - ) - k2 = ( - self._time_masked[segment] - .reshape(flux_binned[seg_mask].shape)[-1] - .astype(bool) + model_binned = self.P3.matrix.dot(self.P3.weights).reshape( + self.flux_binned.shape ) + fig, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w") + # k1 = self._time_masked_pix.reshape(self.flux_binned.shape)[0].astype(bool) im = ax[0, 0].scatter( - dx[k1], - dy[k1], - c=flux_binned[seg_mask][0][k1], + self.P3.dx[self._time_masked_pix], + self.P3.dy[self._time_masked_pix], + c=self.flux_binned[0, self._time_masked_pix], s=3, vmin=0.5, vmax=1.5, @@ -1097,9 +1004,9 @@ def plot_time_model(self, segment=0): rasterized=True, ) ax[0, 1].scatter( - dx[k2], - dy[k2], - c=flux_binned[seg_mask][-1][k2], + self.P3.dx[self._time_masked_pix], + self.P3.dy[self._time_masked_pix], + c=self.flux_binned[-1, self._time_masked_pix], s=3, vmin=0.5, vmax=1.5, @@ -1107,9 +1014,9 @@ def plot_time_model(self, segment=0): rasterized=True, ) ax[1, 0].scatter( - dx[k1], - dy[k1], - c=model[0][k1], + self.P3.dx[self._time_masked_pix], + self.P3.dy[self._time_masked_pix], + c=model_binned[0, self._time_masked_pix], s=3, vmin=0.5, vmax=1.5, @@ -1117,9 +1024,9 @@ def plot_time_model(self, segment=0): rasterized=True, ) ax[1, 1].scatter( - dx[k2], - dy[k2], - c=model[-1][k2], + self.P3.dx[self._time_masked_pix], + self.P3.dy[self._time_masked_pix], + c=model_binned[-1, self._time_masked_pix], s=3, vmin=0.5, vmax=1.5, @@ -1553,89 +1460,39 @@ def fit_model(self, fit_va=False): ) self.model_flux = np.zeros(self.flux.shape) * np.nan - self.ws = np.zeros((self.nt, self.mean_model.shape[0])) self.werrs = np.zeros((self.nt, self.mean_model.shape[0])) if fit_va: - if not hasattr(self, "time_model_w"): - raise ValueError( - "Please use `build_time_model` before fitting with velocity aberration." - ) - - # not necessary to take value from Quantity to do .multiply() - dx, dy = ( - self.source_mask.multiply(self.dra), - self.source_mask.multiply(self.ddec), - ) - dx = dx.data * u.deg.to(u.arcsecond) - dy = dy.data * u.deg.to(u.arcsecond) - - A_cp = _make_A_cartesian( - dx, - dy, - n_knots=self.n_time_knots, - radius=self.time_radius, - knot_spacing_type=self.cartesian_knot_spacing, - ) - A_cp3 = sparse.hstack([A_cp, A_cp, A_cp, A_cp], format="csr") - self.ws_va = np.zeros((self.nt, self.mean_model.shape[0])) self.werrs_va = np.zeros((self.nt, self.mean_model.shape[0])) - for tdx in tqdm( - range(self.nt), - desc=f"Fitting {self.nsources} Sources (w. VA)", - disable=self.quiet, - ): - X = self.mean_model.copy() - X = X.T + for tdx in tqdm( + range(self.nt), + desc=f"Fitting {self.nsources} Sources (w. VA)", + disable=self.quiet, + ): + X = self.mean_model.copy() + X = X.T - sigma_w_inv = X.T.dot( - X.multiply(1 / self.flux_err[tdx][:, None] ** 2) - ).toarray() - sigma_w_inv += np.diag(1 / (prior_sigma ** 2)) - B = X.T.dot((self.flux[tdx] / self.flux_err[tdx] ** 2)) - B += prior_mu / (prior_sigma ** 2) - self.ws[tdx] = np.linalg.solve(sigma_w_inv, np.nan_to_num(B)) - self.werrs[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5 - - # Divide through by expected velocity aberration - X = self.mean_model.copy() - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - # use median pos_corr - t_mult = np.hstack( - np.array( - [ - 1, - self.pos_corr1_smooth[tdx], - self.pos_corr2_smooth[tdx], - self.pos_corr1_smooth[tdx] * self.pos_corr2_smooth[tdx], - ] - )[:, None] - * np.ones(A_cp3.shape[1] // 4) - ) - else: - # use time - t_mult = np.hstack( - (self._whitened_time[tdx] ** np.arange(4))[:, None] - * np.ones(A_cp3.shape[1] // 4) + sigma_w_inv = X.T.dot( + X.multiply(1 / self.flux_err[tdx][:, None] ** 2) + ).toarray() + sigma_w_inv += np.diag(1 / (prior_sigma ** 2)) + B = X.T.dot((self.flux[tdx] / self.flux_err[tdx] ** 2)) + B += prior_mu / (prior_sigma ** 2) + self.ws[tdx] = np.linalg.solve(sigma_w_inv, np.nan_to_num(B)) + self.werrs[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5 + self.model_flux[tdx] = X.dot(self.ws[tdx]) + + if fit_va: + if not hasattr(self, "P3"): + raise ValueError( + "Please use `build_time_model` before fitting with velocity " + "aberration." ) - # we make sure to use the `time_model_w` that correspond to the segment - # we are computing - seg_num = np.where( - [ - (tdx >= self.seg_splits[k]) & (tdx < self.seg_splits[k + 1]) - for k in range(len(self.seg_splits) - 1) - ] - )[0] - X.data *= ( - A_cp3.multiply(t_mult).dot(self.time_model_w[seg_num].ravel()) + 1 - ) - X = X.T + + X.data *= self.P3.model(time_indices=tdx).ravel() sigma_w_inv = X.T.dot( X.multiply(1 / self.flux_err[tdx][:, None] ** 2) @@ -1647,38 +1504,16 @@ def fit_model(self, fit_va=False): self.werrs_va[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5 self.model_flux[tdx] = X.dot(self.ws_va[tdx]) - nodata = np.asarray(self.mean_model.sum(axis=1))[:, 0] == 0 - self.ws[:, nodata] *= np.nan - self.werrs[:, nodata] *= np.nan + # check bad estimates + nodata = np.asarray(self.mean_model.sum(axis=1))[:, 0] == 0 + # These sources are poorly estimated + # nodata |= (self.mean_model.max(axis=1) > 1).toarray()[:, 0] + self.ws[:, nodata] *= np.nan + self.werrs[:, nodata] *= np.nan + if fit_va: self.ws_va[:, nodata] *= np.nan self.werrs_va[:, nodata] *= np.nan - else: - - X = self.mean_model.copy() - X = X.T - f = self.flux - fe = self.flux_err - - for tdx in tqdm( - range(self.nt), - desc=f"Fitting {self.nsources} Sources (No VA)", - disable=self.quiet, - ): - sigma_w_inv = X.T.dot(X.multiply(1 / fe[tdx][:, None] ** 2)).toarray() - sigma_w_inv += np.diag(1 / (prior_sigma ** 2)) - B = X.T.dot((f[tdx] / fe[tdx] ** 2)) - B += prior_mu / (prior_sigma ** 2) - self.ws[tdx] = np.linalg.solve(sigma_w_inv, np.nan_to_num(B)) - self.werrs[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5 - self.model_flux[tdx] = X.dot(self.ws[tdx]) - - nodata = np.asarray(self.source_mask.sum(axis=1))[:, 0] == 0 - # These sources are poorly estimated - nodata |= (self.mean_model.max(axis=1) > 1).toarray()[:, 0] - self.ws[:, nodata] *= np.nan - self.werrs[:, nodata] *= np.nan - return # aperture photometry functions From cc99962fbb574b5ac53f97ac256f5470c1ae7af9 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 5 Apr 2022 20:04:42 -0700 Subject: [PATCH 14/53] demoving _time_bin method --- src/psfmachine/machine.py | 205 -------------------------------------- 1 file changed, 205 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index cbcf20c..11a5ff2 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -618,211 +618,6 @@ def _get_centroids(self): return - def _time_bin(self, npoints=200, downsample=False): - """Bin the flux data down in time. If using `pos_corr`s as corrector, it will - return also the binned and smooth versions of the pos_coors vectors. - - Parameters - ---------- - npoints: int - How many points should be in each time bin - downsample: bool - If True, the arrays will be downsampled instead of bin-averaged - - Returns - ------- - time_original: np.ndarray - The time array of the data, whitened - time_binned: np.ndarray - The binned time array - flux_binned_raw: np.ndarray - The binned flux, raw - flux_binned: np.ndarray - The binned flux, whitened by the mean of the flux in time - flux_err_binned:: np.ndarray - The binned flux error, whitened by the mean of the flux - pc1_smooth and pc2_smooth: np.ndarray - Smooth version of the median poscorr vectors 1 and 2 used for time correction. - pc1_bin and pc2_bin: np.ndarray - Binned version of poscorrs vectors 1 and 2 used for setting knots. - """ - - # Where there are break points in the time array - splits = get_breaks(self.time) - # if using poscorr, find and add discontinuity in poscorr data - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - if self.time_corrector == "pos_corr": - # take the scene-median poscorr - mpc1 = np.nanmedian(self.pos_corr1, axis=0) - mpc2 = np.nanmedian(self.pos_corr2, axis=0) - else: - # if usig centroids need to convert to pixels - mpc1 = self.ra_centroid.to("arcsec").value / 4 - mpc2 = self.dec_centroid.to("arcsec").value / 4 - - # find poscorr discontinuity in each axis - grads1 = np.gradient(mpc1, self.time) - grads2 = np.gradient(mpc2, self.time) - # the 7-sigma here is hardcoded and found to work ok - splits1 = np.where(grads1 > 7 * grads1.std())[0] - splits2 = np.where(grads2 > 7 * grads2.std())[0] - # merging breaks - splits = np.unique(np.concatenate([splits, splits1[1::2], splits2[1::2]])) - del grads1, grads2, splits1, splits2 - - # the adition is to set the first knot after breaks @ 1% of the sequence lenght - splits_a = splits[:-1] + int(self.nt * 0.01) - splits_b = splits[1:] - dsplits = (splits_b - splits_a) // npoints - # this isteration is to avoid knots right at poscorr/time discontinuity by - # iterating over segments between splits and creating evenly spaced knots - # within them - breaks = [] - for spdx in range(len(splits_a)): - if not downsample: - breaks.append(splits_a[spdx] + np.arange(0, dsplits[spdx]) * npoints) - else: - # if downsample, the knots are spaced by npoints untill the end of the - # segment - breaks.append( - np.linspace( - splits_a[spdx], - splits_b[spdx] - int(self.nt * 0.01), - dsplits[spdx], - dtype=int, - ) - ) - breaks = np.unique(np.hstack(breaks)) - - if not downsample: - # time averaged between breaks - tm = np.vstack( - [t1.mean(axis=0) for t1 in np.array_split(self.time, breaks)] - ).ravel() - # whiten the time array - ta = (self.time - tm.mean()) / (tm.max() - tm.mean()) - # find the time index for each segments between breaks to then average flux - ms = [ - np.in1d(np.arange(self.nt), i) - for i in np.array_split(np.arange(self.nt), breaks) - ] - # Average Pixel flux values - fm = np.asarray( - [ - ( - sparse.csr_matrix(self.uncontaminated_source_mask) - .multiply(self.flux[ms[tdx]].mean(axis=0)) - .data - ) - for tdx in range(len(ms)) - ] - ) - # average flux err values at knots - fem = np.asarray( - [ - ( - sparse.csr_matrix(self.uncontaminated_source_mask) - .multiply((self.flux_err ** 2)[ms[tdx]].sum(axis=0) ** 0.5) - .data - / ms[tdx].sum() - ) - for tdx in range(len(ms)) - ] - ) - else: - # use breaks as downsample points - # when working with short light curves, need to make sure the last break - # point isn't the lenght of the time. - if breaks[-1] == self.nt: - breaks[-1] -= 1 - tm = self.time[breaks] - ta = (self.time - tm.mean()) / (tm.max() - tm.mean()) - fm = np.asarray( - [ - self.uncontaminated_source_mask.multiply(self.flux[idx]).data - for idx in breaks - ] - ) - fem = np.asarray( - [ - self.uncontaminated_source_mask.multiply(self.flux_err[idx]).data - for idx in breaks - ] - ) - - fm_raw = fm.copy() - fem /= np.nanmean(fm, axis=0) - fm /= np.nanmean(fm, axis=0) - - tm = ((tm - tm.mean()) / (tm.max() - tm.mean()))[:, None] * np.ones(fm.shape) - - # poscor - if hasattr(self, "pos_corr1") and self.time_corrector in [ - "pos_corr", - "centroid", - ]: - # we smooth the poscorr with a Gaussian kernel and 12 cadence window - # (6hr-CDPP) to not introduce too much noise, the smoothing is aware - # of focus-change breaks - pc1_smooth = [] - pc2_smooth = [] - # if poscorr_filter_size == 0 then the smoothing will fail. we take care of - # it by setting every value < .5 to 0.1 which leads to no smoothing - # (residuals < 1e-4) - - for i in range(1, len(splits)): - pc1_smooth.extend( - gaussian_filter1d( - mpc1[splits[i - 1] : splits[i]], - self.poscorr_filter_size, - mode="mirror", - ) - ) - pc2_smooth.extend( - gaussian_filter1d( - mpc2[splits[i - 1] : splits[i]], - self.poscorr_filter_size, - mode="mirror", - ) - ) - pc1_smooth = np.array(pc1_smooth) - pc2_smooth = np.array(pc2_smooth) - pc1_smooth = (pc1_smooth - pc1_smooth.mean()) / ( - pc1_smooth.max() - pc1_smooth.mean() - ) - pc2_smooth = (pc2_smooth - pc2_smooth.mean()) / ( - pc2_smooth.max() - pc2_smooth.mean() - ) - - # do poscorr binning - if not downsample: - pc1_bin = np.vstack( - [np.median(t1, axis=0) for t1 in np.array_split(pc1_smooth, breaks)] - ).ravel()[:, None] * np.ones(fm.shape) - pc2_bin = np.vstack( - [np.median(t1, axis=0) for t1 in np.array_split(pc2_smooth, breaks)] - ).ravel()[:, None] * np.ones(fm.shape) - else: - pc1_bin = pc1_smooth[breaks][:, None] * np.ones(fm.shape) - pc2_bin = pc2_smooth[breaks][:, None] * np.ones(fm.shape) - - return ( - ta, - tm, - fm_raw, - fm, - fem, - pc1_smooth, - pc2_smooth, - pc1_bin, - pc2_bin, - ) - - return ta, tm, fm_raw, fm, fem - def build_time_model( self, plot=False, From 5e2de6869aa13d7ac567df29309355d59d68d737 Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 6 Apr 2022 12:54:21 -0700 Subject: [PATCH 15/53] normalize poscorr/centr and fixing pytest to work with new perturbation API --- src/psfmachine/machine.py | 28 +++++++++++++---------- tests/test_machine.py | 47 +++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 11a5ff2..1fadc5b 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd from scipy import sparse -from scipy.ndimage import gaussian_filter1d import astropy.units as u from tqdm import tqdm import matplotlib.pyplot as plt @@ -12,10 +11,8 @@ from .utils import ( _make_A_polar, - _make_A_cartesian, solve_linear_model, sparse_lessthan, - _combine_A, threshold_bin, get_breaks, smooth_vector, @@ -638,14 +635,14 @@ def build_time_model( ---------- plot: boolean Plot a diagnostic figure. - bin_method: boolean - If True the `time` and `pos_corr` arrays will be downsampled instead of - binned. - split_time_segments : boolean, list/array of ints + bin_method: string + Type of bin method, options are "bin" and "downsample". + split_time_segments : boolean If `True` will split the light curve into segments to fit different time - models with a commong pixel normalization. If `False` will fit the full - time series as one segment. If list or numpy array of ints, will use the - index values as segment breaks. + models with a common pixel normalization. If `False` will fit the full + time series as one segment. + focus_component: boolean + Add a component that models th focus change at the begining of a segment. """ # create the time and space basis _whitened_time = (self.time - self.time.mean()) / ( @@ -677,11 +674,18 @@ def build_time_model( mpc1 = self.ra_centroid.to("arcsec").value / 4 mpc2 = self.dec_centroid.to("arcsec").value / 4 - # we smooth the vectors + # smooth the vectors mpc1_smooth, mpc2_smooth = smooth_vector( [mpc1, mpc2], splits=splits, filter_size=self.poscorr_filter_size ) - # we combine them as the first order + # normalize components + mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / ( + mpc1_smooth.max() - mpc1_smooth.mean() + ) + mpc2_smooth = (mpc2_smooth - mpc2_smooth.mean()) / ( + mpc2_smooth.max() - mpc2_smooth.mean() + ) + # combine them as the first order other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth] # create a 3D perturbation matrix diff --git a/tests/test_machine.py b/tests/test_machine.py index 00d3c02..ebfadfb 100644 --- a/tests/test_machine.py +++ b/tests/test_machine.py @@ -120,19 +120,13 @@ def test_compute_aperture_photometry(): @pytest.mark.remote_data def test_poscorr_smooth(): machine = TPFMachine.from_TPFs(tpfs, apply_focus_mask=False) - machine._get_source_mask() - machine.poscorr_filter_size = 0.4 - ( - time_original, - time_binned, - flux_binned_raw, - flux_binned, - flux_err_binned, - poscorr1_smooth, - poscorr2_smooth, - poscorr1_binned, - poscorr2_binned, - ) = machine._time_bin(npoints=3) + machine.build_shape_model(plot=False) + # no segments + machine.time_corrector = "pos_corr" + machine.poscorr_filter_size = 1 + machine.build_time_model( + split_time_segments=False, bin_method="bin", focus_component=False + ) median_pc1 = np.nanmedian(machine.pos_corr1, axis=0) median_pc2 = np.nanmedian(machine.pos_corr2, axis=0) @@ -143,24 +137,29 @@ def test_poscorr_smooth(): median_pc2.max() - median_pc2.mean() ) - assert np.isclose(poscorr1_smooth, median_pc1, atol=1e-3).all() - assert np.isclose(poscorr2_smooth, median_pc2, atol=1e-3).all() + assert np.isclose(machine.P3.other_vectors[:, 0], median_pc1, atol=0.5).all() + assert np.isclose(machine.P3.other_vectors[:, 1], median_pc2, atol=0.5).all() @pytest.mark.remote_data def test_segment_time_model(): # testing segment with the current test dataset we have that only has 10 cadences # isn't the best, but we can still do some sanity checks. - machine = TPFMachine.from_TPFs(tpfs, apply_focus_mask=False, n_time_points=3) + machine = TPFMachine.from_TPFs( + tpfs, apply_focus_mask=False, n_time_points=3, time_corrector="polynomial" + ) machine.build_shape_model(plot=False) # no segments - machine.build_time_model(split_time_model=False, downsample=True) - assert machine.time_model_w.shape[0] == machine.seg_splits.shape[0] - 1 - assert machine.time_model_w.shape[0] == machine._time_masked.shape[0] + machine.build_time_model( + split_time_segments=False, bin_method="bin", focus_component=False + ) + assert machine.P3.vectors.shape == (10, 4) - # need tighter knot spacing - machine.n_time_points = 2 + # fake 2 time breaks + machine.time[4:] += 0.5 + machine.time[7:] += 0.41 # user defined segments - machine.build_time_model(split_time_model=[5], downsample=True) - assert machine.time_model_w.shape[0] == machine.seg_splits.shape[0] - 1 - assert machine.time_model_w.shape[0] == machine._time_masked.shape[0] + machine.build_time_model( + split_time_segments=True, bin_method="bin", focus_component=False + ) + assert machine.P3.vectors.shape == (10, 4 * 3) From cceeb2bae5f1cd63a45a325c6aa42158585a2153 Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 11 Apr 2022 10:58:23 -0700 Subject: [PATCH 16/53] smooth_vector API improved --- src/psfmachine/machine.py | 5 +++-- src/psfmachine/utils.py | 22 ++++++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 1fadc5b..f120976 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -659,7 +659,6 @@ def build_time_model( uncontaminated_pixels = uncontaminated_pixels.data # add other vectors if asked, centroids or poscorrs - splits = get_breaks(self.time) self.poscorr_filter_size = ( 0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size ) @@ -676,7 +675,9 @@ def build_time_model( # smooth the vectors mpc1_smooth, mpc2_smooth = smooth_vector( - [mpc1, mpc2], splits=splits, filter_size=self.poscorr_filter_size + [mpc1, mpc2], + time=self.time, + filter_size=self.poscorr_filter_size, ) # normalize components mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / ( diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index cafe3c7..e2796d0 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -555,7 +555,7 @@ def get_breaks(time): return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)]) -def smooth_vector(v, splits=None, filter_size=13, mode="mirror"): +def smooth_vector(v, time=None, filter_size=13, mode="mirror"): """ Smooth out a vector @@ -563,8 +563,8 @@ def smooth_vector(v, splits=None, filter_size=13, mode="mirror"): ---------- v : numpy.ndarray or list of numpy.ndarray Arrays to be smoothen in the last axis - splits : list of index - List of index where `v` has discontinuity + time : numpy.ndarray + Time array of same shape of `v` last axis used to find data discontinuity. filter_size : int Filter window size mode : str @@ -578,8 +578,22 @@ def smooth_vector(v, splits=None, filter_size=13, mode="mirror"): else: v = np.atleast_2d(v) - if splits is None: + if time is None: splits = [0, v.shape[1]] + elif isinstance(time, np.ndarray): + splits = get_breaks(time) + # find poscorr discontinuity in each axis + grads = np.gradient(v, time, axis=1) + # the 7-sigma here is hardcoded and found to work ok + splits = np.unique( + np.concatenate( + [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])] + ) + ) + else: + raise ValueError( + "`time` optional argument invalid, pass None or an array like." + ) for i in range(1, len(splits)): v_smooth.append( From c3cdfb1c3c507dcfcfd7d625162a9a8cc19df9a0 Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 11 Apr 2022 12:38:10 -0700 Subject: [PATCH 17/53] gaussian smooth -> spline smooth --- src/psfmachine/machine.py | 15 +----- src/psfmachine/utils.py | 105 ++++++++++++++++++++++++++++++-------- 2 files changed, 86 insertions(+), 34 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index f120976..2196b5a 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -14,8 +14,7 @@ solve_linear_model, sparse_lessthan, threshold_bin, - get_breaks, - smooth_vector, + spline_smooth, ) from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP from .perturbation import PerturbationMatrix3D @@ -134,8 +133,6 @@ def __init__( time_corrector: string The type of time corrector that will be used to build the time model, default is a "polynomial" for a polynomial in time, it can also be "pos_corr" - poscorr_filter_size: int - Standard deviation for Gaussian kernel to be used to smooth the pos_corrs cartesian_knot_spacing: string Defines the type of spacing between knots in cartessian space to generate the design matrix, options are "linear" or "sqrt". @@ -167,10 +164,6 @@ def __init__( self.cut_r = cut_r self.sparse_dist_lim = sparse_dist_lim * u.arcsecond self.time_corrector = "polynomial" - # if pos_corr, then we can smooth the vector with a Gaussian kernel of size - # poscorr_filter_size, if this is < 0.5 -> no smoothing, default is 12 - # beacause of 6hr-CDPP - self.poscorr_filter_size = 12 self.cartesian_knot_spacing = "sqrt" # disble tqdm prgress bar when running in HPC self.quiet = False @@ -659,9 +652,6 @@ def build_time_model( uncontaminated_pixels = uncontaminated_pixels.data # add other vectors if asked, centroids or poscorrs - self.poscorr_filter_size = ( - 0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size - ) if self.time_corrector == "polynomial": other_vectors = None else: @@ -674,10 +664,9 @@ def build_time_model( mpc2 = self.dec_centroid.to("arcsec").value / 4 # smooth the vectors - mpc1_smooth, mpc2_smooth = smooth_vector( + mpc1_smooth, mpc2_smooth = spline_smooth( [mpc1, mpc2], time=self.time, - filter_size=self.poscorr_filter_size, ) # normalize components mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / ( diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index e2796d0..54eeeb7 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -7,6 +7,7 @@ from scipy import sparse from patsy import dmatrix from scipy.ndimage import gaussian_filter1d +from scipy import interpolate import pyia # size_limit is 1GB @@ -552,38 +553,42 @@ def get_breaks(time): An array of indexes with the break positions """ dts = np.diff(time) - return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)]) + return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time) - 1]) -def smooth_vector(v, time=None, filter_size=13, mode="mirror"): +def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"): """ - Smooth out a vector + Applies a Gaussian smoothing to a curve. Parameters ---------- - v : numpy.ndarray or list of numpy.ndarray + y : numpy.ndarray or list of numpy.ndarray Arrays to be smoothen in the last axis - time : numpy.ndarray - Time array of same shape of `v` last axis used to find data discontinuity. + x : numpy.ndarray + Time array of same shape of `y` last axis used to find data discontinuity. filter_size : int Filter window size mode : str The `mode` parameter determines how the input array is extended beyond its boundaries. Options are {'reflect', 'constant', 'nearest', 'mirror', 'wrap'}. Default is 'mirror' + + Returns + ------- + y_smooth : numpy.ndarray + Smooth array. """ - v_smooth = [] - if isinstance(v, list): - v = np.asarray(v) + if isinstance(y, list): + y = np.asarray(y) else: - v = np.atleast_2d(v) + y = np.atleast_2d(y) - if time is None: - splits = [0, v.shape[1]] - elif isinstance(time, np.ndarray): - splits = get_breaks(time) + if x is None: + splits = [0, y.shape[1]] + elif isinstance(x, np.ndarray): + splits = get_breaks(x) # find poscorr discontinuity in each axis - grads = np.gradient(v, time, axis=1) + grads = np.gradient(y, x, axis=1) # the 7-sigma here is hardcoded and found to work ok splits = np.unique( np.concatenate( @@ -591,17 +596,75 @@ def smooth_vector(v, time=None, filter_size=13, mode="mirror"): ) ) else: - raise ValueError( - "`time` optional argument invalid, pass None or an array like." - ) + raise ValueError("`x` optional argument invalid, pass None or an array like.") + y_smooth = [] for i in range(1, len(splits)): - v_smooth.append( + y_smooth.append( gaussian_filter1d( - v[:, splits[i - 1] : splits[i]], + y[:, splits[i - 1] : splits[i]], filter_size, mode=mode, axis=1, ) ) - return np.hstack(v_smooth) + return np.hstack(y_smooth) + + +def spline_smooth(y, x, weights=None, degree=3, do_splits=True): + """ + Applies a spline smoothing to a curve. + + Parameters + ---------- + y : numpy.ndarray or list of numpy.ndarray + Arrays to be smoothen in the last axis + x : numpy.ndarray + Time array of same shape of `y` last axis. + weights : numpy.ndarray or list of numpy.ndarray + Optional array of weights the same shape as `y`. + degree : int + Degree of the psline fit, default is 3. + do_splits : boolean + Do the splines per segments with splits defined by data discontinuity. + + Returns + ------- + y_smooth : numpy.ndarray + Smooth array. + """ + if isinstance(y, list): + y = np.asarray(y) + else: + y = np.atleast_2d(y) + + if do_splits: + splits = get_breaks(x) + # find poscorr discontinuity in each axis + grads = np.gradient(y, x, axis=1) + # the 7-sigma here is hardcoded and found to work ok + splits = np.unique( + np.concatenate( + [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])] + ) + ) + else: + splits = [0, -1] + if weights is None: + s = 5e-4 + weights = np.ones_like(y) + else: + if isinstance(weights, list): + weights = np.asarray(weights) + else: + weights = np.atleast_2d(weights) + # s = weights.shape[-1] - np.sqrt(2 * weights.shape[-1]) + s = 1.9e-10 + if y.shape != weights.shape: + raise ValueError("Inconsistent shape between `v` and `weights`") + + y_smooth = [] + for y_, w in zip(y, weights): + tck = interpolate.splrep(x, y_, w=w, s=s, xb=x[splits], k=degree) + y_smooth.append(interpolate.splev(x, tck, der=0)) + return np.array(y_smooth) From e5f3f8e4d558fc8d690630529aea7efb4295a3fd Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 11 Apr 2022 12:42:45 -0700 Subject: [PATCH 18/53] matching arguments between `perturbation` and `machine` --- src/psfmachine/machine.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 2196b5a..cbe7d52 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -612,8 +612,8 @@ def build_time_model( self, plot=False, bin_method="bin", - split_time_segments=False, - focus_component=False, + segments=False, + focus=False, ): """ Builds a time model that moves the PRF model to account for the scene movement @@ -630,11 +630,11 @@ def build_time_model( Plot a diagnostic figure. bin_method: string Type of bin method, options are "bin" and "downsample". - split_time_segments : boolean + segments : boolean If `True` will split the light curve into segments to fit different time models with a common pixel normalization. If `False` will fit the full time series as one segment. - focus_component: boolean + focus: boolean Add a component that models th focus change at the begining of a segment. """ # create the time and space basis @@ -683,8 +683,8 @@ def build_time_model( time=_whitened_time, dx=dx, dy=dy, - segments=split_time_segments, - focus=focus_component, + segments=segments, + focus=focus, resolution=self.n_time_points, other_vectors=other_vectors, bin_method=bin_method, From 84a064c022a80a6945e7241bf029a8797ac4df53 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Mon, 11 Apr 2022 15:43:25 -0700 Subject: [PATCH 19/53] :sparkles: Added PCA method --- src/psfmachine/perturbation.py | 67 ++++++++++++++++++++-------------- tests/test_perturbation.py | 20 ++++++++++ 2 files changed, 60 insertions(+), 27 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 6620eff..5e81009 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -6,6 +6,7 @@ from scipy import sparse from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt +from fbpca import pca class PerturbationMatrix(object): @@ -36,7 +37,6 @@ def __init__( other_vectors: Optional[list] = None, poly_order: int = 3, focus: bool = False, - cbvs: bool = False, segments: bool = True, resolution: int = 10, bin_method: str = "bin", @@ -46,20 +46,20 @@ def __init__( self.other_vectors = other_vectors self.poly_order = poly_order self.focus = focus - self.cbvs = cbvs self.segments = segments self.resolution = resolution self.bin_method = bin_method - self.vectors = np.vstack( - [self.time ** idx for idx in range(self.poly_order + 1)] + self._vectors = np.vstack( + [ + (self.time - self.time.mean()) ** idx + for idx in range(self.poly_order + 1) + ] ).T - if self.cbvs: - self._get_cbvs() if self.focus: self._get_focus_change() self._validate() if self.segments: - self._cut_segments() + self.vectors = self._cut_segments(self.vectors) self._clean_vectors() self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) @@ -93,10 +93,12 @@ def _validate(self): raise ValueError("Must pass other vectors in the right shape") else: raise ValueError("Must pass a list as other vectors") - self.vectors = np.hstack([self.vectors, self.other_vectors]) + self.vectors = np.hstack([self._vectors.copy(), self.other_vectors]) + else: + self.vectors = self._vectors.copy() return - def _cut_segments(self): + def _cut_segments(self, vectors): """ Cuts the data into "segments" wherever there is a break. Breaks are defined as anywhere where there is a gap in data of more than 5 times the median @@ -106,11 +108,8 @@ def _cut_segments(self): masks = np.asarray( [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x] ).T - self.vectors = np.hstack( - [ - self.vectors[:, idx][:, None] * masks - for idx in range(self.vectors.shape[1]) - ] + return np.hstack( + [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])] ) def _get_focus_change(self, exptime: float = 100): @@ -135,7 +134,7 @@ def _get_focus_change(self, exptime: float = 100): ] ) focus[focus < 1e-10] = 0 - self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]]) + self._vectors = np.hstack([self._vectors, np.nansum(focus, axis=0)[:, None]]) return def _clean_vectors(self): @@ -143,8 +142,6 @@ def _clean_vectors(self): nvec = self.poly_order + 1 if self.focus: nvec += 1 - if self.cbvs: - nvec += self.ncbvs if self.segments: s = nvec * (len(self.breaks) + 1) else: @@ -160,14 +157,6 @@ def plot(self): ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors") return fig - def _get_cbvs(self): - self.ncbvs = 0 - # use lightkurve to get CBVs for a given time????????? - # Might need channel information maybe - # self.ncbvs = .... - # self.vectors = np.hstack([self.vectors, cbvs]) - raise NotImplementedError - def _fit_linalg(self, y, ye, k=None): """Hidden method to fit data with linalg""" if k is None: @@ -296,6 +285,32 @@ def func(x, quad=False): return func + def pca(self, y, ncomponents=5): + """In place operation to add the principal components of `y` to `other_vectors` + + Parameters + ---------- + y: np.ndarray + Input flux array to take PCA of. + """ + if not y.ndim == 2: + raise ValueError("Must pass a 2D `y`") + if not y.shape[0] == len(self.time): + raise ValueError(f"Must pass a `y` with shape ({len(self.time)}, X)") + + # Clean out any time series have significant contribution from one component + k = np.ones(y.shape[1], bool) + for count in range(3): + U, s, V = pca(y[:, k], ncomponents, n_iter=30) + k[k] &= (np.abs(V) < 0.5).all(axis=0) + + self._vectors = np.hstack([self._vectors, U]) + if self.segments: + self._vectors = self._cut_segments(self._vectors) + self._validate() + self._clean_vectors() + self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) + class PerturbationMatrix3D(PerturbationMatrix): """Class to handle 3D perturbation matrices in PSFMachine @@ -336,7 +351,6 @@ def __init__( nknots: int = 10, radius: float = 8, focus: bool = False, - cbvs: bool = False, segments: bool = True, resolution: int = 10, bin_method: str = "downsample", @@ -357,7 +371,6 @@ def __init__( other_vectors=other_vectors, poly_order=poly_order, focus=focus, - cbvs=cbvs, segments=segments, resolution=resolution, bin_method=bin_method, diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index c3f93f5..ed683e5 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -64,6 +64,26 @@ def test_perturbation_matrix(): assert chi < 3 # chi = np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (p.shape[0] - p.shape[1] - 1) + flux = np.random.normal(1, 0.1, size=(200, 100)) + assert p.matrix.shape == (20, 8) + p.pca(flux, ncomponents=5) + assert p.matrix.shape == (20, 18) + for bin_method in ["downsample", "bin"]: + s = 200 / res + 1 if bin_method == "downsample" else 200 / res + p = PerturbationMatrix( + time=time, focus=False, resolution=res, bin_method=bin_method + ) + assert len(p.bin_func(y)) == s + assert len(p.bin_func(ye, quad=True)) == s + with pytest.raises(ValueError): + p.bin_func(y[:-4]) + + p.fit(y, ye) + w = p.weights + model = p.model() + assert model.shape[0] == 200 + chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1) + assert chi < 3 def test_perturbation_matrix3d(): From 3da1557e5f612e7feb99ca04aa555c4ee02c0e8e Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Mon, 11 Apr 2022 16:05:07 -0700 Subject: [PATCH 20/53] :art: added fbpca explicitly as dependency --- poetry.lock | 18 +----------------- pyproject.toml | 1 + src/psfmachine/perturbation.py | 17 +++++++---------- tests/test_perturbation.py | 2 +- 4 files changed, 10 insertions(+), 28 deletions(-) diff --git a/poetry.lock b/poetry.lock index df5b865..05d834a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2028,7 +2028,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "a7b6f13fbee1055129e929d7ca8c9f4ebe00aa78ba2cdf9c960c8656bc69429a" +content-hash = "85a0cd463a7872e91f124ec89b42dda1c3cb1c873a5f0cf1427caf4b2bde55c3" [metadata.files] aesara-theano-fallback = [ @@ -2062,10 +2062,6 @@ argon2-cffi = [ {file = "argon2_cffi-20.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2"}, {file = "argon2_cffi-20.1.0-cp39-cp39-win32.whl", hash = "sha256:e2db6e85c057c16d0bd3b4d2b04f270a7467c147381e8fd73cbbe5bc719832be"}, {file = "argon2_cffi-20.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:8a84934bd818e14a17943de8099d41160da4a336bcc699bb4c394bbb9b94bd32"}, - {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-macosx_10_7_x86_64.whl", hash = "sha256:b94042e5dcaa5d08cf104a54bfae614be502c6f44c9c89ad1535b2ebdaacbd4c"}, - {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:8282b84ceb46b5b75c3a882b28856b8cd7e647ac71995e71b6705ec06fc232c3"}, - {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3aa804c0e52f208973845e8b10c70d8957c9e5a666f702793256242e9167c4e0"}, - {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:36320372133a003374ef4275fbfce78b7ab581440dfca9f9471be3dd9a522428"}, ] arviz = [ {file = "arviz-0.11.2-py3-none-any.whl", hash = "sha256:f6a1389a90b53335f248d282c8142b8209150b9625459a85ec6d3d38786797c1"}, @@ -2160,36 +2156,24 @@ cffi = [ {file = "cffi-1.14.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058"}, {file = "cffi-1.14.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5"}, {file = "cffi-1.14.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24ec4ff2c5c0c8f9c6b87d5bb53555bf267e1e6f70e52e5a9740d32861d36b6f"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c3f39fa737542161d8b0d680df2ec249334cd70a8f420f71c9304bd83c3cbed"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:681d07b0d1e3c462dd15585ef5e33cb021321588bebd910124ef4f4fb71aef55"}, {file = "cffi-1.14.5-cp36-cp36m-win32.whl", hash = "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53"}, {file = "cffi-1.14.5-cp36-cp36m-win_amd64.whl", hash = "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813"}, {file = "cffi-1.14.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73"}, {file = "cffi-1.14.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06"}, {file = "cffi-1.14.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1"}, {file = "cffi-1.14.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06d7cd1abac2ffd92e65c0609661866709b4b2d82dd15f611e602b9b188b0b69"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f861a89e0043afec2a51fd177a567005847973be86f709bbb044d7f42fc4e05"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc5a8e069b9ebfa22e26d0e6b97d6f9781302fe7f4f2b8776c3e1daea35f1adc"}, {file = "cffi-1.14.5-cp37-cp37m-win32.whl", hash = "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62"}, {file = "cffi-1.14.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4"}, {file = "cffi-1.14.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053"}, {file = "cffi-1.14.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0"}, {file = "cffi-1.14.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e"}, {file = "cffi-1.14.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04c468b622ed31d408fea2346bec5bbffba2cc44226302a0de1ade9f5ea3d373"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:06db6321b7a68b2bd6df96d08a5adadc1fa0e8f419226e25b2a5fbf6ccc7350f"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:293e7ea41280cb28c6fcaaa0b1aa1f533b8ce060b9e701d78511e1e6c4a1de76"}, {file = "cffi-1.14.5-cp38-cp38-win32.whl", hash = "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e"}, {file = "cffi-1.14.5-cp38-cp38-win_amd64.whl", hash = "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396"}, {file = "cffi-1.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea"}, {file = "cffi-1.14.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322"}, {file = "cffi-1.14.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c"}, {file = "cffi-1.14.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bf1ac1984eaa7675ca8d5745a8cb87ef7abecb5592178406e55858d411eadc0"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:df5052c5d867c1ea0b311fb7c3cd28b19df469c056f7fdcfe88c7473aa63e333"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24a570cd11895b60829e941f2613a4f79df1a27344cbbb82164ef2e0116f09c7"}, {file = "cffi-1.14.5-cp39-cp39-win32.whl", hash = "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396"}, {file = "cffi-1.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d"}, {file = "cffi-1.14.5.tar.gz", hash = "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"}, diff --git a/pyproject.toml b/pyproject.toml index 3926155..acfaf8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ diskcache = "^5.3.0" imageio = "^2.9.0" imageio-ffmpeg = "^0.4.5" kbackground = "0.1.6" +fbpca = "^1.0" [tool.poetry.dev-dependencies] pytest = "^6.1.2" diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 5e81009..21c8e76 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -29,6 +29,8 @@ class PerturbationMatrix(object): How many cadences to bin down via `bin_method` bin_method: str How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin' + focus_exptime: float + Time for the exponent for focus change, if used """ def __init__( @@ -40,6 +42,7 @@ def __init__( segments: bool = True, resolution: int = 10, bin_method: str = "bin", + focus_exptime=50, ): self.time = time @@ -49,6 +52,7 @@ def __init__( self.segments = segments self.resolution = resolution self.bin_method = bin_method + self.focus_exptime = focus_exptime self._vectors = np.vstack( [ (self.time - self.time.mean()) ** idx @@ -112,18 +116,11 @@ def _cut_segments(self, vectors): [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])] ) - def _get_focus_change(self, exptime: float = 100): - """ - Finds a simple model for the focus change - - Parameters - ---------- - exptime: float - The timescale for an exponential decay. - """ + def _get_focus_change(self): + """Finds a simple model for the focus change""" focus = np.asarray( [ - np.exp(-exptime * (self.time - self.time[b])) + np.exp(-self.focus_exptime * (self.time - self.time[b])) for b in np.hstack([0, self.breaks]) ] ) diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index ed683e5..42f6602 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -63,7 +63,7 @@ def test_perturbation_matrix(): chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1) assert chi < 3 - # chi = np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (p.shape[0] - p.shape[1] - 1) + # Test PCA flux = np.random.normal(1, 0.1, size=(200, 100)) assert p.matrix.shape == (20, 8) p.pca(flux, ncomponents=5) From 15200011772b163339d9619fa4e33322cae1d68e Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Mon, 11 Apr 2022 16:09:59 -0700 Subject: [PATCH 21/53] :memo: docs --- src/psfmachine/perturbation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 21c8e76..1db444a 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -283,7 +283,7 @@ def func(x, quad=False): return func def pca(self, y, ncomponents=5): - """In place operation to add the principal components of `y` to `other_vectors` + """In place operation to add the principal components of `y` to the design matrix Parameters ---------- From 612d3fdd3d77cfa4250760d508f53b45d09069ad Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 11 Apr 2022 19:53:22 -0700 Subject: [PATCH 22/53] merging from refactor-time --- src/psfmachine/perturbation.py | 159 +++++++++++++-------------------- 1 file changed, 60 insertions(+), 99 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index a4717fc..1db444a 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -6,7 +6,7 @@ from scipy import sparse from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt -from matplotlib import cm +from fbpca import pca class PerturbationMatrix(object): @@ -24,13 +24,13 @@ class PerturbationMatrix(object): focus : bool Whether to correct focus using a simple exponent model segments: bool - Whether to fit portions of data where there is a significant time break as - separate segments + Whether to fit portions of data where there is a significant time break as separate segments resolution: int How many cadences to bin down via `bin_method` bin_method: str - How to bin the data under the hood. Default is by mean binning. Options are - 'downsample' and 'bin' + How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin' + focus_exptime: float + Time for the exponent for focus change, if used """ def __init__( @@ -39,30 +39,31 @@ def __init__( other_vectors: Optional[list] = None, poly_order: int = 3, focus: bool = False, - cbvs: bool = False, segments: bool = True, resolution: int = 10, bin_method: str = "bin", + focus_exptime=50, ): self.time = time self.other_vectors = other_vectors self.poly_order = poly_order self.focus = focus - self.cbvs = cbvs self.segments = segments self.resolution = resolution self.bin_method = bin_method - self.vectors = np.vstack( - [self.time ** idx for idx in range(self.poly_order + 1)] + self.focus_exptime = focus_exptime + self._vectors = np.vstack( + [ + (self.time - self.time.mean()) ** idx + for idx in range(self.poly_order + 1) + ] ).T - if self.cbvs: - self._get_cbvs() if self.focus: self._get_focus_change() self._validate() if self.segments: - self._cut_segments() + self.vectors = self._cut_segments(self.vectors) self._clean_vectors() self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) @@ -96,10 +97,12 @@ def _validate(self): raise ValueError("Must pass other vectors in the right shape") else: raise ValueError("Must pass a list as other vectors") - self.vectors = np.hstack([self.vectors, self.other_vectors]) + self.vectors = np.hstack([self._vectors.copy(), self.other_vectors]) + else: + self.vectors = self._vectors.copy() return - def _cut_segments(self): + def _cut_segments(self, vectors): """ Cuts the data into "segments" wherever there is a break. Breaks are defined as anywhere where there is a gap in data of more than 5 times the median @@ -109,25 +112,15 @@ def _cut_segments(self): masks = np.asarray( [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x] ).T - self.vectors = np.hstack( - [ - self.vectors[:, idx][:, None] * masks - for idx in range(self.vectors.shape[1]) - ] + return np.hstack( + [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])] ) - def _get_focus_change(self, exptime: float = 100): - """ - Finds a simple model for the focus change - - Parameters - ---------- - exptime: float - The timescale for an exponential decay. - """ + def _get_focus_change(self): + """Finds a simple model for the focus change""" focus = np.asarray( [ - np.exp(-exptime * (self.time - self.time[b])) + np.exp(-self.focus_exptime * (self.time - self.time[b])) for b in np.hstack([0, self.breaks]) ] ) @@ -138,7 +131,7 @@ def _get_focus_change(self, exptime: float = 100): ] ) focus[focus < 1e-10] = 0 - self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]]) + self._vectors = np.hstack([self._vectors, np.nansum(focus, axis=0)[:, None]]) return def _clean_vectors(self): @@ -146,8 +139,6 @@ def _clean_vectors(self): nvec = self.poly_order + 1 if self.focus: nvec += 1 - if self.cbvs: - nvec += self.ncbvs if self.segments: s = nvec * (len(self.breaks) + 1) else: @@ -158,47 +149,11 @@ def _clean_vectors(self): return def plot(self): - """ - Creates a diagnostic plot showing the vector components. Will show the full - (line) and bindown (dots) version of the components. - - """ - fig, ax = plt.subplots(1, 2, figsize=(12, 4)) - fig.suptitle("Vectors") - bin_time = self.bin_func(self.time) - bin_vec = self.bin_func(self.vectors) - colors = cm.get_cmap('Set1', self.vectors.shape[1]) - for k in range(self.vectors.shape[1]): - ax[0].plot(self.time, self.vectors[:, k] + k * 0.1, color=colors(k)) - ax[0].plot( - bin_time, - bin_vec[:, k] + k * 0.1, - marker=".", - lw=0, - color=colors(k), - ) - ax[0].set(xlabel="Time", ylabel="Vector", yticks=[]) - - im = ax[1].imshow( - self.vectors.T, - origin="lower", - aspect="auto", - interpolation="nearest", - vmin=-1, - vmax=1, - ) - ax[1].set(xlabel="Time Index", yticks=range(self.vectors.shape[1])) - plt.colorbar(im, ax=ax[1]) + fig, ax = plt.subplots() + ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1) + ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors") return fig - def _get_cbvs(self): - self.ncbvs = 0 - # use lightkurve to get CBVs for a given time????????? - # Might need channel information maybe - # self.ncbvs = .... - # self.vectors = np.hstack([self.vectors, cbvs]) - raise NotImplementedError - def _fit_linalg(self, y, ye, k=None): """Hidden method to fit data with linalg""" if k is None: @@ -327,6 +282,32 @@ def func(x, quad=False): return func + def pca(self, y, ncomponents=5): + """In place operation to add the principal components of `y` to the design matrix + + Parameters + ---------- + y: np.ndarray + Input flux array to take PCA of. + """ + if not y.ndim == 2: + raise ValueError("Must pass a 2D `y`") + if not y.shape[0] == len(self.time): + raise ValueError(f"Must pass a `y` with shape ({len(self.time)}, X)") + + # Clean out any time series have significant contribution from one component + k = np.ones(y.shape[1], bool) + for count in range(3): + U, s, V = pca(y[:, k], ncomponents, n_iter=30) + k[k] &= (np.abs(V) < 0.5).all(axis=0) + + self._vectors = np.hstack([self._vectors, U]) + if self.segments: + self._vectors = self._cut_segments(self._vectors) + self._validate() + self._clean_vectors() + self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) + class PerturbationMatrix3D(PerturbationMatrix): """Class to handle 3D perturbation matrices in PSFMachine @@ -350,8 +331,7 @@ class PerturbationMatrix3D(PerturbationMatrix): focus : bool Whether to correct focus using a simple exponent model segments: bool - Whether to fit portions of data where there is a significant time break as - separate segments + Whether to fit portions of data where there is a significant time break as separate segments resolution: int How many cadences to bin down via `bin_method` bin_method: str @@ -368,7 +348,6 @@ def __init__( nknots: int = 10, radius: float = 8, focus: bool = False, - cbvs: bool = False, segments: bool = True, resolution: int = 10, bin_method: str = "downsample", @@ -389,7 +368,6 @@ def __init__( other_vectors=other_vectors, poly_order=poly_order, focus=focus, - cbvs=cbvs, segments=segments, resolution=resolution, bin_method=bin_method, @@ -427,8 +405,8 @@ def fit( pixel_mask: Optional[npt.ArrayLike] = None, ): """ - Fits flux to find the best fit model weights. Optionally will include flux - errors. Sets the `self.weights` attribute with best fit weights. + Fits flux to find the best fit model weights. Optionally will include flux errors. + Sets the `self.weights` attribute with best fit weights. Parameters ---------- @@ -437,8 +415,8 @@ def fit( flux_err: npt.ArrayLike Optional flux errors. Should have shape ntimes x npixels. pixel_mask: npt.ArrayLike - Pixel mask to apply. Values that are `True` will be used in the fit. - Values that are `False` will be masked. Should have shape npixels. + Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked. + Should have shape npixels. """ if pixel_mask is not None: if not isinstance(pixel_mask, np.ndarray): @@ -487,28 +465,11 @@ def model(self, time_indices: Optional[list] = None): ] ) - def plot_model(self, time_index: int = 0): - """Creates a diagnostic plot with the perturbation model at a given cadences - - Parameters - ---------- - time_index: int - Time index (cadence) to show in the figure. - """ + def plot_model(self, time_index=0): if not hasattr(self, "weights"): raise ValueError("Run `fit` first.") - fig, ax = plt.subplots(figsize=(6, 5)) - im = ax.scatter( - self.dx, - self.dy, - c=self.model(time_index)[0], - s=2, - cmap="viridis", - vmin=0.8, - vmax=1.2, - ) - cbar = fig.colorbar(im, ax=ax, shrink=0.7) - cbar.set_label("Perturbation Value") + fig, ax = plt.subplots() + ax.scatter(self.dx, self.dy, c=self.model(time_index)[0]) ax.set( xlabel=r"$\delta$x", ylabel=r"$\delta$y", From 78633dc4e0b0152e0d1776a6513b93db54f34025 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Mon, 11 Apr 2022 22:48:59 -0700 Subject: [PATCH 23/53] :bug: fixing pca --- src/psfmachine/perturbation.py | 61 +++++++++++++++++----------------- tests/test_perturbation.py | 18 ++-------- 2 files changed, 33 insertions(+), 46 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 1db444a..0d173ab 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -46,7 +46,7 @@ def __init__( ): self.time = time - self.other_vectors = other_vectors + self.other_vectors = np.nan_to_num(other_vectors) self.poly_order = poly_order self.focus = focus self.segments = segments @@ -61,7 +61,19 @@ def __init__( ).T if self.focus: self._get_focus_change() - self._validate() + if self.other_vectors is not None: + if isinstance(self.other_vectors, (list, np.ndarray)): + self.other_vectors = np.atleast_2d(self.other_vectors) + if self.other_vectors.shape[0] != len(self.time): + if self.other_vectors.shape[1] == len(self.time): + self.other_vectors = self.other_vectors.T + else: + raise ValueError("Must pass other vectors in the right shape") + else: + raise ValueError("Must pass a list as other vectors") + self.vectors = np.hstack([self._vectors.copy(), self.other_vectors]) + else: + self.vectors = self._vectors.copy() if self.segments: self.vectors = self._cut_segments(self.vectors) self._clean_vectors() @@ -82,26 +94,6 @@ def prior_sigma(self): def breaks(self): return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1 - def _validate(self): - """ - Checks that the `other_vectors` are the right dimensions. - """ - - if self.other_vectors is not None: - if isinstance(self.other_vectors, (list, np.ndarray)): - self.other_vectors = np.atleast_2d(self.other_vectors) - if self.other_vectors.shape[0] != len(self.time): - if self.other_vectors.shape[1] == len(self.time): - self.other_vectors = self.other_vectors.T - else: - raise ValueError("Must pass other vectors in the right shape") - else: - raise ValueError("Must pass a list as other vectors") - self.vectors = np.hstack([self._vectors.copy(), self.other_vectors]) - else: - self.vectors = self._vectors.copy() - return - def _cut_segments(self, vectors): """ Cuts the data into "segments" wherever there is a break. Breaks are defined @@ -143,9 +135,14 @@ def _clean_vectors(self): s = nvec * (len(self.breaks) + 1) else: s = nvec - X = self.vectors[:, :s] - w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) - self.vectors[:, s:] -= X.dot(w) + + if s != self.vectors.shape[1]: + X = self.vectors[:, :s] + w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) + self.vectors[:, s:] -= X.dot(w) + self.vectors[:, s:] -= np.asarray( + [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T] + ).T return def plot(self): @@ -283,7 +280,7 @@ def func(x, quad=False): return func def pca(self, y, ncomponents=5): - """In place operation to add the principal components of `y` to the design matrix + """Adds the principal components of `y` to the design matrix Parameters ---------- @@ -298,13 +295,17 @@ def pca(self, y, ncomponents=5): # Clean out any time series have significant contribution from one component k = np.ones(y.shape[1], bool) for count in range(3): - U, s, V = pca(y[:, k], ncomponents, n_iter=30) + self._pca_components, s, V = pca(y[:, k], ncomponents, n_iter=30) k[k] &= (np.abs(V) < 0.5).all(axis=0) - self._vectors = np.hstack([self._vectors, U]) + if self.other_vectors is not None: + self.vectors = np.hstack( + [self._vectors, self.other_vectors, self._pca_components] + ) + else: + self.vectors = np.hstack([self._vectors, self._pca_components]) if self.segments: - self._vectors = self._cut_segments(self._vectors) - self._validate() + self.vectors = self._cut_segments(self.vectors) self._clean_vectors() self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index 42f6602..d147261 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -68,22 +68,8 @@ def test_perturbation_matrix(): assert p.matrix.shape == (20, 8) p.pca(flux, ncomponents=5) assert p.matrix.shape == (20, 18) - for bin_method in ["downsample", "bin"]: - s = 200 / res + 1 if bin_method == "downsample" else 200 / res - p = PerturbationMatrix( - time=time, focus=False, resolution=res, bin_method=bin_method - ) - assert len(p.bin_func(y)) == s - assert len(p.bin_func(ye, quad=True)) == s - with pytest.raises(ValueError): - p.bin_func(y[:-4]) - - p.fit(y, ye) - w = p.weights - model = p.model() - assert model.shape[0] == 200 - chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1) - assert chi < 3 + assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0) + p.fit(y, ye) def test_perturbation_matrix3d(): From 14babe3e4ebcb64e81c956d512e961ccc832d69f Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Mon, 11 Apr 2022 23:01:42 -0700 Subject: [PATCH 24/53] :bug: fix @jorgemarpa bug --- src/psfmachine/perturbation.py | 13 +++++++++++++ tests/test_perturbation.py | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 0d173ab..4e9e98d 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -140,6 +140,7 @@ def _clean_vectors(self): X = self.vectors[:, :s] w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) self.vectors[:, s:] -= X.dot(w) + # Each segment has mean zero self.vectors[:, s:] -= np.asarray( [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T] ).T @@ -279,7 +280,11 @@ def func(x, quad=False): return func + def pca(self, y, ncomponents=5): + return self._pca(y, ncomponents=ncomponents) + + def _pca(self, y, ncomponents=5): """Adds the principal components of `y` to the design matrix Parameters @@ -352,6 +357,7 @@ def __init__( segments: bool = True, resolution: int = 10, bin_method: str = "downsample", + focus_exptime: float = 50, ): self.dx = dx self.dy = dy @@ -372,8 +378,11 @@ def __init__( segments=segments, resolution=resolution, bin_method=bin_method, + focus_exptime=focus_exptime, ) + self._get_cartesian_stacked() + def _get_cartesian_stacked(self): self._cartesian_stacked = sparse.hstack( [self.cartesian_matrix for idx in range(self.vectors.shape[1])], format="csr", @@ -466,6 +475,10 @@ def model(self, time_indices: Optional[list] = None): ] ) + def pca(self, y, ncomponents=5): + self._pca(y, ncomponents=5) + self._get_cartesian_stacked() + def plot_model(self, time_index=0): if not hasattr(self, "weights"): raise ValueError("Run `fit` first.") diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index d147261..7a8e08f 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -129,6 +129,19 @@ def test_perturbation_matrix3d(): ) assert chi < 3 + p3 = PerturbationMatrix3D( + time=time, + dx=dx, + dy=dy, + nknots=4, + radius=5, + poly_order=2, + bin_method=bin_method, + ) + p3.pca(flux, ncomponents=5) + p3.fit(flux, flux_err) + + # Add in one bad pixel flux[:, 100] = 1e5 pixel_mask = np.ones(169, bool) From 9d4a80faa42a803dd34fa42258612c0d2a6447f4 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Mon, 11 Apr 2022 23:07:07 -0700 Subject: [PATCH 25/53] :memo: update docstrings --- src/psfmachine/perturbation.py | 18 +++++++++++++----- tests/test_perturbation.py | 1 - 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 4e9e98d..14b5c38 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -280,11 +280,7 @@ def func(x, quad=False): return func - def pca(self, y, ncomponents=5): - return self._pca(y, ncomponents=ncomponents) - - def _pca(self, y, ncomponents=5): """Adds the principal components of `y` to the design matrix Parameters @@ -292,6 +288,11 @@ def _pca(self, y, ncomponents=5): y: np.ndarray Input flux array to take PCA of. """ + return self._pca(y, ncomponents=ncomponents) + + def _pca(self, y, ncomponents=5): + """This hidden method allows us to update the pca method for other classes + """ if not y.ndim == 2: raise ValueError("Must pass a 2D `y`") if not y.shape[0] == len(self.time): @@ -476,9 +477,16 @@ def model(self, time_indices: Optional[list] = None): ) def pca(self, y, ncomponents=5): + """Adds the principal components of `y` to the design matrix + + Parameters + ---------- + y: np.ndarray + Input flux array to take PCA of. + """ self._pca(y, ncomponents=5) self._get_cartesian_stacked() - + def plot_model(self, time_index=0): if not hasattr(self, "weights"): raise ValueError("Run `fit` first.") diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index 7a8e08f..e0fcde1 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -141,7 +141,6 @@ def test_perturbation_matrix3d(): p3.pca(flux, ncomponents=5) p3.fit(flux, flux_err) - # Add in one bad pixel flux[:, 100] = 1e5 pixel_mask = np.ones(169, bool) From 88c7493e462d19cd110933ba39eeddd843c2a0da Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 10:36:46 -0700 Subject: [PATCH 26/53] merging from refactor-time --- src/psfmachine/perturbation.py | 81 +++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 1db444a..fc3cf39 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -46,7 +46,7 @@ def __init__( ): self.time = time - self.other_vectors = other_vectors + self.other_vectors = np.nan_to_num(other_vectors) self.poly_order = poly_order self.focus = focus self.segments = segments @@ -61,7 +61,19 @@ def __init__( ).T if self.focus: self._get_focus_change() - self._validate() + if self.other_vectors is not None: + if isinstance(self.other_vectors, (list, np.ndarray)): + self.other_vectors = np.atleast_2d(self.other_vectors) + if self.other_vectors.shape[0] != len(self.time): + if self.other_vectors.shape[1] == len(self.time): + self.other_vectors = self.other_vectors.T + else: + raise ValueError("Must pass other vectors in the right shape") + else: + raise ValueError("Must pass a list as other vectors") + self.vectors = np.hstack([self._vectors.copy(), self.other_vectors]) + else: + self.vectors = self._vectors.copy() if self.segments: self.vectors = self._cut_segments(self.vectors) self._clean_vectors() @@ -82,26 +94,6 @@ def prior_sigma(self): def breaks(self): return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1 - def _validate(self): - """ - Checks that the `other_vectors` are the right dimensions. - """ - - if self.other_vectors is not None: - if isinstance(self.other_vectors, (list, np.ndarray)): - self.other_vectors = np.atleast_2d(self.other_vectors) - if self.other_vectors.shape[0] != len(self.time): - if self.other_vectors.shape[1] == len(self.time): - self.other_vectors = self.other_vectors.T - else: - raise ValueError("Must pass other vectors in the right shape") - else: - raise ValueError("Must pass a list as other vectors") - self.vectors = np.hstack([self._vectors.copy(), self.other_vectors]) - else: - self.vectors = self._vectors.copy() - return - def _cut_segments(self, vectors): """ Cuts the data into "segments" wherever there is a break. Breaks are defined @@ -143,9 +135,15 @@ def _clean_vectors(self): s = nvec * (len(self.breaks) + 1) else: s = nvec - X = self.vectors[:, :s] - w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) - self.vectors[:, s:] -= X.dot(w) + + if s != self.vectors.shape[1]: + X = self.vectors[:, :s] + w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) + self.vectors[:, s:] -= X.dot(w) + # Each segment has mean zero + self.vectors[:, s:] -= np.asarray( + [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T] + ).T return def plot(self): @@ -283,13 +281,17 @@ def func(x, quad=False): return func def pca(self, y, ncomponents=5): - """In place operation to add the principal components of `y` to the design matrix + """Adds the principal components of `y` to the design matrix Parameters ---------- y: np.ndarray Input flux array to take PCA of. """ + return self._pca(y, ncomponents=ncomponents) + + def _pca(self, y, ncomponents=5): + """This hidden method allows us to update the pca method for other classes""" if not y.ndim == 2: raise ValueError("Must pass a 2D `y`") if not y.shape[0] == len(self.time): @@ -298,13 +300,17 @@ def pca(self, y, ncomponents=5): # Clean out any time series have significant contribution from one component k = np.ones(y.shape[1], bool) for count in range(3): - U, s, V = pca(y[:, k], ncomponents, n_iter=30) + self._pca_components, s, V = pca(y[:, k], ncomponents, n_iter=30) k[k] &= (np.abs(V) < 0.5).all(axis=0) - self._vectors = np.hstack([self._vectors, U]) + if self.other_vectors is not None: + self.vectors = np.hstack( + [self._vectors, self.other_vectors, self._pca_components] + ) + else: + self.vectors = np.hstack([self._vectors, self._pca_components]) if self.segments: - self._vectors = self._cut_segments(self._vectors) - self._validate() + self.vectors = self._cut_segments(self.vectors) self._clean_vectors() self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) @@ -351,6 +357,7 @@ def __init__( segments: bool = True, resolution: int = 10, bin_method: str = "downsample", + focus_exptime: float = 50, ): self.dx = dx self.dy = dy @@ -371,8 +378,11 @@ def __init__( segments=segments, resolution=resolution, bin_method=bin_method, + focus_exptime=focus_exptime, ) + self._get_cartesian_stacked() + def _get_cartesian_stacked(self): self._cartesian_stacked = sparse.hstack( [self.cartesian_matrix for idx in range(self.vectors.shape[1])], format="csr", @@ -465,6 +475,17 @@ def model(self, time_indices: Optional[list] = None): ] ) + def pca(self, y, ncomponents=5): + """Adds the principal components of `y` to the design matrix + + Parameters + ---------- + y: np.ndarray + Input flux array to take PCA of. + """ + self._pca(y, ncomponents=5) + self._get_cartesian_stacked() + def plot_model(self, time_index=0): if not hasattr(self, "weights"): raise ValueError("Run `fit` first.") From 556317de99416ea380125088ca7f2df02cffe9bd Mon Sep 17 00:00:00 2001 From: Christina Hedges <14965634+christinahedges@users.noreply.github.com> Date: Tue, 12 Apr 2022 11:04:36 -0700 Subject: [PATCH 27/53] Update src/psfmachine/perturbation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jorge Martínez-Palomera --- src/psfmachine/perturbation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 14b5c38..4f5acec 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -484,7 +484,7 @@ def pca(self, y, ncomponents=5): y: np.ndarray Input flux array to take PCA of. """ - self._pca(y, ncomponents=5) + self._pca(y, ncomponents=ncomponents) self._get_cartesian_stacked() def plot_model(self, time_index=0): From 648211e6abbdcb8d5661ec6ea4d19409bbfbdcc6 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 12:00:13 -0700 Subject: [PATCH 28/53] adding perturbation-pca into machine --- src/psfmachine/machine.py | 9 ++++++++- src/psfmachine/perturbation.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index cbe7d52..1e99ec8 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -466,7 +466,8 @@ def _get_source_mask( l[idx] = test_r[loc[0]] ok = np.isfinite(l) source_radius_limit = np.polyval( - np.polyfit(test_f[ok], l[ok], 1), np.log10(self.source_flux_estimates) + np.polyfit(test_f[ok], l[ok], poly_order), + np.log10(self.source_flux_estimates), ) source_radius_limit[ source_radius_limit > upper_radius_limit @@ -614,6 +615,8 @@ def build_time_model( bin_method="bin", segments=False, focus=False, + focus_exptime=50, + pca_ncomponents=0, ): """ Builds a time model that moves the PRF model to account for the scene movement @@ -688,6 +691,7 @@ def build_time_model( resolution=self.n_time_points, other_vectors=other_vectors, bin_method=bin_method, + focus_exptime=focus_exptime, ) # get uncontaminated pixel norm-flux @@ -728,6 +732,9 @@ def build_time_model( # combine good-behaved pixels with uncontaminated pixels k &= uncontaminated_pixels + if pca_ncomponents > 0: + P3.pca(flux_norm[:, k], ncomponents=pca_ncomponents) + # iterate to remvoe outliers for count in [0, 1, 2]: # need to add pixel_mask=k diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index fc3cf39..589c2e6 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -483,7 +483,7 @@ def pca(self, y, ncomponents=5): y: np.ndarray Input flux array to take PCA of. """ - self._pca(y, ncomponents=5) + self._pca(y, ncomponents=ncomponents) self._get_cartesian_stacked() def plot_model(self, time_index=0): From e97d3f5e51254af1cb282a87d6453f48f23d1828 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 12 Apr 2022 13:12:17 -0700 Subject: [PATCH 29/53] :sparkles: now pca has smoothed components --- src/psfmachine/perturbation.py | 73 ++++++++++++++++++++++++++-------- tests/test_perturbation.py | 11 ++++- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 14b5c38..adb674d 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -7,6 +7,7 @@ from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt from fbpca import pca +from astropy.convolution import convolve, Box1DKernel class PerturbationMatrix(object): @@ -94,18 +95,24 @@ def prior_sigma(self): def breaks(self): return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1 + @property + def segment_masks(self): + x = np.array_split(np.arange(len(self.time)), self.breaks) + return np.asarray( + [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x] + ).T + def _cut_segments(self, vectors): """ Cuts the data into "segments" wherever there is a break. Breaks are defined as anywhere where there is a gap in data of more than 5 times the median time between observations. """ - x = np.array_split(np.arange(len(self.time)), self.breaks) - masks = np.asarray( - [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x] - ).T return np.hstack( - [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])] + [ + vectors[:, idx][:, None] * self.segment_masks + for idx in range(vectors.shape[1]) + ] ) def _get_focus_change(self): @@ -176,7 +183,7 @@ def fit(self, flux: npt.ArrayLike, flux_err: Optional[npt.ArrayLike] = None): Optional flux errors. Should have shape ntimes. """ if flux_err is None: - flux_err = np.ones(len(flux_err)) + flux_err = np.ones_like(flux) y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel() self.weights = self._fit_linalg(y, ye) @@ -280,7 +287,7 @@ def func(x, quad=False): return func - def pca(self, y, ncomponents=5): + def pca(self, y, ncomponents=5, box_width=100): """Adds the principal components of `y` to the design matrix Parameters @@ -288,22 +295,56 @@ def pca(self, y, ncomponents=5): y: np.ndarray Input flux array to take PCA of. """ - return self._pca(y, ncomponents=ncomponents) + return self._pca(y, ncomponents=ncomponents, box_width=box_width) - def _pca(self, y, ncomponents=5): - """This hidden method allows us to update the pca method for other classes - """ + def _pca(self, y, ncomponents=5, box_width=100): + """This hidden method allows us to update the pca method for other classes""" if not y.ndim == 2: raise ValueError("Must pass a 2D `y`") if not y.shape[0] == len(self.time): raise ValueError(f"Must pass a `y` with shape ({len(self.time)}, X)") # Clean out any time series have significant contribution from one component - k = np.ones(y.shape[1], bool) + k = np.nansum(y, axis=0) != 0 + + long_y = np.vstack( + [ + np.hstack( + [ + convolve(y[m, idx], Box1DKernel(100), boundary="extend") + for m in self.segment_masks.astype(bool).T + ] + ) + for idx in range(y.shape[1]) + ] + ).T + + med_y = np.vstack( + [ + np.hstack( + [ + convolve( + y[m, idx] / long_y[m, idx], + Box1DKernel(15), + boundary="extend", + ) + for m in self.segment_masks.astype(bool).T + ] + ) + for idx in range(y.shape[1]) + ] + ).T + for count in range(3): - self._pca_components, s, V = pca(y[:, k], ncomponents, n_iter=30) + U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30) k[k] &= (np.abs(V) < 0.5).all(axis=0) + for count in range(3): + U2, s, V = pca(np.nan_to_num(med_y)[:, k], ncomponents, n_iter=30) + k[k] &= (np.abs(V) < 0.5).all(axis=0) + + self._pca_components = np.hstack([U1, U2]) + if self.other_vectors is not None: self.vectors = np.hstack( [self._vectors, self.other_vectors, self._pca_components] @@ -439,7 +480,7 @@ def fit( else: pixel_mask = np.ones(flux.shape[-1], bool) if flux_err is None: - flux_err = np.ones(len(flux_err)) + flux_err = np.ones_like(flux) y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel() k = (np.ones(self.nbins, bool)[:, None] * pixel_mask).ravel() @@ -476,7 +517,7 @@ def model(self, time_indices: Optional[list] = None): ] ) - def pca(self, y, ncomponents=5): + def pca(self, y, ncomponents=5, box_width=100): """Adds the principal components of `y` to the design matrix Parameters @@ -484,7 +525,7 @@ def pca(self, y, ncomponents=5): y: np.ndarray Input flux array to take PCA of. """ - self._pca(y, ncomponents=5) + self._pca(y, ncomponents=5, box_width=box_width) self._get_cartesian_stacked() def plot_model(self, time_index=0): diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index e0fcde1..b1b8b12 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -65,9 +65,16 @@ def test_perturbation_matrix(): # Test PCA flux = np.random.normal(1, 0.1, size=(200, 100)) + p = PerturbationMatrix(time=time, focus=False) + assert p.matrix.shape == (20, 8) + p.pca(flux, ncomponents=2) + assert p.matrix.shape == (20, 16) + assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0) + p.fit(y, ye) + p = PerturbationMatrix(time=time, focus=False, segments=False) + assert p.matrix.shape == (20, 4) + p.pca(flux, ncomponents=2) assert p.matrix.shape == (20, 8) - p.pca(flux, ncomponents=5) - assert p.matrix.shape == (20, 18) assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0) p.fit(y, ye) From ca3645da2647ca6732e044a5f4a2f78a688e741c Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 14:48:05 -0700 Subject: [PATCH 30/53] improved spline and gaussian smooth --- src/psfmachine/machine.py | 7 ++- src/psfmachine/utils.py | 121 +++++++++++++++++++++++--------------- 2 files changed, 80 insertions(+), 48 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 1e99ec8..b1342bf 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -667,9 +667,10 @@ def build_time_model( mpc2 = self.dec_centroid.to("arcsec").value / 4 # smooth the vectors - mpc1_smooth, mpc2_smooth = spline_smooth( + mpc1_smooth, mpc2_smooth = bspline_smooth( [mpc1, mpc2], - time=self.time, + x=self.time, + do_segments=segments, ) # normalize components mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / ( @@ -733,6 +734,8 @@ def build_time_model( k &= uncontaminated_pixels if pca_ncomponents > 0: + # select only bright pixels + k &= (flux_binned_raw > 300).all(axis=0) P3.pca(flux_norm[:, k], ncomponents=pca_ncomponents) # iterate to remvoe outliers diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index 54eeeb7..9a188dd 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -538,7 +538,7 @@ def threshold_bin(x, y, z, z_err=None, abs_thresh=10, bins=15, statistic=np.nanm ) -def get_breaks(time): +def get_breaks(time, include_ext=False): """ Finds discontinuity in the time array and return the break indexes. @@ -553,10 +553,15 @@ def get_breaks(time): An array of indexes with the break positions """ dts = np.diff(time) - return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time) - 1]) + if include_ext: + return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)]) + else: + return np.where(dts > 5 * np.median(dts))[0] + 1 -def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"): +def gaussian_smooth( + y, x=None, do_segments=False, filter_size=13, mode="mirror", breaks=None +): """ Applies a Gaussian smoothing to a curve. @@ -583,20 +588,24 @@ def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"): else: y = np.atleast_2d(y) - if x is None: - splits = [0, y.shape[1]] - elif isinstance(x, np.ndarray): - splits = get_breaks(x) - # find poscorr discontinuity in each axis - grads = np.gradient(y, x, axis=1) - # the 7-sigma here is hardcoded and found to work ok - splits = np.unique( - np.concatenate( - [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])] + if do_segments: + if breaks is None and x is None: + raise ValueError("Please provide `x` or `breaks` to have splits.") + elif breaks is None and x is not None: + splits = get_breaks(x, include_ext=True) + else: + splits = np.array(breaks) + # find discontinuity in y according to x if provided + if x is not None: + grads = np.gradient(y, x, axis=1) + # the 7-sigma here is hardcoded and found to work ok + splits = np.unique( + np.concatenate( + [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])] + ) ) - ) else: - raise ValueError("`x` optional argument invalid, pass None or an array like.") + splits = [0, y.shape[-1]] y_smooth = [] for i in range(1, len(splits)): @@ -611,7 +620,7 @@ def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"): return np.hstack(y_smooth) -def spline_smooth(y, x, weights=None, degree=3, do_splits=True): +def bspline_smooth(y, x=None, degree=3, do_segments=False, breaks=None, n_knots=100): """ Applies a spline smoothing to a curve. @@ -620,13 +629,17 @@ def spline_smooth(y, x, weights=None, degree=3, do_splits=True): y : numpy.ndarray or list of numpy.ndarray Arrays to be smoothen in the last axis x : numpy.ndarray - Time array of same shape of `y` last axis. - weights : numpy.ndarray or list of numpy.ndarray - Optional array of weights the same shape as `y`. + Optional. x array, as `y = f(x)`` used to find discontinuities in `f(x)`. If x + is given then splits will be computed, if not `breaks` argument as to be provided. degree : int Degree of the psline fit, default is 3. - do_splits : boolean - Do the splines per segments with splits defined by data discontinuity. + do_segments : boolean + Do the splines per segments with splits computed from data `x` or given in `breaks`. + breaks : list of ints + List of break indexes in `y`. + nknots : int + Number of knots for the B-Spline. If `do_segments` is True, knots will be + distributed in each segment. Returns ------- @@ -638,33 +651,49 @@ def spline_smooth(y, x, weights=None, degree=3, do_splits=True): else: y = np.atleast_2d(y) - if do_splits: - splits = get_breaks(x) - # find poscorr discontinuity in each axis - grads = np.gradient(y, x, axis=1) - # the 7-sigma here is hardcoded and found to work ok - splits = np.unique( - np.concatenate( - [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])] + if do_segments: + if breaks is None and x is None: + raise ValueError("Please provide `x` or `breaks` to have splits.") + elif breaks is None and x is not None: + splits = get_breaks(x) + else: + splits = np.array(breaks) + # find discontinuity in y according to x if provided + if x is not None: + grads = np.gradient(y, x, axis=1) + # the 7-sigma here is hardcoded and found to work ok + splits = np.unique( + np.concatenate( + [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])] + ) ) - ) else: - splits = [0, -1] - if weights is None: - s = 5e-4 - weights = np.ones_like(y) - else: - if isinstance(weights, list): - weights = np.asarray(weights) - else: - weights = np.atleast_2d(weights) - # s = weights.shape[-1] - np.sqrt(2 * weights.shape[-1]) - s = 1.9e-10 - if y.shape != weights.shape: - raise ValueError("Inconsistent shape between `v` and `weights`") + splits = [0, y.shape[-1]] + + def spline(v): + knots = np.linspace(v.min(), v.max(), n_knots) + DM = np.asarray( + dmatrix( + "bs(x, knots=knots, degree=2, include_intercept=True)", + {"x": list(v), "knots": knots}, + ) + ) + x = np.array_split(np.arange(len(v)), splits) + masks = np.asarray([np.in1d(np.arange(len(v)), x1).astype(float) for x1 in x]).T + DM = np.hstack([DM[:, idx][:, None] * masks for idx in range(DM.shape[1])]) + return DM y_smooth = [] - for y_, w in zip(y, weights): - tck = interpolate.splrep(x, y_, w=w, s=s, xb=x[splits], k=degree) - y_smooth.append(interpolate.splev(x, tck, der=0)) + DM = spline(np.arange(y.shape[-1])) + prior_mu = np.zeros(DM.shape[1]) + prior_sigma = np.ones(DM.shape[1]) * 1e5 + # iterate over vectors in y + for v in range(y.shape[0]): + weights = solve_linear_model( + DM, + y[v], + prior_mu=prior_mu, + prior_sigma=prior_sigma, + ) + y_smooth.append(DM.dot(weights)) return np.array(y_smooth) From 8b4da3dbcfde60dbb8350f2443c3556f9bea1acd Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 15:01:43 -0700 Subject: [PATCH 31/53] P3 -> P --- src/psfmachine/machine.py | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index b1342bf..1bb09f0 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -683,7 +683,7 @@ def build_time_model( other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth] # create a 3D perturbation matrix - P3 = PerturbationMatrix3D( + P = PerturbationMatrix3D( time=_whitened_time, dx=dx, dy=dy, @@ -711,9 +711,9 @@ def build_time_model( flux_err_norm = flux_err / np.nanmean(flux, axis=0) # bindown flux arrays - flux_binned_raw = P3.bin_func(flux) - flux_binned = P3.bin_func(flux_norm) - flux_err_binned = P3.bin_func(flux_err_norm) + flux_binned_raw = P.bin_func(flux) + flux_binned = P.bin_func(flux_norm) + flux_err_binned = P.bin_func(flux_err_norm) # No saturated pixels, 1e5 is a hardcoded value for Kepler. k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones( @@ -736,13 +736,13 @@ def build_time_model( if pca_ncomponents > 0: # select only bright pixels k &= (flux_binned_raw > 300).all(axis=0) - P3.pca(flux_norm[:, k], ncomponents=pca_ncomponents) + P.pca(flux_norm[:, k], ncomponents=pca_ncomponents) # iterate to remvoe outliers for count in [0, 1, 2]: # need to add pixel_mask=k - P3.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k) - res = flux_binned - P3.matrix.dot(P3.weights).reshape(flux_binned.shape) + P.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k) + res = flux_binned - P.matrix.dot(P.weights).reshape(flux_binned.shape) # res = np.ma.masked_array(res, (~k).reshape(flux_binned.shape)) res = np.ma.masked_array(res, ~np.tile(k, flux_binned.shape[0]).T) bad_targets = sigma_clip(res, sigma=5).mask @@ -752,7 +752,7 @@ def build_time_model( # bookkeeping self.flux_binned = flux_binned self._time_masked_pix = k - self.P3 = P3 + self.P = P if plot: return self.plot_time_model() return @@ -770,7 +770,7 @@ def _get_perturbed_model(self, time_indices=None): perturbed_model = [] for tdx in time_indices: X = self.mean_model.copy() - X.data *= self.P3.model(time_indices=tdx).ravel() + X.data *= self.P.model(time_indices=tdx).ravel() perturbed_model.append(X) self.perturbed_model = perturbed_model @@ -787,14 +787,12 @@ def plot_time_model(self): fig : matplotlib.Figure Figure. """ - model_binned = self.P3.matrix.dot(self.P3.weights).reshape( - self.flux_binned.shape - ) + model_binned = self.P.matrix.dot(self.P.weights).reshape(self.flux_binned.shape) fig, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w") # k1 = self._time_masked_pix.reshape(self.flux_binned.shape)[0].astype(bool) im = ax[0, 0].scatter( - self.P3.dx[self._time_masked_pix], - self.P3.dy[self._time_masked_pix], + self.P.dx[self._time_masked_pix], + self.P.dy[self._time_masked_pix], c=self.flux_binned[0, self._time_masked_pix], s=3, vmin=0.5, @@ -803,8 +801,8 @@ def plot_time_model(self): rasterized=True, ) ax[0, 1].scatter( - self.P3.dx[self._time_masked_pix], - self.P3.dy[self._time_masked_pix], + self.P.dx[self._time_masked_pix], + self.P.dy[self._time_masked_pix], c=self.flux_binned[-1, self._time_masked_pix], s=3, vmin=0.5, @@ -813,8 +811,8 @@ def plot_time_model(self): rasterized=True, ) ax[1, 0].scatter( - self.P3.dx[self._time_masked_pix], - self.P3.dy[self._time_masked_pix], + self.P.dx[self._time_masked_pix], + self.P.dy[self._time_masked_pix], c=model_binned[0, self._time_masked_pix], s=3, vmin=0.5, @@ -823,8 +821,8 @@ def plot_time_model(self): rasterized=True, ) ax[1, 1].scatter( - self.P3.dx[self._time_masked_pix], - self.P3.dy[self._time_masked_pix], + self.P.dx[self._time_masked_pix], + self.P.dy[self._time_masked_pix], c=model_binned[-1, self._time_masked_pix], s=3, vmin=0.5, @@ -1285,13 +1283,13 @@ def fit_model(self, fit_va=False): self.model_flux[tdx] = X.dot(self.ws[tdx]) if fit_va: - if not hasattr(self, "P3"): + if not hasattr(self, "P"): raise ValueError( "Please use `build_time_model` before fitting with velocity " "aberration." ) - X.data *= self.P3.model(time_indices=tdx).ravel() + X.data *= self.P.model(time_indices=tdx).ravel() sigma_w_inv = X.T.dot( X.multiply(1 / self.flux_err[tdx][:, None] ** 2) From 94c3aafabb794ee5ed181581990a531dd0eeb2a1 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 15:16:20 -0700 Subject: [PATCH 32/53] hotfix --- src/psfmachine/machine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 1bb09f0..acb7a40 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -14,7 +14,7 @@ solve_linear_model, sparse_lessthan, threshold_bin, - spline_smooth, + bspline_smooth, ) from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP from .perturbation import PerturbationMatrix3D @@ -466,7 +466,7 @@ def _get_source_mask( l[idx] = test_r[loc[0]] ok = np.isfinite(l) source_radius_limit = np.polyval( - np.polyfit(test_f[ok], l[ok], poly_order), + np.polyfit(test_f[ok], l[ok], 1), np.log10(self.source_flux_estimates), ) source_radius_limit[ From 68861a6203e0cd5ae9d63d2410bc5b5dc8cc4437 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 15:16:59 -0700 Subject: [PATCH 33/53] flake8 --- src/psfmachine/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index 9a188dd..a820326 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -7,7 +7,6 @@ from scipy import sparse from patsy import dmatrix from scipy.ndimage import gaussian_filter1d -from scipy import interpolate import pyia # size_limit is 1GB From 0cbde4652d6abc98958d6af45e4409b08972fff1 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 12 Apr 2022 16:24:10 -0700 Subject: [PATCH 34/53] :bug: update pca --- src/psfmachine/perturbation.py | 123 +++++++++++++++++++++++++-------- src/psfmachine/utils.py | 36 ++++++---- 2 files changed, 117 insertions(+), 42 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index adb674d..34b3a26 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -8,6 +8,7 @@ import matplotlib.pyplot as plt from fbpca import pca from astropy.convolution import convolve, Box1DKernel +from .utils import spline1d class PerturbationMatrix(object): @@ -287,7 +288,7 @@ def func(x, quad=False): return func - def pca(self, y, ncomponents=5, box_width=100): + def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): """Adds the principal components of `y` to the design matrix Parameters @@ -295,9 +296,14 @@ def pca(self, y, ncomponents=5, box_width=100): y: np.ndarray Input flux array to take PCA of. """ - return self._pca(y, ncomponents=ncomponents, box_width=box_width) + return self._pca( + y, + ncomponents=ncomponents, + long_time_scale=long_time_scale, + med_time_scale=med_time_scale, + ) - def _pca(self, y, ncomponents=5, box_width=100): + def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): """This hidden method allows us to update the pca method for other classes""" if not y.ndim == 2: raise ValueError("Must pass a 2D `y`") @@ -307,33 +313,91 @@ def _pca(self, y, ncomponents=5, box_width=100): # Clean out any time series have significant contribution from one component k = np.nansum(y, axis=0) != 0 - long_y = np.vstack( + X = sparse.hstack( [ - np.hstack( - [ - convolve(y[m, idx], Box1DKernel(100), boundary="extend") - for m in self.segment_masks.astype(bool).T - ] - ) - for idx in range(y.shape[1]) + spline1d( + self.time, + np.linspace( + self.time[m].min(), + self.time[m].max(), + int( + np.ceil( + (self.time[m].max() - self.time[m].min()) + / long_time_scale + ) + ), + )[1:-1], + degree=3, + )[:, 1:] + for m in self.segment_masks.astype(bool).T ] - ).T + ) + X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr() + X = X[:, np.asarray(X.sum(axis=0) != 0)[0]] + long_y = X.dot( + np.linalg.solve( + X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)), + X.T.dot(y), + ) + ) - med_y = np.vstack( + X = sparse.hstack( [ - np.hstack( - [ - convolve( - y[m, idx] / long_y[m, idx], - Box1DKernel(15), - boundary="extend", - ) - for m in self.segment_masks.astype(bool).T - ] - ) - for idx in range(y.shape[1]) + spline1d( + self.time, + np.linspace( + self.time[m].min(), + self.time[m].max(), + int( + np.ceil( + (self.time[m].max() - self.time[m].min()) + / med_time_scale + ) + ), + )[1:-1], + degree=3, + )[:, 1:] + for m in self.segment_masks.astype(bool).T ] - ).T + ) + X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr() + X = X[:, np.asarray(X.sum(axis=0) != 0)[0]] + + med_y = X.dot( + np.linalg.solve( + X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)), + X.T.dot(y - long_y), + ) + ) + + # + # long_y = np.vstack( + # [ + # np.hstack( + # [ + # convolve(y[m, idx], Box1DKernel(100), boundary="extend") + # for m in self.segment_masks.astype(bool).T + # ] + # ) + # for idx in range(y.shape[1]) + # ] + # ).T + + # med_y = np.vstack( + # [ + # np.hstack( + # [ + # convolve( + # y[m, idx] / long_y[m, idx], + # Box1DKernel(15), + # boundary="extend", + # ) + # for m in self.segment_masks.astype(bool).T + # ] + # ) + # for idx in range(y.shape[1]) + # ] + # ).T for count in range(3): U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30) @@ -517,7 +581,7 @@ def model(self, time_indices: Optional[list] = None): ] ) - def pca(self, y, ncomponents=5, box_width=100): + def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): """Adds the principal components of `y` to the design matrix Parameters @@ -525,7 +589,12 @@ def pca(self, y, ncomponents=5, box_width=100): y: np.ndarray Input flux array to take PCA of. """ - self._pca(y, ncomponents=5, box_width=box_width) + self._pca( + y, + ncomponents=5, + long_time_scale=long_time_scale, + med_time_scale=med_time_scale, + ) self._get_cartesian_stacked() def plot_model(self, time_index=0): diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index 4850e43..9e1e62f 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -196,24 +196,30 @@ def _make_A_polar(phi, r, cut_r=6, rmin=1, rmax=18, n_r_knots=12, n_phi_knots=15 return X1 -def _make_A_cartesian(x, y, n_knots=10, radius=3.0, knot_spacing_type="sqrt"): - def spline(v): - if knot_spacing_type == "sqrt": - knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots) - knots = np.sign(knots) * knots ** 2 - else: - knots = np.linspace(-radius, radius, n_knots) - return sparse.csr_matrix( - np.asarray( - dmatrix( - "bs(x, knots=knots, degree=3, include_intercept=True)", - {"x": list(v), "knots": knots}, - ) +def spline1d(x, knots, degree=3): + """Make a spline in a 1D variable `x`""" + X = sparse.csr_matrix( + np.asarray( + dmatrix( + "bs(x, knots=knots, degree=degree, include_intercept=True)", + {"x": list(x), "knots": knots, "degree": degree}, ) ) + ) + if not X.shape[0] == x.shape[0]: + raise ValueError("`patsy` has made the wrong matrix.") + X = X[:, np.asarray(X.sum(axis=0) != 0)[0]] + return X + - x_spline = spline(x) - y_spline = spline(y) +def _make_A_cartesian(x, y, n_knots=10, radius=3.0, knot_spacing_type="sqrt", degree=3): + if knot_spacing_type == "sqrt": + knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots) + knots = np.sign(knots) * knots ** 2 + else: + knots = np.linspace(-radius, radius, n_knots) + x_spline = spline1d(x, knots=knots, degree=degree) + y_spline = spline1d(y, knots=knots, degree=degree) X = sparse.hstack( [x_spline.multiply(y_spline[:, idx]) for idx in range(y_spline.shape[1])], From 18fdc554462ae54658ada362720caa35fc7b6824 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 12 Apr 2022 16:25:42 -0700 Subject: [PATCH 35/53] :art: flake8 --- src/psfmachine/perturbation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 499c80f..c58dc3e 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -7,7 +7,6 @@ from psfmachine.utils import _make_A_cartesian import matplotlib.pyplot as plt from fbpca import pca -from astropy.convolution import convolve, Box1DKernel from .utils import spline1d From de15bfc67012e4cca6519249e972c69882515830 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 16:27:58 -0700 Subject: [PATCH 36/53] perturbed model --- src/psfmachine/machine.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index acb7a40..95f4a3e 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -713,7 +713,7 @@ def build_time_model( # bindown flux arrays flux_binned_raw = P.bin_func(flux) flux_binned = P.bin_func(flux_norm) - flux_err_binned = P.bin_func(flux_err_norm) + flux_err_binned = P.bin_func(flux_err_norm, quad=True) # No saturated pixels, 1e5 is a hardcoded value for Kepler. k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones( @@ -757,26 +757,13 @@ def build_time_model( return self.plot_time_model() return - def _get_perturbed_model(self, time_indices=None): + def perturbed_model(self, time_index): """ - Computes the perturbed model at given times + Computes the perturbed model at a given time """ - if time_indices is None: - time_indices = np.arange(len(self.time)) - time_indices = np.atleast_1d(time_indices) - if isinstance(time_indices[0], bool): - time_indices = np.where(time_indices[0])[0] - - perturbed_model = [] - for tdx in time_indices: - X = self.mean_model.copy() - X.data *= self.P.model(time_indices=tdx).ravel() - perturbed_model.append(X) - self.perturbed_model = perturbed_model - - return - - # raise NotImplementedError + X = self.mean_model.copy() + X.data *= self.P.model(time_indices=time_index).ravel() + return X def plot_time_model(self): """ @@ -1289,7 +1276,7 @@ def fit_model(self, fit_va=False): "aberration." ) - X.data *= self.P.model(time_indices=tdx).ravel() + X = self.perturbed_model(tdx) sigma_w_inv = X.T.dot( X.multiply(1 / self.flux_err[tdx][:, None] ** 2) From eb9ce3cead95561726d32ca1faf9afa199270e40 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 12 Apr 2022 16:28:15 -0700 Subject: [PATCH 37/53] :art: no comment block --- src/psfmachine/perturbation.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index c58dc3e..649c5ff 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -369,35 +369,6 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): ) ) - # - # long_y = np.vstack( - # [ - # np.hstack( - # [ - # convolve(y[m, idx], Box1DKernel(100), boundary="extend") - # for m in self.segment_masks.astype(bool).T - # ] - # ) - # for idx in range(y.shape[1]) - # ] - # ).T - - # med_y = np.vstack( - # [ - # np.hstack( - # [ - # convolve( - # y[m, idx] / long_y[m, idx], - # Box1DKernel(15), - # boundary="extend", - # ) - # for m in self.segment_masks.astype(bool).T - # ] - # ) - # for idx in range(y.shape[1]) - # ] - # ).T - for count in range(3): U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30) k[k] &= (np.abs(V) < 0.5).all(axis=0) From 2efb963ea2d698ca8809e0ce86798b649991541b Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Tue, 12 Apr 2022 16:30:56 -0700 Subject: [PATCH 38/53] :memo: docstrings --- src/psfmachine/perturbation.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 649c5ff..1b42747 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -290,10 +290,22 @@ def func(x, quad=False): def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): """Adds the principal components of `y` to the design matrix + Will add two time scales of principal components, definied by `long_time_scale` + and `med_time_scale`. + Parameters ---------- y: np.ndarray Input flux array to take PCA of. + ncomponents: int + Number of principal components to use + long_time_scale: float + The time scale where variability is considered "long" term. Should be + in the same units as `self.time`. + med_time_scale: float + The time scale where variability is considered "medium" term. Should be + in the same units as `self.time`. Variability longer than `long_time_scale`, + or shorter than `med_time_scale`, will be removed before building components """ return self._pca( y, @@ -325,7 +337,7 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): / long_time_scale ) ), - )[1:-1], + ), degree=3, )[:, 1:] for m in self.segment_masks.astype(bool).T @@ -353,7 +365,7 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): / med_time_scale ) ), - )[1:-1], + ), degree=3, )[:, 1:] for m in self.segment_masks.astype(bool).T From 70af6aca15c44a38a33493bde6898739e719e2c9 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 17:08:38 -0700 Subject: [PATCH 39/53] time_resolution attr --- src/psfmachine/machine.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 95f4a3e..f79d5c8 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -50,7 +50,7 @@ def __init__( n_r_knots=10, n_phi_knots=15, n_time_knots=10, - n_time_points=200, + time_resolution=200, time_radius=8, rmin=1, rmax=16, @@ -86,7 +86,7 @@ def __init__( Number of radial knots in the spline model. n_phi_knots: int Number of azimuthal knots in the spline model. - n_time_points: int + time_resolution: int Number of time points to bin by when fitting for velocity aberration. time_radius: float The radius around sources, out to which the velocity aberration model @@ -157,7 +157,7 @@ def __init__( self.n_r_knots = n_r_knots self.n_phi_knots = n_phi_knots self.n_time_knots = n_time_knots - self.n_time_points = n_time_points + self.time_resolution = time_resolution self.time_radius = time_radius self.rmin = rmin self.rmax = rmax @@ -689,7 +689,7 @@ def build_time_model( dy=dy, segments=segments, focus=focus, - resolution=self.n_time_points, + resolution=self.time_resolution, other_vectors=other_vectors, bin_method=bin_method, focus_exptime=focus_exptime, From 514f72b10bcbc16eff88eb13a48c9dbb4aaefa25 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 12 Apr 2022 17:20:32 -0700 Subject: [PATCH 40/53] matrix transpose hotfix --- src/psfmachine/machine.py | 1 + src/psfmachine/tpf.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index f79d5c8..38af35a 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -1277,6 +1277,7 @@ def fit_model(self, fit_va=False): ) X = self.perturbed_model(tdx) + X = X.T sigma_w_inv = X.T.dot( X.multiply(1 / self.flux_err[tdx][:, None] ** 2) diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index 66ff707..880f662 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -42,7 +42,7 @@ def __init__( n_r_knots=10, n_phi_knots=15, n_time_knots=10, - n_time_points=200, + time_resolution=200, time_radius=8, rmin=1, rmax=16, @@ -68,7 +68,7 @@ def __init__( n_r_knots=n_r_knots, n_phi_knots=n_phi_knots, n_time_knots=n_time_knots, - n_time_points=n_time_points, + time_resolution=time_resolution, time_radius=time_radius, rmin=rmin, rmax=rmax, From c6ebac8a51521f68e4be0b0b08f4e044a89823c0 Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 13 Apr 2022 14:35:45 -0700 Subject: [PATCH 41/53] agressive bkg pixel mask --- src/psfmachine/tpf.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index 880f662..eb16130 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -6,6 +6,7 @@ from astropy.coordinates import SkyCoord, match_coordinates_sky from astropy.time import Time from astropy.io import fits +from astropy.stats import sigma_clip import astropy.units as u import matplotlib.pyplot as plt from matplotlib import patches @@ -143,6 +144,21 @@ def remove_background_model(self, plot=False, data_augment=None): # keep track of which pixels comes from TPFs pixels_in_tpf = np.ones_like(self.row, dtype=bool) + # enheance pixel mask + time_corr = np.nanpercentile(bkg_flux, 20, axis=1)[:, None] + med_flux = np.median(bkg_flux - time_corr, axis=0)[None, :] + + f = bkg_flux - med_flux + f -= np.median(f) + # Mask out pixels that are particularly bright. + _mask = (f - time_corr).std(axis=0) < 500 + if not _mask.any(): + raise ValueError("All the input pixels are brighter than 500 counts.") + _mask &= (f - time_corr).std(axis=0) < 30 + _mask &= ~sigma_clip(med_flux[0]).mask + _mask &= ~sigma_clip(np.std(f - time_corr, axis=0)).mask + bkg_pixel_mask &= _mask + if data_augment: # augment background pixels bkg_row = np.hstack([bkg_row, data_augment["row"]]) From eaa807f712ae465719f69b82b4e111d19f8c7719 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Fri, 15 Apr 2022 16:15:40 -0700 Subject: [PATCH 42/53] :art: updating sane defaults, adding smooth PCA components --- src/psfmachine/perturbation.py | 172 +++++++++++++++------------------ tests/test_perturbation.py | 22 +++-- 2 files changed, 92 insertions(+), 102 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 1b42747..16b51e3 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -43,7 +43,7 @@ def __init__( segments: bool = True, resolution: int = 10, bin_method: str = "bin", - focus_exptime=50, + focus_exptime=2, ): self.time = time @@ -77,7 +77,7 @@ def __init__( self.vectors = self._vectors.copy() if self.segments: self.vectors = self._cut_segments(self.vectors) - self._clean_vectors() + # self._clean_vectors() self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) def __repr__(self): @@ -133,25 +133,25 @@ def _get_focus_change(self): self._vectors = np.hstack([self._vectors, np.nansum(focus, axis=0)[:, None]]) return - def _clean_vectors(self): - """Remove time polynomial from other vectors""" - nvec = self.poly_order + 1 - if self.focus: - nvec += 1 - if self.segments: - s = nvec * (len(self.breaks) + 1) - else: - s = nvec - - if s != self.vectors.shape[1]: - X = self.vectors[:, :s] - w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) - self.vectors[:, s:] -= X.dot(w) - # Each segment has mean zero - self.vectors[:, s:] -= np.asarray( - [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T] - ).T - return + # def _clean_vectors(self): + # """Remove time polynomial from other vectors""" + # nvec = self.poly_order + 1 + # if self.focus: + # nvec += 1 + # if self.segments: + # s = nvec * (len(self.breaks) + 1) + # else: + # s = nvec + # + # if s != self.vectors.shape[1]: + # X = self.vectors[:, :s] + # w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:])) + # self.vectors[:, s:] -= X.dot(w) + # # Each segment has mean zero + # self.vectors[:, s:] -= np.asarray( + # [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T] + # ).T + # return def plot(self): fig, ax = plt.subplots() @@ -212,6 +212,14 @@ def model(self, time_indices: Optional[list] = None): def shape(self): return self.vectors.shape + @property + def nvec(self): + return self.vectors.shape[1] + + @property + def ntime(self): + return self.time.shape[0] + def bin_func(self, var, **kwargs): """ Bins down an input variable to the same time resolution as `self` @@ -287,7 +295,7 @@ def func(x, quad=False): return func - def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): + def pca(self, y, ncomponents=5, smooth_time_scale=0): """Adds the principal components of `y` to the design matrix Will add two time scales of principal components, definied by `long_time_scale` @@ -308,13 +316,10 @@ def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): or shorter than `med_time_scale`, will be removed before building components """ return self._pca( - y, - ncomponents=ncomponents, - long_time_scale=long_time_scale, - med_time_scale=med_time_scale, + y, ncomponents=ncomponents, smooth_time_scale=smooth_time_scale ) - def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): + def _pca(self, y, ncomponents=3, smooth_time_scale=0): """This hidden method allows us to update the pca method for other classes""" if not y.ndim == 2: raise ValueError("Must pass a 2D `y`") @@ -324,73 +329,43 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): # Clean out any time series have significant contribution from one component k = np.nansum(y, axis=0) != 0 - X = sparse.hstack( - [ - spline1d( - self.time, - np.linspace( - self.time[m].min(), - self.time[m].max(), - int( - np.ceil( - (self.time[m].max() - self.time[m].min()) - / long_time_scale - ) + if smooth_time_scale != 0: + X = sparse.hstack( + [ + spline1d( + self.time, + np.linspace( + self.time[m].min(), + self.time[m].max(), + int( + np.ceil( + (self.time[m].max() - self.time[m].min()) + / smooth_time_scale + ) + ), ), - ), - degree=3, - )[:, 1:] - for m in self.segment_masks.astype(bool).T - ] - ) - X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr() - X = X[:, np.asarray(X.sum(axis=0) != 0)[0]] - long_y = X.dot( - np.linalg.solve( - X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)), - X.T.dot(y), + degree=3, + )[:, 1:] + for m in self.segment_masks.astype(bool).T + ] ) - ) - - X = sparse.hstack( - [ - spline1d( - self.time, - np.linspace( - self.time[m].min(), - self.time[m].max(), - int( - np.ceil( - (self.time[m].max() - self.time[m].min()) - / med_time_scale - ) - ), - ), - degree=3, - )[:, 1:] - for m in self.segment_masks.astype(bool).T - ] - ) - X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr() - X = X[:, np.asarray(X.sum(axis=0) != 0)[0]] - - med_y = X.dot( - np.linalg.solve( - X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)), - X.T.dot(y - long_y), + X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr() + X = X[:, np.asarray(X.sum(axis=0) != 0)[0]] + smoothed_y = X.dot( + np.linalg.solve( + X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)), + X.T.dot(y), + ) ) - ) - - for count in range(3): - U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30) - k[k] &= (np.abs(V) < 0.5).all(axis=0) + else: + smoothed_y = np.copy(y) for count in range(3): - U2, s, V = pca(np.nan_to_num(med_y)[:, k], ncomponents, n_iter=30) + self._pca_components, s, V = pca( + np.nan_to_num(smoothed_y)[:, k], ncomponents, n_iter=30 + ) k[k] &= (np.abs(V) < 0.5).all(axis=0) - self._pca_components = np.hstack([U1, U2]) - if self.other_vectors is not None: self.vectors = np.hstack( [self._vectors, self.other_vectors, self._pca_components] @@ -399,7 +374,7 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): self.vectors = np.hstack([self._vectors, self._pca_components]) if self.segments: self.vectors = self._cut_segments(self.vectors) - self._clean_vectors() + # self._clean_vectors() self.matrix = sparse.csr_matrix(self.bin_func(self.vectors)) @@ -443,20 +418,25 @@ def __init__( radius: float = 8, focus: bool = False, segments: bool = True, - resolution: int = 10, + resolution: int = 30, bin_method: str = "downsample", - focus_exptime: float = 50, + focus_exptime: float = 2, + degree: int = 2, + knot_spacing_type: str = "linear", ): self.dx = dx self.dy = dy self.nknots = nknots self.radius = radius + self.degree = degree + self.knot_spacing_type = knot_spacing_type self.cartesian_matrix = _make_A_cartesian( self.dx, self.dy, n_knots=self.nknots, radius=self.radius, - knot_spacing_type="linear", + knot_spacing_type=self.knot_spacing_type, + degree=self.degree, ) super().__init__( time=time, @@ -563,19 +543,23 @@ def model(self, time_indices: Optional[list] = None): ] ) - def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5): + def pca(self, y, ncomponents=3, smooth_time_scale=0): """Adds the principal components of `y` to the design matrix Parameters ---------- y: np.ndarray Input flux array to take PCA of. + n_components: int + Number of components to take + smooth_time_scale: float + Amount to smooth the components, using a spline in time. + If 0, the components will not be smoothed. """ self._pca( y, ncomponents=ncomponents, - long_time_scale=long_time_scale, - med_time_scale=med_time_scale, + smooth_time_scale=smooth_time_scale, ) self._get_cartesian_stacked() diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index b1b8b12..d3d7c35 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -68,14 +68,14 @@ def test_perturbation_matrix(): p = PerturbationMatrix(time=time, focus=False) assert p.matrix.shape == (20, 8) p.pca(flux, ncomponents=2) - assert p.matrix.shape == (20, 16) - assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0) + assert p.matrix.shape == (20, 12) + # assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0) p.fit(y, ye) p = PerturbationMatrix(time=time, focus=False, segments=False) assert p.matrix.shape == (20, 4) p.pca(flux, ncomponents=2) - assert p.matrix.shape == (20, 8) - assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0) + assert p.matrix.shape == (20, 6) + # assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0) p.fit(y, ye) @@ -95,13 +95,19 @@ def test_perturbation_matrix3d(): p3 = PerturbationMatrix3D( time=time, dx=dx, dy=dy, nknots=4, radius=5, resolution=5, poly_order=1 ) - assert p3.cartesian_matrix.shape == (169, 81) + assert p3.cartesian_matrix.shape == (169, 64) assert p3.vectors.shape == (10, 2) - assert p3.shape == (1690, 81 * 2) - assert p3.matrix.shape == (169 * 3, 81 * 2) + assert p3.shape == ( + p3.cartesian_matrix.shape[0] * p3.ntime, + p3.cartesian_matrix.shape[1] * p3.nvec, + ) + assert p3.matrix.shape == ( + p3.cartesian_matrix.shape[0] * p3.nbins, + p3.cartesian_matrix.shape[1] * p3.nvec, + ) p3.fit(flux, flux_err) w = p3.weights - assert w.shape[0] == 81 * 2 + assert w.shape[0] == p3.cartesian_matrix.shape[1] * p3.nvec model = p3.model() assert model.shape == flux.shape From e1bc28e54c080ed82f9c00cbbb3daa5764161a95 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Fri, 15 Apr 2022 16:16:21 -0700 Subject: [PATCH 43/53] :art: sane defaults --- src/psfmachine/perturbation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 16b51e3..0b43d27 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -414,7 +414,7 @@ def __init__( dy: npt.ArrayLike, other_vectors: Optional[list] = None, poly_order: int = 3, - nknots: int = 10, + nknots: int = 7, radius: float = 8, focus: bool = False, segments: bool = True, From 9d336fb01f3ab079caa8a4d70f1b2f3cc70cbbaa Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 18 Apr 2022 10:46:46 -0700 Subject: [PATCH 44/53] pca smooth time scale arg visible --- src/psfmachine/machine.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 38af35a..4ddaa8d 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -617,6 +617,7 @@ def build_time_model( focus=False, focus_exptime=50, pca_ncomponents=0, + pca_smooth_time_scale=0, ): """ Builds a time model that moves the PRF model to account for the scene movement @@ -671,6 +672,7 @@ def build_time_model( [mpc1, mpc2], x=self.time, do_segments=segments, + n_knots=50, ) # normalize components mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / ( @@ -736,7 +738,11 @@ def build_time_model( if pca_ncomponents > 0: # select only bright pixels k &= (flux_binned_raw > 300).all(axis=0) - P.pca(flux_norm[:, k], ncomponents=pca_ncomponents) + P.pca( + flux_norm[:, k], + ncomponents=pca_ncomponents, + smooth_time_scale=pca_smooth_time_scale, + ) # iterate to remvoe outliers for count in [0, 1, 2]: From ea876ccf173927ca618d75faf328aaeaeabde82e Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 18 Apr 2022 18:31:24 -0700 Subject: [PATCH 45/53] time_resolution kwarg --- src/psfmachine/machine.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 4ddaa8d..42eabb3 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -49,7 +49,7 @@ def __init__( time_mask=None, n_r_knots=10, n_phi_knots=15, - n_time_knots=10, + time_nknots=10, time_resolution=200, time_radius=8, rmin=1, @@ -156,7 +156,7 @@ def __init__( self.limit_flux = 1e4 self.n_r_knots = n_r_knots self.n_phi_knots = n_phi_knots - self.n_time_knots = n_time_knots + self.time_nknots = time_nknots self.time_resolution = time_resolution self.time_radius = time_radius self.rmin = rmin @@ -691,10 +691,13 @@ def build_time_model( dy=dy, segments=segments, focus=focus, - resolution=self.time_resolution, other_vectors=other_vectors, bin_method=bin_method, focus_exptime=focus_exptime, + resolution=self.time_resolution, + radius=self.time_radius, + nknots=self.time_nknots, + knot_spacing_type=self.cartesian_knot_spacing, ) # get uncontaminated pixel norm-flux From 12cd2a25bf52688a64f8227af4de0816ca6e30fd Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 19 Apr 2022 19:00:55 -0700 Subject: [PATCH 46/53] using time_knots kwarg --- src/psfmachine/tpf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index eb16130..7c1ca74 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -42,7 +42,7 @@ def __init__( time_mask=None, n_r_knots=10, n_phi_knots=15, - n_time_knots=10, + time_nknots=10, time_resolution=200, time_radius=8, rmin=1, @@ -68,7 +68,7 @@ def __init__( limit_radius=limit_radius, n_r_knots=n_r_knots, n_phi_knots=n_phi_knots, - n_time_knots=n_time_knots, + time_nknots=time_nknots, time_resolution=time_resolution, time_radius=time_radius, rmin=rmin, From dc1a8e6d398fbd79bb563e8ba19b6c4108fd18ee Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 20 Apr 2022 12:41:34 -0700 Subject: [PATCH 47/53] new time model diag plot and components API --- src/psfmachine/machine.py | 81 +++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 12 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 42eabb3..e80fe89 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -132,7 +132,7 @@ def __init__( Mean PSF model values per pixel used for PSF photometry time_corrector: string The type of time corrector that will be used to build the time model, - default is a "polynomial" for a polynomial in time, it can also be "pos_corr" + default is a "polynomial" for a polynomial in time, it can also be "poscorr" cartesian_knot_spacing: string Defines the type of spacing between knots in cartessian space to generate the design matrix, options are "linear" or "sqrt". @@ -163,7 +163,6 @@ def __init__( self.rmax = rmax self.cut_r = cut_r self.sparse_dist_lim = sparse_dist_lim * u.arcsecond - self.time_corrector = "polynomial" self.cartesian_knot_spacing = "sqrt" # disble tqdm prgress bar when running in HPC self.quiet = False @@ -618,12 +617,13 @@ def build_time_model( focus_exptime=50, pca_ncomponents=0, pca_smooth_time_scale=0, + positions=False, ): """ Builds a time model that moves the PRF model to account for the scene movement due to velocity aberration. It has two methods to choose from using the attribute `self.time_corrector`, if `"polynomial"` (default) will use a - polynomial in time, if `"pos_corr"` will use the pos_corr vectos that can be found + polynomial in time, if `"poscorr"` will use the pos_corr vectos that can be found in the TPFs. The time polynomial gives a more flexible model vs the pos_corr option, but can lead to light curves with "weird" long-term trends. Using pos_corr is recomended for Kepler data. @@ -656,16 +656,17 @@ def build_time_model( uncontaminated_pixels = uncontaminated_pixels.data # add other vectors if asked, centroids or poscorrs - if self.time_corrector == "polynomial": - other_vectors = None - else: - if self.time_corrector == "pos_corr" and hasattr(self, "pos_corr1"): + if positions: + if positions == "poscorr" and hasattr(self, "pos_corr1"): mpc1 = np.nanmedian(self.pos_corr1, axis=0) mpc2 = np.nanmedian(self.pos_corr2, axis=0) - # other_vectors = - elif self.time_corrector == "centroid" and hasattr(self, "ra_centroid"): + elif positions == "centroid" and hasattr(self, "ra_centroid"): mpc1 = self.ra_centroid.to("arcsec").value / 4 mpc2 = self.dec_centroid.to("arcsec").value / 4 + else: + raise ValueError( + "`position` not valid, use one of {None, 'poscorr', 'centroid'}" + ) # smooth the vectors mpc1_smooth, mpc2_smooth = bspline_smooth( @@ -683,6 +684,8 @@ def build_time_model( ) # combine them as the first order other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth] + else: + other_vectors = None # create a 3D perturbation matrix P = PerturbationMatrix3D( @@ -784,7 +787,7 @@ def plot_time_model(self): Figure. """ model_binned = self.P.matrix.dot(self.P.weights).reshape(self.flux_binned.shape) - fig, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w") + fig1, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w") # k1 = self._time_masked_pix.reshape(self.flux_binned.shape)[0].astype(bool) im = ax[0, 0].scatter( self.P.dx[self._time_masked_pix], @@ -834,9 +837,63 @@ def plot_time_model(self): ax[1, 1].set(title="Model Last Cadence", xlabel=r"$\delta x$") plt.subplots_adjust(hspace=0.3) - cbar = fig.colorbar(im, ax=ax, shrink=0.7) + cbar = fig1.colorbar(im, ax=ax, shrink=0.7) cbar.set_label("Normalized Flux") - return fig + + flux = np.array( + [self.source_mask.multiply(self.flux[idx]).data for idx in range(self.nt)] + ) + flux_sort = np.argsort(np.nanmean(flux[:, self._time_masked_pix], axis=0)) + data_binned_clean = self.flux_binned[:, self._time_masked_pix] + model_binned_clean = model_binned[:, self._time_masked_pix] + + fig2, ax = plt.subplots(1, 3, figsize=(18, 5)) + im = ax[0].imshow( + data_binned_clean.T[flux_sort], + origin="lower", + aspect="auto", + interpolation="nearest", + cmap="viridis", + vmin=0.9, + vmax=1.1, + ) + ax[0].set( + xlabel="Binned Time Index", ylabel="Flux-Sorted Clean Pixels", title="Data" + ) + + im = ax[1].imshow( + model_binned_clean.T[flux_sort], + origin="lower", + aspect="auto", + interpolation="nearest", + cmap="viridis", + vmin=0.9, + vmax=1.1, + ) + ax[1].set(xlabel="Binned Time Index", title="Perturbed Mode") + + cbar = fig2.colorbar( + im, ax=ax[:2], shrink=0.7, orientation="horizontal", location="bottom" + ) + cbar.set_label("Normalized Flux") + + im = ax[2].imshow( + (model_binned_clean / data_binned_clean).T[flux_sort], + origin="lower", + aspect="auto", + interpolation="nearest", + cmap="viridis", + vmin=0.97, + vmax=1.03, + ) + ax[2].set(xlabel="Binned Time Index", title="Perturbed Model / Data") + + cbar = fig2.colorbar( + im, ax=ax[2], shrink=0.7, orientation="horizontal", location="bottom" + ) + cbar.set_label("") + + return fig1, fig2 def build_shape_model( self, plot=False, flux_cut_off=1, frame_index="mean", bin_data=False, **kwargs From 7ac7b71d72d1979a6a77696951382e4861687be0 Mon Sep 17 00:00:00 2001 From: Jorge Date: Thu, 28 Apr 2022 13:54:50 -0700 Subject: [PATCH 48/53] updated lock --- poetry.lock | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 36eab17..a204cb8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2077,7 +2077,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "0b6cb5d37be3084d7c0cc8d2209c66d7b8d7847580dd404a37c3e74a2033a4a2" +content-hash = "74612ba5a75b49f844e825824b7a009a047559bc2aecec7f10930fb1e8e8f3bd" [metadata.files] aesara-theano-fallback = [ @@ -2111,6 +2111,10 @@ argon2-cffi = [ {file = "argon2_cffi-20.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2"}, {file = "argon2_cffi-20.1.0-cp39-cp39-win32.whl", hash = "sha256:e2db6e85c057c16d0bd3b4d2b04f270a7467c147381e8fd73cbbe5bc719832be"}, {file = "argon2_cffi-20.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:8a84934bd818e14a17943de8099d41160da4a336bcc699bb4c394bbb9b94bd32"}, + {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-macosx_10_7_x86_64.whl", hash = "sha256:b94042e5dcaa5d08cf104a54bfae614be502c6f44c9c89ad1535b2ebdaacbd4c"}, + {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:8282b84ceb46b5b75c3a882b28856b8cd7e647ac71995e71b6705ec06fc232c3"}, + {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3aa804c0e52f208973845e8b10c70d8957c9e5a666f702793256242e9167c4e0"}, + {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:36320372133a003374ef4275fbfce78b7ab581440dfca9f9471be3dd9a522428"}, ] arviz = [ {file = "arviz-0.11.2-py3-none-any.whl", hash = "sha256:f6a1389a90b53335f248d282c8142b8209150b9625459a85ec6d3d38786797c1"}, @@ -2205,24 +2209,36 @@ cffi = [ {file = "cffi-1.14.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058"}, {file = "cffi-1.14.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5"}, {file = "cffi-1.14.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24ec4ff2c5c0c8f9c6b87d5bb53555bf267e1e6f70e52e5a9740d32861d36b6f"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c3f39fa737542161d8b0d680df2ec249334cd70a8f420f71c9304bd83c3cbed"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:681d07b0d1e3c462dd15585ef5e33cb021321588bebd910124ef4f4fb71aef55"}, {file = "cffi-1.14.5-cp36-cp36m-win32.whl", hash = "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53"}, {file = "cffi-1.14.5-cp36-cp36m-win_amd64.whl", hash = "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813"}, {file = "cffi-1.14.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73"}, {file = "cffi-1.14.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06"}, {file = "cffi-1.14.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1"}, {file = "cffi-1.14.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06d7cd1abac2ffd92e65c0609661866709b4b2d82dd15f611e602b9b188b0b69"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f861a89e0043afec2a51fd177a567005847973be86f709bbb044d7f42fc4e05"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc5a8e069b9ebfa22e26d0e6b97d6f9781302fe7f4f2b8776c3e1daea35f1adc"}, {file = "cffi-1.14.5-cp37-cp37m-win32.whl", hash = "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62"}, {file = "cffi-1.14.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4"}, {file = "cffi-1.14.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053"}, {file = "cffi-1.14.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0"}, {file = "cffi-1.14.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e"}, {file = "cffi-1.14.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04c468b622ed31d408fea2346bec5bbffba2cc44226302a0de1ade9f5ea3d373"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:06db6321b7a68b2bd6df96d08a5adadc1fa0e8f419226e25b2a5fbf6ccc7350f"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:293e7ea41280cb28c6fcaaa0b1aa1f533b8ce060b9e701d78511e1e6c4a1de76"}, {file = "cffi-1.14.5-cp38-cp38-win32.whl", hash = "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e"}, {file = "cffi-1.14.5-cp38-cp38-win_amd64.whl", hash = "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396"}, {file = "cffi-1.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea"}, {file = "cffi-1.14.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322"}, {file = "cffi-1.14.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c"}, {file = "cffi-1.14.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bf1ac1984eaa7675ca8d5745a8cb87ef7abecb5592178406e55858d411eadc0"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:df5052c5d867c1ea0b311fb7c3cd28b19df469c056f7fdcfe88c7473aa63e333"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24a570cd11895b60829e941f2613a4f79df1a27344cbbb82164ef2e0116f09c7"}, {file = "cffi-1.14.5-cp39-cp39-win32.whl", hash = "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396"}, {file = "cffi-1.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d"}, {file = "cffi-1.14.5.tar.gz", hash = "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"}, From 1e6b8a4368ea3313c6847759cb4bcfbe2aeb0983 Mon Sep 17 00:00:00 2001 From: Jorge Date: Thu, 28 Apr 2022 14:15:29 -0700 Subject: [PATCH 49/53] pytest fixed --- src/psfmachine/ffi.py | 8 ++++---- tests/test_machine.py | 21 ++++++++------------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/psfmachine/ffi.py b/src/psfmachine/ffi.py index ba0183e..4176e2e 100644 --- a/src/psfmachine/ffi.py +++ b/src/psfmachine/ffi.py @@ -44,8 +44,8 @@ def __init__( limit_radius=32.0, n_r_knots=10, n_phi_knots=15, - n_time_knots=10, - n_time_points=200, + time_nknots=10, + time_resolution=200, time_radius=8, cut_r=6, rmin=1, @@ -130,8 +130,8 @@ def __init__( self.row, n_r_knots=n_r_knots, n_phi_knots=n_phi_knots, - n_time_knots=n_time_knots, - n_time_points=n_time_points, + time_nknots=time_nknots, + time_resolution=time_resolution, time_radius=time_radius, cut_r=cut_r, rmin=rmin, diff --git a/tests/test_machine.py b/tests/test_machine.py index ebfadfb..5ac4f53 100644 --- a/tests/test_machine.py +++ b/tests/test_machine.py @@ -122,10 +122,9 @@ def test_poscorr_smooth(): machine = TPFMachine.from_TPFs(tpfs, apply_focus_mask=False) machine.build_shape_model(plot=False) # no segments - machine.time_corrector = "pos_corr" machine.poscorr_filter_size = 1 machine.build_time_model( - split_time_segments=False, bin_method="bin", focus_component=False + segments=False, bin_method="bin", focus=False, positions="poscorr" ) median_pc1 = np.nanmedian(machine.pos_corr1, axis=0) @@ -137,8 +136,8 @@ def test_poscorr_smooth(): median_pc2.max() - median_pc2.mean() ) - assert np.isclose(machine.P3.other_vectors[:, 0], median_pc1, atol=0.5).all() - assert np.isclose(machine.P3.other_vectors[:, 1], median_pc2, atol=0.5).all() + assert np.isclose(machine.P.other_vectors[:, 0], median_pc1, atol=0.5).all() + assert np.isclose(machine.P.other_vectors[:, 1], median_pc2, atol=0.5).all() @pytest.mark.remote_data @@ -146,20 +145,16 @@ def test_segment_time_model(): # testing segment with the current test dataset we have that only has 10 cadences # isn't the best, but we can still do some sanity checks. machine = TPFMachine.from_TPFs( - tpfs, apply_focus_mask=False, n_time_points=3, time_corrector="polynomial" + tpfs, apply_focus_mask=False, time_resolution=3, time_corrector="polynomial" ) machine.build_shape_model(plot=False) # no segments - machine.build_time_model( - split_time_segments=False, bin_method="bin", focus_component=False - ) - assert machine.P3.vectors.shape == (10, 4) + machine.build_time_model(segments=False, bin_method="bin", focus=False) + assert machine.P.vectors.shape == (10, 4) # fake 2 time breaks machine.time[4:] += 0.5 machine.time[7:] += 0.41 # user defined segments - machine.build_time_model( - split_time_segments=True, bin_method="bin", focus_component=False - ) - assert machine.P3.vectors.shape == (10, 4 * 3) + machine.build_time_model(segments=True, bin_method="bin", focus=False) + assert machine.P.vectors.shape == (10, 4 * 3) From 90385d9a736a1889e169bf3ea1018adad9e1004b Mon Sep 17 00:00:00 2001 From: Jorge Date: Fri, 29 Apr 2022 11:10:02 -0700 Subject: [PATCH 50/53] better pixel mask in build_time_model --- src/psfmachine/machine.py | 54 ++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index e80fe89..fa43401 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -130,9 +130,6 @@ def __init__( build the PSF model mean_model: scipy.sparce.csr_matrix Mean PSF model values per pixel used for PSF photometry - time_corrector: string - The type of time corrector that will be used to build the time model, - default is a "polynomial" for a polynomial in time, it can also be "poscorr" cartesian_knot_spacing: string Defines the type of spacing between knots in cartessian space to generate the design matrix, options are "linear" or "sqrt". @@ -640,6 +637,17 @@ def build_time_model( time series as one segment. focus: boolean Add a component that models th focus change at the begining of a segment. + focus_exptime : int + Characteristic decay for focus component modeled as exponential decay when + `focus` is True. In the same units as `PerturbationMatrix3D.time`. + pca_ncomponents : int + Number of PCA components used in `PerturbationMatrix3D`. The components are + derived from pixel light lighrcurves. + pca_smooth_time_scale : float + Smooth time sacel for PCA components. + positions : boolean or string + If one of strings `"poscorr", "centroid"` then the perturbation matrix will + add other vectors accounting for position shift. """ # create the time and space basis _whitened_time = (self.time - self.time.mean()) / ( @@ -718,47 +726,47 @@ def build_time_model( flux_norm = flux / np.nanmean(flux, axis=0) flux_err_norm = flux_err / np.nanmean(flux, axis=0) - # bindown flux arrays - flux_binned_raw = P.bin_func(flux) - flux_binned = P.bin_func(flux_norm) - flux_err_binned = P.bin_func(flux_err_norm, quad=True) - + # create pixel mask for model fitting # No saturated pixels, 1e5 is a hardcoded value for Kepler. - k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones( - flux_binned_raw.shape, bool - ) + k = (flux < 1e5).all(axis=0)[None, :] * np.ones(flux.shape, bool) # No faint pixels, 100 is a hardcoded value for Kepler. - k &= (flux_binned_raw > 100).all(axis=0)[None, :] * np.ones( - flux_binned_raw.shape, bool - ) + k &= (flux > 100).all(axis=0)[None, :] * np.ones(flux.shape, bool) # No huge variability - k &= (np.abs(flux_binned - 1) < 1).all(axis=0)[None, :] * np.ones( - flux_binned.shape, bool - ) + _per_pix = np.percentile(flux_norm, [3, 97], axis=1) + k &= ( + ( + (flux_norm < _per_pix[0][:, None]) | (flux_norm > _per_pix[1][:, None]) + ).sum(axis=0) + < flux_norm.shape[0] * 0.95 + )[None, :] * np.ones(flux_norm.shape, bool) # No nans - k &= np.isfinite(flux_binned) & np.isfinite(flux_err_binned) + k &= np.isfinite(flux_norm) & np.isfinite(flux_err_norm) k = np.all(k, axis=0) # combine good-behaved pixels with uncontaminated pixels k &= uncontaminated_pixels + # adding PCA components to pertrubation matrix if pca_ncomponents > 0: # select only bright pixels - k &= (flux_binned_raw > 300).all(axis=0) + k &= (flux > 300).all(axis=0) P.pca( flux_norm[:, k], ncomponents=pca_ncomponents, smooth_time_scale=pca_smooth_time_scale, ) + # bindown flux arrays + flux_binned = P.bin_func(flux_norm) + flux_err_binned = P.bin_func(flux_err_norm, quad=True) + # iterate to remvoe outliers for count in [0, 1, 2]: # need to add pixel_mask=k P.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k) res = flux_binned - P.matrix.dot(P.weights).reshape(flux_binned.shape) - # res = np.ma.masked_array(res, (~k).reshape(flux_binned.shape)) - res = np.ma.masked_array(res, ~np.tile(k, flux_binned.shape[0]).T) - bad_targets = sigma_clip(res, sigma=5).mask - bad_targets = bad_targets.any(axis=0) + chi2 = np.sum((res) ** 2 / (flux_err_binned ** 2), axis=0) / P.nbins + bad_targets = sigma_clip(chi2, sigma=5).mask + bad_targets = bad_targets.all(axis=0) k &= ~bad_targets # bookkeeping From 5b18a7e4b214538ae0c513aa3de3143944278fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jorge=20Mart=C3=ADnez-Palomera?= Date: Fri, 29 Apr 2022 12:37:59 -0700 Subject: [PATCH 51/53] Update pyproject.toml Co-authored-by: Christina Hedges <14965634+christinahedges@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 55273e5..9104d31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ photutils = "^1.1.0" diskcache = "^5.3.0" imageio = "^2.9.0" imageio-ffmpeg = "^0.4.5" -kbackground = "0.1.7" +kbackground = "^0.1.7" fbpca = "^1.0" [tool.poetry.dev-dependencies] From 88ca62c636a734757e0623af18558e73c4e9c52c Mon Sep 17 00:00:00 2001 From: Jorge Date: Fri, 3 Jun 2022 15:34:24 -0700 Subject: [PATCH 52/53] bump up mac version to 1.1.2, missing docstr and removed dup fx in utils --- pyproject.toml | 2 +- src/psfmachine/machine.py | 11 +++++----- src/psfmachine/perturbation.py | 38 +++++++++++++++++++++++----------- src/psfmachine/tpf.py | 3 ++- src/psfmachine/utils.py | 23 ++++++++------------ src/psfmachine/version.py | 2 +- 6 files changed, 45 insertions(+), 34 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9104d31..ed2dfac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "psfmachine" -version = "1.1.1" +version = "1.1.2" description = "Tool to perform fast PSF photometry of primary and background targets from Kepler/K2 Target Pixel Files" authors = ["Christina Hedges ", "Jorge Martinez-Palomera "] diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index fa43401..cc6610f 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -86,6 +86,8 @@ def __init__( Number of radial knots in the spline model. n_phi_knots: int Number of azimuthal knots in the spline model. + time_nknots: int + Number og knots for cartesian DM in time model. time_resolution: int Number of time points to bin by when fitting for velocity aberration. time_radius: float @@ -462,8 +464,7 @@ def _get_source_mask( l[idx] = test_r[loc[0]] ok = np.isfinite(l) source_radius_limit = np.polyval( - np.polyfit(test_f[ok], l[ok], 1), - np.log10(self.source_flux_estimates), + np.polyfit(test_f[ok], l[ok], 1), np.log10(self.source_flux_estimates) ) source_radius_limit[ source_radius_limit > upper_radius_limit @@ -634,7 +635,8 @@ def build_time_model( segments : boolean If `True` will split the light curve into segments to fit different time models with a common pixel normalization. If `False` will fit the full - time series as one segment. + time series as one segment. Segments breaks are infered from time + discontinuities. focus: boolean Add a component that models th focus change at the begining of a segment. focus_exptime : int @@ -761,7 +763,6 @@ def build_time_model( # iterate to remvoe outliers for count in [0, 1, 2]: - # need to add pixel_mask=k P.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k) res = flux_binned - P.matrix.dot(P.weights).reshape(flux_binned.shape) chi2 = np.sum((res) ** 2 / (flux_err_binned ** 2), axis=0) / P.nbins @@ -769,7 +770,7 @@ def build_time_model( bad_targets = bad_targets.all(axis=0) k &= ~bad_targets - # bookkeeping + # book keeping self.flux_binned = flux_binned self._time_masked_pix = k self.P = P diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 0b43d27..57ea06e 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -181,6 +181,11 @@ def fit(self, flux: npt.ArrayLike, flux_err: Optional[npt.ArrayLike] = None): Array of flux values. Should have shape ntimes. flux: npt.ArrayLike Optional flux errors. Should have shape ntimes. + + Returns + ------- + weights: npt.ArrayLike + Array with computed weights """ if flux_err is None: flux_err = np.ones_like(flux) @@ -229,6 +234,11 @@ def bin_func(self, var, **kwargs): var: npt.ArrayLike Array of values with at least 1 dimension. The first dimension must be the same shape as `self.time` + + Returns + ------- + func: object + An object function according to `self.bin_method` """ if self.bin_method.lower() == "downsample": func = self._get_downsample_func() @@ -307,13 +317,9 @@ def pca(self, y, ncomponents=5, smooth_time_scale=0): Input flux array to take PCA of. ncomponents: int Number of principal components to use - long_time_scale: float - The time scale where variability is considered "long" term. Should be - in the same units as `self.time`. - med_time_scale: float - The time scale where variability is considered "medium" term. Should be - in the same units as `self.time`. Variability longer than `long_time_scale`, - or shorter than `med_time_scale`, will be removed before building components + smooth_time_scale: float + Amount to smooth the components, using a spline in time. + If 0, the components will not be smoothed. """ return self._pca( y, ncomponents=ncomponents, smooth_time_scale=smooth_time_scale @@ -400,11 +406,19 @@ class PerturbationMatrix3D(PerturbationMatrix): focus : bool Whether to correct focus using a simple exponent model segments: bool - Whether to fit portions of data where there is a significant time break as separate segments + Whether to fit portions of data where there is a significant time break as + separate segments resolution: int How many cadences to bin down via `bin_method` bin_method: str How to bin the data under the hood. Default is by mean binning. + focus_exptime: float + Time for the exponent for focus change, if used + degree: int + Polynomial degree used to build the row/column cartesian design matrix + knot_spacing_type: str + Type of spacing bewtwen knots used for cartesian design matrix, options are + {"linear", "sqrt"} """ def __init__( @@ -483,8 +497,8 @@ def fit( pixel_mask: Optional[npt.ArrayLike] = None, ): """ - Fits flux to find the best fit model weights. Optionally will include flux errors. - Sets the `self.weights` attribute with best fit weights. + Fits flux to find the best fit model weights. Optionally will include flux + errors. Sets the `self.weights` attribute with best fit weights. Parameters ---------- @@ -493,8 +507,8 @@ def fit( flux_err: npt.ArrayLike Optional flux errors. Should have shape ntimes x npixels. pixel_mask: npt.ArrayLike - Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked. - Should have shape npixels. + Pixel mask to apply. Values that are `True` will be used in the fit. + Values that are `False` will be masked. Should have shape npixels. """ if pixel_mask is not None: if not isinstance(pixel_mask, np.ndarray): diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index 4bb15a3..899003c 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -150,10 +150,11 @@ def remove_background_model(self, plot=False, data_augment=None): f = bkg_flux - med_flux f -= np.median(f) - # Mask out pixels that are particularly bright. + # Mask out pixels that are particularly bright, 500 its a good number for Kepler _mask = (f - time_corr).std(axis=0) < 500 if not _mask.any(): raise ValueError("All the input pixels are brighter than 500 counts.") + # non variable pixels _mask &= (f - time_corr).std(axis=0) < 30 _mask &= ~sigma_clip(med_flux[0]).mask _mask &= ~sigma_clip(np.std(f - time_corr, axis=0)).mask diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index a243727..0cc4977 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -675,21 +675,16 @@ def bspline_smooth(y, x=None, degree=3, do_segments=False, breaks=None, n_knots= else: splits = [0, y.shape[-1]] - def spline(v): - knots = np.linspace(v.min(), v.max(), n_knots) - DM = np.asarray( - dmatrix( - "bs(x, knots=knots, degree=2, include_intercept=True)", - {"x": list(v), "knots": knots}, - ) - ) - x = np.array_split(np.arange(len(v)), splits) - masks = np.asarray([np.in1d(np.arange(len(v)), x1).astype(float) for x1 in x]).T - DM = np.hstack([DM[:, idx][:, None] * masks for idx in range(DM.shape[1])]) - return DM - y_smooth = [] - DM = spline(np.arange(y.shape[-1])) + v = np.arange(y.shape[-1]) + DM = spline1d(v, np.linspace(v.min(), v.max(), n_knots)).toarray() + # do segments + arr_splits = np.array_split(np.arange(len(v)), splits) + masks = np.asarray( + [np.in1d(np.arange(len(v)), x1).astype(float) for x1 in arr_splits] + ).T + DM = np.hstack([DM[:, idx][:, None] * masks for idx in range(DM.shape[1])]) + prior_mu = np.zeros(DM.shape[1]) prior_sigma = np.ones(DM.shape[1]) * 1e5 # iterate over vectors in y diff --git a/src/psfmachine/version.py b/src/psfmachine/version.py index 3459b59..8c6a373 100644 --- a/src/psfmachine/version.py +++ b/src/psfmachine/version.py @@ -1,3 +1,3 @@ # It is important to store the version number in a separate file # so that we can read it from setup.py without importing the package -__version__ = "1.1.1" +__version__ = "1.1.2" From a6612f6c2307b0f328e683ed41a52a89a8b487df Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 21 Jun 2022 14:01:32 -0700 Subject: [PATCH 53/53] docstrings fixed --- src/psfmachine/perturbation.py | 9 ++++----- src/psfmachine/tpf.py | 4 ++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py index 57ea06e..c645107 100644 --- a/src/psfmachine/perturbation.py +++ b/src/psfmachine/perturbation.py @@ -306,10 +306,8 @@ def func(x, quad=False): return func def pca(self, y, ncomponents=5, smooth_time_scale=0): - """Adds the principal components of `y` to the design matrix - - Will add two time scales of principal components, definied by `long_time_scale` - and `med_time_scale`. + """Adds the first `ncomponents` principal components of `y` to the design + matrix. `y` is smoothen with a spline function and scale `smooth_time_scale`. Parameters ---------- @@ -558,7 +556,8 @@ def model(self, time_indices: Optional[list] = None): ) def pca(self, y, ncomponents=3, smooth_time_scale=0): - """Adds the principal components of `y` to the design matrix + """Adds the first `ncomponents` principal components of `y` to the design + matrix. `y` is smoothen with a spline function and scale `smooth_time_scale`. Parameters ---------- diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index 899003c..f649ccb 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -145,6 +145,10 @@ def remove_background_model(self, plot=False, data_augment=None): pixels_in_tpf = np.ones_like(self.row, dtype=bool) # enheance pixel mask + # creates a quick simple bkg model as fx of time to find variable pixels by + # taking the percentile value of pixel-flux distribution at each time + # the choice of percentile is irrelevant, it's to capture the global trend + # of the background pixels in time time_corr = np.nanpercentile(bkg_flux, 20, axis=1)[:, None] med_flux = np.median(bkg_flux - time_corr, axis=0)[None, :]