From 05f4e266d573bc2da52a9ce6e32ddeb20cbaa770 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 29 Mar 2022 21:21:56 -0700
Subject: [PATCH 01/53] :sparkles: Developing PerturbationMatrix class

---
 src/psfmachine/perturbation.py | 263 +++++++++++++++++++++++++++++++++
 tests/test_perturbation.py     |  71 +++++++++
 2 files changed, 334 insertions(+)
 create mode 100644 src/psfmachine/perturbation.py
 create mode 100644 tests/test_perturbation.py

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
new file mode 100644
index 0000000..4a5b0c8
--- /dev/null
+++ b/src/psfmachine/perturbation.py
@@ -0,0 +1,263 @@
+"""Classes to deal with perturbation matrices"""
+
+from dataclasses import dataclass
+import numpy as np
+from scipy import sparse
+from typing import Optional
+import matplotlib.pyplot as plt
+
+
+@dataclass
+class PerturbationMatrix:
+    """
+    Class to handle perturbation matrices in PSFMachine
+    """
+
+    time: np.ndarray
+    other_vectors: Optional = None
+    poly_order: int = 3
+    bin_or_downsample: str = "bin"
+    focus: bool = False
+    cbvs: bool = False
+    segments: bool = True
+    clean: bool = True
+
+    def __post_init__(self):
+        self.vectors = np.vstack(
+            [self.time ** idx for idx in range(self.poly_order + 1)]
+        ).T
+        if self.cbvs:
+            self._get_cbvs()
+        if self.focus:
+            self._get_focus_change()
+        self._validate()
+        if self.segments:
+            self._cut_segments()
+        if self.clean:
+            self._clean_vectors()
+        self.prior_mu = np.ones(self.shape[1])
+        self.prior_sigma = np.ones(self.shape[1]) * 1e4
+
+    def __repr__(self):
+        return f"PerturbationMatrix {self.shape}"
+
+    @property
+    def breaks(self):
+        return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1
+
+    def _validate(self):
+        # Check that the shape is correct etc
+        # Check the priors are right
+        if self.other_vectors is not None:
+            if isinstance(self.other_vectors, (list, np.ndarray)):
+                self.other_vectors = np.atleast_2d(self.other_vectors)
+                if self.other_vectors.shape[0] != len(self.time):
+                    if self.other_vectors.shape[1] == len(self.time):
+                        self.other_vectors = self.other_vectors.T
+                    else:
+                        raise ValueError("Must pass other vectors in the right shape")
+            else:
+                raise ValueError("Must pass a list as other vectors")
+            self.vectors = np.hstack([self.vectors, self.other_vectors])
+        return
+
+    def _cut_segments(self):
+        x = np.array_split(np.arange(len(self.time)), self.breaks)
+        masks = np.asarray(
+            [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x]
+        ).T
+        self.vectors = np.hstack(
+            [
+                self.vectors[:, idx][:, None] * masks
+                for idx in range(self.vectors.shape[1])
+            ]
+        )
+
+    def _get_focus_change(self, exptime=100):
+        focus = np.asarray(
+            [
+                np.exp(-exptime * (self.time - self.time[b]))
+                for b in np.hstack([0, self.breaks])
+            ]
+        )
+        focus *= np.asarray(
+            [
+                ((self.time - self.time[b]) >= 0).astype(float)
+                for b in np.hstack([0, self.breaks])
+            ]
+        )
+        self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]])
+        return
+
+    def _clean_vectors(self):
+        """Remove time polynomial from other vectors"""
+        nvec = self.poly_order + 1
+        if self.focus:
+            nvec += 1
+        if self.cbvs:
+            nvec += self.ncbvs
+        s = nvec * (len(self.breaks) + 1)
+        X = self.vectors[:, :s]
+        w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
+        self.vectors[:, s:] -= X.dot(w)
+        return
+
+    def plot(self):
+        fig, ax = plt.subplots()
+        ax.plot(self.time, self.vectors + np.arange(self.shape[1]) * 0.1, c="k")
+        ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors")
+        return fig
+
+    def _get_cbvs(self):
+        self.ncbvs = 0
+        # use lightkurve to get CBVs for a given time?????????
+        # Might need channel information maybe
+        # self.ncbvs = ....
+        #        self.vectors = np.hstack([self.vectors, cbvs])
+        raise NotImplementedError
+
+    def fit(self, y, ye=None, mask=None):
+        if mask is None:
+            mask = np.ones(len(y), bool)
+        elif not isinstance(mask, np.ndarray):
+            raise ValueError("Pass a mask of booleans")
+        if ye is None:
+            ye = np.ones(len(y))
+        if mask.sum() == 0:
+            raise ValueError("All points in perturbation matrix are masked")
+        X = self.matrix[mask]
+        if (y.ndim == 1) and (ye.ndim == 1):
+            sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag(
+                1 / self.prior_sigma ** 2
+            )
+            B = X.T.dot(y[mask] / ye[mask] ** 2) + self.prior_mu / self.prior_sigma ** 2
+            w = np.linalg.solve(sigma_w_inv, B)
+        elif (y.ndim == 2) and (ye.ndim == 1):
+            sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag(
+                1 / self.prior_sigma ** 2
+            )
+            B = (
+                X.T.dot(y[mask] / ye[mask, None] ** 2)
+                + self.prior_mu[:, None] / self.prior_sigma[:, None] ** 2
+            )
+            w = np.linalg.solve(sigma_w_inv, B)
+        elif (y.ndim == 2) and (ye.ndim == 2):
+            w = []
+            for y1, ye1 in zip(y.T, ye.T):
+                sigma_w_inv = X.T.dot(X.multiply(1 / ye1[mask, None] ** 2)) + np.diag(
+                    1 / self.prior_sigma ** 2
+                )
+                B = (
+                    X.T.dot(y1[mask] / ye1[mask] ** 2)
+                    + self.prior_mu / self.prior_sigma ** 2
+                )
+                w.append(np.linalg.solve(sigma_w_inv, B))
+            w = np.asarray(w).T
+        else:
+            raise ValueError("Can not parse input dimensions")
+        return w
+
+    def dot(self, weights):
+        return np.asarray(self.matrix.dot(weights))
+
+    @property
+    def matrix(self):
+        return sparse.csr_matrix(self.vectors)
+
+    @property
+    def shape(self):
+        return self.vectors.shape
+
+    def bin_vectors(self, var):
+        """
+        Bins down an input variable to the same time resolution as `self`
+        """
+        return var
+
+    def to_low_res(self, resolution=20, method="downsample"):
+        """
+        Convert to a lower resolution matrix
+        Parameters
+        ----------
+        resolution: int
+            Number of points to either bin down to or downsample to
+        """
+
+        if method is "downsample":
+            points = []
+            b = np.hstack([0, self.breaks, self.shape[0] - 1])
+            for b1, b2 in zip(b[:-1], b[1:]):
+                p = np.arange(b1, b2, resolution)
+                if p[-1] != b2:
+                    p = np.hstack([p, b2])
+                points.append(p)
+            points = np.hstack(points)
+
+            def bin_func(x):
+                """
+                Bins down an input variable to the same time resolution as `self`
+                """
+                if x.shape[0] == self.shape[0]:
+                    return x[points]
+                else:
+                    raise ValueError("Wrong size to bin")
+
+        # Make new object, turn off all additional vectors and cleaning
+        # It will inherrit those from the "other_vectors" variable
+        low_res_pm = PerturbationMatrix(
+            time=bin_func(self.time),
+            other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]),
+            segments=False,
+            clean=False,
+            focus=False,
+            cbvs=False,
+        )
+        low_res_pm.bin_vectors = bin_func
+        return low_res_pm
+
+
+# @dataclass
+# class PerturbationMatrix3d(PerturbationMatrix):
+#     """"
+#     Makes a perturbation matrix that accounts for spatial extent
+#     """"
+#     time
+#     other_vectors
+#     dx
+#     dy
+#     ntime_knots
+#     knot_radius
+#     knot_spacing
+#
+#     def __post_init__(self):
+#         self.time_matrix = PerturbationMatrix(self.time, ....)
+#         self._get_cartesian_matrix(dx, dy)
+#
+#     def __repr__(self):
+#         return f'PerturbationMatrix {self.shape}'
+#
+#     @property
+#     def shape(self):
+#
+#     def _get_cartesian_matrix(self, dx, dy):
+#         ...
+#         self.cartesian_matrix =
+#
+#     @property
+#     def matrix(self):
+#         # Merge cartesian and time
+#
+#     def to_low_res(self):
+#         return PerturbationMatrix3d(self.time_matrix.to_low_res....)
+#
+#     def from_time_matrix(self, perturbationmatrix):
+#         return PerturbationMatrix3d(....)
+#
+#
+# A = PerturbationMatrix3d(time=time, dx=dx, dy=dy, other_vectors=[], focus=True, bin_style='bin')
+# w = A.to_low_res.fit(y, ye, mask)
+# model = A.dot(w)
+#
+#
+# mac.mean_model
+# mac.perturbed_model
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
new file mode 100644
index 0000000..452753b
--- /dev/null
+++ b/tests/test_perturbation.py
@@ -0,0 +1,71 @@
+"""test perturbation matrix"""
+import numpy as np
+from scipy import sparse
+import pytest
+from psfmachine.perturbation import PerturbationMatrix
+
+
+def test_perturbation_matrix():
+    time = np.arange(0, 10, 0.1)
+    p = PerturbationMatrix(time=time, focus=False)
+    assert p.vectors.shape == (100, 4)
+    p = PerturbationMatrix(time=time, focus=True)
+    assert p.vectors.shape == (100, 5)
+
+    with pytest.raises(ValueError):
+        p = PerturbationMatrix(
+            time=time, other_vectors=np.random.normal(size=(2, 10)), focus=False
+        )
+        p = PerturbationMatrix(time=time, other_vectors=1, focus=False)
+    p = PerturbationMatrix(
+        time=time, other_vectors=np.random.normal(size=(2, 100)), focus=False
+    )
+    p = PerturbationMatrix(
+        time=time, other_vectors=np.random.normal(size=(100, 2)), focus=False
+    )
+    assert p.vectors.shape == (100, 6)
+    time = np.hstack([np.arange(0, 10, 0.1), np.arange(15, 25, 0.1)])
+    p = PerturbationMatrix(time=time, focus=False)
+    assert p.vectors.shape == (200, 8)
+    p = PerturbationMatrix(time=time, focus=True)
+    assert p.vectors.shape == (200, 10)
+
+    assert p.matrix.shape == p.vectors.shape
+    assert sparse.issparse(p.matrix)
+
+    y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01
+    w = p.fit(y, ye)
+    assert w.shape[0] == p.shape[1]
+    assert np.isfinite(w).all()
+    model = p.dot(w)
+    assert np.sum((y - model) ** 2 / (ye ** 2)) / (200) < 1.5
+
+    # Fit multiple vectors
+    y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones(200) * 0.01
+    w = p.fit(y, ye)
+    assert w.shape == (p.shape[1], 30)
+    assert np.isfinite(w).all()
+    model = p.dot(w)
+    assert np.all(
+        np.sum((y - model) ** 2 / (ye[:, None] ** 2), axis=0) / (200 - 1) < 1.5
+    )
+
+    # Fit multiple vectors and multiple errors
+    y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones((200, 30)) * 0.01
+    w = p.fit(y, ye)
+    assert w.shape == (p.shape[1], 30)
+    assert np.isfinite(w).all()
+    model = p.dot(w)
+    assert np.all(np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (200 - 1) < 1.5)
+
+    y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01
+    assert len(p.bin_vectors(y)) == len(y)
+    p_low = p.to_low_res(resolution=20)
+    assert len(p_low.bin_vectors(y)) == 12
+    assert p_low.bin_vectors(y[:, None]).shape == (12, 1)
+    with pytest.raises(ValueError):
+        p_low.bin_vectors(y[:-4])
+
+    w = p_low.fit(p_low.bin_vectors(y))
+    model = p.dot(w)
+    assert model.shape[0] == 200

From 172fbdadacbf0f2e27f413120fccec512f0c3bd9 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 29 Mar 2022 21:29:40 -0700
Subject: [PATCH 02/53] :white_check_mark: fixing flake8

---
 src/psfmachine/perturbation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 4a5b0c8..801c4f1 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -183,7 +183,7 @@ def to_low_res(self, resolution=20, method="downsample"):
             Number of points to either bin down to or downsample to
         """
 
-        if method is "downsample":
+        if method == "downsample":
             points = []
             b = np.hstack([0, self.breaks, self.shape[0] - 1])
             for b1, b2 in zip(b[:-1], b[1:]):

From fb31d8b138d52bc5bc59fbb46b671b414c47a9c0 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Thu, 31 Mar 2022 12:22:35 -0700
Subject: [PATCH 03/53] :sparkles: added a 3D matrix

---
 src/psfmachine/machine.py      |   6 +-
 src/psfmachine/perturbation.py | 220 ++++++++++++++++++++++-----------
 src/psfmachine/utils.py        |  42 +++----
 tests/test_perturbation.py     |  48 ++++++-
 4 files changed, 212 insertions(+), 104 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index f878d7b..ea52eb4 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -887,7 +887,7 @@ def build_time_model(self, plot=False, downsample=False, split_time_model=False)
             dy,
             n_knots=self.n_time_knots,
             radius=self.time_radius,
-            spacing=self.cartesian_knot_spacing,
+            knot_spacing_type=self.cartesian_knot_spacing,
         )
         A2 = sparse.vstack([A_c] * time_binned.shape[0], format="csr")
         # Cartesian spline with time dependence
@@ -1049,7 +1049,7 @@ def plot_time_model(self, segment=0):
             dy,
             n_knots=self.n_time_knots,
             radius=self.time_radius,
-            spacing=self.cartesian_knot_spacing,
+            knot_spacing_type=self.cartesian_knot_spacing,
         )
         # if self.seg_splits.shape[0] == 2 and segment == 0:
         #     seg_mask = np.ones(time_binned.shape[0], dtype=bool)
@@ -1576,7 +1576,7 @@ def fit_model(self, fit_va=False):
                 dy,
                 n_knots=self.n_time_knots,
                 radius=self.time_radius,
-                spacing=self.cartesian_knot_spacing,
+                knot_spacing_type=self.cartesian_knot_spacing,
             )
             A_cp3 = sparse.hstack([A_cp, A_cp, A_cp, A_cp], format="csr")
 
diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 801c4f1..35938de 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -4,25 +4,33 @@
 import numpy as np
 from scipy import sparse
 from typing import Optional
+from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
 
 
-@dataclass
-class PerturbationMatrix:
+class PerturbationMatrix(object):
     """
     Class to handle perturbation matrices in PSFMachine
     """
 
-    time: np.ndarray
-    other_vectors: Optional = None
-    poly_order: int = 3
-    bin_or_downsample: str = "bin"
-    focus: bool = False
-    cbvs: bool = False
-    segments: bool = True
-    clean: bool = True
+    def __init__(
+        self,
+        time,
+        other_vectors=None,
+        poly_order=3,
+        focus=False,
+        cbvs=False,
+        segments=True,
+        clean=True,
+    ):
 
-    def __post_init__(self):
+        self.time = time
+        self.other_vectors = other_vectors
+        self.poly_order = poly_order
+        self.focus = focus
+        self.cbvs = cbvs
+        self.segments = segments
+        self.clean = clean
         self.vectors = np.vstack(
             [self.time ** idx for idx in range(self.poly_order + 1)]
         ).T
@@ -35,11 +43,17 @@ def __post_init__(self):
             self._cut_segments()
         if self.clean:
             self._clean_vectors()
-        self.prior_mu = np.ones(self.shape[1])
-        self.prior_sigma = np.ones(self.shape[1]) * 1e4
 
     def __repr__(self):
-        return f"PerturbationMatrix {self.shape}"
+        return f"PerturbationMatrix"
+
+    @property
+    def prior_mu(self):
+        return np.ones(self.shape[1])
+
+    @property
+    def prior_sigma(self):
+        return np.ones(self.shape[1]) * 1e4
 
     @property
     def breaks(self):
@@ -174,6 +188,27 @@ def bin_vectors(self, var):
         """
         return var
 
+    def _get_downsample_func(self, resolution):
+        points = []
+        b = np.hstack([0, self.breaks, len(self.time) - 1])
+        for b1, b2 in zip(b[:-1], b[1:]):
+            p = np.arange(b1, b2, resolution)
+            if p[-1] != b2:
+                p = np.hstack([p, b2])
+            points.append(p)
+        points = np.hstack(points)
+
+        def bin_func(x):
+            """
+            Bins down an input variable to the same time resolution as `self`
+            """
+            if x.shape[0] == len(self.time):
+                return x[points]
+            else:
+                raise ValueError("Wrong size to bin")
+
+        return bin_func
+
     def to_low_res(self, resolution=20, method="downsample"):
         """
         Convert to a lower resolution matrix
@@ -184,23 +219,7 @@ def to_low_res(self, resolution=20, method="downsample"):
         """
 
         if method == "downsample":
-            points = []
-            b = np.hstack([0, self.breaks, self.shape[0] - 1])
-            for b1, b2 in zip(b[:-1], b[1:]):
-                p = np.arange(b1, b2, resolution)
-                if p[-1] != b2:
-                    p = np.hstack([p, b2])
-                points.append(p)
-            points = np.hstack(points)
-
-            def bin_func(x):
-                """
-                Bins down an input variable to the same time resolution as `self`
-                """
-                if x.shape[0] == self.shape[0]:
-                    return x[points]
-                else:
-                    raise ValueError("Wrong size to bin")
+            bin_func = self._get_downsample_func(resolution)
 
         # Make new object, turn off all additional vectors and cleaning
         # It will inherrit those from the "other_vectors" variable
@@ -216,48 +235,99 @@ def bin_func(x):
         return low_res_pm
 
 
-# @dataclass
-# class PerturbationMatrix3d(PerturbationMatrix):
-#     """"
-#     Makes a perturbation matrix that accounts for spatial extent
+class PerturbationMatrix3D(PerturbationMatrix):
+    """3D perturbation matrix in time, row, column
+
+    NOTE: Radius seems like a bad way of parameterizing something in -cartesian- space?!
+    """
+
+    def __init__(
+        self,
+        time,
+        dx,
+        dy,
+        other_vectors=None,
+        poly_order=3,
+        nknots=10,
+        radius=8,
+        focus=False,
+        cbvs=False,
+        segments=True,
+        clean=True,
+    ):
+        self.dx = dx
+        self.dy = dy
+        self.nknots = nknots
+        self.radius = radius
+        self.cartesian_matrix = _make_A_cartesian(
+            self.dx,
+            self.dy,
+            n_knots=self.nknots,
+            radius=self.radius,
+            knot_spacing_type="linear",
+        )
+        super().__init__(
+            time=time,
+            other_vectors=other_vectors,
+            poly_order=poly_order,
+            focus=focus,
+            cbvs=cbvs,
+            segments=segments,
+            clean=clean,
+        )
+
+    def __repr__(self):
+        return f"PerturbationMatrix3D"
+
+    @property
+    def shape(self):
+        return (
+            self.cartesian_matrix.shape[0] * self.time.shape[0],
+            self.cartesian_matrix.shape[1] * self.vectors.shape[1],
+        )
+
+    @property
+    def matrix(self):
+        C = sparse.hstack(
+            [
+                sparse.vstack(
+                    [self.cartesian_matrix * vector[idx] for idx in range(len(vector))],
+                    format="csr",
+                )
+                for vector in self.vectors.T
+            ],
+            format="csr",
+        )
+        return C
+
+    def to_low_res(self, resolution=20, method="downsample"):
+        """
+        Convert to a lower resolution matrix
+        Parameters
+        ----------
+        resolution: int
+            Number of points to either bin down to or downsample to
+        """
+
+        if method == "downsample":
+            bin_func = self._get_downsample_func(resolution)
+
+        # Make new object, turn off all additional vectors and cleaning
+        # It will inherrit those from the "other_vectors" variable
+        low_res_pm = PerturbationMatrix3D(
+            time=bin_func(self.time),
+            dx=self.dx,
+            dy=self.dy,
+            nknots=self.nknots,
+            radius=self.radius,
+            other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]),
+            segments=False,
+            clean=False,
+            focus=False,
+            cbvs=False,
+        )
+        low_res_pm.bin_vectors = bin_func
+        return low_res_pm
+
+
 #     """"
-#     time
-#     other_vectors
-#     dx
-#     dy
-#     ntime_knots
-#     knot_radius
-#     knot_spacing
-#
-#     def __post_init__(self):
-#         self.time_matrix = PerturbationMatrix(self.time, ....)
-#         self._get_cartesian_matrix(dx, dy)
-#
-#     def __repr__(self):
-#         return f'PerturbationMatrix {self.shape}'
-#
-#     @property
-#     def shape(self):
-#
-#     def _get_cartesian_matrix(self, dx, dy):
-#         ...
-#         self.cartesian_matrix =
-#
-#     @property
-#     def matrix(self):
-#         # Merge cartesian and time
-#
-#     def to_low_res(self):
-#         return PerturbationMatrix3d(self.time_matrix.to_low_res....)
-#
-#     def from_time_matrix(self, perturbationmatrix):
-#         return PerturbationMatrix3d(....)
-#
-#
-# A = PerturbationMatrix3d(time=time, dx=dx, dy=dy, other_vectors=[], focus=True, bin_style='bin')
-# w = A.to_low_res.fit(y, ye, mask)
-# model = A.dot(w)
-#
-#
-# mac.mean_model
-# mac.perturbed_model
diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index 9356680..4850e43 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -196,33 +196,25 @@ def _make_A_polar(phi, r, cut_r=6, rmin=1, rmax=18, n_r_knots=12, n_phi_knots=15
     return X1
 
 
-def _make_A_cartesian(x, y, n_knots=10, radius=3.0, spacing="sqrt"):
-    if spacing == "sqrt":
-        x_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots)
-        x_knots = np.sign(x_knots) * x_knots ** 2
-    else:
-        x_knots = np.linspace(-radius, radius, n_knots)
-    x_spline = sparse.csr_matrix(
-        np.asarray(
-            dmatrix(
-                "bs(x, knots=knots, degree=3, include_intercept=True)",
-                {"x": list(x), "knots": x_knots},
-            )
-        )
-    )
-    if spacing == "sqrt":
-        y_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots)
-        y_knots = np.sign(y_knots) * y_knots ** 2
-    else:
-        y_knots = np.linspace(-radius, radius, n_knots)
-    y_spline = sparse.csr_matrix(
-        np.asarray(
-            dmatrix(
-                "bs(x, knots=knots, degree=3, include_intercept=True)",
-                {"x": list(y), "knots": y_knots},
+def _make_A_cartesian(x, y, n_knots=10, radius=3.0, knot_spacing_type="sqrt"):
+    def spline(v):
+        if knot_spacing_type == "sqrt":
+            knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots)
+            knots = np.sign(knots) * knots ** 2
+        else:
+            knots = np.linspace(-radius, radius, n_knots)
+        return sparse.csr_matrix(
+            np.asarray(
+                dmatrix(
+                    "bs(x, knots=knots, degree=3, include_intercept=True)",
+                    {"x": list(v), "knots": knots},
+                )
             )
         )
-    )
+
+    x_spline = spline(x)
+    y_spline = spline(y)
+
     X = sparse.hstack(
         [x_spline.multiply(y_spline[:, idx]) for idx in range(y_spline.shape[1])],
         format="csr",
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index 452753b..ab9b749 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -2,7 +2,7 @@
 import numpy as np
 from scipy import sparse
 import pytest
-from psfmachine.perturbation import PerturbationMatrix
+from psfmachine.perturbation import PerturbationMatrix, PerturbationMatrix3D
 
 
 def test_perturbation_matrix():
@@ -69,3 +69,49 @@ def test_perturbation_matrix():
     w = p_low.fit(p_low.bin_vectors(y))
     model = p.dot(w)
     assert model.shape[0] == 200
+
+
+def test_perturbation_matrix3d():
+    time = np.arange(0, 10, 1)
+    # 13 x 13 pixels, evenly spaced in x and y
+    dx, dy = np.mgrid[:13, :13] - 6 + 0.01
+    dx, dy = dx.ravel(), dy.ravel()
+
+    # ntime x npixels
+    flux = (
+        np.random.normal(0, 0.01, size=(10, 169))
+        + 2 * dx[None, :] ** 2 * 3 * dy[None, :]
+    )
+    flux_err = np.ones((10, 169)) * 0.01
+
+    p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5)
+    assert p3.cartesian_matrix.shape == (169, 81)
+    assert p3.vectors.shape == (10, 4)
+    assert p3.matrix.shape == (1690, 324)
+    assert p3.shape == (1690, 324)
+    w = p3.fit(flux.ravel(), flux_err.ravel())
+    assert w.shape[0] == 324
+    model = p3.dot(w).reshape(flux.shape)
+    chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / (
+        p3.shape[0] - p3.shape[1] - 1
+    )
+    assert chi < 1.5
+
+    time = np.arange(0, 100, 1)
+    flux = (
+        np.random.normal(0, 0.01, size=(100, 169))
+        + 2 * dx[None, :] ** 2 * 3 * dy[None, :]
+    )
+    flux_err = np.ones((100, 169)) * 0.01
+
+    p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5)
+    assert p3.to_low_res(resolution=20).shape == (169 * 6, 81 * 4)
+    assert p3.to_low_res(resolution=20).vectors.shape == (6, 4)
+
+    p3l = p3.to_low_res(resolution=10)
+    w = p3l.fit(p3l.bin_vectors(flux).ravel(), p3l.bin_vectors(flux_err).ravel())
+    model = p3.dot(w).reshape(flux.shape)
+    chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / (
+        p3.shape[0] - p3.shape[1] - 1
+    )
+    assert chi < 1.5

From d37a540f96d4839f86c9ae203142c6a24725d9ea Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Fri, 1 Apr 2022 17:04:52 -0700
Subject: [PATCH 04/53] :art: fairly good sketch

---
 src/psfmachine/perturbation.py | 265 ++++++++++++++++++---------------
 tests/test_perturbation.py     |  99 ++++++------
 2 files changed, 199 insertions(+), 165 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 35938de..dc12a98 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -1,9 +1,7 @@
 """Classes to deal with perturbation matrices"""
 
-from dataclasses import dataclass
 import numpy as np
 from scipy import sparse
-from typing import Optional
 from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
 
@@ -11,6 +9,23 @@
 class PerturbationMatrix(object):
     """
     Class to handle perturbation matrices in PSFMachine
+
+    Parameters
+    ----------
+    time : np.ndarray
+        Array of time values
+    other_vectors: list or np.ndarray
+        Other detrending vectors (e.g. centroids)
+    poly_order: int
+        Polynomial order to use for detrending, default 3
+    focus : bool
+        Whether to correct focus using a simple exponent model
+    segments: bool
+        Whether to fit portions of data where there is a significant time break as separate segments
+    resolution: int
+        How many cadences to bin down via `bin_method`
+    bin_method: str
+        How to bin the data under the hood. Default is by mean binning.
     """
 
     def __init__(
@@ -21,7 +36,8 @@ def __init__(
         focus=False,
         cbvs=False,
         segments=True,
-        clean=True,
+        resolution=10,
+        bin_method="bin",
     ):
 
         self.time = time
@@ -30,7 +46,8 @@ def __init__(
         self.focus = focus
         self.cbvs = cbvs
         self.segments = segments
-        self.clean = clean
+        self.resolution = resolution
+        self.bin_method = bin_method
         self.vectors = np.vstack(
             [self.time ** idx for idx in range(self.poly_order + 1)]
         ).T
@@ -41,15 +58,14 @@ def __init__(
         self._validate()
         if self.segments:
             self._cut_segments()
-        if self.clean:
-            self._clean_vectors()
+        self._clean_vectors()
 
     def __repr__(self):
-        return f"PerturbationMatrix"
+        return "PerturbationMatrix"
 
     @property
     def prior_mu(self):
-        return np.ones(self.shape[1])
+        return np.zeros(self.shape[1])
 
     @property
     def prior_sigma(self):
@@ -130,75 +146,55 @@ def _get_cbvs(self):
         #        self.vectors = np.hstack([self.vectors, cbvs])
         raise NotImplementedError
 
-    def fit(self, y, ye=None, mask=None):
-        if mask is None:
-            mask = np.ones(len(y), bool)
-        elif not isinstance(mask, np.ndarray):
-            raise ValueError("Pass a mask of booleans")
-        if ye is None:
-            ye = np.ones(len(y))
-        if mask.sum() == 0:
-            raise ValueError("All points in perturbation matrix are masked")
-        X = self.matrix[mask]
-        if (y.ndim == 1) and (ye.ndim == 1):
-            sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag(
-                1 / self.prior_sigma ** 2
-            )
-            B = X.T.dot(y[mask] / ye[mask] ** 2) + self.prior_mu / self.prior_sigma ** 2
-            w = np.linalg.solve(sigma_w_inv, B)
-        elif (y.ndim == 2) and (ye.ndim == 1):
-            sigma_w_inv = X.T.dot(X.multiply(1 / ye[mask, None] ** 2)) + np.diag(
-                1 / self.prior_sigma ** 2
-            )
-            B = (
-                X.T.dot(y[mask] / ye[mask, None] ** 2)
-                + self.prior_mu[:, None] / self.prior_sigma[:, None] ** 2
-            )
-            w = np.linalg.solve(sigma_w_inv, B)
-        elif (y.ndim == 2) and (ye.ndim == 2):
-            w = []
-            for y1, ye1 in zip(y.T, ye.T):
-                sigma_w_inv = X.T.dot(X.multiply(1 / ye1[mask, None] ** 2)) + np.diag(
-                    1 / self.prior_sigma ** 2
-                )
-                B = (
-                    X.T.dot(y1[mask] / ye1[mask] ** 2)
-                    + self.prior_mu / self.prior_sigma ** 2
-                )
-                w.append(np.linalg.solve(sigma_w_inv, B))
-            w = np.asarray(w).T
-        else:
-            raise ValueError("Can not parse input dimensions")
-        return w
+    def fit(self, flux, flux_err=None):
+        if flux_err is None:
+            flux_err = np.ones(len(flux_err))
 
-    def dot(self, weights):
-        return np.asarray(self.matrix.dot(weights))
+        y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel()
+        X = self.matrix
+        sigma_w_inv = X.T.dot(X.multiply(1 / ye[:, None] ** 2)) + np.diag(
+            1 / self.prior_sigma ** 2
+        )
+        B = X.T.dot(y / ye ** 2) + self.prior_mu / self.prior_sigma ** 2
+        self.weights = np.linalg.solve(sigma_w_inv, B)
+        return self.weights
+
+    def model(self, time_indices=None):
+        if time_indices is None:
+            time_indices = np.ones(len(self.time), bool)
+        return self.vectors[time_indices].dot(self.weights)
 
     @property
     def matrix(self):
-        return sparse.csr_matrix(self.vectors)
+        return sparse.csr_matrix(self.bin_func(self.vectors))
 
     @property
     def shape(self):
         return self.vectors.shape
 
-    def bin_vectors(self, var):
+    def bin_func(self, var, **kwargs):
         """
         Bins down an input variable to the same time resolution as `self`
         """
-        return var
+        if self.bin_method.lower() == "downsample":
+            func = self._get_downsample_func()
+        elif self.bin_method.lower() == "bin":
+            func = self._get_bindown_func()
+        else:
+            raise NotImplementedError
+        return func(var, **kwargs)
 
-    def _get_downsample_func(self, resolution):
+    def _get_downsample_func(self):
         points = []
         b = np.hstack([0, self.breaks, len(self.time) - 1])
         for b1, b2 in zip(b[:-1], b[1:]):
-            p = np.arange(b1, b2, resolution)
+            p = np.arange(b1, b2, self.resolution)
             if p[-1] != b2:
                 p = np.hstack([p, b2])
             points.append(p)
-        points = np.hstack(points)
+        points = np.unique(np.hstack(points))
 
-        def bin_func(x):
+        def func(x, quad=False):
             """
             Bins down an input variable to the same time resolution as `self`
             """
@@ -207,38 +203,68 @@ def bin_func(x):
             else:
                 raise ValueError("Wrong size to bin")
 
-        return bin_func
-
-    def to_low_res(self, resolution=20, method="downsample"):
-        """
-        Convert to a lower resolution matrix
-        Parameters
-        ----------
-        resolution: int
-            Number of points to either bin down to or downsample to
-        """
+        return func
 
-        if method == "downsample":
-            bin_func = self._get_downsample_func(resolution)
-
-        # Make new object, turn off all additional vectors and cleaning
-        # It will inherrit those from the "other_vectors" variable
-        low_res_pm = PerturbationMatrix(
-            time=bin_func(self.time),
-            other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]),
-            segments=False,
-            clean=False,
-            focus=False,
-            cbvs=False,
+    def _get_bindown_func(self):
+        b = np.hstack([0, self.breaks, len(self.time) - 1])
+        points = np.hstack(
+            [np.arange(b1, b2, self.resolution) for b1, b2 in zip(b[:-1], b[1:])]
         )
-        low_res_pm.bin_vectors = bin_func
-        return low_res_pm
+        points = points[~np.in1d(points, np.hstack([0, len(self.time) - 1]))]
+        points = np.sort(np.unique(np.hstack([points, self.breaks])))
+
+        def func(x, quad=False):
+            """
+            Bins down an input variable to the same time resolution as `self`
+            """
+            if x.shape[0] == len(self.time):
+                if not quad:
+                    return np.asarray(
+                        [i.mean(axis=0) for i in np.array_split(x, points)]
+                    )
+                else:
+                    return (
+                        np.asarray(
+                            [
+                                np.sum(i ** 2, axis=0) / (len(i) ** 2)
+                                for i in np.array_split(x, points)
+                            ]
+                        )
+                        ** 0.5
+                    )
+            else:
+                raise ValueError("Wrong size to bin")
 
+        return func
 
-class PerturbationMatrix3D(PerturbationMatrix):
-    """3D perturbation matrix in time, row, column
 
-    NOTE: Radius seems like a bad way of parameterizing something in -cartesian- space?!
+class PerturbationMatrix3D(PerturbationMatrix):
+    """Class to handle 3D perturbation matrices in PSFMachine
+
+    Parameters
+    ----------
+    time : np.ndarray
+        Array of time values
+    dx: np.ndarray
+        Pixel positions in x separation from source center
+    dy : np.ndaray
+        Pixel positions in y separation from source center
+    other_vectors: list or np.ndarray
+        Other detrending vectors (e.g. centroids)
+    poly_order: int
+        Polynomial order to use for detrending, default 3
+    nknots: int
+        Number of knots for the cartesian spline
+    radius: float
+        Radius out to which to calculate the cartesian spline
+    focus : bool
+        Whether to correct focus using a simple exponent model
+    segments: bool
+        Whether to fit portions of data where there is a significant time break as separate segments
+    resolution: int
+        How many cadences to bin down via `bin_method`
+    bin_method: str
+        How to bin the data under the hood. Default is by mean binning.
     """
 
     def __init__(
@@ -253,7 +279,8 @@ def __init__(
         focus=False,
         cbvs=False,
         segments=True,
-        clean=True,
+        resolution=10,
+        bin_method="downsample",
     ):
         self.dx = dx
         self.dy = dy
@@ -273,11 +300,12 @@ def __init__(
             focus=focus,
             cbvs=cbvs,
             segments=segments,
-            clean=clean,
+            resolution=resolution,
+            bin_method=bin_method,
         )
 
     def __repr__(self):
-        return f"PerturbationMatrix3D"
+        return "PerturbationMatrix3D"
 
     @property
     def shape(self):
@@ -294,40 +322,43 @@ def matrix(self):
                     [self.cartesian_matrix * vector[idx] for idx in range(len(vector))],
                     format="csr",
                 )
-                for vector in self.vectors.T
+                for vector in self.bin_func(self.vectors).T
             ],
             format="csr",
         )
         return C
 
-    def to_low_res(self, resolution=20, method="downsample"):
-        """
-        Convert to a lower resolution matrix
-        Parameters
-        ----------
-        resolution: int
-            Number of points to either bin down to or downsample to
-        """
-
-        if method == "downsample":
-            bin_func = self._get_downsample_func(resolution)
-
-        # Make new object, turn off all additional vectors and cleaning
-        # It will inherrit those from the "other_vectors" variable
-        low_res_pm = PerturbationMatrix3D(
-            time=bin_func(self.time),
-            dx=self.dx,
-            dy=self.dy,
-            nknots=self.nknots,
-            radius=self.radius,
-            other_vectors=bin_func(self.vectors[:, self.poly_order + 1 :]),
-            segments=False,
-            clean=False,
-            focus=False,
-            cbvs=False,
+    def model(self, time_indices=None):
+        """We build the matrix for every frame"""
+        if time_indices is None:
+            time_indices = np.arange(len(self.time))
+        time_indices = np.atleast_1d(time_indices)
+        if isinstance(time_indices[0], bool):
+            time_indices = np.where(time_indices[0])[0]
+        return np.asarray(
+            [
+                sparse.hstack(
+                    [
+                        sparse.vstack(
+                            self.cartesian_matrix * vector[time_index],
+                            format="csr",
+                        )
+                        for vector in (self.vectors).T
+                    ],
+                    format="csr",
+                ).dot(self.weights)
+                for time_index in time_indices
+            ]
         )
-        low_res_pm.bin_vectors = bin_func
-        return low_res_pm
 
-
-#     """"
+    def plot_model(self, time_index=0):
+        if not hasattr(self, "weights"):
+            raise ValueError("Run `fit` first.")
+        fig, ax = plt.subplots()
+        ax.scatter(self.dx, self.dy, c=self.model(time_index)[0])
+        ax.set(
+            xlabel=r"$\delta$x",
+            ylabel=r"$\delta$y",
+            title=f"Perturbation Model [Cadence {time_index}]",
+        )
+        return fig
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index ab9b749..de8fd99 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -30,45 +30,38 @@ def test_perturbation_matrix():
     p = PerturbationMatrix(time=time, focus=True)
     assert p.vectors.shape == (200, 10)
 
-    assert p.matrix.shape == p.vectors.shape
+    assert p.matrix.shape == (200 / 10, 10)
     assert sparse.issparse(p.matrix)
 
+    res = 10
+    p = PerturbationMatrix(time=time, focus=False, resolution=res)
     y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01
     w = p.fit(y, ye)
     assert w.shape[0] == p.shape[1]
     assert np.isfinite(w).all()
-    model = p.dot(w)
-    assert np.sum((y - model) ** 2 / (ye ** 2)) / (200) < 1.5
-
-    # Fit multiple vectors
-    y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones(200) * 0.01
-    w = p.fit(y, ye)
-    assert w.shape == (p.shape[1], 30)
-    assert np.isfinite(w).all()
-    model = p.dot(w)
-    assert np.all(
-        np.sum((y - model) ** 2 / (ye[:, None] ** 2), axis=0) / (200 - 1) < 1.5
-    )
-
-    # Fit multiple vectors and multiple errors
-    y, ye = np.random.normal(1, 0.01, size=(200, 30)), np.ones((200, 30)) * 0.01
-    w = p.fit(y, ye)
-    assert w.shape == (p.shape[1], 30)
-    assert np.isfinite(w).all()
-    model = p.dot(w)
-    assert np.all(np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (200 - 1) < 1.5)
+    model = p.model()
+    assert model.shape == y.shape
+    chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1)
+    assert chi < 3
 
     y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01
-    assert len(p.bin_vectors(y)) == len(y)
-    p_low = p.to_low_res(resolution=20)
-    assert len(p_low.bin_vectors(y)) == 12
-    assert p_low.bin_vectors(y[:, None]).shape == (12, 1)
-    with pytest.raises(ValueError):
-        p_low.bin_vectors(y[:-4])
+    for bin_method in ["downsample", "bin"]:
+        s = 200 / res + 1 if bin_method == "downsample" else 200 / res
+        p = PerturbationMatrix(
+            time=time, focus=False, resolution=res, bin_method=bin_method
+        )
+        assert len(p.bin_func(y)) == s
+        assert len(p.bin_func(ye, quad=True)) == s
+        with pytest.raises(ValueError):
+            p.bin_func(y[:-4])
 
-    w = p_low.fit(p_low.bin_vectors(y))
-    model = p.dot(w)
-    assert model.shape[0] == 200
+        w = p.fit(y, ye)
+        model = p.model()
+        assert model.shape[0] == 200
+        chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1)
+        assert chi < 3
+
+    # chi = np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (p.shape[0] - p.shape[1] - 1)
 
 
 def test_perturbation_matrix3d():
@@ -84,14 +77,18 @@ def test_perturbation_matrix3d():
     )
     flux_err = np.ones((10, 169)) * 0.01
 
-    p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5)
+    p3 = PerturbationMatrix3D(
+        time=time, dx=dx, dy=dy, nknots=4, radius=5, resolution=5, poly_order=1
+    )
     assert p3.cartesian_matrix.shape == (169, 81)
-    assert p3.vectors.shape == (10, 4)
-    assert p3.matrix.shape == (1690, 324)
-    assert p3.shape == (1690, 324)
-    w = p3.fit(flux.ravel(), flux_err.ravel())
-    assert w.shape[0] == 324
-    model = p3.dot(w).reshape(flux.shape)
+    assert p3.vectors.shape == (10, 2)
+    assert p3.shape == (1690, 81 * 2)
+    assert p3.matrix.shape == (169 * 3, 81 * 2)
+    w = p3.fit(flux, flux_err)
+    assert w.shape[0] == 81 * 2
+    model = p3.model()
+    assert model.shape == flux.shape
+
     chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / (
         p3.shape[0] - p3.shape[1] - 1
     )
@@ -104,14 +101,20 @@ def test_perturbation_matrix3d():
     )
     flux_err = np.ones((100, 169)) * 0.01
 
-    p3 = PerturbationMatrix3D(time=time, dx=dx, dy=dy, nknots=4, radius=5)
-    assert p3.to_low_res(resolution=20).shape == (169 * 6, 81 * 4)
-    assert p3.to_low_res(resolution=20).vectors.shape == (6, 4)
-
-    p3l = p3.to_low_res(resolution=10)
-    w = p3l.fit(p3l.bin_vectors(flux).ravel(), p3l.bin_vectors(flux_err).ravel())
-    model = p3.dot(w).reshape(flux.shape)
-    chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / (
-        p3.shape[0] - p3.shape[1] - 1
-    )
-    assert chi < 1.5
+    for bin_method in ["downsample", "bin"]:
+        p3 = PerturbationMatrix3D(
+            time=time,
+            dx=dx,
+            dy=dy,
+            nknots=4,
+            radius=5,
+            poly_order=2,
+            bin_method=bin_method,
+        )
+        w = p3.fit(flux, flux_err)
+        model = p3.model()
+        assert model.shape == flux.shape
+        chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / (
+            p3.shape[0] - p3.shape[1] - 1
+        )
+        assert chi < 3

From 884f24d6922f9b82df347f1cce5e7337c667d880 Mon Sep 17 00:00:00 2001
From: Christina Hedges <14965634+christinahedges@users.noreply.github.com>
Date: Tue, 5 Apr 2022 11:45:08 -0700
Subject: [PATCH 05/53] Update src/psfmachine/perturbation.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jorge Martínez-Palomera <jorgemarpa@users.noreply.github.com>
---
 src/psfmachine/perturbation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index dc12a98..e5d5856 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -134,7 +134,7 @@ def _clean_vectors(self):
 
     def plot(self):
         fig, ax = plt.subplots()
-        ax.plot(self.time, self.vectors + np.arange(self.shape[1]) * 0.1, c="k")
+ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1)
         ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors")
         return fig
 

From 18cca59b31742fa6fb96987545cbfd329231581f Mon Sep 17 00:00:00 2001
From: Christina Hedges <14965634+christinahedges@users.noreply.github.com>
Date: Tue, 5 Apr 2022 11:45:22 -0700
Subject: [PATCH 06/53] Update src/psfmachine/perturbation.py

---
 src/psfmachine/perturbation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index e5d5856..f7186c4 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -211,7 +211,7 @@ def _get_bindown_func(self):
             [np.arange(b1, b2, self.resolution) for b1, b2 in zip(b[:-1], b[1:])]
         )
         points = points[~np.in1d(points, np.hstack([0, len(self.time) - 1]))]
-        points = np.sort(np.unique(np.hstack([points, self.breaks])))
+        points = np.unique(np.hstack([points, self.breaks]))
 
         def func(x, quad=False):
             """

From 3df9bea76bba0dc75a84f38a3bfa05a467194675 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 5 Apr 2022 12:06:35 -0700
Subject: [PATCH 07/53] fixing identation and _clean_vectors when segments=True

---
 src/psfmachine/perturbation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index f7186c4..2f78eb9 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -126,7 +126,10 @@ def _clean_vectors(self):
             nvec += 1
         if self.cbvs:
             nvec += self.ncbvs
-        s = nvec * (len(self.breaks) + 1)
+        if self.segments:
+            s = nvec * (len(self.breaks) + 1)
+        else:
+            s = nvec
         X = self.vectors[:, :s]
         w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
         self.vectors[:, s:] -= X.dot(w)
@@ -134,7 +137,7 @@ def _clean_vectors(self):
 
     def plot(self):
         fig, ax = plt.subplots()
-ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1)
+        ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1)
         ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors")
         return fig
 

From b64b9cc20dbf927394089fbaef38714833e20800 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 5 Apr 2022 13:56:24 -0700
Subject: [PATCH 08/53] :zap: faster!

---
 src/psfmachine/perturbation.py | 46 ++++++++++++++--------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 2f78eb9..2d2e0a8 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -59,6 +59,7 @@ def __init__(
         if self.segments:
             self._cut_segments()
         self._clean_vectors()
+        self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
 
     def __repr__(self):
         return "PerturbationMatrix"
@@ -167,10 +168,6 @@ def model(self, time_indices=None):
             time_indices = np.ones(len(self.time), bool)
         return self.vectors[time_indices].dot(self.weights)
 
-    @property
-    def matrix(self):
-        return sparse.csr_matrix(self.bin_func(self.vectors))
-
     @property
     def shape(self):
         return self.vectors.shape
@@ -196,6 +193,7 @@ def _get_downsample_func(self):
                 p = np.hstack([p, b2])
             points.append(p)
         points = np.unique(np.hstack(points))
+        self.nbins = len(points)
 
         def func(x, quad=False):
             """
@@ -215,6 +213,7 @@ def _get_bindown_func(self):
         )
         points = points[~np.in1d(points, np.hstack([0, len(self.time) - 1]))]
         points = np.unique(np.hstack([points, self.breaks]))
+        self.nbins = len(points) + 1
 
         def func(x, quad=False):
             """
@@ -307,6 +306,19 @@ def __init__(
             bin_method=bin_method,
         )
 
+        self._cartesian_stacked = sparse.hstack(
+            [self.cartesian_matrix for idx in range(self.vectors.shape[1])],
+            format="csr",
+        )
+        repeat1d = np.repeat(
+            self.bin_func(self.vectors), self.cartesian_matrix.shape[1], axis=1
+        )
+        repeat2d = np.repeat(repeat1d, self.cartesian_matrix.shape[0], axis=0)
+        self.matrix = sparse.vstack([self._cartesian_stacked] * self.nbins).multiply(
+            repeat2d
+        )
+        self.matrix.eliminate_zeros()
+
     def __repr__(self):
         return "PerturbationMatrix3D"
 
@@ -317,20 +329,6 @@ def shape(self):
             self.cartesian_matrix.shape[1] * self.vectors.shape[1],
         )
 
-    @property
-    def matrix(self):
-        C = sparse.hstack(
-            [
-                sparse.vstack(
-                    [self.cartesian_matrix * vector[idx] for idx in range(len(vector))],
-                    format="csr",
-                )
-                for vector in self.bin_func(self.vectors).T
-            ],
-            format="csr",
-        )
-        return C
-
     def model(self, time_indices=None):
         """We build the matrix for every frame"""
         if time_indices is None:
@@ -338,17 +336,11 @@ def model(self, time_indices=None):
         time_indices = np.atleast_1d(time_indices)
         if isinstance(time_indices[0], bool):
             time_indices = np.where(time_indices[0])[0]
+
         return np.asarray(
             [
-                sparse.hstack(
-                    [
-                        sparse.vstack(
-                            self.cartesian_matrix * vector[time_index],
-                            format="csr",
-                        )
-                        for vector in (self.vectors).T
-                    ],
-                    format="csr",
+                self._cartesian_stacked.multiply(
+                    np.repeat(self.vectors[time_index], self.cartesian_matrix.shape[1])
                 ).dot(self.weights)
                 for time_index in time_indices
             ]

From 2d55aa174dcdab11a5f9bbd49da0f36dc3d09900 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 5 Apr 2022 16:11:48 -0700
Subject: [PATCH 09/53] :art: adding pixel masking

---
 src/psfmachine/perturbation.py | 29 +++++++++++++++++++++++++++--
 tests/test_perturbation.py     | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 2d2e0a8..4d35cc4 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -314,8 +314,10 @@ def __init__(
             self.bin_func(self.vectors), self.cartesian_matrix.shape[1], axis=1
         )
         repeat2d = np.repeat(repeat1d, self.cartesian_matrix.shape[0], axis=0)
-        self.matrix = sparse.vstack([self._cartesian_stacked] * self.nbins).multiply(
-            repeat2d
+        self.matrix = (
+            sparse.vstack([self._cartesian_stacked] * self.nbins)
+            .multiply(repeat2d)
+            .tocsr()
         )
         self.matrix.eliminate_zeros()
 
@@ -329,6 +331,29 @@ def shape(self):
             self.cartesian_matrix.shape[1] * self.vectors.shape[1],
         )
 
+    def fit(self, flux, flux_err=None, pixel_mask=None):
+        if pixel_mask is not None:
+            if not isinstance(pixel_mask, np.ndarray):
+                raise ValueError("`pixel_mask` must be an `np.ndarray`")
+            if not pixel_mask.shape[0] == flux.shape[-1]:
+                raise ValueError(
+                    f"`pixel_mask` must be shape {flux.shape[-1]} (npixels)"
+                )
+        else:
+            pixel_mask = np.ones(flux.shape[-1], bool)
+        if flux_err is None:
+            flux_err = np.ones(len(flux_err))
+
+        y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel()
+        k = (np.ones(self.nbins, bool)[:, None] * pixel_mask).ravel()
+        X = self.matrix
+        sigma_w_inv = X[k].T.dot(X[k].multiply(1 / ye[k, None] ** 2)) + np.diag(
+            1 / self.prior_sigma ** 2
+        )
+        B = X[k].T.dot(y[k] / ye[k] ** 2) + self.prior_mu / self.prior_sigma ** 2
+        self.weights = np.linalg.solve(sigma_w_inv, B)
+        return self.weights
+
     def model(self, time_indices=None):
         """We build the matrix for every frame"""
         if time_indices is None:
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index de8fd99..d5f87f5 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -118,3 +118,35 @@ def test_perturbation_matrix3d():
             p3.shape[0] - p3.shape[1] - 1
         )
         assert chi < 3
+
+    # Add in one bad pixel
+    flux[:, 100] = 1e5
+    pixel_mask = np.ones(169, bool)
+    pixel_mask[100] = False
+
+    for bin_method in ["downsample", "bin"]:
+        p3 = PerturbationMatrix3D(
+            time=time,
+            dx=dx,
+            dy=dy,
+            nknots=4,
+            radius=5,
+            poly_order=2,
+            bin_method=bin_method,
+        )
+        w = p3.fit(flux, flux_err)
+        model = p3.model()
+        chi = np.sum(
+            (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2
+            / (flux_err[:, pixel_mask] ** 2)
+        ) / (p3.shape[0] - p3.shape[1] - 1)
+        # Without the pixel masking the model doesn't fit
+        assert chi > 3
+        w = p3.fit(flux, flux_err, pixel_mask=pixel_mask)
+        model = p3.model()
+        chi = np.sum(
+            (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2
+            / (flux_err[:, pixel_mask] ** 2)
+        ) / (p3.shape[0] - p3.shape[1] - 1)
+        # with pixel masking, it should fit
+        assert chi < 3

From 7310ed354f744071f2668b92c5a5cf88c31b3ed7 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 5 Apr 2022 16:37:29 -0700
Subject: [PATCH 10/53] :memo: added docstrings

---
 src/psfmachine/perturbation.py | 168 ++++++++++++++++++++++++---------
 tests/test_perturbation.py     |  18 ++--
 2 files changed, 138 insertions(+), 48 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 4d35cc4..6620eff 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -1,6 +1,8 @@
 """Classes to deal with perturbation matrices"""
 
 import numpy as np
+import numpy.typing as npt
+from typing import Optional
 from scipy import sparse
 from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
@@ -25,19 +27,19 @@ class PerturbationMatrix(object):
     resolution: int
         How many cadences to bin down via `bin_method`
     bin_method: str
-        How to bin the data under the hood. Default is by mean binning.
+        How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin'
     """
 
     def __init__(
         self,
-        time,
-        other_vectors=None,
-        poly_order=3,
-        focus=False,
-        cbvs=False,
-        segments=True,
-        resolution=10,
-        bin_method="bin",
+        time: npt.ArrayLike,
+        other_vectors: Optional[list] = None,
+        poly_order: int = 3,
+        focus: bool = False,
+        cbvs: bool = False,
+        segments: bool = True,
+        resolution: int = 10,
+        bin_method: str = "bin",
     ):
 
         self.time = time
@@ -77,8 +79,10 @@ def breaks(self):
         return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1
 
     def _validate(self):
-        # Check that the shape is correct etc
-        # Check the priors are right
+        """
+        Checks that the `other_vectors` are the right dimensions.
+        """
+
         if self.other_vectors is not None:
             if isinstance(self.other_vectors, (list, np.ndarray)):
                 self.other_vectors = np.atleast_2d(self.other_vectors)
@@ -93,6 +97,11 @@ def _validate(self):
         return
 
     def _cut_segments(self):
+        """
+        Cuts the data into "segments" wherever there is a break. Breaks are defined
+        as anywhere where there is a gap in data of more than 5 times the median
+        time between observations.
+        """
         x = np.array_split(np.arange(len(self.time)), self.breaks)
         masks = np.asarray(
             [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x]
@@ -104,7 +113,15 @@ def _cut_segments(self):
             ]
         )
 
-    def _get_focus_change(self, exptime=100):
+    def _get_focus_change(self, exptime: float = 100):
+        """
+        Finds a simple model for the focus change
+
+        Parameters
+        ----------
+        exptime: float
+            The timescale for an exponential decay.
+        """
         focus = np.asarray(
             [
                 np.exp(-exptime * (self.time - self.time[b]))
@@ -117,6 +134,7 @@ def _get_focus_change(self, exptime=100):
                 for b in np.hstack([0, self.breaks])
             ]
         )
+        focus[focus < 1e-10] = 0
         self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]])
         return
 
@@ -150,20 +168,51 @@ def _get_cbvs(self):
         #        self.vectors = np.hstack([self.vectors, cbvs])
         raise NotImplementedError
 
-    def fit(self, flux, flux_err=None):
+    def _fit_linalg(self, y, ye, k=None):
+        """Hidden method to fit data with linalg"""
+        if k is None:
+            k = np.ones(y.shape[0], bool)
+        X = self.matrix[k]
+        sigma_w_inv = X.T.dot(X.multiply(1 / ye[k, None] ** 2)) + np.diag(
+            1 / self.prior_sigma ** 2
+        )
+        B = X.T.dot(y[k] / ye[k] ** 2) + self.prior_mu / self.prior_sigma ** 2
+        return np.linalg.solve(sigma_w_inv, B)
+
+    def fit(self, flux: npt.ArrayLike, flux_err: Optional[npt.ArrayLike] = None):
+        """
+        Fits flux to find the best fit model weights. Optionally will include flux errors.
+        Sets the `self.weights` attribute with best fit weights.
+
+        Parameters
+        ----------
+        flux: npt.ArrayLike
+            Array of flux values. Should have shape ntimes.
+        flux: npt.ArrayLike
+            Optional flux errors. Should have shape ntimes.
+        """
         if flux_err is None:
             flux_err = np.ones(len(flux_err))
 
         y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel()
-        X = self.matrix
-        sigma_w_inv = X.T.dot(X.multiply(1 / ye[:, None] ** 2)) + np.diag(
-            1 / self.prior_sigma ** 2
-        )
-        B = X.T.dot(y / ye ** 2) + self.prior_mu / self.prior_sigma ** 2
-        self.weights = np.linalg.solve(sigma_w_inv, B)
+        self.weights = self._fit_linalg(y, ye)
         return self.weights
 
-    def model(self, time_indices=None):
+    def model(self, time_indices: Optional[list] = None):
+        """Returns the best fit model at given `time_indices`.
+
+        Parameters
+        ----------
+        time_indices: list
+            Optionally pass a list of integers. Model will be evaluated at those indices.
+
+        Returns
+        -------
+        model: npt.ArrayLike
+            Array of values with the same shape as the `flux` used in `self.fit`
+        """
+        if not hasattr(self, "weights"):
+            raise ValueError("Run `fit` first.")
         if time_indices is None:
             time_indices = np.ones(len(self.time), bool)
         return self.vectors[time_indices].dot(self.weights)
@@ -175,6 +224,12 @@ def shape(self):
     def bin_func(self, var, **kwargs):
         """
         Bins down an input variable to the same time resolution as `self`
+
+        Parameters
+        ----------
+        var: npt.ArrayLike
+            Array of values with at least 1 dimension. The first dimension must be
+            the same shape as `self.time`
         """
         if self.bin_method.lower() == "downsample":
             func = self._get_downsample_func()
@@ -185,6 +240,7 @@ def bin_func(self, var, **kwargs):
         return func(var, **kwargs)
 
     def _get_downsample_func(self):
+        """Builds a function to lower the resolution of the data through downsampling"""
         points = []
         b = np.hstack([0, self.breaks, len(self.time) - 1])
         for b1, b2 in zip(b[:-1], b[1:]):
@@ -207,6 +263,7 @@ def func(x, quad=False):
         return func
 
     def _get_bindown_func(self):
+        """Builds a function to lower the resolution of the data through binning"""
         b = np.hstack([0, self.breaks, len(self.time) - 1])
         points = np.hstack(
             [np.arange(b1, b2, self.resolution) for b1, b2 in zip(b[:-1], b[1:])]
@@ -271,18 +328,18 @@ class PerturbationMatrix3D(PerturbationMatrix):
 
     def __init__(
         self,
-        time,
-        dx,
-        dy,
-        other_vectors=None,
-        poly_order=3,
-        nknots=10,
-        radius=8,
-        focus=False,
-        cbvs=False,
-        segments=True,
-        resolution=10,
-        bin_method="downsample",
+        time: npt.ArrayLike,
+        dx: npt.ArrayLike,
+        dy: npt.ArrayLike,
+        other_vectors: Optional[list] = None,
+        poly_order: int = 3,
+        nknots: int = 10,
+        radius: float = 8,
+        focus: bool = False,
+        cbvs: bool = False,
+        segments: bool = True,
+        resolution: int = 10,
+        bin_method: str = "downsample",
     ):
         self.dx = dx
         self.dy = dy
@@ -331,7 +388,26 @@ def shape(self):
             self.cartesian_matrix.shape[1] * self.vectors.shape[1],
         )
 
-    def fit(self, flux, flux_err=None, pixel_mask=None):
+    def fit(
+        self,
+        flux: npt.ArrayLike,
+        flux_err: Optional[npt.ArrayLike] = None,
+        pixel_mask: Optional[npt.ArrayLike] = None,
+    ):
+        """
+        Fits flux to find the best fit model weights. Optionally will include flux errors.
+        Sets the `self.weights` attribute with best fit weights.
+
+        Parameters
+        ----------
+        flux: npt.ArrayLike
+            Array of flux values. Should have shape ntimes x npixels.
+        flux_err: npt.ArrayLike
+            Optional flux errors. Should have shape ntimes x npixels.
+        pixel_mask: npt.ArrayLike
+            Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked.
+            Should have shape npixels.
+        """
         if pixel_mask is not None:
             if not isinstance(pixel_mask, np.ndarray):
                 raise ValueError("`pixel_mask` must be an `np.ndarray`")
@@ -346,16 +422,24 @@ def fit(self, flux, flux_err=None, pixel_mask=None):
 
         y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel()
         k = (np.ones(self.nbins, bool)[:, None] * pixel_mask).ravel()
-        X = self.matrix
-        sigma_w_inv = X[k].T.dot(X[k].multiply(1 / ye[k, None] ** 2)) + np.diag(
-            1 / self.prior_sigma ** 2
-        )
-        B = X[k].T.dot(y[k] / ye[k] ** 2) + self.prior_mu / self.prior_sigma ** 2
-        self.weights = np.linalg.solve(sigma_w_inv, B)
-        return self.weights
+        self.weights = self._fit_linalg(y, ye, k=k)
+        return
 
-    def model(self, time_indices=None):
-        """We build the matrix for every frame"""
+    def model(self, time_indices: Optional[list] = None):
+        """Returns the best fit model at given `time_indices`.
+
+        Parameters
+        ----------
+        time_indices: list
+            Optionally pass a list of integers. Model will be evaluated at those indices.
+
+        Returns
+        -------
+        model: npt.ArrayLike
+            Array of values with the same shape as the `flux` used in `self.fit`
+        """
+        if not hasattr(self, "weights"):
+            raise ValueError("Run `fit` first")
         if time_indices is None:
             time_indices = np.arange(len(self.time))
         time_indices = np.atleast_1d(time_indices)
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index d5f87f5..c3f93f5 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -36,7 +36,8 @@ def test_perturbation_matrix():
     res = 10
     p = PerturbationMatrix(time=time, focus=False, resolution=res)
     y, ye = np.random.normal(1, 0.01, size=200), np.ones(200) * 0.01
-    w = p.fit(y, ye)
+    p.fit(y, ye)
+    w = p.weights
     assert w.shape[0] == p.shape[1]
     assert np.isfinite(w).all()
     model = p.model()
@@ -55,7 +56,8 @@ def test_perturbation_matrix():
         with pytest.raises(ValueError):
             p.bin_func(y[:-4])
 
-        w = p.fit(y, ye)
+        p.fit(y, ye)
+        w = p.weights
         model = p.model()
         assert model.shape[0] == 200
         chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1)
@@ -84,7 +86,8 @@ def test_perturbation_matrix3d():
     assert p3.vectors.shape == (10, 2)
     assert p3.shape == (1690, 81 * 2)
     assert p3.matrix.shape == (169 * 3, 81 * 2)
-    w = p3.fit(flux, flux_err)
+    p3.fit(flux, flux_err)
+    w = p3.weights
     assert w.shape[0] == 81 * 2
     model = p3.model()
     assert model.shape == flux.shape
@@ -111,7 +114,8 @@ def test_perturbation_matrix3d():
             poly_order=2,
             bin_method=bin_method,
         )
-        w = p3.fit(flux, flux_err)
+        p3.fit(flux, flux_err)
+        w = p3.weights
         model = p3.model()
         assert model.shape == flux.shape
         chi = np.sum((flux - model) ** 2 / (flux_err ** 2)) / (
@@ -134,7 +138,8 @@ def test_perturbation_matrix3d():
             poly_order=2,
             bin_method=bin_method,
         )
-        w = p3.fit(flux, flux_err)
+        p3.fit(flux, flux_err)
+        w = p3.weights
         model = p3.model()
         chi = np.sum(
             (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2
@@ -142,7 +147,8 @@ def test_perturbation_matrix3d():
         ) / (p3.shape[0] - p3.shape[1] - 1)
         # Without the pixel masking the model doesn't fit
         assert chi > 3
-        w = p3.fit(flux, flux_err, pixel_mask=pixel_mask)
+        p3.fit(flux, flux_err, pixel_mask=pixel_mask)
+        w = p3.weights
         model = p3.model()
         chi = np.sum(
             (flux[:, pixel_mask] - model[:, pixel_mask]) ** 2

From 34d943cf7562883de11f4b16d8acac2fae8bed75 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 5 Apr 2022 19:25:40 -0700
Subject: [PATCH 11/53] smooth vector fx

---
 src/psfmachine/utils.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index 4850e43..cafe3c7 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -6,6 +6,7 @@
 
 from scipy import sparse
 from patsy import dmatrix
+from scipy.ndimage import gaussian_filter1d
 import pyia
 
 # size_limit is 1GB
@@ -552,3 +553,41 @@ def get_breaks(time):
     """
     dts = np.diff(time)
     return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)])
+
+
+def smooth_vector(v, splits=None, filter_size=13, mode="mirror"):
+    """
+    Smooth out a vector
+
+    Parameters
+    ----------
+    v : numpy.ndarray or list of numpy.ndarray
+        Arrays to be smoothen in the last axis
+    splits : list of index
+        List of index where `v` has discontinuity
+    filter_size : int
+        Filter window size
+    mode : str
+        The `mode` parameter determines how the input array is extended
+        beyond its boundaries. Options are {'reflect', 'constant', 'nearest', 'mirror',
+        'wrap'}. Default is 'mirror'
+    """
+    v_smooth = []
+    if isinstance(v, list):
+        v = np.asarray(v)
+    else:
+        v = np.atleast_2d(v)
+
+    if splits is None:
+        splits = [0, v.shape[1]]
+
+    for i in range(1, len(splits)):
+        v_smooth.append(
+            gaussian_filter1d(
+                v[:, splits[i - 1] : splits[i]],
+                filter_size,
+                mode=mode,
+                axis=1,
+            )
+        )
+    return np.hstack(v_smooth)

From 7648325d2b4f5279b0a7845dda1880f79a9a7068 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 5 Apr 2022 19:25:52 -0700
Subject: [PATCH 12/53] improved plots and docstrings

---
 src/psfmachine/perturbation.py | 75 ++++++++++++++++++++++++++++------
 1 file changed, 62 insertions(+), 13 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 6620eff..a4717fc 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -6,6 +6,7 @@
 from scipy import sparse
 from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
+from matplotlib import cm
 
 
 class PerturbationMatrix(object):
@@ -23,11 +24,13 @@ class PerturbationMatrix(object):
     focus : bool
         Whether to correct focus using a simple exponent model
     segments: bool
-        Whether to fit portions of data where there is a significant time break as separate segments
+        Whether to fit portions of data where there is a significant time break as
+        separate segments
     resolution: int
         How many cadences to bin down via `bin_method`
     bin_method: str
-        How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin'
+        How to bin the data under the hood. Default is by mean binning. Options are
+        'downsample' and 'bin'
     """
 
     def __init__(
@@ -155,9 +158,37 @@ def _clean_vectors(self):
         return
 
     def plot(self):
-        fig, ax = plt.subplots()
-        ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1)
-        ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors")
+        """
+        Creates a diagnostic plot showing the vector components. Will show the full
+        (line) and bindown (dots) version of the components.
+
+        """
+        fig, ax = plt.subplots(1, 2, figsize=(12, 4))
+        fig.suptitle("Vectors")
+        bin_time = self.bin_func(self.time)
+        bin_vec = self.bin_func(self.vectors)
+        colors = cm.get_cmap('Set1', self.vectors.shape[1])
+        for k in range(self.vectors.shape[1]):
+            ax[0].plot(self.time, self.vectors[:, k] + k * 0.1, color=colors(k))
+            ax[0].plot(
+                bin_time,
+                bin_vec[:, k] + k * 0.1,
+                marker=".",
+                lw=0,
+                color=colors(k),
+            )
+        ax[0].set(xlabel="Time", ylabel="Vector", yticks=[])
+
+        im = ax[1].imshow(
+            self.vectors.T,
+            origin="lower",
+            aspect="auto",
+            interpolation="nearest",
+            vmin=-1,
+            vmax=1,
+        )
+        ax[1].set(xlabel="Time Index", yticks=range(self.vectors.shape[1]))
+        plt.colorbar(im, ax=ax[1])
         return fig
 
     def _get_cbvs(self):
@@ -319,7 +350,8 @@ class PerturbationMatrix3D(PerturbationMatrix):
     focus : bool
         Whether to correct focus using a simple exponent model
     segments: bool
-        Whether to fit portions of data where there is a significant time break as separate segments
+        Whether to fit portions of data where there is a significant time break as
+        separate segments
     resolution: int
         How many cadences to bin down via `bin_method`
     bin_method: str
@@ -395,8 +427,8 @@ def fit(
         pixel_mask: Optional[npt.ArrayLike] = None,
     ):
         """
-        Fits flux to find the best fit model weights. Optionally will include flux errors.
-        Sets the `self.weights` attribute with best fit weights.
+        Fits flux to find the best fit model weights. Optionally will include flux
+        errors. Sets the `self.weights` attribute with best fit weights.
 
         Parameters
         ----------
@@ -405,8 +437,8 @@ def fit(
         flux_err: npt.ArrayLike
             Optional flux errors. Should have shape ntimes x npixels.
         pixel_mask: npt.ArrayLike
-            Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked.
-            Should have shape npixels.
+            Pixel mask to apply. Values that are `True` will be used in the fit.
+            Values that are `False` will be masked. Should have shape npixels.
         """
         if pixel_mask is not None:
             if not isinstance(pixel_mask, np.ndarray):
@@ -455,11 +487,28 @@ def model(self, time_indices: Optional[list] = None):
             ]
         )
 
-    def plot_model(self, time_index=0):
+    def plot_model(self, time_index: int = 0):
+        """Creates a diagnostic plot with the perturbation model at a given cadences
+
+        Parameters
+        ----------
+        time_index: int
+            Time index (cadence) to show in the figure.
+        """
         if not hasattr(self, "weights"):
             raise ValueError("Run `fit` first.")
-        fig, ax = plt.subplots()
-        ax.scatter(self.dx, self.dy, c=self.model(time_index)[0])
+        fig, ax = plt.subplots(figsize=(6, 5))
+        im = ax.scatter(
+            self.dx,
+            self.dy,
+            c=self.model(time_index)[0],
+            s=2,
+            cmap="viridis",
+            vmin=0.8,
+            vmax=1.2,
+        )
+        cbar = fig.colorbar(im, ax=ax, shrink=0.7)
+        cbar.set_label("Perturbation Value")
         ax.set(
             xlabel=r"$\delta$x",
             ylabel=r"$\delta$y",

From b330ee6e754d761554a9b5d97404f7fd73bf8804 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 5 Apr 2022 19:40:04 -0700
Subject: [PATCH 13/53] machine uses PerturbationMatrix3D

---
 src/psfmachine/machine.py | 511 +++++++++++++-------------------------
 1 file changed, 173 insertions(+), 338 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index ea52eb4..cbcf20c 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -18,8 +18,10 @@
     _combine_A,
     threshold_bin,
     get_breaks,
+    smooth_vector,
 )
 from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP
+from .perturbation import PerturbationMatrix3D
 
 __all__ = ["Machine"]
 
@@ -770,9 +772,7 @@ def _time_bin(self, npoints=200, downsample=False):
             # if poscorr_filter_size == 0 then the smoothing will fail. we take care of
             # it by setting every value < .5 to 0.1 which leads to no smoothing
             # (residuals < 1e-4)
-            self.poscorr_filter_size = (
-                0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size
-            )
+
             for i in range(1, len(splits)):
                 pc1_smooth.extend(
                     gaussian_filter1d(
@@ -823,7 +823,13 @@ def _time_bin(self, npoints=200, downsample=False):
 
         return ta, tm, fm_raw, fm, fem
 
-    def build_time_model(self, plot=False, downsample=False, split_time_model=False):
+    def build_time_model(
+        self,
+        plot=False,
+        bin_method="bin",
+        split_time_segments=False,
+        focus_component=False,
+    ):
         """
         Builds a time model that moves the PRF model to account for the scene movement
         due to velocity aberration. It has two methods to choose from using the
@@ -837,259 +843,160 @@ def build_time_model(self, plot=False, downsample=False, split_time_model=False)
         ----------
         plot: boolean
             Plot a diagnostic figure.
-        downsample: boolean
+        bin_method: boolean
             If True the `time` and `pos_corr` arrays will be downsampled instead of
             binned.
-        split_time_model : boolean, list/array of ints
+        split_time_segments : boolean, list/array of ints
             If `True` will split the light curve into segments to fit different time
             models with a commong pixel normalization. If `False` will fit the full
             time series as one segment. If list or numpy array of ints, will use the
             index values as segment breaks.
         """
-        if hasattr(self, "pos_corr1") and self.time_corrector in [
-            "pos_corr",
-            "centroid",
-        ]:
-            (
-                time_original,
-                time_binned,
-                flux_binned_raw,
-                flux_binned,
-                flux_err_binned,
-                poscorr1_smooth,
-                poscorr2_smooth,
-                poscorr1_binned,
-                poscorr2_binned,
-            ) = self._time_bin(npoints=self.n_time_points, downsample=downsample)
-            self.pos_corr1_smooth = poscorr1_smooth
-            self.pos_corr2_smooth = poscorr2_smooth
-        else:
-            (
-                time_original,
-                time_binned,
-                flux_binned_raw,
-                flux_binned,
-                flux_err_binned,
-            ) = self._time_bin(npoints=self.n_time_points, downsample=downsample)
-
-        self._whitened_time = time_original
-        self.downsample_time_knots = downsample
-        # not necessary to take value from Quantity to do .multiply()
-        dx, dy = (
-            self.uncontaminated_source_mask.multiply(self.dra),
-            self.uncontaminated_source_mask.multiply(self.ddec),
+        # create the time and space basis
+        _whitened_time = (self.time - self.time.mean()) / (
+            self.time.max() - self.time.mean()
+        )
+        dx, dy, uncontaminated_pixels = (
+            self.source_mask.multiply(self.dra),
+            self.source_mask.multiply(self.ddec),
+            self.source_mask.multiply(self.uncontaminated_source_mask.todense()),
         )
         dx = dx.data * u.deg.to(u.arcsecond)
         dy = dy.data * u.deg.to(u.arcsecond)
+        # uncontaminated pixels mask
+        uncontaminated_pixels = uncontaminated_pixels.data
 
-        A_c = _make_A_cartesian(
-            dx,
-            dy,
-            n_knots=self.n_time_knots,
-            radius=self.time_radius,
-            knot_spacing_type=self.cartesian_knot_spacing,
+        # add other vectors if asked, centroids or poscorrs
+        splits = get_breaks(self.time)
+        self.poscorr_filter_size = (
+            0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size
         )
-        A2 = sparse.vstack([A_c] * time_binned.shape[0], format="csr")
-        # Cartesian spline with time dependence
-        if hasattr(self, "pos_corr1") and self.time_corrector in [
-            "pos_corr",
-            "centroid",
-        ]:
-            # Cartesian spline with poscor dependence
-            A3 = _combine_A(A2, poscorr=[poscorr1_binned, poscorr2_binned])
+        if self.time_corrector == "polynomial":
+            other_vectors = None
         else:
-            # Cartesian spline with time dependence
-            A3 = _combine_A(A2, time=time_binned)
-
-        prior_sigma = np.ones(A3.shape[1]) * 10
-        prior_mu = np.zeros(A3.shape[1])
-
-        # fit the time model for each segment
-        # use user input
-        if isinstance(split_time_model, (np.ndarray, list)):
-            # we make sure first and last index are included and sorted
-            splits = np.unique(np.append(split_time_model, [0, len(self.time)]))
-        elif isinstance(split_time_model, bool):
-            # we find the splits in data
-            if split_time_model:
-                splits = get_breaks(self.time)
-            else:
-                splits = np.array([0, len(self.time)])
-
-        seg_time_model_w, pix_mask_k = [], []
-        # we iterate over segements
-        for bk in range(len(splits) - 1):
-            # find the right mask that select the binned times andd flux to fit the
-            # time model
-            seg_mask = (time_binned[:, 0] >= time_original[splits[bk]]) & (
-                time_binned[:, 0] < time_original[splits[bk + 1] - 1]
-            )
-            # need to rebuild the A3 DM to use the rigth times/poscorrs
-            A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr")
-            if hasattr(self, "pos_corr1") and self.time_corrector in [
-                "pos_corr",
-                "centroid",
-            ]:
-                A3 = _combine_A(
-                    A2, poscorr=[poscorr1_binned[seg_mask], poscorr2_binned[seg_mask]]
-                )
-            else:
-                A3 = _combine_A(A2, time=time_binned[seg_mask])
+            if self.time_corrector == "pos_corr" and hasattr(self, "pos_corr1"):
+                mpc1 = np.nanmedian(self.pos_corr1, axis=0)
+                mpc2 = np.nanmedian(self.pos_corr2, axis=0)
+                # other_vectors =
+            elif self.time_corrector == "centroid" and hasattr(self, "ra_centroid"):
+                mpc1 = self.ra_centroid.to("arcsec").value / 4
+                mpc2 = self.dec_centroid.to("arcsec").value / 4
 
-            # No saturated pixels, 1e5 is a hardcoded value for Kepler.
-            k = (
-                (flux_binned_raw < 1e5).all(axis=0)[None, :]
-                * np.ones(flux_binned_raw[seg_mask].shape, bool)
-            ).ravel()
-            # No faint pixels, 100 is a hardcoded value for Kepler.
-            k &= (
-                (flux_binned_raw[seg_mask] > 100).all(axis=0)[None, :]
-                * np.ones(flux_binned_raw[seg_mask].shape, bool)
-            ).ravel()
-            # No huge variability
-            k &= (
-                (np.abs(flux_binned[seg_mask] - 1) < 1).all(axis=0)[None, :]
-                * np.ones(flux_binned[seg_mask].shape, bool)
-            ).ravel()
-            # No nans
-            k &= np.isfinite(flux_binned[seg_mask].ravel()) & np.isfinite(
-                flux_err_binned[seg_mask].ravel()
+            # we smooth the vectors
+            mpc1_smooth, mpc2_smooth = smooth_vector(
+                [mpc1, mpc2], splits=splits, filter_size=self.poscorr_filter_size
             )
+            # we combine them as the first order
+            other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth]
+
+        # create a 3D perturbation matrix
+        P3 = PerturbationMatrix3D(
+            time=_whitened_time,
+            dx=dx,
+            dy=dy,
+            segments=split_time_segments,
+            focus=focus_component,
+            resolution=self.n_time_points,
+            other_vectors=other_vectors,
+            bin_method=bin_method,
+        )
 
-            # we solve the segment by using `seg_mask` on all `*_binned` variables
-            for count in [0, 1, 2]:
-                sigma_w_inv = A3[k].T.dot(
-                    (
-                        A3.multiply(1 / flux_err_binned[seg_mask].ravel()[:, None] ** 2)
-                    ).tocsr()[k]
-                )
-                sigma_w_inv += np.diag(1 / prior_sigma ** 2)
-                # Fit the flux - 1
-                B = A3[k].T.dot(
-                    (
-                        (flux_binned[seg_mask].ravel() - 1)
-                        / flux_err_binned[seg_mask].ravel() ** 2
-                    )[k]
-                )
-                B += prior_mu / (prior_sigma ** 2)
-                time_model_w = np.linalg.solve(sigma_w_inv, B)
-                res = flux_binned[seg_mask] - A3.dot(time_model_w).reshape(
-                    flux_binned[seg_mask].shape
+        # get uncontaminated pixel norm-flux
+        flux, flux_err = np.array(
+            [
+                np.array(
+                    [
+                        self.source_mask.multiply(self.flux[idx]).data,
+                        self.source_mask.multiply(self.flux_err[idx]).data,
+                    ]
                 )
-                res = np.ma.masked_array(res, (~k).reshape(flux_binned[seg_mask].shape))
-                bad_targets = sigma_clip(res, sigma=5).mask
-                bad_targets = (
-                    np.ones(flux_binned[seg_mask].shape, bool) & bad_targets.any(axis=0)
-                ).ravel()
-                # k &= ~sigma_clip(flux_binned.ravel() - A3.dot(time_model_w)).mask
-                k &= ~bad_targets
-            #  we save the weights and pixel mask of each segement for later use
-            seg_time_model_w.append(time_model_w)
-            pix_mask_k.append(k)
-
-        self.seg_splits = splits
-        self.time_model_w = np.asarray(seg_time_model_w)
-        self._time_masked = np.asarray(pix_mask_k, dtype=object)
+                for idx in range(self.nt)
+            ]
+        ).transpose((1, 0, 2))
+        flux_norm = flux / np.nanmean(flux, axis=0)
+        flux_err_norm = flux_err / np.nanmean(flux, axis=0)
+
+        # bindown flux arrays
+        flux_binned_raw = P3.bin_func(flux)
+        flux_binned = P3.bin_func(flux_norm)
+        flux_err_binned = P3.bin_func(flux_err_norm)
+
+        # No saturated pixels, 1e5 is a hardcoded value for Kepler.
+        k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones(
+            flux_binned_raw.shape, bool
+        )
+        # No faint pixels, 100 is a hardcoded value for Kepler.
+        k &= (flux_binned_raw > 100).all(axis=0)[None, :] * np.ones(
+            flux_binned_raw.shape, bool
+        )
+        # No huge variability
+        k &= (np.abs(flux_binned - 1) < 1).all(axis=0)[None, :] * np.ones(
+            flux_binned.shape, bool
+        )
+        # No nans
+        k &= np.isfinite(flux_binned) & np.isfinite(flux_err_binned)
+        k = np.all(k, axis=0)
+        # combine good-behaved pixels with uncontaminated pixels
+        k &= uncontaminated_pixels
+
+        # iterate to remvoe outliers
+        for count in [0, 1, 2]:
+            # need to add pixel_mask=k
+            P3.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k)
+            res = flux_binned - P3.matrix.dot(P3.weights).reshape(flux_binned.shape)
+            # res = np.ma.masked_array(res, (~k).reshape(flux_binned.shape))
+            res = np.ma.masked_array(res, ~np.tile(k, flux_binned.shape[0]).T)
+            bad_targets = sigma_clip(res, sigma=5).mask
+            bad_targets = bad_targets.any(axis=0)
+            k &= ~bad_targets
+
+        # bookkeeping
+        self.flux_binned = flux_binned
+        self._time_masked_pix = k
+        self.P3 = P3
         if plot:
             return self.plot_time_model()
         return
 
-    def plot_time_model(self, segment=0):
+    def _get_perturbed_model(self, time_indices=None):
         """
-        Diagnostic plot of time model.
+        Computes the perturbed model at given times
+        """
+        if time_indices is None:
+            time_indices = np.arange(len(self.time))
+        time_indices = np.atleast_1d(time_indices)
+        if isinstance(time_indices[0], bool):
+            time_indices = np.where(time_indices[0])[0]
+
+        perturbed_model = []
+        for tdx in time_indices:
+            X = self.mean_model.copy()
+            X.data *= self.P3.model(time_indices=tdx).ravel()
+            perturbed_model.append(X)
+        self.perturbed_model = perturbed_model
 
-        Parameters
-        ----------
-        segment : int
-            Which light curve segment will be plotted, default is first one.
+        return
+
+        # raise NotImplementedError
+
+    def plot_time_model(self):
+        """
+        Diagnostic plot of time model.
 
         Returns
         -------
         fig : matplotlib.Figure
             Figure.
         """
-        if hasattr(self, "pos_corr1") and self.time_corrector in [
-            "pos_corr",
-            "centroid",
-        ]:
-            (
-                time_original,
-                time_binned,
-                flux_binned_raw,
-                flux_binned,
-                flux_err_binned,
-                poscorr1_smooth,
-                poscorr2_smooth,
-                poscorr1_binned,
-                poscorr2_binned,
-            ) = self._time_bin(
-                npoints=self.n_time_points, downsample=self.downsample_time_knots
-            )
-        else:
-            (
-                time_original,
-                time_binned,
-                flux_binned_raw,
-                flux_binned,
-                flux_err_binned,
-            ) = self._time_bin(
-                npoints=self.n_time_points, downsample=self.downsample_time_knots
-            )
-
-        # not necessary to take value from Quantity to do .multiply()
-        dx, dy = (
-            self.uncontaminated_source_mask.multiply(self.dra),
-            self.uncontaminated_source_mask.multiply(self.ddec),
-        )
-        dx = dx.data * u.deg.to(u.arcsecond)
-        dy = dy.data * u.deg.to(u.arcsecond)
-
-        A_c = _make_A_cartesian(
-            dx,
-            dy,
-            n_knots=self.n_time_knots,
-            radius=self.time_radius,
-            knot_spacing_type=self.cartesian_knot_spacing,
-        )
-        # if self.seg_splits.shape[0] == 2 and segment == 0:
-        #     seg_mask = np.ones(time_binned.shape[0], dtype=bool)
-        # else:
-        seg_mask = (time_binned[:, 0] >= time_original[self.seg_splits[segment]]) & (
-            time_binned[:, 0] < time_original[self.seg_splits[segment + 1] - 1]
-        )
-        # find the right mask that select the binned times andd flux
-        A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr")
-
-        if hasattr(self, "pos_corr1") and self.time_corrector in [
-            "pos_corr",
-            "centroid",
-        ]:
-            # Cartesian spline with poscor dependence
-            A3 = _combine_A(
-                A2, poscorr=[poscorr1_binned[seg_mask], poscorr2_binned[seg_mask]]
-            )
-        else:
-            # Cartesian spline with time dependence
-            A3 = _combine_A(A2, time=time_binned[seg_mask])
-
-        model = (
-            A3.dot(self.time_model_w[segment]).reshape(flux_binned[seg_mask].shape) + 1
-        )
-        fig, ax = plt.subplots(2, 2, figsize=(7, 6), facecolor="w")
-        k1 = (
-            self._time_masked[segment]
-            .reshape(flux_binned[seg_mask].shape)[0]
-            .astype(bool)
-        )
-        k2 = (
-            self._time_masked[segment]
-            .reshape(flux_binned[seg_mask].shape)[-1]
-            .astype(bool)
+        model_binned = self.P3.matrix.dot(self.P3.weights).reshape(
+            self.flux_binned.shape
         )
+        fig, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w")
+        # k1 = self._time_masked_pix.reshape(self.flux_binned.shape)[0].astype(bool)
         im = ax[0, 0].scatter(
-            dx[k1],
-            dy[k1],
-            c=flux_binned[seg_mask][0][k1],
+            self.P3.dx[self._time_masked_pix],
+            self.P3.dy[self._time_masked_pix],
+            c=self.flux_binned[0, self._time_masked_pix],
             s=3,
             vmin=0.5,
             vmax=1.5,
@@ -1097,9 +1004,9 @@ def plot_time_model(self, segment=0):
             rasterized=True,
         )
         ax[0, 1].scatter(
-            dx[k2],
-            dy[k2],
-            c=flux_binned[seg_mask][-1][k2],
+            self.P3.dx[self._time_masked_pix],
+            self.P3.dy[self._time_masked_pix],
+            c=self.flux_binned[-1, self._time_masked_pix],
             s=3,
             vmin=0.5,
             vmax=1.5,
@@ -1107,9 +1014,9 @@ def plot_time_model(self, segment=0):
             rasterized=True,
         )
         ax[1, 0].scatter(
-            dx[k1],
-            dy[k1],
-            c=model[0][k1],
+            self.P3.dx[self._time_masked_pix],
+            self.P3.dy[self._time_masked_pix],
+            c=model_binned[0, self._time_masked_pix],
             s=3,
             vmin=0.5,
             vmax=1.5,
@@ -1117,9 +1024,9 @@ def plot_time_model(self, segment=0):
             rasterized=True,
         )
         ax[1, 1].scatter(
-            dx[k2],
-            dy[k2],
-            c=model[-1][k2],
+            self.P3.dx[self._time_masked_pix],
+            self.P3.dy[self._time_masked_pix],
+            c=model_binned[-1, self._time_masked_pix],
             s=3,
             vmin=0.5,
             vmax=1.5,
@@ -1553,89 +1460,39 @@ def fit_model(self, fit_va=False):
         )
 
         self.model_flux = np.zeros(self.flux.shape) * np.nan
-
         self.ws = np.zeros((self.nt, self.mean_model.shape[0]))
         self.werrs = np.zeros((self.nt, self.mean_model.shape[0]))
 
         if fit_va:
-            if not hasattr(self, "time_model_w"):
-                raise ValueError(
-                    "Please use `build_time_model` before fitting with velocity aberration."
-                )
-
-            # not necessary to take value from Quantity to do .multiply()
-            dx, dy = (
-                self.source_mask.multiply(self.dra),
-                self.source_mask.multiply(self.ddec),
-            )
-            dx = dx.data * u.deg.to(u.arcsecond)
-            dy = dy.data * u.deg.to(u.arcsecond)
-
-            A_cp = _make_A_cartesian(
-                dx,
-                dy,
-                n_knots=self.n_time_knots,
-                radius=self.time_radius,
-                knot_spacing_type=self.cartesian_knot_spacing,
-            )
-            A_cp3 = sparse.hstack([A_cp, A_cp, A_cp, A_cp], format="csr")
-
             self.ws_va = np.zeros((self.nt, self.mean_model.shape[0]))
             self.werrs_va = np.zeros((self.nt, self.mean_model.shape[0]))
 
-            for tdx in tqdm(
-                range(self.nt),
-                desc=f"Fitting {self.nsources} Sources (w. VA)",
-                disable=self.quiet,
-            ):
-                X = self.mean_model.copy()
-                X = X.T
+        for tdx in tqdm(
+            range(self.nt),
+            desc=f"Fitting {self.nsources} Sources (w. VA)",
+            disable=self.quiet,
+        ):
+            X = self.mean_model.copy()
+            X = X.T
 
-                sigma_w_inv = X.T.dot(
-                    X.multiply(1 / self.flux_err[tdx][:, None] ** 2)
-                ).toarray()
-                sigma_w_inv += np.diag(1 / (prior_sigma ** 2))
-                B = X.T.dot((self.flux[tdx] / self.flux_err[tdx] ** 2))
-                B += prior_mu / (prior_sigma ** 2)
-                self.ws[tdx] = np.linalg.solve(sigma_w_inv, np.nan_to_num(B))
-                self.werrs[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5
-
-                # Divide through by expected velocity aberration
-                X = self.mean_model.copy()
-                if hasattr(self, "pos_corr1") and self.time_corrector in [
-                    "pos_corr",
-                    "centroid",
-                ]:
-                    # use median pos_corr
-                    t_mult = np.hstack(
-                        np.array(
-                            [
-                                1,
-                                self.pos_corr1_smooth[tdx],
-                                self.pos_corr2_smooth[tdx],
-                                self.pos_corr1_smooth[tdx] * self.pos_corr2_smooth[tdx],
-                            ]
-                        )[:, None]
-                        * np.ones(A_cp3.shape[1] // 4)
-                    )
-                else:
-                    # use time
-                    t_mult = np.hstack(
-                        (self._whitened_time[tdx] ** np.arange(4))[:, None]
-                        * np.ones(A_cp3.shape[1] // 4)
+            sigma_w_inv = X.T.dot(
+                X.multiply(1 / self.flux_err[tdx][:, None] ** 2)
+            ).toarray()
+            sigma_w_inv += np.diag(1 / (prior_sigma ** 2))
+            B = X.T.dot((self.flux[tdx] / self.flux_err[tdx] ** 2))
+            B += prior_mu / (prior_sigma ** 2)
+            self.ws[tdx] = np.linalg.solve(sigma_w_inv, np.nan_to_num(B))
+            self.werrs[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5
+            self.model_flux[tdx] = X.dot(self.ws[tdx])
+
+            if fit_va:
+                if not hasattr(self, "P3"):
+                    raise ValueError(
+                        "Please use `build_time_model` before fitting with velocity "
+                        "aberration."
                     )
-                # we make sure to use the `time_model_w` that correspond to the segment
-                # we are computing
-                seg_num = np.where(
-                    [
-                        (tdx >= self.seg_splits[k]) & (tdx < self.seg_splits[k + 1])
-                        for k in range(len(self.seg_splits) - 1)
-                    ]
-                )[0]
-                X.data *= (
-                    A_cp3.multiply(t_mult).dot(self.time_model_w[seg_num].ravel()) + 1
-                )
-                X = X.T
+
+                X.data *= self.P3.model(time_indices=tdx).ravel()
 
                 sigma_w_inv = X.T.dot(
                     X.multiply(1 / self.flux_err[tdx][:, None] ** 2)
@@ -1647,38 +1504,16 @@ def fit_model(self, fit_va=False):
                 self.werrs_va[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5
                 self.model_flux[tdx] = X.dot(self.ws_va[tdx])
 
-            nodata = np.asarray(self.mean_model.sum(axis=1))[:, 0] == 0
-            self.ws[:, nodata] *= np.nan
-            self.werrs[:, nodata] *= np.nan
+        # check bad estimates
+        nodata = np.asarray(self.mean_model.sum(axis=1))[:, 0] == 0
+        # These sources are poorly estimated
+        # nodata |= (self.mean_model.max(axis=1) > 1).toarray()[:, 0]
+        self.ws[:, nodata] *= np.nan
+        self.werrs[:, nodata] *= np.nan
+        if fit_va:
             self.ws_va[:, nodata] *= np.nan
             self.werrs_va[:, nodata] *= np.nan
 
-        else:
-
-            X = self.mean_model.copy()
-            X = X.T
-            f = self.flux
-            fe = self.flux_err
-
-            for tdx in tqdm(
-                range(self.nt),
-                desc=f"Fitting {self.nsources} Sources (No VA)",
-                disable=self.quiet,
-            ):
-                sigma_w_inv = X.T.dot(X.multiply(1 / fe[tdx][:, None] ** 2)).toarray()
-                sigma_w_inv += np.diag(1 / (prior_sigma ** 2))
-                B = X.T.dot((f[tdx] / fe[tdx] ** 2))
-                B += prior_mu / (prior_sigma ** 2)
-                self.ws[tdx] = np.linalg.solve(sigma_w_inv, np.nan_to_num(B))
-                self.werrs[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5
-                self.model_flux[tdx] = X.dot(self.ws[tdx])
-
-            nodata = np.asarray(self.source_mask.sum(axis=1))[:, 0] == 0
-            # These sources are poorly estimated
-            nodata |= (self.mean_model.max(axis=1) > 1).toarray()[:, 0]
-            self.ws[:, nodata] *= np.nan
-            self.werrs[:, nodata] *= np.nan
-
         return
 
     # aperture photometry functions

From cc99962fbb574b5ac53f97ac256f5470c1ae7af9 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 5 Apr 2022 20:04:42 -0700
Subject: [PATCH 14/53] demoving _time_bin method

---
 src/psfmachine/machine.py | 205 --------------------------------------
 1 file changed, 205 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index cbcf20c..11a5ff2 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -618,211 +618,6 @@ def _get_centroids(self):
 
         return
 
-    def _time_bin(self, npoints=200, downsample=False):
-        """Bin the flux data down in time. If using `pos_corr`s as corrector, it will
-        return also the binned and smooth versions of the pos_coors vectors.
-
-        Parameters
-        ----------
-        npoints: int
-            How many points should be in each time bin
-        downsample: bool
-            If True, the arrays will be downsampled instead of bin-averaged
-
-        Returns
-        -------
-        time_original: np.ndarray
-            The time array of the data, whitened
-        time_binned: np.ndarray
-            The binned time array
-        flux_binned_raw: np.ndarray
-            The binned flux, raw
-        flux_binned: np.ndarray
-            The binned flux, whitened by the mean of the flux in time
-        flux_err_binned:: np.ndarray
-            The binned flux error, whitened by the mean of the flux
-        pc1_smooth and pc2_smooth: np.ndarray
-            Smooth version of the median poscorr vectors 1 and 2 used for time correction.
-        pc1_bin and pc2_bin: np.ndarray
-            Binned version of poscorrs vectors 1 and 2 used for setting knots.
-        """
-
-        # Where there are break points in the time array
-        splits = get_breaks(self.time)
-        # if using poscorr, find and add discontinuity in poscorr data
-        if hasattr(self, "pos_corr1") and self.time_corrector in [
-            "pos_corr",
-            "centroid",
-        ]:
-            if self.time_corrector == "pos_corr":
-                # take the scene-median poscorr
-                mpc1 = np.nanmedian(self.pos_corr1, axis=0)
-                mpc2 = np.nanmedian(self.pos_corr2, axis=0)
-            else:
-                # if usig centroids need to convert to pixels
-                mpc1 = self.ra_centroid.to("arcsec").value / 4
-                mpc2 = self.dec_centroid.to("arcsec").value / 4
-
-            # find poscorr discontinuity in each axis
-            grads1 = np.gradient(mpc1, self.time)
-            grads2 = np.gradient(mpc2, self.time)
-            # the 7-sigma here is hardcoded and found to work ok
-            splits1 = np.where(grads1 > 7 * grads1.std())[0]
-            splits2 = np.where(grads2 > 7 * grads2.std())[0]
-            # merging breaks
-            splits = np.unique(np.concatenate([splits, splits1[1::2], splits2[1::2]]))
-            del grads1, grads2, splits1, splits2
-
-        # the adition is to set the first knot after breaks @ 1% of the sequence lenght
-        splits_a = splits[:-1] + int(self.nt * 0.01)
-        splits_b = splits[1:]
-        dsplits = (splits_b - splits_a) // npoints
-        # this isteration is to avoid knots right at poscorr/time discontinuity by
-        # iterating over segments between splits and creating evenly spaced knots
-        # within them
-        breaks = []
-        for spdx in range(len(splits_a)):
-            if not downsample:
-                breaks.append(splits_a[spdx] + np.arange(0, dsplits[spdx]) * npoints)
-            else:
-                # if downsample, the knots are spaced by npoints untill the end of the
-                # segment
-                breaks.append(
-                    np.linspace(
-                        splits_a[spdx],
-                        splits_b[spdx] - int(self.nt * 0.01),
-                        dsplits[spdx],
-                        dtype=int,
-                    )
-                )
-        breaks = np.unique(np.hstack(breaks))
-
-        if not downsample:
-            # time averaged between breaks
-            tm = np.vstack(
-                [t1.mean(axis=0) for t1 in np.array_split(self.time, breaks)]
-            ).ravel()
-            # whiten the time array
-            ta = (self.time - tm.mean()) / (tm.max() - tm.mean())
-            # find the time index for each segments between breaks to then average flux
-            ms = [
-                np.in1d(np.arange(self.nt), i)
-                for i in np.array_split(np.arange(self.nt), breaks)
-            ]
-            # Average Pixel flux values
-            fm = np.asarray(
-                [
-                    (
-                        sparse.csr_matrix(self.uncontaminated_source_mask)
-                        .multiply(self.flux[ms[tdx]].mean(axis=0))
-                        .data
-                    )
-                    for tdx in range(len(ms))
-                ]
-            )
-            # average flux err values at knots
-            fem = np.asarray(
-                [
-                    (
-                        sparse.csr_matrix(self.uncontaminated_source_mask)
-                        .multiply((self.flux_err ** 2)[ms[tdx]].sum(axis=0) ** 0.5)
-                        .data
-                        / ms[tdx].sum()
-                    )
-                    for tdx in range(len(ms))
-                ]
-            )
-        else:
-            # use breaks as downsample points
-            # when working with short light curves, need to make sure the last break
-            # point isn't the lenght of the time.
-            if breaks[-1] == self.nt:
-                breaks[-1] -= 1
-            tm = self.time[breaks]
-            ta = (self.time - tm.mean()) / (tm.max() - tm.mean())
-            fm = np.asarray(
-                [
-                    self.uncontaminated_source_mask.multiply(self.flux[idx]).data
-                    for idx in breaks
-                ]
-            )
-            fem = np.asarray(
-                [
-                    self.uncontaminated_source_mask.multiply(self.flux_err[idx]).data
-                    for idx in breaks
-                ]
-            )
-
-        fm_raw = fm.copy()
-        fem /= np.nanmean(fm, axis=0)
-        fm /= np.nanmean(fm, axis=0)
-
-        tm = ((tm - tm.mean()) / (tm.max() - tm.mean()))[:, None] * np.ones(fm.shape)
-
-        # poscor
-        if hasattr(self, "pos_corr1") and self.time_corrector in [
-            "pos_corr",
-            "centroid",
-        ]:
-            # we smooth the poscorr with a Gaussian kernel and 12 cadence window
-            # (6hr-CDPP) to not introduce too much noise, the smoothing is aware
-            # of focus-change breaks
-            pc1_smooth = []
-            pc2_smooth = []
-            # if poscorr_filter_size == 0 then the smoothing will fail. we take care of
-            # it by setting every value < .5 to 0.1 which leads to no smoothing
-            # (residuals < 1e-4)
-
-            for i in range(1, len(splits)):
-                pc1_smooth.extend(
-                    gaussian_filter1d(
-                        mpc1[splits[i - 1] : splits[i]],
-                        self.poscorr_filter_size,
-                        mode="mirror",
-                    )
-                )
-                pc2_smooth.extend(
-                    gaussian_filter1d(
-                        mpc2[splits[i - 1] : splits[i]],
-                        self.poscorr_filter_size,
-                        mode="mirror",
-                    )
-                )
-            pc1_smooth = np.array(pc1_smooth)
-            pc2_smooth = np.array(pc2_smooth)
-            pc1_smooth = (pc1_smooth - pc1_smooth.mean()) / (
-                pc1_smooth.max() - pc1_smooth.mean()
-            )
-            pc2_smooth = (pc2_smooth - pc2_smooth.mean()) / (
-                pc2_smooth.max() - pc2_smooth.mean()
-            )
-
-            # do poscorr binning
-            if not downsample:
-                pc1_bin = np.vstack(
-                    [np.median(t1, axis=0) for t1 in np.array_split(pc1_smooth, breaks)]
-                ).ravel()[:, None] * np.ones(fm.shape)
-                pc2_bin = np.vstack(
-                    [np.median(t1, axis=0) for t1 in np.array_split(pc2_smooth, breaks)]
-                ).ravel()[:, None] * np.ones(fm.shape)
-            else:
-                pc1_bin = pc1_smooth[breaks][:, None] * np.ones(fm.shape)
-                pc2_bin = pc2_smooth[breaks][:, None] * np.ones(fm.shape)
-
-            return (
-                ta,
-                tm,
-                fm_raw,
-                fm,
-                fem,
-                pc1_smooth,
-                pc2_smooth,
-                pc1_bin,
-                pc2_bin,
-            )
-
-        return ta, tm, fm_raw, fm, fem
-
     def build_time_model(
         self,
         plot=False,

From 5e2de6869aa13d7ac567df29309355d59d68d737 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Wed, 6 Apr 2022 12:54:21 -0700
Subject: [PATCH 15/53] normalize poscorr/centr and fixing pytest to work with
 new perturbation API

---
 src/psfmachine/machine.py | 28 +++++++++++++----------
 tests/test_machine.py     | 47 +++++++++++++++++++--------------------
 2 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 11a5ff2..1fadc5b 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -4,7 +4,6 @@
 import numpy as np
 import pandas as pd
 from scipy import sparse
-from scipy.ndimage import gaussian_filter1d
 import astropy.units as u
 from tqdm import tqdm
 import matplotlib.pyplot as plt
@@ -12,10 +11,8 @@
 
 from .utils import (
     _make_A_polar,
-    _make_A_cartesian,
     solve_linear_model,
     sparse_lessthan,
-    _combine_A,
     threshold_bin,
     get_breaks,
     smooth_vector,
@@ -638,14 +635,14 @@ def build_time_model(
         ----------
         plot: boolean
             Plot a diagnostic figure.
-        bin_method: boolean
-            If True the `time` and `pos_corr` arrays will be downsampled instead of
-            binned.
-        split_time_segments : boolean, list/array of ints
+        bin_method: string
+            Type of bin method, options are "bin" and "downsample".
+        split_time_segments : boolean
             If `True` will split the light curve into segments to fit different time
-            models with a commong pixel normalization. If `False` will fit the full
-            time series as one segment. If list or numpy array of ints, will use the
-            index values as segment breaks.
+            models with a common pixel normalization. If `False` will fit the full
+            time series as one segment.
+        focus_component: boolean
+            Add a component that models th focus change at the begining of a segment.
         """
         # create the time and space basis
         _whitened_time = (self.time - self.time.mean()) / (
@@ -677,11 +674,18 @@ def build_time_model(
                 mpc1 = self.ra_centroid.to("arcsec").value / 4
                 mpc2 = self.dec_centroid.to("arcsec").value / 4
 
-            # we smooth the vectors
+            # smooth the vectors
             mpc1_smooth, mpc2_smooth = smooth_vector(
                 [mpc1, mpc2], splits=splits, filter_size=self.poscorr_filter_size
             )
-            # we combine them as the first order
+            # normalize components
+            mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / (
+                mpc1_smooth.max() - mpc1_smooth.mean()
+            )
+            mpc2_smooth = (mpc2_smooth - mpc2_smooth.mean()) / (
+                mpc2_smooth.max() - mpc2_smooth.mean()
+            )
+            # combine them as the first order
             other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth]
 
         # create a 3D perturbation matrix
diff --git a/tests/test_machine.py b/tests/test_machine.py
index 00d3c02..ebfadfb 100644
--- a/tests/test_machine.py
+++ b/tests/test_machine.py
@@ -120,19 +120,13 @@ def test_compute_aperture_photometry():
 @pytest.mark.remote_data
 def test_poscorr_smooth():
     machine = TPFMachine.from_TPFs(tpfs, apply_focus_mask=False)
-    machine._get_source_mask()
-    machine.poscorr_filter_size = 0.4
-    (
-        time_original,
-        time_binned,
-        flux_binned_raw,
-        flux_binned,
-        flux_err_binned,
-        poscorr1_smooth,
-        poscorr2_smooth,
-        poscorr1_binned,
-        poscorr2_binned,
-    ) = machine._time_bin(npoints=3)
+    machine.build_shape_model(plot=False)
+    # no segments
+    machine.time_corrector = "pos_corr"
+    machine.poscorr_filter_size = 1
+    machine.build_time_model(
+        split_time_segments=False, bin_method="bin", focus_component=False
+    )
 
     median_pc1 = np.nanmedian(machine.pos_corr1, axis=0)
     median_pc2 = np.nanmedian(machine.pos_corr2, axis=0)
@@ -143,24 +137,29 @@ def test_poscorr_smooth():
         median_pc2.max() - median_pc2.mean()
     )
 
-    assert np.isclose(poscorr1_smooth, median_pc1, atol=1e-3).all()
-    assert np.isclose(poscorr2_smooth, median_pc2, atol=1e-3).all()
+    assert np.isclose(machine.P3.other_vectors[:, 0], median_pc1, atol=0.5).all()
+    assert np.isclose(machine.P3.other_vectors[:, 1], median_pc2, atol=0.5).all()
 
 
 @pytest.mark.remote_data
 def test_segment_time_model():
     # testing segment with the current test dataset we have that only has 10 cadences
     # isn't the best, but we can still do some sanity checks.
-    machine = TPFMachine.from_TPFs(tpfs, apply_focus_mask=False, n_time_points=3)
+    machine = TPFMachine.from_TPFs(
+        tpfs, apply_focus_mask=False, n_time_points=3, time_corrector="polynomial"
+    )
     machine.build_shape_model(plot=False)
     # no segments
-    machine.build_time_model(split_time_model=False, downsample=True)
-    assert machine.time_model_w.shape[0] == machine.seg_splits.shape[0] - 1
-    assert machine.time_model_w.shape[0] == machine._time_masked.shape[0]
+    machine.build_time_model(
+        split_time_segments=False, bin_method="bin", focus_component=False
+    )
+    assert machine.P3.vectors.shape == (10, 4)
 
-    # need tighter knot spacing
-    machine.n_time_points = 2
+    # fake 2 time breaks
+    machine.time[4:] += 0.5
+    machine.time[7:] += 0.41
     # user defined segments
-    machine.build_time_model(split_time_model=[5], downsample=True)
-    assert machine.time_model_w.shape[0] == machine.seg_splits.shape[0] - 1
-    assert machine.time_model_w.shape[0] == machine._time_masked.shape[0]
+    machine.build_time_model(
+        split_time_segments=True, bin_method="bin", focus_component=False
+    )
+    assert machine.P3.vectors.shape == (10, 4 * 3)

From cceeb2bae5f1cd63a45a325c6aa42158585a2153 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Mon, 11 Apr 2022 10:58:23 -0700
Subject: [PATCH 16/53] smooth_vector API improved

---
 src/psfmachine/machine.py |  5 +++--
 src/psfmachine/utils.py   | 22 ++++++++++++++++++----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 1fadc5b..f120976 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -659,7 +659,6 @@ def build_time_model(
         uncontaminated_pixels = uncontaminated_pixels.data
 
         # add other vectors if asked, centroids or poscorrs
-        splits = get_breaks(self.time)
         self.poscorr_filter_size = (
             0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size
         )
@@ -676,7 +675,9 @@ def build_time_model(
 
             # smooth the vectors
             mpc1_smooth, mpc2_smooth = smooth_vector(
-                [mpc1, mpc2], splits=splits, filter_size=self.poscorr_filter_size
+                [mpc1, mpc2],
+                time=self.time,
+                filter_size=self.poscorr_filter_size,
             )
             # normalize components
             mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / (
diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index cafe3c7..e2796d0 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -555,7 +555,7 @@ def get_breaks(time):
     return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)])
 
 
-def smooth_vector(v, splits=None, filter_size=13, mode="mirror"):
+def smooth_vector(v, time=None, filter_size=13, mode="mirror"):
     """
     Smooth out a vector
 
@@ -563,8 +563,8 @@ def smooth_vector(v, splits=None, filter_size=13, mode="mirror"):
     ----------
     v : numpy.ndarray or list of numpy.ndarray
         Arrays to be smoothen in the last axis
-    splits : list of index
-        List of index where `v` has discontinuity
+    time : numpy.ndarray
+        Time array of same shape of `v` last axis used to find data discontinuity.
     filter_size : int
         Filter window size
     mode : str
@@ -578,8 +578,22 @@ def smooth_vector(v, splits=None, filter_size=13, mode="mirror"):
     else:
         v = np.atleast_2d(v)
 
-    if splits is None:
+    if time is None:
         splits = [0, v.shape[1]]
+    elif isinstance(time, np.ndarray):
+        splits = get_breaks(time)
+        # find poscorr discontinuity in each axis
+        grads = np.gradient(v, time, axis=1)
+        # the 7-sigma here is hardcoded and found to work ok
+        splits = np.unique(
+            np.concatenate(
+                [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])]
+            )
+        )
+    else:
+        raise ValueError(
+            "`time` optional argument invalid, pass None or an array like."
+        )
 
     for i in range(1, len(splits)):
         v_smooth.append(

From c3cdfb1c3c507dcfcfd7d625162a9a8cc19df9a0 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Mon, 11 Apr 2022 12:38:10 -0700
Subject: [PATCH 17/53] gaussian smooth -> spline smooth

---
 src/psfmachine/machine.py |  15 +-----
 src/psfmachine/utils.py   | 105 ++++++++++++++++++++++++++++++--------
 2 files changed, 86 insertions(+), 34 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index f120976..2196b5a 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -14,8 +14,7 @@
     solve_linear_model,
     sparse_lessthan,
     threshold_bin,
-    get_breaks,
-    smooth_vector,
+    spline_smooth,
 )
 from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP
 from .perturbation import PerturbationMatrix3D
@@ -134,8 +133,6 @@ def __init__(
         time_corrector: string
             The type of time corrector that will be used to build the time model,
             default is a "polynomial" for a polynomial in time, it can also be "pos_corr"
-        poscorr_filter_size: int
-            Standard deviation for Gaussian kernel to be used to smooth the pos_corrs
         cartesian_knot_spacing: string
             Defines the type of spacing between knots in cartessian space to generate
             the design matrix, options are "linear" or "sqrt".
@@ -167,10 +164,6 @@ def __init__(
         self.cut_r = cut_r
         self.sparse_dist_lim = sparse_dist_lim * u.arcsecond
         self.time_corrector = "polynomial"
-        # if pos_corr, then we can smooth the vector with a Gaussian kernel of size
-        # poscorr_filter_size, if this is < 0.5 -> no smoothing, default is 12
-        # beacause of 6hr-CDPP
-        self.poscorr_filter_size = 12
         self.cartesian_knot_spacing = "sqrt"
         # disble tqdm prgress bar when running in HPC
         self.quiet = False
@@ -659,9 +652,6 @@ def build_time_model(
         uncontaminated_pixels = uncontaminated_pixels.data
 
         # add other vectors if asked, centroids or poscorrs
-        self.poscorr_filter_size = (
-            0.1 if self.poscorr_filter_size < 0.5 else self.poscorr_filter_size
-        )
         if self.time_corrector == "polynomial":
             other_vectors = None
         else:
@@ -674,10 +664,9 @@ def build_time_model(
                 mpc2 = self.dec_centroid.to("arcsec").value / 4
 
             # smooth the vectors
-            mpc1_smooth, mpc2_smooth = smooth_vector(
+            mpc1_smooth, mpc2_smooth = spline_smooth(
                 [mpc1, mpc2],
                 time=self.time,
-                filter_size=self.poscorr_filter_size,
             )
             # normalize components
             mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / (
diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index e2796d0..54eeeb7 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -7,6 +7,7 @@
 from scipy import sparse
 from patsy import dmatrix
 from scipy.ndimage import gaussian_filter1d
+from scipy import interpolate
 import pyia
 
 # size_limit is 1GB
@@ -552,38 +553,42 @@ def get_breaks(time):
         An array of indexes with the break positions
     """
     dts = np.diff(time)
-    return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)])
+    return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time) - 1])
 
 
-def smooth_vector(v, time=None, filter_size=13, mode="mirror"):
+def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"):
     """
-    Smooth out a vector
+    Applies a Gaussian smoothing to a curve.
 
     Parameters
     ----------
-    v : numpy.ndarray or list of numpy.ndarray
+    y : numpy.ndarray or list of numpy.ndarray
         Arrays to be smoothen in the last axis
-    time : numpy.ndarray
-        Time array of same shape of `v` last axis used to find data discontinuity.
+    x : numpy.ndarray
+        Time array of same shape of `y` last axis used to find data discontinuity.
     filter_size : int
         Filter window size
     mode : str
         The `mode` parameter determines how the input array is extended
         beyond its boundaries. Options are {'reflect', 'constant', 'nearest', 'mirror',
         'wrap'}. Default is 'mirror'
+
+    Returns
+    -------
+    y_smooth : numpy.ndarray
+        Smooth array.
     """
-    v_smooth = []
-    if isinstance(v, list):
-        v = np.asarray(v)
+    if isinstance(y, list):
+        y = np.asarray(y)
     else:
-        v = np.atleast_2d(v)
+        y = np.atleast_2d(y)
 
-    if time is None:
-        splits = [0, v.shape[1]]
-    elif isinstance(time, np.ndarray):
-        splits = get_breaks(time)
+    if x is None:
+        splits = [0, y.shape[1]]
+    elif isinstance(x, np.ndarray):
+        splits = get_breaks(x)
         # find poscorr discontinuity in each axis
-        grads = np.gradient(v, time, axis=1)
+        grads = np.gradient(y, x, axis=1)
         # the 7-sigma here is hardcoded and found to work ok
         splits = np.unique(
             np.concatenate(
@@ -591,17 +596,75 @@ def smooth_vector(v, time=None, filter_size=13, mode="mirror"):
             )
         )
     else:
-        raise ValueError(
-            "`time` optional argument invalid, pass None or an array like."
-        )
+        raise ValueError("`x` optional argument invalid, pass None or an array like.")
 
+    y_smooth = []
     for i in range(1, len(splits)):
-        v_smooth.append(
+        y_smooth.append(
             gaussian_filter1d(
-                v[:, splits[i - 1] : splits[i]],
+                y[:, splits[i - 1] : splits[i]],
                 filter_size,
                 mode=mode,
                 axis=1,
             )
         )
-    return np.hstack(v_smooth)
+    return np.hstack(y_smooth)
+
+
+def spline_smooth(y, x, weights=None, degree=3, do_splits=True):
+    """
+    Applies a spline smoothing to a curve.
+
+    Parameters
+    ----------
+    y : numpy.ndarray or list of numpy.ndarray
+        Arrays to be smoothen in the last axis
+    x : numpy.ndarray
+        Time array of same shape of `y` last axis.
+    weights : numpy.ndarray or list of numpy.ndarray
+        Optional array of weights the same shape as `y`.
+    degree : int
+        Degree of the psline fit, default is 3.
+    do_splits : boolean
+        Do the splines per segments with splits defined by data discontinuity.
+
+    Returns
+    -------
+    y_smooth : numpy.ndarray
+        Smooth array.
+    """
+    if isinstance(y, list):
+        y = np.asarray(y)
+    else:
+        y = np.atleast_2d(y)
+
+    if do_splits:
+        splits = get_breaks(x)
+        # find poscorr discontinuity in each axis
+        grads = np.gradient(y, x, axis=1)
+        # the 7-sigma here is hardcoded and found to work ok
+        splits = np.unique(
+            np.concatenate(
+                [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])]
+            )
+        )
+    else:
+        splits = [0, -1]
+    if weights is None:
+        s = 5e-4
+        weights = np.ones_like(y)
+    else:
+        if isinstance(weights, list):
+            weights = np.asarray(weights)
+        else:
+            weights = np.atleast_2d(weights)
+        # s = weights.shape[-1] - np.sqrt(2 * weights.shape[-1])
+        s = 1.9e-10
+        if y.shape != weights.shape:
+            raise ValueError("Inconsistent shape between `v` and `weights`")
+
+    y_smooth = []
+    for y_, w in zip(y, weights):
+        tck = interpolate.splrep(x, y_, w=w, s=s, xb=x[splits], k=degree)
+        y_smooth.append(interpolate.splev(x, tck, der=0))
+    return np.array(y_smooth)

From e5f3f8e4d558fc8d690630529aea7efb4295a3fd Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Mon, 11 Apr 2022 12:42:45 -0700
Subject: [PATCH 18/53] matching arguments between `perturbation` and `machine`

---
 src/psfmachine/machine.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 2196b5a..cbe7d52 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -612,8 +612,8 @@ def build_time_model(
         self,
         plot=False,
         bin_method="bin",
-        split_time_segments=False,
-        focus_component=False,
+        segments=False,
+        focus=False,
     ):
         """
         Builds a time model that moves the PRF model to account for the scene movement
@@ -630,11 +630,11 @@ def build_time_model(
             Plot a diagnostic figure.
         bin_method: string
             Type of bin method, options are "bin" and "downsample".
-        split_time_segments : boolean
+        segments : boolean
             If `True` will split the light curve into segments to fit different time
             models with a common pixel normalization. If `False` will fit the full
             time series as one segment.
-        focus_component: boolean
+        focus: boolean
             Add a component that models th focus change at the begining of a segment.
         """
         # create the time and space basis
@@ -683,8 +683,8 @@ def build_time_model(
             time=_whitened_time,
             dx=dx,
             dy=dy,
-            segments=split_time_segments,
-            focus=focus_component,
+            segments=segments,
+            focus=focus,
             resolution=self.n_time_points,
             other_vectors=other_vectors,
             bin_method=bin_method,

From 84a064c022a80a6945e7241bf029a8797ac4df53 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Mon, 11 Apr 2022 15:43:25 -0700
Subject: [PATCH 19/53] :sparkles: Added PCA method

---
 src/psfmachine/perturbation.py | 67 ++++++++++++++++++++--------------
 tests/test_perturbation.py     | 20 ++++++++++
 2 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 6620eff..5e81009 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -6,6 +6,7 @@
 from scipy import sparse
 from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
+from fbpca import pca
 
 
 class PerturbationMatrix(object):
@@ -36,7 +37,6 @@ def __init__(
         other_vectors: Optional[list] = None,
         poly_order: int = 3,
         focus: bool = False,
-        cbvs: bool = False,
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "bin",
@@ -46,20 +46,20 @@ def __init__(
         self.other_vectors = other_vectors
         self.poly_order = poly_order
         self.focus = focus
-        self.cbvs = cbvs
         self.segments = segments
         self.resolution = resolution
         self.bin_method = bin_method
-        self.vectors = np.vstack(
-            [self.time ** idx for idx in range(self.poly_order + 1)]
+        self._vectors = np.vstack(
+            [
+                (self.time - self.time.mean()) ** idx
+                for idx in range(self.poly_order + 1)
+            ]
         ).T
-        if self.cbvs:
-            self._get_cbvs()
         if self.focus:
             self._get_focus_change()
         self._validate()
         if self.segments:
-            self._cut_segments()
+            self.vectors = self._cut_segments(self.vectors)
         self._clean_vectors()
         self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
 
@@ -93,10 +93,12 @@ def _validate(self):
                         raise ValueError("Must pass other vectors in the right shape")
             else:
                 raise ValueError("Must pass a list as other vectors")
-            self.vectors = np.hstack([self.vectors, self.other_vectors])
+            self.vectors = np.hstack([self._vectors.copy(), self.other_vectors])
+        else:
+            self.vectors = self._vectors.copy()
         return
 
-    def _cut_segments(self):
+    def _cut_segments(self, vectors):
         """
         Cuts the data into "segments" wherever there is a break. Breaks are defined
         as anywhere where there is a gap in data of more than 5 times the median
@@ -106,11 +108,8 @@ def _cut_segments(self):
         masks = np.asarray(
             [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x]
         ).T
-        self.vectors = np.hstack(
-            [
-                self.vectors[:, idx][:, None] * masks
-                for idx in range(self.vectors.shape[1])
-            ]
+        return np.hstack(
+            [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])]
         )
 
     def _get_focus_change(self, exptime: float = 100):
@@ -135,7 +134,7 @@ def _get_focus_change(self, exptime: float = 100):
             ]
         )
         focus[focus < 1e-10] = 0
-        self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]])
+        self._vectors = np.hstack([self._vectors, np.nansum(focus, axis=0)[:, None]])
         return
 
     def _clean_vectors(self):
@@ -143,8 +142,6 @@ def _clean_vectors(self):
         nvec = self.poly_order + 1
         if self.focus:
             nvec += 1
-        if self.cbvs:
-            nvec += self.ncbvs
         if self.segments:
             s = nvec * (len(self.breaks) + 1)
         else:
@@ -160,14 +157,6 @@ def plot(self):
         ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors")
         return fig
 
-    def _get_cbvs(self):
-        self.ncbvs = 0
-        # use lightkurve to get CBVs for a given time?????????
-        # Might need channel information maybe
-        # self.ncbvs = ....
-        #        self.vectors = np.hstack([self.vectors, cbvs])
-        raise NotImplementedError
-
     def _fit_linalg(self, y, ye, k=None):
         """Hidden method to fit data with linalg"""
         if k is None:
@@ -296,6 +285,32 @@ def func(x, quad=False):
 
         return func
 
+    def pca(self, y, ncomponents=5):
+        """In place operation to add the principal components of `y` to `other_vectors`
+
+        Parameters
+        ----------
+        y: np.ndarray
+            Input flux array to take PCA of.
+        """
+        if not y.ndim == 2:
+            raise ValueError("Must pass a 2D `y`")
+        if not y.shape[0] == len(self.time):
+            raise ValueError(f"Must pass a `y` with shape ({len(self.time)}, X)")
+
+        # Clean out any time series have significant contribution from one component
+        k = np.ones(y.shape[1], bool)
+        for count in range(3):
+            U, s, V = pca(y[:, k], ncomponents, n_iter=30)
+            k[k] &= (np.abs(V) < 0.5).all(axis=0)
+
+        self._vectors = np.hstack([self._vectors, U])
+        if self.segments:
+            self._vectors = self._cut_segments(self._vectors)
+        self._validate()
+        self._clean_vectors()
+        self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
+
 
 class PerturbationMatrix3D(PerturbationMatrix):
     """Class to handle 3D perturbation matrices in PSFMachine
@@ -336,7 +351,6 @@ def __init__(
         nknots: int = 10,
         radius: float = 8,
         focus: bool = False,
-        cbvs: bool = False,
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "downsample",
@@ -357,7 +371,6 @@ def __init__(
             other_vectors=other_vectors,
             poly_order=poly_order,
             focus=focus,
-            cbvs=cbvs,
             segments=segments,
             resolution=resolution,
             bin_method=bin_method,
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index c3f93f5..ed683e5 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -64,6 +64,26 @@ def test_perturbation_matrix():
         assert chi < 3
 
     # chi = np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (p.shape[0] - p.shape[1] - 1)
+    flux = np.random.normal(1, 0.1, size=(200, 100))
+    assert p.matrix.shape == (20, 8)
+    p.pca(flux, ncomponents=5)
+    assert p.matrix.shape == (20, 18)
+    for bin_method in ["downsample", "bin"]:
+        s = 200 / res + 1 if bin_method == "downsample" else 200 / res
+        p = PerturbationMatrix(
+            time=time, focus=False, resolution=res, bin_method=bin_method
+        )
+        assert len(p.bin_func(y)) == s
+        assert len(p.bin_func(ye, quad=True)) == s
+        with pytest.raises(ValueError):
+            p.bin_func(y[:-4])
+
+        p.fit(y, ye)
+        w = p.weights
+        model = p.model()
+        assert model.shape[0] == 200
+        chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1)
+        assert chi < 3
 
 
 def test_perturbation_matrix3d():

From 3da1557e5f612e7feb99ca04aa555c4ee02c0e8e Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Mon, 11 Apr 2022 16:05:07 -0700
Subject: [PATCH 20/53] :art: added fbpca explicitly as dependency

---
 poetry.lock                    | 18 +-----------------
 pyproject.toml                 |  1 +
 src/psfmachine/perturbation.py | 17 +++++++----------
 tests/test_perturbation.py     |  2 +-
 4 files changed, 10 insertions(+), 28 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index df5b865..05d834a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2028,7 +2028,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "a7b6f13fbee1055129e929d7ca8c9f4ebe00aa78ba2cdf9c960c8656bc69429a"
+content-hash = "85a0cd463a7872e91f124ec89b42dda1c3cb1c873a5f0cf1427caf4b2bde55c3"
 
 [metadata.files]
 aesara-theano-fallback = [
@@ -2062,10 +2062,6 @@ argon2-cffi = [
     {file = "argon2_cffi-20.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2"},
     {file = "argon2_cffi-20.1.0-cp39-cp39-win32.whl", hash = "sha256:e2db6e85c057c16d0bd3b4d2b04f270a7467c147381e8fd73cbbe5bc719832be"},
     {file = "argon2_cffi-20.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:8a84934bd818e14a17943de8099d41160da4a336bcc699bb4c394bbb9b94bd32"},
-    {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-macosx_10_7_x86_64.whl", hash = "sha256:b94042e5dcaa5d08cf104a54bfae614be502c6f44c9c89ad1535b2ebdaacbd4c"},
-    {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:8282b84ceb46b5b75c3a882b28856b8cd7e647ac71995e71b6705ec06fc232c3"},
-    {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3aa804c0e52f208973845e8b10c70d8957c9e5a666f702793256242e9167c4e0"},
-    {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:36320372133a003374ef4275fbfce78b7ab581440dfca9f9471be3dd9a522428"},
 ]
 arviz = [
     {file = "arviz-0.11.2-py3-none-any.whl", hash = "sha256:f6a1389a90b53335f248d282c8142b8209150b9625459a85ec6d3d38786797c1"},
@@ -2160,36 +2156,24 @@ cffi = [
     {file = "cffi-1.14.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058"},
     {file = "cffi-1.14.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5"},
     {file = "cffi-1.14.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132"},
-    {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24ec4ff2c5c0c8f9c6b87d5bb53555bf267e1e6f70e52e5a9740d32861d36b6f"},
-    {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c3f39fa737542161d8b0d680df2ec249334cd70a8f420f71c9304bd83c3cbed"},
-    {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:681d07b0d1e3c462dd15585ef5e33cb021321588bebd910124ef4f4fb71aef55"},
     {file = "cffi-1.14.5-cp36-cp36m-win32.whl", hash = "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53"},
     {file = "cffi-1.14.5-cp36-cp36m-win_amd64.whl", hash = "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813"},
     {file = "cffi-1.14.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73"},
     {file = "cffi-1.14.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06"},
     {file = "cffi-1.14.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1"},
     {file = "cffi-1.14.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49"},
-    {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06d7cd1abac2ffd92e65c0609661866709b4b2d82dd15f611e602b9b188b0b69"},
-    {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f861a89e0043afec2a51fd177a567005847973be86f709bbb044d7f42fc4e05"},
-    {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc5a8e069b9ebfa22e26d0e6b97d6f9781302fe7f4f2b8776c3e1daea35f1adc"},
     {file = "cffi-1.14.5-cp37-cp37m-win32.whl", hash = "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62"},
     {file = "cffi-1.14.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4"},
     {file = "cffi-1.14.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053"},
     {file = "cffi-1.14.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0"},
     {file = "cffi-1.14.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e"},
     {file = "cffi-1.14.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827"},
-    {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04c468b622ed31d408fea2346bec5bbffba2cc44226302a0de1ade9f5ea3d373"},
-    {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:06db6321b7a68b2bd6df96d08a5adadc1fa0e8f419226e25b2a5fbf6ccc7350f"},
-    {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:293e7ea41280cb28c6fcaaa0b1aa1f533b8ce060b9e701d78511e1e6c4a1de76"},
     {file = "cffi-1.14.5-cp38-cp38-win32.whl", hash = "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e"},
     {file = "cffi-1.14.5-cp38-cp38-win_amd64.whl", hash = "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396"},
     {file = "cffi-1.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea"},
     {file = "cffi-1.14.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322"},
     {file = "cffi-1.14.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c"},
     {file = "cffi-1.14.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee"},
-    {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bf1ac1984eaa7675ca8d5745a8cb87ef7abecb5592178406e55858d411eadc0"},
-    {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:df5052c5d867c1ea0b311fb7c3cd28b19df469c056f7fdcfe88c7473aa63e333"},
-    {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24a570cd11895b60829e941f2613a4f79df1a27344cbbb82164ef2e0116f09c7"},
     {file = "cffi-1.14.5-cp39-cp39-win32.whl", hash = "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396"},
     {file = "cffi-1.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d"},
     {file = "cffi-1.14.5.tar.gz", hash = "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"},
diff --git a/pyproject.toml b/pyproject.toml
index 3926155..acfaf8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ diskcache = "^5.3.0"
 imageio = "^2.9.0"
 imageio-ffmpeg = "^0.4.5"
 kbackground = "0.1.6"
+fbpca = "^1.0"
 
 [tool.poetry.dev-dependencies]
 pytest = "^6.1.2"
diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 5e81009..21c8e76 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -29,6 +29,8 @@ class PerturbationMatrix(object):
         How many cadences to bin down via `bin_method`
     bin_method: str
         How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin'
+    focus_exptime: float
+        Time for the exponent for focus change, if used
     """
 
     def __init__(
@@ -40,6 +42,7 @@ def __init__(
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "bin",
+        focus_exptime=50,
     ):
 
         self.time = time
@@ -49,6 +52,7 @@ def __init__(
         self.segments = segments
         self.resolution = resolution
         self.bin_method = bin_method
+        self.focus_exptime = focus_exptime
         self._vectors = np.vstack(
             [
                 (self.time - self.time.mean()) ** idx
@@ -112,18 +116,11 @@ def _cut_segments(self, vectors):
             [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])]
         )
 
-    def _get_focus_change(self, exptime: float = 100):
-        """
-        Finds a simple model for the focus change
-
-        Parameters
-        ----------
-        exptime: float
-            The timescale for an exponential decay.
-        """
+    def _get_focus_change(self):
+        """Finds a simple model for the focus change"""
         focus = np.asarray(
             [
-                np.exp(-exptime * (self.time - self.time[b]))
+                np.exp(-self.focus_exptime * (self.time - self.time[b]))
                 for b in np.hstack([0, self.breaks])
             ]
         )
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index ed683e5..42f6602 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -63,7 +63,7 @@ def test_perturbation_matrix():
         chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1)
         assert chi < 3
 
-    # chi = np.sum((y - model) ** 2 / (ye ** 2), axis=0) / (p.shape[0] - p.shape[1] - 1)
+    # Test PCA
     flux = np.random.normal(1, 0.1, size=(200, 100))
     assert p.matrix.shape == (20, 8)
     p.pca(flux, ncomponents=5)

From 15200011772b163339d9619fa4e33322cae1d68e Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Mon, 11 Apr 2022 16:09:59 -0700
Subject: [PATCH 21/53] :memo: docs

---
 src/psfmachine/perturbation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 21c8e76..1db444a 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -283,7 +283,7 @@ def func(x, quad=False):
         return func
 
     def pca(self, y, ncomponents=5):
-        """In place operation to add the principal components of `y` to `other_vectors`
+        """In place operation to add the principal components of `y` to the design matrix
 
         Parameters
         ----------

From 612d3fdd3d77cfa4250760d508f53b45d09069ad Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Mon, 11 Apr 2022 19:53:22 -0700
Subject: [PATCH 22/53] merging from refactor-time

---
 src/psfmachine/perturbation.py | 159 +++++++++++++--------------------
 1 file changed, 60 insertions(+), 99 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index a4717fc..1db444a 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -6,7 +6,7 @@
 from scipy import sparse
 from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
-from matplotlib import cm
+from fbpca import pca
 
 
 class PerturbationMatrix(object):
@@ -24,13 +24,13 @@ class PerturbationMatrix(object):
     focus : bool
         Whether to correct focus using a simple exponent model
     segments: bool
-        Whether to fit portions of data where there is a significant time break as
-        separate segments
+        Whether to fit portions of data where there is a significant time break as separate segments
     resolution: int
         How many cadences to bin down via `bin_method`
     bin_method: str
-        How to bin the data under the hood. Default is by mean binning. Options are
-        'downsample' and 'bin'
+        How to bin the data under the hood. Default is by mean binning. Options are 'downsample' and 'bin'
+    focus_exptime: float
+        Time for the exponent for focus change, if used
     """
 
     def __init__(
@@ -39,30 +39,31 @@ def __init__(
         other_vectors: Optional[list] = None,
         poly_order: int = 3,
         focus: bool = False,
-        cbvs: bool = False,
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "bin",
+        focus_exptime=50,
     ):
 
         self.time = time
         self.other_vectors = other_vectors
         self.poly_order = poly_order
         self.focus = focus
-        self.cbvs = cbvs
         self.segments = segments
         self.resolution = resolution
         self.bin_method = bin_method
-        self.vectors = np.vstack(
-            [self.time ** idx for idx in range(self.poly_order + 1)]
+        self.focus_exptime = focus_exptime
+        self._vectors = np.vstack(
+            [
+                (self.time - self.time.mean()) ** idx
+                for idx in range(self.poly_order + 1)
+            ]
         ).T
-        if self.cbvs:
-            self._get_cbvs()
         if self.focus:
             self._get_focus_change()
         self._validate()
         if self.segments:
-            self._cut_segments()
+            self.vectors = self._cut_segments(self.vectors)
         self._clean_vectors()
         self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
 
@@ -96,10 +97,12 @@ def _validate(self):
                         raise ValueError("Must pass other vectors in the right shape")
             else:
                 raise ValueError("Must pass a list as other vectors")
-            self.vectors = np.hstack([self.vectors, self.other_vectors])
+            self.vectors = np.hstack([self._vectors.copy(), self.other_vectors])
+        else:
+            self.vectors = self._vectors.copy()
         return
 
-    def _cut_segments(self):
+    def _cut_segments(self, vectors):
         """
         Cuts the data into "segments" wherever there is a break. Breaks are defined
         as anywhere where there is a gap in data of more than 5 times the median
@@ -109,25 +112,15 @@ def _cut_segments(self):
         masks = np.asarray(
             [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x]
         ).T
-        self.vectors = np.hstack(
-            [
-                self.vectors[:, idx][:, None] * masks
-                for idx in range(self.vectors.shape[1])
-            ]
+        return np.hstack(
+            [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])]
         )
 
-    def _get_focus_change(self, exptime: float = 100):
-        """
-        Finds a simple model for the focus change
-
-        Parameters
-        ----------
-        exptime: float
-            The timescale for an exponential decay.
-        """
+    def _get_focus_change(self):
+        """Finds a simple model for the focus change"""
         focus = np.asarray(
             [
-                np.exp(-exptime * (self.time - self.time[b]))
+                np.exp(-self.focus_exptime * (self.time - self.time[b]))
                 for b in np.hstack([0, self.breaks])
             ]
         )
@@ -138,7 +131,7 @@ def _get_focus_change(self, exptime: float = 100):
             ]
         )
         focus[focus < 1e-10] = 0
-        self.vectors = np.hstack([self.vectors, np.nansum(focus, axis=0)[:, None]])
+        self._vectors = np.hstack([self._vectors, np.nansum(focus, axis=0)[:, None]])
         return
 
     def _clean_vectors(self):
@@ -146,8 +139,6 @@ def _clean_vectors(self):
         nvec = self.poly_order + 1
         if self.focus:
             nvec += 1
-        if self.cbvs:
-            nvec += self.ncbvs
         if self.segments:
             s = nvec * (len(self.breaks) + 1)
         else:
@@ -158,47 +149,11 @@ def _clean_vectors(self):
         return
 
     def plot(self):
-        """
-        Creates a diagnostic plot showing the vector components. Will show the full
-        (line) and bindown (dots) version of the components.
-
-        """
-        fig, ax = plt.subplots(1, 2, figsize=(12, 4))
-        fig.suptitle("Vectors")
-        bin_time = self.bin_func(self.time)
-        bin_vec = self.bin_func(self.vectors)
-        colors = cm.get_cmap('Set1', self.vectors.shape[1])
-        for k in range(self.vectors.shape[1]):
-            ax[0].plot(self.time, self.vectors[:, k] + k * 0.1, color=colors(k))
-            ax[0].plot(
-                bin_time,
-                bin_vec[:, k] + k * 0.1,
-                marker=".",
-                lw=0,
-                color=colors(k),
-            )
-        ax[0].set(xlabel="Time", ylabel="Vector", yticks=[])
-
-        im = ax[1].imshow(
-            self.vectors.T,
-            origin="lower",
-            aspect="auto",
-            interpolation="nearest",
-            vmin=-1,
-            vmax=1,
-        )
-        ax[1].set(xlabel="Time Index", yticks=range(self.vectors.shape[1]))
-        plt.colorbar(im, ax=ax[1])
+        fig, ax = plt.subplots()
+        ax.plot(self.time, self.vectors + np.arange(self.vectors.shape[1]) * 0.1)
+        ax.set(xlabel="Time", ylabel="Vector", yticks=[], title="Vectors")
         return fig
 
-    def _get_cbvs(self):
-        self.ncbvs = 0
-        # use lightkurve to get CBVs for a given time?????????
-        # Might need channel information maybe
-        # self.ncbvs = ....
-        #        self.vectors = np.hstack([self.vectors, cbvs])
-        raise NotImplementedError
-
     def _fit_linalg(self, y, ye, k=None):
         """Hidden method to fit data with linalg"""
         if k is None:
@@ -327,6 +282,32 @@ def func(x, quad=False):
 
         return func
 
+    def pca(self, y, ncomponents=5):
+        """In place operation to add the principal components of `y` to the design matrix
+
+        Parameters
+        ----------
+        y: np.ndarray
+            Input flux array to take PCA of.
+        """
+        if not y.ndim == 2:
+            raise ValueError("Must pass a 2D `y`")
+        if not y.shape[0] == len(self.time):
+            raise ValueError(f"Must pass a `y` with shape ({len(self.time)}, X)")
+
+        # Clean out any time series have significant contribution from one component
+        k = np.ones(y.shape[1], bool)
+        for count in range(3):
+            U, s, V = pca(y[:, k], ncomponents, n_iter=30)
+            k[k] &= (np.abs(V) < 0.5).all(axis=0)
+
+        self._vectors = np.hstack([self._vectors, U])
+        if self.segments:
+            self._vectors = self._cut_segments(self._vectors)
+        self._validate()
+        self._clean_vectors()
+        self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
+
 
 class PerturbationMatrix3D(PerturbationMatrix):
     """Class to handle 3D perturbation matrices in PSFMachine
@@ -350,8 +331,7 @@ class PerturbationMatrix3D(PerturbationMatrix):
     focus : bool
         Whether to correct focus using a simple exponent model
     segments: bool
-        Whether to fit portions of data where there is a significant time break as
-        separate segments
+        Whether to fit portions of data where there is a significant time break as separate segments
     resolution: int
         How many cadences to bin down via `bin_method`
     bin_method: str
@@ -368,7 +348,6 @@ def __init__(
         nknots: int = 10,
         radius: float = 8,
         focus: bool = False,
-        cbvs: bool = False,
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "downsample",
@@ -389,7 +368,6 @@ def __init__(
             other_vectors=other_vectors,
             poly_order=poly_order,
             focus=focus,
-            cbvs=cbvs,
             segments=segments,
             resolution=resolution,
             bin_method=bin_method,
@@ -427,8 +405,8 @@ def fit(
         pixel_mask: Optional[npt.ArrayLike] = None,
     ):
         """
-        Fits flux to find the best fit model weights. Optionally will include flux
-        errors. Sets the `self.weights` attribute with best fit weights.
+        Fits flux to find the best fit model weights. Optionally will include flux errors.
+        Sets the `self.weights` attribute with best fit weights.
 
         Parameters
         ----------
@@ -437,8 +415,8 @@ def fit(
         flux_err: npt.ArrayLike
             Optional flux errors. Should have shape ntimes x npixels.
         pixel_mask: npt.ArrayLike
-            Pixel mask to apply. Values that are `True` will be used in the fit.
-            Values that are `False` will be masked. Should have shape npixels.
+            Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked.
+            Should have shape npixels.
         """
         if pixel_mask is not None:
             if not isinstance(pixel_mask, np.ndarray):
@@ -487,28 +465,11 @@ def model(self, time_indices: Optional[list] = None):
             ]
         )
 
-    def plot_model(self, time_index: int = 0):
-        """Creates a diagnostic plot with the perturbation model at a given cadences
-
-        Parameters
-        ----------
-        time_index: int
-            Time index (cadence) to show in the figure.
-        """
+    def plot_model(self, time_index=0):
         if not hasattr(self, "weights"):
             raise ValueError("Run `fit` first.")
-        fig, ax = plt.subplots(figsize=(6, 5))
-        im = ax.scatter(
-            self.dx,
-            self.dy,
-            c=self.model(time_index)[0],
-            s=2,
-            cmap="viridis",
-            vmin=0.8,
-            vmax=1.2,
-        )
-        cbar = fig.colorbar(im, ax=ax, shrink=0.7)
-        cbar.set_label("Perturbation Value")
+        fig, ax = plt.subplots()
+        ax.scatter(self.dx, self.dy, c=self.model(time_index)[0])
         ax.set(
             xlabel=r"$\delta$x",
             ylabel=r"$\delta$y",

From 78633dc4e0b0152e0d1776a6513b93db54f34025 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Mon, 11 Apr 2022 22:48:59 -0700
Subject: [PATCH 23/53] :bug: fixing pca

---
 src/psfmachine/perturbation.py | 61 +++++++++++++++++-----------------
 tests/test_perturbation.py     | 18 ++--------
 2 files changed, 33 insertions(+), 46 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 1db444a..0d173ab 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -46,7 +46,7 @@ def __init__(
     ):
 
         self.time = time
-        self.other_vectors = other_vectors
+        self.other_vectors = np.nan_to_num(other_vectors)
         self.poly_order = poly_order
         self.focus = focus
         self.segments = segments
@@ -61,7 +61,19 @@ def __init__(
         ).T
         if self.focus:
             self._get_focus_change()
-        self._validate()
+        if self.other_vectors is not None:
+            if isinstance(self.other_vectors, (list, np.ndarray)):
+                self.other_vectors = np.atleast_2d(self.other_vectors)
+                if self.other_vectors.shape[0] != len(self.time):
+                    if self.other_vectors.shape[1] == len(self.time):
+                        self.other_vectors = self.other_vectors.T
+                    else:
+                        raise ValueError("Must pass other vectors in the right shape")
+            else:
+                raise ValueError("Must pass a list as other vectors")
+            self.vectors = np.hstack([self._vectors.copy(), self.other_vectors])
+        else:
+            self.vectors = self._vectors.copy()
         if self.segments:
             self.vectors = self._cut_segments(self.vectors)
         self._clean_vectors()
@@ -82,26 +94,6 @@ def prior_sigma(self):
     def breaks(self):
         return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1
 
-    def _validate(self):
-        """
-        Checks that the `other_vectors` are the right dimensions.
-        """
-
-        if self.other_vectors is not None:
-            if isinstance(self.other_vectors, (list, np.ndarray)):
-                self.other_vectors = np.atleast_2d(self.other_vectors)
-                if self.other_vectors.shape[0] != len(self.time):
-                    if self.other_vectors.shape[1] == len(self.time):
-                        self.other_vectors = self.other_vectors.T
-                    else:
-                        raise ValueError("Must pass other vectors in the right shape")
-            else:
-                raise ValueError("Must pass a list as other vectors")
-            self.vectors = np.hstack([self._vectors.copy(), self.other_vectors])
-        else:
-            self.vectors = self._vectors.copy()
-        return
-
     def _cut_segments(self, vectors):
         """
         Cuts the data into "segments" wherever there is a break. Breaks are defined
@@ -143,9 +135,14 @@ def _clean_vectors(self):
             s = nvec * (len(self.breaks) + 1)
         else:
             s = nvec
-        X = self.vectors[:, :s]
-        w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
-        self.vectors[:, s:] -= X.dot(w)
+
+        if s != self.vectors.shape[1]:
+            X = self.vectors[:, :s]
+            w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
+            self.vectors[:, s:] -= X.dot(w)
+            self.vectors[:, s:] -= np.asarray(
+                [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T]
+            ).T
         return
 
     def plot(self):
@@ -283,7 +280,7 @@ def func(x, quad=False):
         return func
 
     def pca(self, y, ncomponents=5):
-        """In place operation to add the principal components of `y` to the design matrix
+        """Adds the principal components of `y` to the design matrix
 
         Parameters
         ----------
@@ -298,13 +295,17 @@ def pca(self, y, ncomponents=5):
         # Clean out any time series have significant contribution from one component
         k = np.ones(y.shape[1], bool)
         for count in range(3):
-            U, s, V = pca(y[:, k], ncomponents, n_iter=30)
+            self._pca_components, s, V = pca(y[:, k], ncomponents, n_iter=30)
             k[k] &= (np.abs(V) < 0.5).all(axis=0)
 
-        self._vectors = np.hstack([self._vectors, U])
+        if self.other_vectors is not None:
+            self.vectors = np.hstack(
+                [self._vectors, self.other_vectors, self._pca_components]
+            )
+        else:
+            self.vectors = np.hstack([self._vectors, self._pca_components])
         if self.segments:
-            self._vectors = self._cut_segments(self._vectors)
-        self._validate()
+            self.vectors = self._cut_segments(self.vectors)
         self._clean_vectors()
         self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
 
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index 42f6602..d147261 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -68,22 +68,8 @@ def test_perturbation_matrix():
     assert p.matrix.shape == (20, 8)
     p.pca(flux, ncomponents=5)
     assert p.matrix.shape == (20, 18)
-    for bin_method in ["downsample", "bin"]:
-        s = 200 / res + 1 if bin_method == "downsample" else 200 / res
-        p = PerturbationMatrix(
-            time=time, focus=False, resolution=res, bin_method=bin_method
-        )
-        assert len(p.bin_func(y)) == s
-        assert len(p.bin_func(ye, quad=True)) == s
-        with pytest.raises(ValueError):
-            p.bin_func(y[:-4])
-
-        p.fit(y, ye)
-        w = p.weights
-        model = p.model()
-        assert model.shape[0] == 200
-        chi = np.sum((y - model) ** 2 / (ye ** 2)) / (p.shape[0] - p.shape[1] - 1)
-        assert chi < 3
+    assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0)
+    p.fit(y, ye)
 
 
 def test_perturbation_matrix3d():

From 14babe3e4ebcb64e81c956d512e961ccc832d69f Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Mon, 11 Apr 2022 23:01:42 -0700
Subject: [PATCH 24/53] :bug: fix @jorgemarpa bug

---
 src/psfmachine/perturbation.py | 13 +++++++++++++
 tests/test_perturbation.py     | 13 +++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 0d173ab..4e9e98d 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -140,6 +140,7 @@ def _clean_vectors(self):
             X = self.vectors[:, :s]
             w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
             self.vectors[:, s:] -= X.dot(w)
+            # Each segment has mean zero
             self.vectors[:, s:] -= np.asarray(
                 [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T]
             ).T
@@ -279,7 +280,11 @@ def func(x, quad=False):
 
         return func
 
+
     def pca(self, y, ncomponents=5):
+        return self._pca(y, ncomponents=ncomponents)
+
+    def _pca(self, y, ncomponents=5):
         """Adds the principal components of `y` to the design matrix
 
         Parameters
@@ -352,6 +357,7 @@ def __init__(
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "downsample",
+        focus_exptime: float = 50,
     ):
         self.dx = dx
         self.dy = dy
@@ -372,8 +378,11 @@ def __init__(
             segments=segments,
             resolution=resolution,
             bin_method=bin_method,
+            focus_exptime=focus_exptime,
         )
+        self._get_cartesian_stacked()
 
+    def _get_cartesian_stacked(self):
         self._cartesian_stacked = sparse.hstack(
             [self.cartesian_matrix for idx in range(self.vectors.shape[1])],
             format="csr",
@@ -466,6 +475,10 @@ def model(self, time_indices: Optional[list] = None):
             ]
         )
 
+    def pca(self, y, ncomponents=5):
+        self._pca(y, ncomponents=5)
+        self._get_cartesian_stacked()
+        
     def plot_model(self, time_index=0):
         if not hasattr(self, "weights"):
             raise ValueError("Run `fit` first.")
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index d147261..7a8e08f 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -129,6 +129,19 @@ def test_perturbation_matrix3d():
         )
         assert chi < 3
 
+    p3 = PerturbationMatrix3D(
+        time=time,
+        dx=dx,
+        dy=dy,
+        nknots=4,
+        radius=5,
+        poly_order=2,
+        bin_method=bin_method,
+    )
+    p3.pca(flux, ncomponents=5)
+    p3.fit(flux, flux_err)
+
+
     # Add in one bad pixel
     flux[:, 100] = 1e5
     pixel_mask = np.ones(169, bool)

From 9d4a80faa42a803dd34fa42258612c0d2a6447f4 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Mon, 11 Apr 2022 23:07:07 -0700
Subject: [PATCH 25/53] :memo: update docstrings

---
 src/psfmachine/perturbation.py | 18 +++++++++++++-----
 tests/test_perturbation.py     |  1 -
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 4e9e98d..14b5c38 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -280,11 +280,7 @@ def func(x, quad=False):
 
         return func
 
-
     def pca(self, y, ncomponents=5):
-        return self._pca(y, ncomponents=ncomponents)
-
-    def _pca(self, y, ncomponents=5):
         """Adds the principal components of `y` to the design matrix
 
         Parameters
@@ -292,6 +288,11 @@ def _pca(self, y, ncomponents=5):
         y: np.ndarray
             Input flux array to take PCA of.
         """
+        return self._pca(y, ncomponents=ncomponents)
+
+    def _pca(self, y, ncomponents=5):
+        """This hidden method allows us to update the pca method for other classes
+        """
         if not y.ndim == 2:
             raise ValueError("Must pass a 2D `y`")
         if not y.shape[0] == len(self.time):
@@ -476,9 +477,16 @@ def model(self, time_indices: Optional[list] = None):
         )
 
     def pca(self, y, ncomponents=5):
+        """Adds the principal components of `y` to the design matrix
+
+        Parameters
+        ----------
+        y: np.ndarray
+            Input flux array to take PCA of.
+        """
         self._pca(y, ncomponents=5)
         self._get_cartesian_stacked()
-        
+
     def plot_model(self, time_index=0):
         if not hasattr(self, "weights"):
             raise ValueError("Run `fit` first.")
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index 7a8e08f..e0fcde1 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -141,7 +141,6 @@ def test_perturbation_matrix3d():
     p3.pca(flux, ncomponents=5)
     p3.fit(flux, flux_err)
 
-
     # Add in one bad pixel
     flux[:, 100] = 1e5
     pixel_mask = np.ones(169, bool)

From 88c7493e462d19cd110933ba39eeddd843c2a0da Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 10:36:46 -0700
Subject: [PATCH 26/53] merging from refactor-time

---
 src/psfmachine/perturbation.py | 81 +++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 30 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 1db444a..fc3cf39 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -46,7 +46,7 @@ def __init__(
     ):
 
         self.time = time
-        self.other_vectors = other_vectors
+        self.other_vectors = np.nan_to_num(other_vectors)
         self.poly_order = poly_order
         self.focus = focus
         self.segments = segments
@@ -61,7 +61,19 @@ def __init__(
         ).T
         if self.focus:
             self._get_focus_change()
-        self._validate()
+        if self.other_vectors is not None:
+            if isinstance(self.other_vectors, (list, np.ndarray)):
+                self.other_vectors = np.atleast_2d(self.other_vectors)
+                if self.other_vectors.shape[0] != len(self.time):
+                    if self.other_vectors.shape[1] == len(self.time):
+                        self.other_vectors = self.other_vectors.T
+                    else:
+                        raise ValueError("Must pass other vectors in the right shape")
+            else:
+                raise ValueError("Must pass a list as other vectors")
+            self.vectors = np.hstack([self._vectors.copy(), self.other_vectors])
+        else:
+            self.vectors = self._vectors.copy()
         if self.segments:
             self.vectors = self._cut_segments(self.vectors)
         self._clean_vectors()
@@ -82,26 +94,6 @@ def prior_sigma(self):
     def breaks(self):
         return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1
 
-    def _validate(self):
-        """
-        Checks that the `other_vectors` are the right dimensions.
-        """
-
-        if self.other_vectors is not None:
-            if isinstance(self.other_vectors, (list, np.ndarray)):
-                self.other_vectors = np.atleast_2d(self.other_vectors)
-                if self.other_vectors.shape[0] != len(self.time):
-                    if self.other_vectors.shape[1] == len(self.time):
-                        self.other_vectors = self.other_vectors.T
-                    else:
-                        raise ValueError("Must pass other vectors in the right shape")
-            else:
-                raise ValueError("Must pass a list as other vectors")
-            self.vectors = np.hstack([self._vectors.copy(), self.other_vectors])
-        else:
-            self.vectors = self._vectors.copy()
-        return
-
     def _cut_segments(self, vectors):
         """
         Cuts the data into "segments" wherever there is a break. Breaks are defined
@@ -143,9 +135,15 @@ def _clean_vectors(self):
             s = nvec * (len(self.breaks) + 1)
         else:
             s = nvec
-        X = self.vectors[:, :s]
-        w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
-        self.vectors[:, s:] -= X.dot(w)
+
+        if s != self.vectors.shape[1]:
+            X = self.vectors[:, :s]
+            w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
+            self.vectors[:, s:] -= X.dot(w)
+            # Each segment has mean zero
+            self.vectors[:, s:] -= np.asarray(
+                [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T]
+            ).T
         return
 
     def plot(self):
@@ -283,13 +281,17 @@ def func(x, quad=False):
         return func
 
     def pca(self, y, ncomponents=5):
-        """In place operation to add the principal components of `y` to the design matrix
+        """Adds the principal components of `y` to the design matrix
 
         Parameters
         ----------
         y: np.ndarray
             Input flux array to take PCA of.
         """
+        return self._pca(y, ncomponents=ncomponents)
+
+    def _pca(self, y, ncomponents=5):
+        """This hidden method allows us to update the pca method for other classes"""
         if not y.ndim == 2:
             raise ValueError("Must pass a 2D `y`")
         if not y.shape[0] == len(self.time):
@@ -298,13 +300,17 @@ def pca(self, y, ncomponents=5):
         # Clean out any time series have significant contribution from one component
         k = np.ones(y.shape[1], bool)
         for count in range(3):
-            U, s, V = pca(y[:, k], ncomponents, n_iter=30)
+            self._pca_components, s, V = pca(y[:, k], ncomponents, n_iter=30)
             k[k] &= (np.abs(V) < 0.5).all(axis=0)
 
-        self._vectors = np.hstack([self._vectors, U])
+        if self.other_vectors is not None:
+            self.vectors = np.hstack(
+                [self._vectors, self.other_vectors, self._pca_components]
+            )
+        else:
+            self.vectors = np.hstack([self._vectors, self._pca_components])
         if self.segments:
-            self._vectors = self._cut_segments(self._vectors)
-        self._validate()
+            self.vectors = self._cut_segments(self.vectors)
         self._clean_vectors()
         self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
 
@@ -351,6 +357,7 @@ def __init__(
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "downsample",
+        focus_exptime: float = 50,
     ):
         self.dx = dx
         self.dy = dy
@@ -371,8 +378,11 @@ def __init__(
             segments=segments,
             resolution=resolution,
             bin_method=bin_method,
+            focus_exptime=focus_exptime,
         )
+        self._get_cartesian_stacked()
 
+    def _get_cartesian_stacked(self):
         self._cartesian_stacked = sparse.hstack(
             [self.cartesian_matrix for idx in range(self.vectors.shape[1])],
             format="csr",
@@ -465,6 +475,17 @@ def model(self, time_indices: Optional[list] = None):
             ]
         )
 
+    def pca(self, y, ncomponents=5):
+        """Adds the principal components of `y` to the design matrix
+
+        Parameters
+        ----------
+        y: np.ndarray
+            Input flux array to take PCA of.
+        """
+        self._pca(y, ncomponents=5)
+        self._get_cartesian_stacked()
+
     def plot_model(self, time_index=0):
         if not hasattr(self, "weights"):
             raise ValueError("Run `fit` first.")

From 556317de99416ea380125088ca7f2df02cffe9bd Mon Sep 17 00:00:00 2001
From: Christina Hedges <14965634+christinahedges@users.noreply.github.com>
Date: Tue, 12 Apr 2022 11:04:36 -0700
Subject: [PATCH 27/53] Update src/psfmachine/perturbation.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jorge Martínez-Palomera <jorgemarpa@users.noreply.github.com>
---
 src/psfmachine/perturbation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 14b5c38..4f5acec 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -484,7 +484,7 @@ def pca(self, y, ncomponents=5):
         y: np.ndarray
             Input flux array to take PCA of.
         """
-        self._pca(y, ncomponents=5)
+        self._pca(y, ncomponents=ncomponents)
         self._get_cartesian_stacked()
 
     def plot_model(self, time_index=0):

From 648211e6abbdcb8d5661ec6ea4d19409bbfbdcc6 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 12:00:13 -0700
Subject: [PATCH 28/53] adding perturbation-pca into machine

---
 src/psfmachine/machine.py      | 9 ++++++++-
 src/psfmachine/perturbation.py | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index cbe7d52..1e99ec8 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -466,7 +466,8 @@ def _get_source_mask(
                 l[idx] = test_r[loc[0]]
         ok = np.isfinite(l)
         source_radius_limit = np.polyval(
-            np.polyfit(test_f[ok], l[ok], 1), np.log10(self.source_flux_estimates)
+            np.polyfit(test_f[ok], l[ok], poly_order),
+            np.log10(self.source_flux_estimates),
         )
         source_radius_limit[
             source_radius_limit > upper_radius_limit
@@ -614,6 +615,8 @@ def build_time_model(
         bin_method="bin",
         segments=False,
         focus=False,
+        focus_exptime=50,
+        pca_ncomponents=0,
     ):
         """
         Builds a time model that moves the PRF model to account for the scene movement
@@ -688,6 +691,7 @@ def build_time_model(
             resolution=self.n_time_points,
             other_vectors=other_vectors,
             bin_method=bin_method,
+            focus_exptime=focus_exptime,
         )
 
         # get uncontaminated pixel norm-flux
@@ -728,6 +732,9 @@ def build_time_model(
         # combine good-behaved pixels with uncontaminated pixels
         k &= uncontaminated_pixels
 
+        if pca_ncomponents > 0:
+            P3.pca(flux_norm[:, k], ncomponents=pca_ncomponents)
+
         # iterate to remvoe outliers
         for count in [0, 1, 2]:
             # need to add pixel_mask=k
diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index fc3cf39..589c2e6 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -483,7 +483,7 @@ def pca(self, y, ncomponents=5):
         y: np.ndarray
             Input flux array to take PCA of.
         """
-        self._pca(y, ncomponents=5)
+        self._pca(y, ncomponents=ncomponents)
         self._get_cartesian_stacked()
 
     def plot_model(self, time_index=0):

From e97d3f5e51254af1cb282a87d6453f48f23d1828 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 12 Apr 2022 13:12:17 -0700
Subject: [PATCH 29/53] :sparkles: now pca has smoothed components

---
 src/psfmachine/perturbation.py | 73 ++++++++++++++++++++++++++--------
 tests/test_perturbation.py     | 11 ++++-
 2 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 14b5c38..adb674d 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -7,6 +7,7 @@
 from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
 from fbpca import pca
+from astropy.convolution import convolve, Box1DKernel
 
 
 class PerturbationMatrix(object):
@@ -94,18 +95,24 @@ def prior_sigma(self):
     def breaks(self):
         return np.where(np.diff(self.time) / np.median(np.diff(self.time)) > 5)[0] + 1
 
+    @property
+    def segment_masks(self):
+        x = np.array_split(np.arange(len(self.time)), self.breaks)
+        return np.asarray(
+            [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x]
+        ).T
+
     def _cut_segments(self, vectors):
         """
         Cuts the data into "segments" wherever there is a break. Breaks are defined
         as anywhere where there is a gap in data of more than 5 times the median
         time between observations.
         """
-        x = np.array_split(np.arange(len(self.time)), self.breaks)
-        masks = np.asarray(
-            [np.in1d(np.arange(len(self.time)), x1).astype(float) for x1 in x]
-        ).T
         return np.hstack(
-            [vectors[:, idx][:, None] * masks for idx in range(vectors.shape[1])]
+            [
+                vectors[:, idx][:, None] * self.segment_masks
+                for idx in range(vectors.shape[1])
+            ]
         )
 
     def _get_focus_change(self):
@@ -176,7 +183,7 @@ def fit(self, flux: npt.ArrayLike, flux_err: Optional[npt.ArrayLike] = None):
             Optional flux errors. Should have shape ntimes.
         """
         if flux_err is None:
-            flux_err = np.ones(len(flux_err))
+            flux_err = np.ones_like(flux)
 
         y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel()
         self.weights = self._fit_linalg(y, ye)
@@ -280,7 +287,7 @@ def func(x, quad=False):
 
         return func
 
-    def pca(self, y, ncomponents=5):
+    def pca(self, y, ncomponents=5, box_width=100):
         """Adds the principal components of `y` to the design matrix
 
         Parameters
@@ -288,22 +295,56 @@ def pca(self, y, ncomponents=5):
         y: np.ndarray
             Input flux array to take PCA of.
         """
-        return self._pca(y, ncomponents=ncomponents)
+        return self._pca(y, ncomponents=ncomponents, box_width=box_width)
 
-    def _pca(self, y, ncomponents=5):
-        """This hidden method allows us to update the pca method for other classes
-        """
+    def _pca(self, y, ncomponents=5, box_width=100):
+        """This hidden method allows us to update the pca method for other classes"""
         if not y.ndim == 2:
             raise ValueError("Must pass a 2D `y`")
         if not y.shape[0] == len(self.time):
             raise ValueError(f"Must pass a `y` with shape ({len(self.time)}, X)")
 
         # Clean out any time series have significant contribution from one component
-        k = np.ones(y.shape[1], bool)
+        k = np.nansum(y, axis=0) != 0
+
+        long_y = np.vstack(
+            [
+                np.hstack(
+                    [
+                        convolve(y[m, idx], Box1DKernel(100), boundary="extend")
+                        for m in self.segment_masks.astype(bool).T
+                    ]
+                )
+                for idx in range(y.shape[1])
+            ]
+        ).T
+
+        med_y = np.vstack(
+            [
+                np.hstack(
+                    [
+                        convolve(
+                            y[m, idx] / long_y[m, idx],
+                            Box1DKernel(15),
+                            boundary="extend",
+                        )
+                        for m in self.segment_masks.astype(bool).T
+                    ]
+                )
+                for idx in range(y.shape[1])
+            ]
+        ).T
+
         for count in range(3):
-            self._pca_components, s, V = pca(y[:, k], ncomponents, n_iter=30)
+            U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30)
             k[k] &= (np.abs(V) < 0.5).all(axis=0)
 
+        for count in range(3):
+            U2, s, V = pca(np.nan_to_num(med_y)[:, k], ncomponents, n_iter=30)
+            k[k] &= (np.abs(V) < 0.5).all(axis=0)
+
+        self._pca_components = np.hstack([U1, U2])
+
         if self.other_vectors is not None:
             self.vectors = np.hstack(
                 [self._vectors, self.other_vectors, self._pca_components]
@@ -439,7 +480,7 @@ def fit(
         else:
             pixel_mask = np.ones(flux.shape[-1], bool)
         if flux_err is None:
-            flux_err = np.ones(len(flux_err))
+            flux_err = np.ones_like(flux)
 
         y, ye = self.bin_func(flux).ravel(), self.bin_func(flux_err, quad=True).ravel()
         k = (np.ones(self.nbins, bool)[:, None] * pixel_mask).ravel()
@@ -476,7 +517,7 @@ def model(self, time_indices: Optional[list] = None):
             ]
         )
 
-    def pca(self, y, ncomponents=5):
+    def pca(self, y, ncomponents=5, box_width=100):
         """Adds the principal components of `y` to the design matrix
 
         Parameters
@@ -484,7 +525,7 @@ def pca(self, y, ncomponents=5):
         y: np.ndarray
             Input flux array to take PCA of.
         """
-        self._pca(y, ncomponents=5)
+        self._pca(y, ncomponents=5, box_width=box_width)
         self._get_cartesian_stacked()
 
     def plot_model(self, time_index=0):
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index e0fcde1..b1b8b12 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -65,9 +65,16 @@ def test_perturbation_matrix():
 
     # Test PCA
     flux = np.random.normal(1, 0.1, size=(200, 100))
+    p = PerturbationMatrix(time=time, focus=False)
+    assert p.matrix.shape == (20, 8)
+    p.pca(flux, ncomponents=2)
+    assert p.matrix.shape == (20, 16)
+    assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0)
+    p.fit(y, ye)
+    p = PerturbationMatrix(time=time, focus=False, segments=False)
+    assert p.matrix.shape == (20, 4)
+    p.pca(flux, ncomponents=2)
     assert p.matrix.shape == (20, 8)
-    p.pca(flux, ncomponents=5)
-    assert p.matrix.shape == (20, 18)
     assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0)
     p.fit(y, ye)
 

From ca3645da2647ca6732e044a5f4a2f78a688e741c Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 14:48:05 -0700
Subject: [PATCH 30/53] improved spline and gaussian smooth

---
 src/psfmachine/machine.py |   7 ++-
 src/psfmachine/utils.py   | 121 +++++++++++++++++++++++---------------
 2 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 1e99ec8..b1342bf 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -667,9 +667,10 @@ def build_time_model(
                 mpc2 = self.dec_centroid.to("arcsec").value / 4
 
             # smooth the vectors
-            mpc1_smooth, mpc2_smooth = spline_smooth(
+            mpc1_smooth, mpc2_smooth = bspline_smooth(
                 [mpc1, mpc2],
-                time=self.time,
+                x=self.time,
+                do_segments=segments,
             )
             # normalize components
             mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / (
@@ -733,6 +734,8 @@ def build_time_model(
         k &= uncontaminated_pixels
 
         if pca_ncomponents > 0:
+            # select only bright pixels
+            k &= (flux_binned_raw > 300).all(axis=0)
             P3.pca(flux_norm[:, k], ncomponents=pca_ncomponents)
 
         # iterate to remvoe outliers
diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index 54eeeb7..9a188dd 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -538,7 +538,7 @@ def threshold_bin(x, y, z, z_err=None, abs_thresh=10, bins=15, statistic=np.nanm
     )
 
 
-def get_breaks(time):
+def get_breaks(time, include_ext=False):
     """
     Finds discontinuity in the time array and return the break indexes.
 
@@ -553,10 +553,15 @@ def get_breaks(time):
         An array of indexes with the break positions
     """
     dts = np.diff(time)
-    return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time) - 1])
+    if include_ext:
+        return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)])
+    else:
+        return np.where(dts > 5 * np.median(dts))[0] + 1
 
 
-def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"):
+def gaussian_smooth(
+    y, x=None, do_segments=False, filter_size=13, mode="mirror", breaks=None
+):
     """
     Applies a Gaussian smoothing to a curve.
 
@@ -583,20 +588,24 @@ def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"):
     else:
         y = np.atleast_2d(y)
 
-    if x is None:
-        splits = [0, y.shape[1]]
-    elif isinstance(x, np.ndarray):
-        splits = get_breaks(x)
-        # find poscorr discontinuity in each axis
-        grads = np.gradient(y, x, axis=1)
-        # the 7-sigma here is hardcoded and found to work ok
-        splits = np.unique(
-            np.concatenate(
-                [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])]
+    if do_segments:
+        if breaks is None and x is None:
+            raise ValueError("Please provide `x` or `breaks` to have splits.")
+        elif breaks is None and x is not None:
+            splits = get_breaks(x, include_ext=True)
+        else:
+            splits = np.array(breaks)
+        # find discontinuity in y according to x if provided
+        if x is not None:
+            grads = np.gradient(y, x, axis=1)
+            # the 7-sigma here is hardcoded and found to work ok
+            splits = np.unique(
+                np.concatenate(
+                    [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])]
+                )
             )
-        )
     else:
-        raise ValueError("`x` optional argument invalid, pass None or an array like.")
+        splits = [0, y.shape[-1]]
 
     y_smooth = []
     for i in range(1, len(splits)):
@@ -611,7 +620,7 @@ def gaussian_smooth(y, x=None, filter_size=13, mode="mirror"):
     return np.hstack(y_smooth)
 
 
-def spline_smooth(y, x, weights=None, degree=3, do_splits=True):
+def bspline_smooth(y, x=None, degree=3, do_segments=False, breaks=None, n_knots=100):
     """
     Applies a spline smoothing to a curve.
 
@@ -620,13 +629,17 @@ def spline_smooth(y, x, weights=None, degree=3, do_splits=True):
     y : numpy.ndarray or list of numpy.ndarray
         Arrays to be smoothen in the last axis
     x : numpy.ndarray
-        Time array of same shape of `y` last axis.
-    weights : numpy.ndarray or list of numpy.ndarray
-        Optional array of weights the same shape as `y`.
+        Optional. x array, as `y = f(x)`` used to find discontinuities in `f(x)`. If x
+        is given then splits will be computed, if not `breaks` argument as to be provided.
     degree : int
         Degree of the psline fit, default is 3.
-    do_splits : boolean
-        Do the splines per segments with splits defined by data discontinuity.
+    do_segments : boolean
+        Do the splines per segments with splits computed from data `x` or given in `breaks`.
+    breaks : list of ints
+        List of break indexes in `y`.
+    nknots : int
+        Number of knots for the B-Spline. If `do_segments` is True, knots will be
+        distributed in each segment.
 
     Returns
     -------
@@ -638,33 +651,49 @@ def spline_smooth(y, x, weights=None, degree=3, do_splits=True):
     else:
         y = np.atleast_2d(y)
 
-    if do_splits:
-        splits = get_breaks(x)
-        # find poscorr discontinuity in each axis
-        grads = np.gradient(y, x, axis=1)
-        # the 7-sigma here is hardcoded and found to work ok
-        splits = np.unique(
-            np.concatenate(
-                [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])]
+    if do_segments:
+        if breaks is None and x is None:
+            raise ValueError("Please provide `x` or `breaks` to have splits.")
+        elif breaks is None and x is not None:
+            splits = get_breaks(x)
+        else:
+            splits = np.array(breaks)
+        # find discontinuity in y according to x if provided
+        if x is not None:
+            grads = np.gradient(y, x, axis=1)
+            # the 7-sigma here is hardcoded and found to work ok
+            splits = np.unique(
+                np.concatenate(
+                    [splits, np.hstack([np.where(g > 7 * g.std())[0] for g in grads])]
+                )
             )
-        )
     else:
-        splits = [0, -1]
-    if weights is None:
-        s = 5e-4
-        weights = np.ones_like(y)
-    else:
-        if isinstance(weights, list):
-            weights = np.asarray(weights)
-        else:
-            weights = np.atleast_2d(weights)
-        # s = weights.shape[-1] - np.sqrt(2 * weights.shape[-1])
-        s = 1.9e-10
-        if y.shape != weights.shape:
-            raise ValueError("Inconsistent shape between `v` and `weights`")
+        splits = [0, y.shape[-1]]
+
+    def spline(v):
+        knots = np.linspace(v.min(), v.max(), n_knots)
+        DM = np.asarray(
+            dmatrix(
+                "bs(x, knots=knots, degree=2, include_intercept=True)",
+                {"x": list(v), "knots": knots},
+            )
+        )
+        x = np.array_split(np.arange(len(v)), splits)
+        masks = np.asarray([np.in1d(np.arange(len(v)), x1).astype(float) for x1 in x]).T
+        DM = np.hstack([DM[:, idx][:, None] * masks for idx in range(DM.shape[1])])
+        return DM
 
     y_smooth = []
-    for y_, w in zip(y, weights):
-        tck = interpolate.splrep(x, y_, w=w, s=s, xb=x[splits], k=degree)
-        y_smooth.append(interpolate.splev(x, tck, der=0))
+    DM = spline(np.arange(y.shape[-1]))
+    prior_mu = np.zeros(DM.shape[1])
+    prior_sigma = np.ones(DM.shape[1]) * 1e5
+    # iterate over vectors in y
+    for v in range(y.shape[0]):
+        weights = solve_linear_model(
+            DM,
+            y[v],
+            prior_mu=prior_mu,
+            prior_sigma=prior_sigma,
+        )
+        y_smooth.append(DM.dot(weights))
     return np.array(y_smooth)

From 8b4da3dbcfde60dbb8350f2443c3556f9bea1acd Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 15:01:43 -0700
Subject: [PATCH 31/53] P3 -> P

---
 src/psfmachine/machine.py | 42 +++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index b1342bf..1bb09f0 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -683,7 +683,7 @@ def build_time_model(
             other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth]
 
         # create a 3D perturbation matrix
-        P3 = PerturbationMatrix3D(
+        P = PerturbationMatrix3D(
             time=_whitened_time,
             dx=dx,
             dy=dy,
@@ -711,9 +711,9 @@ def build_time_model(
         flux_err_norm = flux_err / np.nanmean(flux, axis=0)
 
         # bindown flux arrays
-        flux_binned_raw = P3.bin_func(flux)
-        flux_binned = P3.bin_func(flux_norm)
-        flux_err_binned = P3.bin_func(flux_err_norm)
+        flux_binned_raw = P.bin_func(flux)
+        flux_binned = P.bin_func(flux_norm)
+        flux_err_binned = P.bin_func(flux_err_norm)
 
         # No saturated pixels, 1e5 is a hardcoded value for Kepler.
         k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones(
@@ -736,13 +736,13 @@ def build_time_model(
         if pca_ncomponents > 0:
             # select only bright pixels
             k &= (flux_binned_raw > 300).all(axis=0)
-            P3.pca(flux_norm[:, k], ncomponents=pca_ncomponents)
+            P.pca(flux_norm[:, k], ncomponents=pca_ncomponents)
 
         # iterate to remvoe outliers
         for count in [0, 1, 2]:
             # need to add pixel_mask=k
-            P3.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k)
-            res = flux_binned - P3.matrix.dot(P3.weights).reshape(flux_binned.shape)
+            P.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k)
+            res = flux_binned - P.matrix.dot(P.weights).reshape(flux_binned.shape)
             # res = np.ma.masked_array(res, (~k).reshape(flux_binned.shape))
             res = np.ma.masked_array(res, ~np.tile(k, flux_binned.shape[0]).T)
             bad_targets = sigma_clip(res, sigma=5).mask
@@ -752,7 +752,7 @@ def build_time_model(
         # bookkeeping
         self.flux_binned = flux_binned
         self._time_masked_pix = k
-        self.P3 = P3
+        self.P = P
         if plot:
             return self.plot_time_model()
         return
@@ -770,7 +770,7 @@ def _get_perturbed_model(self, time_indices=None):
         perturbed_model = []
         for tdx in time_indices:
             X = self.mean_model.copy()
-            X.data *= self.P3.model(time_indices=tdx).ravel()
+            X.data *= self.P.model(time_indices=tdx).ravel()
             perturbed_model.append(X)
         self.perturbed_model = perturbed_model
 
@@ -787,14 +787,12 @@ def plot_time_model(self):
         fig : matplotlib.Figure
             Figure.
         """
-        model_binned = self.P3.matrix.dot(self.P3.weights).reshape(
-            self.flux_binned.shape
-        )
+        model_binned = self.P.matrix.dot(self.P.weights).reshape(self.flux_binned.shape)
         fig, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w")
         # k1 = self._time_masked_pix.reshape(self.flux_binned.shape)[0].astype(bool)
         im = ax[0, 0].scatter(
-            self.P3.dx[self._time_masked_pix],
-            self.P3.dy[self._time_masked_pix],
+            self.P.dx[self._time_masked_pix],
+            self.P.dy[self._time_masked_pix],
             c=self.flux_binned[0, self._time_masked_pix],
             s=3,
             vmin=0.5,
@@ -803,8 +801,8 @@ def plot_time_model(self):
             rasterized=True,
         )
         ax[0, 1].scatter(
-            self.P3.dx[self._time_masked_pix],
-            self.P3.dy[self._time_masked_pix],
+            self.P.dx[self._time_masked_pix],
+            self.P.dy[self._time_masked_pix],
             c=self.flux_binned[-1, self._time_masked_pix],
             s=3,
             vmin=0.5,
@@ -813,8 +811,8 @@ def plot_time_model(self):
             rasterized=True,
         )
         ax[1, 0].scatter(
-            self.P3.dx[self._time_masked_pix],
-            self.P3.dy[self._time_masked_pix],
+            self.P.dx[self._time_masked_pix],
+            self.P.dy[self._time_masked_pix],
             c=model_binned[0, self._time_masked_pix],
             s=3,
             vmin=0.5,
@@ -823,8 +821,8 @@ def plot_time_model(self):
             rasterized=True,
         )
         ax[1, 1].scatter(
-            self.P3.dx[self._time_masked_pix],
-            self.P3.dy[self._time_masked_pix],
+            self.P.dx[self._time_masked_pix],
+            self.P.dy[self._time_masked_pix],
             c=model_binned[-1, self._time_masked_pix],
             s=3,
             vmin=0.5,
@@ -1285,13 +1283,13 @@ def fit_model(self, fit_va=False):
             self.model_flux[tdx] = X.dot(self.ws[tdx])
 
             if fit_va:
-                if not hasattr(self, "P3"):
+                if not hasattr(self, "P"):
                     raise ValueError(
                         "Please use `build_time_model` before fitting with velocity "
                         "aberration."
                     )
 
-                X.data *= self.P3.model(time_indices=tdx).ravel()
+                X.data *= self.P.model(time_indices=tdx).ravel()
 
                 sigma_w_inv = X.T.dot(
                     X.multiply(1 / self.flux_err[tdx][:, None] ** 2)

From 94c3aafabb794ee5ed181581990a531dd0eeb2a1 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 15:16:20 -0700
Subject: [PATCH 32/53] hotfix

---
 src/psfmachine/machine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 1bb09f0..acb7a40 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -14,7 +14,7 @@
     solve_linear_model,
     sparse_lessthan,
     threshold_bin,
-    spline_smooth,
+    bspline_smooth,
 )
 from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP
 from .perturbation import PerturbationMatrix3D
@@ -466,7 +466,7 @@ def _get_source_mask(
                 l[idx] = test_r[loc[0]]
         ok = np.isfinite(l)
         source_radius_limit = np.polyval(
-            np.polyfit(test_f[ok], l[ok], poly_order),
+            np.polyfit(test_f[ok], l[ok], 1),
             np.log10(self.source_flux_estimates),
         )
         source_radius_limit[

From 68861a6203e0cd5ae9d63d2410bc5b5dc8cc4437 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 15:16:59 -0700
Subject: [PATCH 33/53] flake8

---
 src/psfmachine/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index 9a188dd..a820326 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -7,7 +7,6 @@
 from scipy import sparse
 from patsy import dmatrix
 from scipy.ndimage import gaussian_filter1d
-from scipy import interpolate
 import pyia
 
 # size_limit is 1GB

From 0cbde4652d6abc98958d6af45e4409b08972fff1 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 12 Apr 2022 16:24:10 -0700
Subject: [PATCH 34/53] :bug: update pca

---
 src/psfmachine/perturbation.py | 123 +++++++++++++++++++++++++--------
 src/psfmachine/utils.py        |  36 ++++++----
 2 files changed, 117 insertions(+), 42 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index adb674d..34b3a26 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -8,6 +8,7 @@
 import matplotlib.pyplot as plt
 from fbpca import pca
 from astropy.convolution import convolve, Box1DKernel
+from .utils import spline1d
 
 
 class PerturbationMatrix(object):
@@ -287,7 +288,7 @@ def func(x, quad=False):
 
         return func
 
-    def pca(self, y, ncomponents=5, box_width=100):
+    def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
         """Adds the principal components of `y` to the design matrix
 
         Parameters
@@ -295,9 +296,14 @@ def pca(self, y, ncomponents=5, box_width=100):
         y: np.ndarray
             Input flux array to take PCA of.
         """
-        return self._pca(y, ncomponents=ncomponents, box_width=box_width)
+        return self._pca(
+            y,
+            ncomponents=ncomponents,
+            long_time_scale=long_time_scale,
+            med_time_scale=med_time_scale,
+        )
 
-    def _pca(self, y, ncomponents=5, box_width=100):
+    def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
         """This hidden method allows us to update the pca method for other classes"""
         if not y.ndim == 2:
             raise ValueError("Must pass a 2D `y`")
@@ -307,33 +313,91 @@ def _pca(self, y, ncomponents=5, box_width=100):
         # Clean out any time series have significant contribution from one component
         k = np.nansum(y, axis=0) != 0
 
-        long_y = np.vstack(
+        X = sparse.hstack(
             [
-                np.hstack(
-                    [
-                        convolve(y[m, idx], Box1DKernel(100), boundary="extend")
-                        for m in self.segment_masks.astype(bool).T
-                    ]
-                )
-                for idx in range(y.shape[1])
+                spline1d(
+                    self.time,
+                    np.linspace(
+                        self.time[m].min(),
+                        self.time[m].max(),
+                        int(
+                            np.ceil(
+                                (self.time[m].max() - self.time[m].min())
+                                / long_time_scale
+                            )
+                        ),
+                    )[1:-1],
+                    degree=3,
+                )[:, 1:]
+                for m in self.segment_masks.astype(bool).T
             ]
-        ).T
+        )
+        X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr()
+        X = X[:, np.asarray(X.sum(axis=0) != 0)[0]]
+        long_y = X.dot(
+            np.linalg.solve(
+                X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)),
+                X.T.dot(y),
+            )
+        )
 
-        med_y = np.vstack(
+        X = sparse.hstack(
             [
-                np.hstack(
-                    [
-                        convolve(
-                            y[m, idx] / long_y[m, idx],
-                            Box1DKernel(15),
-                            boundary="extend",
-                        )
-                        for m in self.segment_masks.astype(bool).T
-                    ]
-                )
-                for idx in range(y.shape[1])
+                spline1d(
+                    self.time,
+                    np.linspace(
+                        self.time[m].min(),
+                        self.time[m].max(),
+                        int(
+                            np.ceil(
+                                (self.time[m].max() - self.time[m].min())
+                                / med_time_scale
+                            )
+                        ),
+                    )[1:-1],
+                    degree=3,
+                )[:, 1:]
+                for m in self.segment_masks.astype(bool).T
             ]
-        ).T
+        )
+        X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr()
+        X = X[:, np.asarray(X.sum(axis=0) != 0)[0]]
+
+        med_y = X.dot(
+            np.linalg.solve(
+                X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)),
+                X.T.dot(y - long_y),
+            )
+        )
+
+        #
+        # long_y = np.vstack(
+        #     [
+        #         np.hstack(
+        #             [
+        #                 convolve(y[m, idx], Box1DKernel(100), boundary="extend")
+        #                 for m in self.segment_masks.astype(bool).T
+        #             ]
+        #         )
+        #         for idx in range(y.shape[1])
+        #     ]
+        # ).T
+
+        # med_y = np.vstack(
+        #     [
+        #         np.hstack(
+        #             [
+        #                 convolve(
+        #                     y[m, idx] / long_y[m, idx],
+        #                     Box1DKernel(15),
+        #                     boundary="extend",
+        #                 )
+        #                 for m in self.segment_masks.astype(bool).T
+        #             ]
+        #         )
+        #         for idx in range(y.shape[1])
+        #     ]
+        # ).T
 
         for count in range(3):
             U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30)
@@ -517,7 +581,7 @@ def model(self, time_indices: Optional[list] = None):
             ]
         )
 
-    def pca(self, y, ncomponents=5, box_width=100):
+    def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
         """Adds the principal components of `y` to the design matrix
 
         Parameters
@@ -525,7 +589,12 @@ def pca(self, y, ncomponents=5, box_width=100):
         y: np.ndarray
             Input flux array to take PCA of.
         """
-        self._pca(y, ncomponents=5, box_width=box_width)
+        self._pca(
+            y,
+            ncomponents=5,
+            long_time_scale=long_time_scale,
+            med_time_scale=med_time_scale,
+        )
         self._get_cartesian_stacked()
 
     def plot_model(self, time_index=0):
diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index 4850e43..9e1e62f 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -196,24 +196,30 @@ def _make_A_polar(phi, r, cut_r=6, rmin=1, rmax=18, n_r_knots=12, n_phi_knots=15
     return X1
 
 
-def _make_A_cartesian(x, y, n_knots=10, radius=3.0, knot_spacing_type="sqrt"):
-    def spline(v):
-        if knot_spacing_type == "sqrt":
-            knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots)
-            knots = np.sign(knots) * knots ** 2
-        else:
-            knots = np.linspace(-radius, radius, n_knots)
-        return sparse.csr_matrix(
-            np.asarray(
-                dmatrix(
-                    "bs(x, knots=knots, degree=3, include_intercept=True)",
-                    {"x": list(v), "knots": knots},
-                )
+def spline1d(x, knots, degree=3):
+    """Make a spline in a 1D variable `x`"""
+    X = sparse.csr_matrix(
+        np.asarray(
+            dmatrix(
+                "bs(x, knots=knots, degree=degree, include_intercept=True)",
+                {"x": list(x), "knots": knots, "degree": degree},
             )
         )
+    )
+    if not X.shape[0] == x.shape[0]:
+        raise ValueError("`patsy` has made the wrong matrix.")
+    X = X[:, np.asarray(X.sum(axis=0) != 0)[0]]
+    return X
+
 
-    x_spline = spline(x)
-    y_spline = spline(y)
+def _make_A_cartesian(x, y, n_knots=10, radius=3.0, knot_spacing_type="sqrt", degree=3):
+    if knot_spacing_type == "sqrt":
+        knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots)
+        knots = np.sign(knots) * knots ** 2
+    else:
+        knots = np.linspace(-radius, radius, n_knots)
+    x_spline = spline1d(x, knots=knots, degree=degree)
+    y_spline = spline1d(y, knots=knots, degree=degree)
 
     X = sparse.hstack(
         [x_spline.multiply(y_spline[:, idx]) for idx in range(y_spline.shape[1])],

From 18fdc554462ae54658ada362720caa35fc7b6824 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 12 Apr 2022 16:25:42 -0700
Subject: [PATCH 35/53] :art: flake8

---
 src/psfmachine/perturbation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 499c80f..c58dc3e 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -7,7 +7,6 @@
 from psfmachine.utils import _make_A_cartesian
 import matplotlib.pyplot as plt
 from fbpca import pca
-from astropy.convolution import convolve, Box1DKernel
 from .utils import spline1d
 
 

From de15bfc67012e4cca6519249e972c69882515830 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 16:27:58 -0700
Subject: [PATCH 36/53] perturbed model

---
 src/psfmachine/machine.py | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index acb7a40..95f4a3e 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -713,7 +713,7 @@ def build_time_model(
         # bindown flux arrays
         flux_binned_raw = P.bin_func(flux)
         flux_binned = P.bin_func(flux_norm)
-        flux_err_binned = P.bin_func(flux_err_norm)
+        flux_err_binned = P.bin_func(flux_err_norm, quad=True)
 
         # No saturated pixels, 1e5 is a hardcoded value for Kepler.
         k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones(
@@ -757,26 +757,13 @@ def build_time_model(
             return self.plot_time_model()
         return
 
-    def _get_perturbed_model(self, time_indices=None):
+    def perturbed_model(self, time_index):
         """
-        Computes the perturbed model at given times
+        Computes the perturbed model at a given time
         """
-        if time_indices is None:
-            time_indices = np.arange(len(self.time))
-        time_indices = np.atleast_1d(time_indices)
-        if isinstance(time_indices[0], bool):
-            time_indices = np.where(time_indices[0])[0]
-
-        perturbed_model = []
-        for tdx in time_indices:
-            X = self.mean_model.copy()
-            X.data *= self.P.model(time_indices=tdx).ravel()
-            perturbed_model.append(X)
-        self.perturbed_model = perturbed_model
-
-        return
-
-        # raise NotImplementedError
+        X = self.mean_model.copy()
+        X.data *= self.P.model(time_indices=time_index).ravel()
+        return X
 
     def plot_time_model(self):
         """
@@ -1289,7 +1276,7 @@ def fit_model(self, fit_va=False):
                         "aberration."
                     )
 
-                X.data *= self.P.model(time_indices=tdx).ravel()
+                X = self.perturbed_model(tdx)
 
                 sigma_w_inv = X.T.dot(
                     X.multiply(1 / self.flux_err[tdx][:, None] ** 2)

From eb9ce3cead95561726d32ca1faf9afa199270e40 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 12 Apr 2022 16:28:15 -0700
Subject: [PATCH 37/53] :art: no comment block

---
 src/psfmachine/perturbation.py | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index c58dc3e..649c5ff 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -369,35 +369,6 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
             )
         )
 
-        #
-        # long_y = np.vstack(
-        #     [
-        #         np.hstack(
-        #             [
-        #                 convolve(y[m, idx], Box1DKernel(100), boundary="extend")
-        #                 for m in self.segment_masks.astype(bool).T
-        #             ]
-        #         )
-        #         for idx in range(y.shape[1])
-        #     ]
-        # ).T
-
-        # med_y = np.vstack(
-        #     [
-        #         np.hstack(
-        #             [
-        #                 convolve(
-        #                     y[m, idx] / long_y[m, idx],
-        #                     Box1DKernel(15),
-        #                     boundary="extend",
-        #                 )
-        #                 for m in self.segment_masks.astype(bool).T
-        #             ]
-        #         )
-        #         for idx in range(y.shape[1])
-        #     ]
-        # ).T
-
         for count in range(3):
             U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30)
             k[k] &= (np.abs(V) < 0.5).all(axis=0)

From 2efb963ea2d698ca8809e0ce86798b649991541b Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Tue, 12 Apr 2022 16:30:56 -0700
Subject: [PATCH 38/53] :memo: docstrings

---
 src/psfmachine/perturbation.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 649c5ff..1b42747 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -290,10 +290,22 @@ def func(x, quad=False):
     def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
         """Adds the principal components of `y` to the design matrix
 
+        Will add two time scales of principal components, definied by `long_time_scale`
+        and `med_time_scale`.
+
         Parameters
         ----------
         y: np.ndarray
             Input flux array to take PCA of.
+        ncomponents: int
+            Number of principal components to use
+        long_time_scale: float
+            The time scale where variability is considered "long" term. Should be
+            in the same units as `self.time`.
+        med_time_scale: float
+            The time scale where variability is considered "medium" term. Should be
+            in the same units as `self.time`. Variability longer than `long_time_scale`,
+            or shorter than `med_time_scale`, will be removed before building components
         """
         return self._pca(
             y,
@@ -325,7 +337,7 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
                                 / long_time_scale
                             )
                         ),
-                    )[1:-1],
+                    ),
                     degree=3,
                 )[:, 1:]
                 for m in self.segment_masks.astype(bool).T
@@ -353,7 +365,7 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
                                 / med_time_scale
                             )
                         ),
-                    )[1:-1],
+                    ),
                     degree=3,
                 )[:, 1:]
                 for m in self.segment_masks.astype(bool).T

From 70af6aca15c44a38a33493bde6898739e719e2c9 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 17:08:38 -0700
Subject: [PATCH 39/53] time_resolution attr

---
 src/psfmachine/machine.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 95f4a3e..f79d5c8 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -50,7 +50,7 @@ def __init__(
         n_r_knots=10,
         n_phi_knots=15,
         n_time_knots=10,
-        n_time_points=200,
+        time_resolution=200,
         time_radius=8,
         rmin=1,
         rmax=16,
@@ -86,7 +86,7 @@ def __init__(
             Number of radial knots in the spline model.
         n_phi_knots: int
             Number of azimuthal knots in the spline model.
-        n_time_points: int
+        time_resolution: int
             Number of time points to bin by when fitting for velocity aberration.
         time_radius: float
             The radius around sources, out to which the velocity aberration model
@@ -157,7 +157,7 @@ def __init__(
         self.n_r_knots = n_r_knots
         self.n_phi_knots = n_phi_knots
         self.n_time_knots = n_time_knots
-        self.n_time_points = n_time_points
+        self.time_resolution = time_resolution
         self.time_radius = time_radius
         self.rmin = rmin
         self.rmax = rmax
@@ -689,7 +689,7 @@ def build_time_model(
             dy=dy,
             segments=segments,
             focus=focus,
-            resolution=self.n_time_points,
+            resolution=self.time_resolution,
             other_vectors=other_vectors,
             bin_method=bin_method,
             focus_exptime=focus_exptime,

From 514f72b10bcbc16eff88eb13a48c9dbb4aaefa25 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 12 Apr 2022 17:20:32 -0700
Subject: [PATCH 40/53] matrix transpose hotfix

---
 src/psfmachine/machine.py | 1 +
 src/psfmachine/tpf.py     | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index f79d5c8..38af35a 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -1277,6 +1277,7 @@ def fit_model(self, fit_va=False):
                     )
 
                 X = self.perturbed_model(tdx)
+                X = X.T
 
                 sigma_w_inv = X.T.dot(
                     X.multiply(1 / self.flux_err[tdx][:, None] ** 2)
diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py
index 66ff707..880f662 100644
--- a/src/psfmachine/tpf.py
+++ b/src/psfmachine/tpf.py
@@ -42,7 +42,7 @@ def __init__(
         n_r_knots=10,
         n_phi_knots=15,
         n_time_knots=10,
-        n_time_points=200,
+        time_resolution=200,
         time_radius=8,
         rmin=1,
         rmax=16,
@@ -68,7 +68,7 @@ def __init__(
             n_r_knots=n_r_knots,
             n_phi_knots=n_phi_knots,
             n_time_knots=n_time_knots,
-            n_time_points=n_time_points,
+            time_resolution=time_resolution,
             time_radius=time_radius,
             rmin=rmin,
             rmax=rmax,

From c6ebac8a51521f68e4be0b0b08f4e044a89823c0 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Wed, 13 Apr 2022 14:35:45 -0700
Subject: [PATCH 41/53] agressive bkg pixel mask

---
 src/psfmachine/tpf.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py
index 880f662..eb16130 100644
--- a/src/psfmachine/tpf.py
+++ b/src/psfmachine/tpf.py
@@ -6,6 +6,7 @@
 from astropy.coordinates import SkyCoord, match_coordinates_sky
 from astropy.time import Time
 from astropy.io import fits
+from astropy.stats import sigma_clip
 import astropy.units as u
 import matplotlib.pyplot as plt
 from matplotlib import patches
@@ -143,6 +144,21 @@ def remove_background_model(self, plot=False, data_augment=None):
         # keep track of which pixels comes from TPFs
         pixels_in_tpf = np.ones_like(self.row, dtype=bool)
 
+        # enheance pixel mask
+        time_corr = np.nanpercentile(bkg_flux, 20, axis=1)[:, None]
+        med_flux = np.median(bkg_flux - time_corr, axis=0)[None, :]
+
+        f = bkg_flux - med_flux
+        f -= np.median(f)
+        # Mask out pixels that are particularly bright.
+        _mask = (f - time_corr).std(axis=0) < 500
+        if not _mask.any():
+            raise ValueError("All the input pixels are brighter than 500 counts.")
+        _mask &= (f - time_corr).std(axis=0) < 30
+        _mask &= ~sigma_clip(med_flux[0]).mask
+        _mask &= ~sigma_clip(np.std(f - time_corr, axis=0)).mask
+        bkg_pixel_mask &= _mask
+
         if data_augment:
             # augment background pixels
             bkg_row = np.hstack([bkg_row, data_augment["row"]])

From eaa807f712ae465719f69b82b4e111d19f8c7719 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Fri, 15 Apr 2022 16:15:40 -0700
Subject: [PATCH 42/53] :art: updating sane defaults, adding smooth PCA
 components

---
 src/psfmachine/perturbation.py | 172 +++++++++++++++------------------
 tests/test_perturbation.py     |  22 +++--
 2 files changed, 92 insertions(+), 102 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 1b42747..16b51e3 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -43,7 +43,7 @@ def __init__(
         segments: bool = True,
         resolution: int = 10,
         bin_method: str = "bin",
-        focus_exptime=50,
+        focus_exptime=2,
     ):
 
         self.time = time
@@ -77,7 +77,7 @@ def __init__(
             self.vectors = self._vectors.copy()
         if self.segments:
             self.vectors = self._cut_segments(self.vectors)
-        self._clean_vectors()
+        # self._clean_vectors()
         self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
 
     def __repr__(self):
@@ -133,25 +133,25 @@ def _get_focus_change(self):
         self._vectors = np.hstack([self._vectors, np.nansum(focus, axis=0)[:, None]])
         return
 
-    def _clean_vectors(self):
-        """Remove time polynomial from other vectors"""
-        nvec = self.poly_order + 1
-        if self.focus:
-            nvec += 1
-        if self.segments:
-            s = nvec * (len(self.breaks) + 1)
-        else:
-            s = nvec
-
-        if s != self.vectors.shape[1]:
-            X = self.vectors[:, :s]
-            w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
-            self.vectors[:, s:] -= X.dot(w)
-            # Each segment has mean zero
-            self.vectors[:, s:] -= np.asarray(
-                [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T]
-            ).T
-        return
+    # def _clean_vectors(self):
+    #     """Remove time polynomial from other vectors"""
+    #     nvec = self.poly_order + 1
+    #     if self.focus:
+    #         nvec += 1
+    #     if self.segments:
+    #         s = nvec * (len(self.breaks) + 1)
+    #     else:
+    #         s = nvec
+    #
+    #     if s != self.vectors.shape[1]:
+    #         X = self.vectors[:, :s]
+    #         w = np.linalg.solve(X.T.dot(X), X.T.dot(self.vectors[:, s:]))
+    #         self.vectors[:, s:] -= X.dot(w)
+    #         # Each segment has mean zero
+    #         self.vectors[:, s:] -= np.asarray(
+    #             [v[v != 0].mean() * (v != 0) for v in self.vectors[:, s:].T]
+    #         ).T
+    #     return
 
     def plot(self):
         fig, ax = plt.subplots()
@@ -212,6 +212,14 @@ def model(self, time_indices: Optional[list] = None):
     def shape(self):
         return self.vectors.shape
 
+    @property
+    def nvec(self):
+        return self.vectors.shape[1]
+
+    @property
+    def ntime(self):
+        return self.time.shape[0]
+
     def bin_func(self, var, **kwargs):
         """
         Bins down an input variable to the same time resolution as `self`
@@ -287,7 +295,7 @@ def func(x, quad=False):
 
         return func
 
-    def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
+    def pca(self, y, ncomponents=5, smooth_time_scale=0):
         """Adds the principal components of `y` to the design matrix
 
         Will add two time scales of principal components, definied by `long_time_scale`
@@ -308,13 +316,10 @@ def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
             or shorter than `med_time_scale`, will be removed before building components
         """
         return self._pca(
-            y,
-            ncomponents=ncomponents,
-            long_time_scale=long_time_scale,
-            med_time_scale=med_time_scale,
+            y, ncomponents=ncomponents, smooth_time_scale=smooth_time_scale
         )
 
-    def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
+    def _pca(self, y, ncomponents=3, smooth_time_scale=0):
         """This hidden method allows us to update the pca method for other classes"""
         if not y.ndim == 2:
             raise ValueError("Must pass a 2D `y`")
@@ -324,73 +329,43 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
         # Clean out any time series have significant contribution from one component
         k = np.nansum(y, axis=0) != 0
 
-        X = sparse.hstack(
-            [
-                spline1d(
-                    self.time,
-                    np.linspace(
-                        self.time[m].min(),
-                        self.time[m].max(),
-                        int(
-                            np.ceil(
-                                (self.time[m].max() - self.time[m].min())
-                                / long_time_scale
-                            )
+        if smooth_time_scale != 0:
+            X = sparse.hstack(
+                [
+                    spline1d(
+                        self.time,
+                        np.linspace(
+                            self.time[m].min(),
+                            self.time[m].max(),
+                            int(
+                                np.ceil(
+                                    (self.time[m].max() - self.time[m].min())
+                                    / smooth_time_scale
+                                )
+                            ),
                         ),
-                    ),
-                    degree=3,
-                )[:, 1:]
-                for m in self.segment_masks.astype(bool).T
-            ]
-        )
-        X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr()
-        X = X[:, np.asarray(X.sum(axis=0) != 0)[0]]
-        long_y = X.dot(
-            np.linalg.solve(
-                X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)),
-                X.T.dot(y),
+                        degree=3,
+                    )[:, 1:]
+                    for m in self.segment_masks.astype(bool).T
+                ]
             )
-        )
-
-        X = sparse.hstack(
-            [
-                spline1d(
-                    self.time,
-                    np.linspace(
-                        self.time[m].min(),
-                        self.time[m].max(),
-                        int(
-                            np.ceil(
-                                (self.time[m].max() - self.time[m].min())
-                                / med_time_scale
-                            )
-                        ),
-                    ),
-                    degree=3,
-                )[:, 1:]
-                for m in self.segment_masks.astype(bool).T
-            ]
-        )
-        X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr()
-        X = X[:, np.asarray(X.sum(axis=0) != 0)[0]]
-
-        med_y = X.dot(
-            np.linalg.solve(
-                X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)),
-                X.T.dot(y - long_y),
+            X = sparse.hstack([X, sparse.csr_matrix(np.ones(X.shape[0])).T]).tocsr()
+            X = X[:, np.asarray(X.sum(axis=0) != 0)[0]]
+            smoothed_y = X.dot(
+                np.linalg.solve(
+                    X.T.dot(X).toarray() + np.diag(1 / (np.ones(X.shape[1]) * 1e10)),
+                    X.T.dot(y),
+                )
             )
-        )
-
-        for count in range(3):
-            U1, s, V = pca(np.nan_to_num(long_y)[:, k], ncomponents, n_iter=30)
-            k[k] &= (np.abs(V) < 0.5).all(axis=0)
+        else:
+            smoothed_y = np.copy(y)
 
         for count in range(3):
-            U2, s, V = pca(np.nan_to_num(med_y)[:, k], ncomponents, n_iter=30)
+            self._pca_components, s, V = pca(
+                np.nan_to_num(smoothed_y)[:, k], ncomponents, n_iter=30
+            )
             k[k] &= (np.abs(V) < 0.5).all(axis=0)
 
-        self._pca_components = np.hstack([U1, U2])
-
         if self.other_vectors is not None:
             self.vectors = np.hstack(
                 [self._vectors, self.other_vectors, self._pca_components]
@@ -399,7 +374,7 @@ def _pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
             self.vectors = np.hstack([self._vectors, self._pca_components])
         if self.segments:
             self.vectors = self._cut_segments(self.vectors)
-        self._clean_vectors()
+        # self._clean_vectors()
         self.matrix = sparse.csr_matrix(self.bin_func(self.vectors))
 
 
@@ -443,20 +418,25 @@ def __init__(
         radius: float = 8,
         focus: bool = False,
         segments: bool = True,
-        resolution: int = 10,
+        resolution: int = 30,
         bin_method: str = "downsample",
-        focus_exptime: float = 50,
+        focus_exptime: float = 2,
+        degree: int = 2,
+        knot_spacing_type: str = "linear",
     ):
         self.dx = dx
         self.dy = dy
         self.nknots = nknots
         self.radius = radius
+        self.degree = degree
+        self.knot_spacing_type = knot_spacing_type
         self.cartesian_matrix = _make_A_cartesian(
             self.dx,
             self.dy,
             n_knots=self.nknots,
             radius=self.radius,
-            knot_spacing_type="linear",
+            knot_spacing_type=self.knot_spacing_type,
+            degree=self.degree,
         )
         super().__init__(
             time=time,
@@ -563,19 +543,23 @@ def model(self, time_indices: Optional[list] = None):
             ]
         )
 
-    def pca(self, y, ncomponents=5, long_time_scale=3, med_time_scale=0.5):
+    def pca(self, y, ncomponents=3, smooth_time_scale=0):
         """Adds the principal components of `y` to the design matrix
 
         Parameters
         ----------
         y: np.ndarray
             Input flux array to take PCA of.
+        n_components: int
+            Number of components to take
+        smooth_time_scale: float
+            Amount to smooth the components, using a spline in time.
+             If 0, the components will not be smoothed.
         """
         self._pca(
             y,
             ncomponents=ncomponents,
-            long_time_scale=long_time_scale,
-            med_time_scale=med_time_scale,
+            smooth_time_scale=smooth_time_scale,
         )
         self._get_cartesian_stacked()
 
diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py
index b1b8b12..d3d7c35 100644
--- a/tests/test_perturbation.py
+++ b/tests/test_perturbation.py
@@ -68,14 +68,14 @@ def test_perturbation_matrix():
     p = PerturbationMatrix(time=time, focus=False)
     assert p.matrix.shape == (20, 8)
     p.pca(flux, ncomponents=2)
-    assert p.matrix.shape == (20, 16)
-    assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0)
+    assert p.matrix.shape == (20, 12)
+    # assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0)
     p.fit(y, ye)
     p = PerturbationMatrix(time=time, focus=False, segments=False)
     assert p.matrix.shape == (20, 4)
     p.pca(flux, ncomponents=2)
-    assert p.matrix.shape == (20, 8)
-    assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0)
+    assert p.matrix.shape == (20, 6)
+    # assert np.allclose((p.vectors.sum(axis=0) / (p.vectors != 0).sum(axis=0))[8:], 0)
     p.fit(y, ye)
 
 
@@ -95,13 +95,19 @@ def test_perturbation_matrix3d():
     p3 = PerturbationMatrix3D(
         time=time, dx=dx, dy=dy, nknots=4, radius=5, resolution=5, poly_order=1
     )
-    assert p3.cartesian_matrix.shape == (169, 81)
+    assert p3.cartesian_matrix.shape == (169, 64)
     assert p3.vectors.shape == (10, 2)
-    assert p3.shape == (1690, 81 * 2)
-    assert p3.matrix.shape == (169 * 3, 81 * 2)
+    assert p3.shape == (
+        p3.cartesian_matrix.shape[0] * p3.ntime,
+        p3.cartesian_matrix.shape[1] * p3.nvec,
+    )
+    assert p3.matrix.shape == (
+        p3.cartesian_matrix.shape[0] * p3.nbins,
+        p3.cartesian_matrix.shape[1] * p3.nvec,
+    )
     p3.fit(flux, flux_err)
     w = p3.weights
-    assert w.shape[0] == 81 * 2
+    assert w.shape[0] == p3.cartesian_matrix.shape[1] * p3.nvec
     model = p3.model()
     assert model.shape == flux.shape
 

From e1bc28e54c080ed82f9c00cbbb3daa5764161a95 Mon Sep 17 00:00:00 2001
From: Christina Hedges <christina.l.hedges@nasa.gov>
Date: Fri, 15 Apr 2022 16:16:21 -0700
Subject: [PATCH 43/53] :art: sane defaults

---
 src/psfmachine/perturbation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 16b51e3..0b43d27 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -414,7 +414,7 @@ def __init__(
         dy: npt.ArrayLike,
         other_vectors: Optional[list] = None,
         poly_order: int = 3,
-        nknots: int = 10,
+        nknots: int = 7,
         radius: float = 8,
         focus: bool = False,
         segments: bool = True,

From 9d336fb01f3ab079caa8a4d70f1b2f3cc70cbbaa Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Mon, 18 Apr 2022 10:46:46 -0700
Subject: [PATCH 44/53] pca smooth time scale arg visible

---
 src/psfmachine/machine.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 38af35a..4ddaa8d 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -617,6 +617,7 @@ def build_time_model(
         focus=False,
         focus_exptime=50,
         pca_ncomponents=0,
+        pca_smooth_time_scale=0,
     ):
         """
         Builds a time model that moves the PRF model to account for the scene movement
@@ -671,6 +672,7 @@ def build_time_model(
                 [mpc1, mpc2],
                 x=self.time,
                 do_segments=segments,
+                n_knots=50,
             )
             # normalize components
             mpc1_smooth = (mpc1_smooth - mpc1_smooth.mean()) / (
@@ -736,7 +738,11 @@ def build_time_model(
         if pca_ncomponents > 0:
             # select only bright pixels
             k &= (flux_binned_raw > 300).all(axis=0)
-            P.pca(flux_norm[:, k], ncomponents=pca_ncomponents)
+            P.pca(
+                flux_norm[:, k],
+                ncomponents=pca_ncomponents,
+                smooth_time_scale=pca_smooth_time_scale,
+            )
 
         # iterate to remvoe outliers
         for count in [0, 1, 2]:

From ea876ccf173927ca618d75faf328aaeaeabde82e Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Mon, 18 Apr 2022 18:31:24 -0700
Subject: [PATCH 45/53] time_resolution kwarg

---
 src/psfmachine/machine.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 4ddaa8d..42eabb3 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -49,7 +49,7 @@ def __init__(
         time_mask=None,
         n_r_knots=10,
         n_phi_knots=15,
-        n_time_knots=10,
+        time_nknots=10,
         time_resolution=200,
         time_radius=8,
         rmin=1,
@@ -156,7 +156,7 @@ def __init__(
         self.limit_flux = 1e4
         self.n_r_knots = n_r_knots
         self.n_phi_knots = n_phi_knots
-        self.n_time_knots = n_time_knots
+        self.time_nknots = time_nknots
         self.time_resolution = time_resolution
         self.time_radius = time_radius
         self.rmin = rmin
@@ -691,10 +691,13 @@ def build_time_model(
             dy=dy,
             segments=segments,
             focus=focus,
-            resolution=self.time_resolution,
             other_vectors=other_vectors,
             bin_method=bin_method,
             focus_exptime=focus_exptime,
+            resolution=self.time_resolution,
+            radius=self.time_radius,
+            nknots=self.time_nknots,
+            knot_spacing_type=self.cartesian_knot_spacing,
         )
 
         # get uncontaminated pixel norm-flux

From 12cd2a25bf52688a64f8227af4de0816ca6e30fd Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 19 Apr 2022 19:00:55 -0700
Subject: [PATCH 46/53] using time_knots kwarg

---
 src/psfmachine/tpf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py
index eb16130..7c1ca74 100644
--- a/src/psfmachine/tpf.py
+++ b/src/psfmachine/tpf.py
@@ -42,7 +42,7 @@ def __init__(
         time_mask=None,
         n_r_knots=10,
         n_phi_knots=15,
-        n_time_knots=10,
+        time_nknots=10,
         time_resolution=200,
         time_radius=8,
         rmin=1,
@@ -68,7 +68,7 @@ def __init__(
             limit_radius=limit_radius,
             n_r_knots=n_r_knots,
             n_phi_knots=n_phi_knots,
-            n_time_knots=n_time_knots,
+            time_nknots=time_nknots,
             time_resolution=time_resolution,
             time_radius=time_radius,
             rmin=rmin,

From dc1a8e6d398fbd79bb563e8ba19b6c4108fd18ee Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Wed, 20 Apr 2022 12:41:34 -0700
Subject: [PATCH 47/53] new time model diag plot and components API

---
 src/psfmachine/machine.py | 81 +++++++++++++++++++++++++++++++++------
 1 file changed, 69 insertions(+), 12 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index 42eabb3..e80fe89 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -132,7 +132,7 @@ def __init__(
             Mean PSF model values per pixel used for PSF photometry
         time_corrector: string
             The type of time corrector that will be used to build the time model,
-            default is a "polynomial" for a polynomial in time, it can also be "pos_corr"
+            default is a "polynomial" for a polynomial in time, it can also be "poscorr"
         cartesian_knot_spacing: string
             Defines the type of spacing between knots in cartessian space to generate
             the design matrix, options are "linear" or "sqrt".
@@ -163,7 +163,6 @@ def __init__(
         self.rmax = rmax
         self.cut_r = cut_r
         self.sparse_dist_lim = sparse_dist_lim * u.arcsecond
-        self.time_corrector = "polynomial"
         self.cartesian_knot_spacing = "sqrt"
         # disble tqdm prgress bar when running in HPC
         self.quiet = False
@@ -618,12 +617,13 @@ def build_time_model(
         focus_exptime=50,
         pca_ncomponents=0,
         pca_smooth_time_scale=0,
+        positions=False,
     ):
         """
         Builds a time model that moves the PRF model to account for the scene movement
         due to velocity aberration. It has two methods to choose from using the
         attribute `self.time_corrector`, if `"polynomial"` (default) will use a
-        polynomial in time, if `"pos_corr"` will use the pos_corr vectos that can be found
+        polynomial in time, if `"poscorr"` will use the pos_corr vectos that can be found
         in the TPFs. The time polynomial gives a more flexible model vs the pos_corr
         option, but can lead to light curves with "weird" long-term trends. Using
         pos_corr is recomended for Kepler data.
@@ -656,16 +656,17 @@ def build_time_model(
         uncontaminated_pixels = uncontaminated_pixels.data
 
         # add other vectors if asked, centroids or poscorrs
-        if self.time_corrector == "polynomial":
-            other_vectors = None
-        else:
-            if self.time_corrector == "pos_corr" and hasattr(self, "pos_corr1"):
+        if positions:
+            if positions == "poscorr" and hasattr(self, "pos_corr1"):
                 mpc1 = np.nanmedian(self.pos_corr1, axis=0)
                 mpc2 = np.nanmedian(self.pos_corr2, axis=0)
-                # other_vectors =
-            elif self.time_corrector == "centroid" and hasattr(self, "ra_centroid"):
+            elif positions == "centroid" and hasattr(self, "ra_centroid"):
                 mpc1 = self.ra_centroid.to("arcsec").value / 4
                 mpc2 = self.dec_centroid.to("arcsec").value / 4
+            else:
+                raise ValueError(
+                    "`position` not valid, use one of {None, 'poscorr', 'centroid'}"
+                )
 
             # smooth the vectors
             mpc1_smooth, mpc2_smooth = bspline_smooth(
@@ -683,6 +684,8 @@ def build_time_model(
             )
             # combine them as the first order
             other_vectors = [mpc1_smooth, mpc2_smooth, mpc1_smooth * mpc2_smooth]
+        else:
+            other_vectors = None
 
         # create a 3D perturbation matrix
         P = PerturbationMatrix3D(
@@ -784,7 +787,7 @@ def plot_time_model(self):
             Figure.
         """
         model_binned = self.P.matrix.dot(self.P.weights).reshape(self.flux_binned.shape)
-        fig, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w")
+        fig1, ax = plt.subplots(2, 2, figsize=(9, 7), facecolor="w")
         # k1 = self._time_masked_pix.reshape(self.flux_binned.shape)[0].astype(bool)
         im = ax[0, 0].scatter(
             self.P.dx[self._time_masked_pix],
@@ -834,9 +837,63 @@ def plot_time_model(self):
         ax[1, 1].set(title="Model Last Cadence", xlabel=r"$\delta x$")
         plt.subplots_adjust(hspace=0.3)
 
-        cbar = fig.colorbar(im, ax=ax, shrink=0.7)
+        cbar = fig1.colorbar(im, ax=ax, shrink=0.7)
         cbar.set_label("Normalized Flux")
-        return fig
+
+        flux = np.array(
+            [self.source_mask.multiply(self.flux[idx]).data for idx in range(self.nt)]
+        )
+        flux_sort = np.argsort(np.nanmean(flux[:, self._time_masked_pix], axis=0))
+        data_binned_clean = self.flux_binned[:, self._time_masked_pix]
+        model_binned_clean = model_binned[:, self._time_masked_pix]
+
+        fig2, ax = plt.subplots(1, 3, figsize=(18, 5))
+        im = ax[0].imshow(
+            data_binned_clean.T[flux_sort],
+            origin="lower",
+            aspect="auto",
+            interpolation="nearest",
+            cmap="viridis",
+            vmin=0.9,
+            vmax=1.1,
+        )
+        ax[0].set(
+            xlabel="Binned Time Index", ylabel="Flux-Sorted Clean Pixels", title="Data"
+        )
+
+        im = ax[1].imshow(
+            model_binned_clean.T[flux_sort],
+            origin="lower",
+            aspect="auto",
+            interpolation="nearest",
+            cmap="viridis",
+            vmin=0.9,
+            vmax=1.1,
+        )
+        ax[1].set(xlabel="Binned Time Index", title="Perturbed Mode")
+
+        cbar = fig2.colorbar(
+            im, ax=ax[:2], shrink=0.7, orientation="horizontal", location="bottom"
+        )
+        cbar.set_label("Normalized Flux")
+
+        im = ax[2].imshow(
+            (model_binned_clean / data_binned_clean).T[flux_sort],
+            origin="lower",
+            aspect="auto",
+            interpolation="nearest",
+            cmap="viridis",
+            vmin=0.97,
+            vmax=1.03,
+        )
+        ax[2].set(xlabel="Binned Time Index", title="Perturbed Model / Data")
+
+        cbar = fig2.colorbar(
+            im, ax=ax[2], shrink=0.7, orientation="horizontal", location="bottom"
+        )
+        cbar.set_label("")
+
+        return fig1, fig2
 
     def build_shape_model(
         self, plot=False, flux_cut_off=1, frame_index="mean", bin_data=False, **kwargs

From 7ac7b71d72d1979a6a77696951382e4861687be0 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Thu, 28 Apr 2022 13:54:50 -0700
Subject: [PATCH 48/53] updated lock

---
 poetry.lock | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 36eab17..a204cb8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2077,7 +2077,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "0b6cb5d37be3084d7c0cc8d2209c66d7b8d7847580dd404a37c3e74a2033a4a2"
+content-hash = "74612ba5a75b49f844e825824b7a009a047559bc2aecec7f10930fb1e8e8f3bd"
 
 [metadata.files]
 aesara-theano-fallback = [
@@ -2111,6 +2111,10 @@ argon2-cffi = [
     {file = "argon2_cffi-20.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2"},
     {file = "argon2_cffi-20.1.0-cp39-cp39-win32.whl", hash = "sha256:e2db6e85c057c16d0bd3b4d2b04f270a7467c147381e8fd73cbbe5bc719832be"},
     {file = "argon2_cffi-20.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:8a84934bd818e14a17943de8099d41160da4a336bcc699bb4c394bbb9b94bd32"},
+    {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-macosx_10_7_x86_64.whl", hash = "sha256:b94042e5dcaa5d08cf104a54bfae614be502c6f44c9c89ad1535b2ebdaacbd4c"},
+    {file = "argon2_cffi-20.1.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:8282b84ceb46b5b75c3a882b28856b8cd7e647ac71995e71b6705ec06fc232c3"},
+    {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3aa804c0e52f208973845e8b10c70d8957c9e5a666f702793256242e9167c4e0"},
+    {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:36320372133a003374ef4275fbfce78b7ab581440dfca9f9471be3dd9a522428"},
 ]
 arviz = [
     {file = "arviz-0.11.2-py3-none-any.whl", hash = "sha256:f6a1389a90b53335f248d282c8142b8209150b9625459a85ec6d3d38786797c1"},
@@ -2205,24 +2209,36 @@ cffi = [
     {file = "cffi-1.14.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058"},
     {file = "cffi-1.14.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5"},
     {file = "cffi-1.14.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132"},
+    {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24ec4ff2c5c0c8f9c6b87d5bb53555bf267e1e6f70e52e5a9740d32861d36b6f"},
+    {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c3f39fa737542161d8b0d680df2ec249334cd70a8f420f71c9304bd83c3cbed"},
+    {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:681d07b0d1e3c462dd15585ef5e33cb021321588bebd910124ef4f4fb71aef55"},
     {file = "cffi-1.14.5-cp36-cp36m-win32.whl", hash = "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53"},
     {file = "cffi-1.14.5-cp36-cp36m-win_amd64.whl", hash = "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813"},
     {file = "cffi-1.14.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73"},
     {file = "cffi-1.14.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06"},
     {file = "cffi-1.14.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1"},
     {file = "cffi-1.14.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49"},
+    {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06d7cd1abac2ffd92e65c0609661866709b4b2d82dd15f611e602b9b188b0b69"},
+    {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f861a89e0043afec2a51fd177a567005847973be86f709bbb044d7f42fc4e05"},
+    {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc5a8e069b9ebfa22e26d0e6b97d6f9781302fe7f4f2b8776c3e1daea35f1adc"},
     {file = "cffi-1.14.5-cp37-cp37m-win32.whl", hash = "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62"},
     {file = "cffi-1.14.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4"},
     {file = "cffi-1.14.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053"},
     {file = "cffi-1.14.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0"},
     {file = "cffi-1.14.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e"},
     {file = "cffi-1.14.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827"},
+    {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04c468b622ed31d408fea2346bec5bbffba2cc44226302a0de1ade9f5ea3d373"},
+    {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:06db6321b7a68b2bd6df96d08a5adadc1fa0e8f419226e25b2a5fbf6ccc7350f"},
+    {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:293e7ea41280cb28c6fcaaa0b1aa1f533b8ce060b9e701d78511e1e6c4a1de76"},
     {file = "cffi-1.14.5-cp38-cp38-win32.whl", hash = "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e"},
     {file = "cffi-1.14.5-cp38-cp38-win_amd64.whl", hash = "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396"},
     {file = "cffi-1.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea"},
     {file = "cffi-1.14.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322"},
     {file = "cffi-1.14.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c"},
     {file = "cffi-1.14.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee"},
+    {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bf1ac1984eaa7675ca8d5745a8cb87ef7abecb5592178406e55858d411eadc0"},
+    {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:df5052c5d867c1ea0b311fb7c3cd28b19df469c056f7fdcfe88c7473aa63e333"},
+    {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24a570cd11895b60829e941f2613a4f79df1a27344cbbb82164ef2e0116f09c7"},
     {file = "cffi-1.14.5-cp39-cp39-win32.whl", hash = "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396"},
     {file = "cffi-1.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d"},
     {file = "cffi-1.14.5.tar.gz", hash = "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"},

From 1e6b8a4368ea3313c6847759cb4bcfbe2aeb0983 Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Thu, 28 Apr 2022 14:15:29 -0700
Subject: [PATCH 49/53] pytest fixed

---
 src/psfmachine/ffi.py |  8 ++++----
 tests/test_machine.py | 21 ++++++++-------------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/psfmachine/ffi.py b/src/psfmachine/ffi.py
index ba0183e..4176e2e 100644
--- a/src/psfmachine/ffi.py
+++ b/src/psfmachine/ffi.py
@@ -44,8 +44,8 @@ def __init__(
         limit_radius=32.0,
         n_r_knots=10,
         n_phi_knots=15,
-        n_time_knots=10,
-        n_time_points=200,
+        time_nknots=10,
+        time_resolution=200,
         time_radius=8,
         cut_r=6,
         rmin=1,
@@ -130,8 +130,8 @@ def __init__(
             self.row,
             n_r_knots=n_r_knots,
             n_phi_knots=n_phi_knots,
-            n_time_knots=n_time_knots,
-            n_time_points=n_time_points,
+            time_nknots=time_nknots,
+            time_resolution=time_resolution,
             time_radius=time_radius,
             cut_r=cut_r,
             rmin=rmin,
diff --git a/tests/test_machine.py b/tests/test_machine.py
index ebfadfb..5ac4f53 100644
--- a/tests/test_machine.py
+++ b/tests/test_machine.py
@@ -122,10 +122,9 @@ def test_poscorr_smooth():
     machine = TPFMachine.from_TPFs(tpfs, apply_focus_mask=False)
     machine.build_shape_model(plot=False)
     # no segments
-    machine.time_corrector = "pos_corr"
     machine.poscorr_filter_size = 1
     machine.build_time_model(
-        split_time_segments=False, bin_method="bin", focus_component=False
+        segments=False, bin_method="bin", focus=False, positions="poscorr"
     )
 
     median_pc1 = np.nanmedian(machine.pos_corr1, axis=0)
@@ -137,8 +136,8 @@ def test_poscorr_smooth():
         median_pc2.max() - median_pc2.mean()
     )
 
-    assert np.isclose(machine.P3.other_vectors[:, 0], median_pc1, atol=0.5).all()
-    assert np.isclose(machine.P3.other_vectors[:, 1], median_pc2, atol=0.5).all()
+    assert np.isclose(machine.P.other_vectors[:, 0], median_pc1, atol=0.5).all()
+    assert np.isclose(machine.P.other_vectors[:, 1], median_pc2, atol=0.5).all()
 
 
 @pytest.mark.remote_data
@@ -146,20 +145,16 @@ def test_segment_time_model():
     # testing segment with the current test dataset we have that only has 10 cadences
     # isn't the best, but we can still do some sanity checks.
     machine = TPFMachine.from_TPFs(
-        tpfs, apply_focus_mask=False, n_time_points=3, time_corrector="polynomial"
+        tpfs, apply_focus_mask=False, time_resolution=3, time_corrector="polynomial"
     )
     machine.build_shape_model(plot=False)
     # no segments
-    machine.build_time_model(
-        split_time_segments=False, bin_method="bin", focus_component=False
-    )
-    assert machine.P3.vectors.shape == (10, 4)
+    machine.build_time_model(segments=False, bin_method="bin", focus=False)
+    assert machine.P.vectors.shape == (10, 4)
 
     # fake 2 time breaks
     machine.time[4:] += 0.5
     machine.time[7:] += 0.41
     # user defined segments
-    machine.build_time_model(
-        split_time_segments=True, bin_method="bin", focus_component=False
-    )
-    assert machine.P3.vectors.shape == (10, 4 * 3)
+    machine.build_time_model(segments=True, bin_method="bin", focus=False)
+    assert machine.P.vectors.shape == (10, 4 * 3)

From 90385d9a736a1889e169bf3ea1018adad9e1004b Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Fri, 29 Apr 2022 11:10:02 -0700
Subject: [PATCH 50/53] better pixel mask in build_time_model

---
 src/psfmachine/machine.py | 54 ++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index e80fe89..fa43401 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -130,9 +130,6 @@ def __init__(
             build the PSF model
         mean_model: scipy.sparce.csr_matrix
             Mean PSF model values per pixel used for PSF photometry
-        time_corrector: string
-            The type of time corrector that will be used to build the time model,
-            default is a "polynomial" for a polynomial in time, it can also be "poscorr"
         cartesian_knot_spacing: string
             Defines the type of spacing between knots in cartessian space to generate
             the design matrix, options are "linear" or "sqrt".
@@ -640,6 +637,17 @@ def build_time_model(
             time series as one segment.
         focus: boolean
             Add a component that models th focus change at the begining of a segment.
+        focus_exptime : int
+            Characteristic decay for focus component modeled as exponential decay when
+            `focus` is True. In the same units as `PerturbationMatrix3D.time`.
+        pca_ncomponents : int
+            Number of PCA components used in `PerturbationMatrix3D`. The components are
+            derived from pixel light lighrcurves.
+        pca_smooth_time_scale : float
+            Smooth time sacel for PCA components.
+        positions : boolean or string
+            If one of strings `"poscorr", "centroid"` then the perturbation matrix will
+            add other vectors accounting for position shift.
         """
         # create the time and space basis
         _whitened_time = (self.time - self.time.mean()) / (
@@ -718,47 +726,47 @@ def build_time_model(
         flux_norm = flux / np.nanmean(flux, axis=0)
         flux_err_norm = flux_err / np.nanmean(flux, axis=0)
 
-        # bindown flux arrays
-        flux_binned_raw = P.bin_func(flux)
-        flux_binned = P.bin_func(flux_norm)
-        flux_err_binned = P.bin_func(flux_err_norm, quad=True)
-
+        # create pixel mask for model fitting
         # No saturated pixels, 1e5 is a hardcoded value for Kepler.
-        k = (flux_binned_raw < 1e5).all(axis=0)[None, :] * np.ones(
-            flux_binned_raw.shape, bool
-        )
+        k = (flux < 1e5).all(axis=0)[None, :] * np.ones(flux.shape, bool)
         # No faint pixels, 100 is a hardcoded value for Kepler.
-        k &= (flux_binned_raw > 100).all(axis=0)[None, :] * np.ones(
-            flux_binned_raw.shape, bool
-        )
+        k &= (flux > 100).all(axis=0)[None, :] * np.ones(flux.shape, bool)
         # No huge variability
-        k &= (np.abs(flux_binned - 1) < 1).all(axis=0)[None, :] * np.ones(
-            flux_binned.shape, bool
-        )
+        _per_pix = np.percentile(flux_norm, [3, 97], axis=1)
+        k &= (
+            (
+                (flux_norm < _per_pix[0][:, None]) | (flux_norm > _per_pix[1][:, None])
+            ).sum(axis=0)
+            < flux_norm.shape[0] * 0.95
+        )[None, :] * np.ones(flux_norm.shape, bool)
         # No nans
-        k &= np.isfinite(flux_binned) & np.isfinite(flux_err_binned)
+        k &= np.isfinite(flux_norm) & np.isfinite(flux_err_norm)
         k = np.all(k, axis=0)
         # combine good-behaved pixels with uncontaminated pixels
         k &= uncontaminated_pixels
 
+        # adding PCA components to pertrubation matrix
         if pca_ncomponents > 0:
             # select only bright pixels
-            k &= (flux_binned_raw > 300).all(axis=0)
+            k &= (flux > 300).all(axis=0)
             P.pca(
                 flux_norm[:, k],
                 ncomponents=pca_ncomponents,
                 smooth_time_scale=pca_smooth_time_scale,
             )
 
+        # bindown flux arrays
+        flux_binned = P.bin_func(flux_norm)
+        flux_err_binned = P.bin_func(flux_err_norm, quad=True)
+
         # iterate to remvoe outliers
         for count in [0, 1, 2]:
             # need to add pixel_mask=k
             P.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k)
             res = flux_binned - P.matrix.dot(P.weights).reshape(flux_binned.shape)
-            # res = np.ma.masked_array(res, (~k).reshape(flux_binned.shape))
-            res = np.ma.masked_array(res, ~np.tile(k, flux_binned.shape[0]).T)
-            bad_targets = sigma_clip(res, sigma=5).mask
-            bad_targets = bad_targets.any(axis=0)
+            chi2 = np.sum((res) ** 2 / (flux_err_binned ** 2), axis=0) / P.nbins
+            bad_targets = sigma_clip(chi2, sigma=5).mask
+            bad_targets = bad_targets.all(axis=0)
             k &= ~bad_targets
 
         # bookkeeping

From 5b18a7e4b214538ae0c513aa3de3143944278fd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jorge=20Mart=C3=ADnez-Palomera?=
 <jorgemarpa@users.noreply.github.com>
Date: Fri, 29 Apr 2022 12:37:59 -0700
Subject: [PATCH 51/53] Update pyproject.toml

Co-authored-by: Christina Hedges <14965634+christinahedges@users.noreply.github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 55273e5..9104d31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,7 @@ photutils = "^1.1.0"
 diskcache = "^5.3.0"
 imageio = "^2.9.0"
 imageio-ffmpeg = "^0.4.5"
-kbackground = "0.1.7"
+kbackground = "^0.1.7"
 fbpca = "^1.0"
 
 [tool.poetry.dev-dependencies]

From 88ca62c636a734757e0623af18558e73c4e9c52c Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Fri, 3 Jun 2022 15:34:24 -0700
Subject: [PATCH 52/53] bump up mac version to 1.1.2, missing docstr and
 removed dup fx in utils

---
 pyproject.toml                 |  2 +-
 src/psfmachine/machine.py      | 11 +++++-----
 src/psfmachine/perturbation.py | 38 +++++++++++++++++++++++-----------
 src/psfmachine/tpf.py          |  3 ++-
 src/psfmachine/utils.py        | 23 ++++++++------------
 src/psfmachine/version.py      |  2 +-
 6 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9104d31..ed2dfac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "psfmachine"
-version = "1.1.1"
+version = "1.1.2"
 description = "Tool to perform fast PSF photometry of primary and background targets from Kepler/K2 Target Pixel Files"
 authors = ["Christina Hedges <christina.l.hedges@nasa.gov>",
           "Jorge Martinez-Palomera <jorgemarpa@ug.uchile.cl>"]
diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py
index fa43401..cc6610f 100644
--- a/src/psfmachine/machine.py
+++ b/src/psfmachine/machine.py
@@ -86,6 +86,8 @@ def __init__(
             Number of radial knots in the spline model.
         n_phi_knots: int
             Number of azimuthal knots in the spline model.
+        time_nknots: int
+            Number og knots for cartesian DM in time model.
         time_resolution: int
             Number of time points to bin by when fitting for velocity aberration.
         time_radius: float
@@ -462,8 +464,7 @@ def _get_source_mask(
                 l[idx] = test_r[loc[0]]
         ok = np.isfinite(l)
         source_radius_limit = np.polyval(
-            np.polyfit(test_f[ok], l[ok], 1),
-            np.log10(self.source_flux_estimates),
+            np.polyfit(test_f[ok], l[ok], 1), np.log10(self.source_flux_estimates)
         )
         source_radius_limit[
             source_radius_limit > upper_radius_limit
@@ -634,7 +635,8 @@ def build_time_model(
         segments : boolean
             If `True` will split the light curve into segments to fit different time
             models with a common pixel normalization. If `False` will fit the full
-            time series as one segment.
+            time series as one segment. Segments breaks are infered from time
+            discontinuities.
         focus: boolean
             Add a component that models th focus change at the begining of a segment.
         focus_exptime : int
@@ -761,7 +763,6 @@ def build_time_model(
 
         # iterate to remvoe outliers
         for count in [0, 1, 2]:
-            # need to add pixel_mask=k
             P.fit(flux_norm, flux_err=flux_err_norm, pixel_mask=k)
             res = flux_binned - P.matrix.dot(P.weights).reshape(flux_binned.shape)
             chi2 = np.sum((res) ** 2 / (flux_err_binned ** 2), axis=0) / P.nbins
@@ -769,7 +770,7 @@ def build_time_model(
             bad_targets = bad_targets.all(axis=0)
             k &= ~bad_targets
 
-        # bookkeeping
+        # book keeping
         self.flux_binned = flux_binned
         self._time_masked_pix = k
         self.P = P
diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 0b43d27..57ea06e 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -181,6 +181,11 @@ def fit(self, flux: npt.ArrayLike, flux_err: Optional[npt.ArrayLike] = None):
             Array of flux values. Should have shape ntimes.
         flux: npt.ArrayLike
             Optional flux errors. Should have shape ntimes.
+
+        Returns
+        -------
+        weights: npt.ArrayLike
+            Array with computed weights
         """
         if flux_err is None:
             flux_err = np.ones_like(flux)
@@ -229,6 +234,11 @@ def bin_func(self, var, **kwargs):
         var: npt.ArrayLike
             Array of values with at least 1 dimension. The first dimension must be
             the same shape as `self.time`
+
+        Returns
+        -------
+        func: object
+            An object function according to `self.bin_method`
         """
         if self.bin_method.lower() == "downsample":
             func = self._get_downsample_func()
@@ -307,13 +317,9 @@ def pca(self, y, ncomponents=5, smooth_time_scale=0):
             Input flux array to take PCA of.
         ncomponents: int
             Number of principal components to use
-        long_time_scale: float
-            The time scale where variability is considered "long" term. Should be
-            in the same units as `self.time`.
-        med_time_scale: float
-            The time scale where variability is considered "medium" term. Should be
-            in the same units as `self.time`. Variability longer than `long_time_scale`,
-            or shorter than `med_time_scale`, will be removed before building components
+        smooth_time_scale: float
+            Amount to smooth the components, using a spline in time.
+             If 0, the components will not be smoothed.
         """
         return self._pca(
             y, ncomponents=ncomponents, smooth_time_scale=smooth_time_scale
@@ -400,11 +406,19 @@ class PerturbationMatrix3D(PerturbationMatrix):
     focus : bool
         Whether to correct focus using a simple exponent model
     segments: bool
-        Whether to fit portions of data where there is a significant time break as separate segments
+        Whether to fit portions of data where there is a significant time break as
+        separate segments
     resolution: int
         How many cadences to bin down via `bin_method`
     bin_method: str
         How to bin the data under the hood. Default is by mean binning.
+    focus_exptime: float
+        Time for the exponent for focus change, if used
+    degree: int
+        Polynomial degree used to build the row/column cartesian design matrix
+    knot_spacing_type: str
+        Type of spacing bewtwen knots used for cartesian design matrix, options are
+        {"linear", "sqrt"}
     """
 
     def __init__(
@@ -483,8 +497,8 @@ def fit(
         pixel_mask: Optional[npt.ArrayLike] = None,
     ):
         """
-        Fits flux to find the best fit model weights. Optionally will include flux errors.
-        Sets the `self.weights` attribute with best fit weights.
+        Fits flux to find the best fit model weights. Optionally will include flux
+        errors. Sets the `self.weights` attribute with best fit weights.
 
         Parameters
         ----------
@@ -493,8 +507,8 @@ def fit(
         flux_err: npt.ArrayLike
             Optional flux errors. Should have shape ntimes x npixels.
         pixel_mask: npt.ArrayLike
-            Pixel mask to apply. Values that are `True` will be used in the fit. Values that are `False` will be masked.
-            Should have shape npixels.
+            Pixel mask to apply. Values that are `True` will be used in the fit.
+            Values that are `False` will be masked. Should have shape npixels.
         """
         if pixel_mask is not None:
             if not isinstance(pixel_mask, np.ndarray):
diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py
index 4bb15a3..899003c 100644
--- a/src/psfmachine/tpf.py
+++ b/src/psfmachine/tpf.py
@@ -150,10 +150,11 @@ def remove_background_model(self, plot=False, data_augment=None):
 
         f = bkg_flux - med_flux
         f -= np.median(f)
-        # Mask out pixels that are particularly bright.
+        # Mask out pixels that are particularly bright, 500 its a good number for Kepler
         _mask = (f - time_corr).std(axis=0) < 500
         if not _mask.any():
             raise ValueError("All the input pixels are brighter than 500 counts.")
+        # non variable pixels
         _mask &= (f - time_corr).std(axis=0) < 30
         _mask &= ~sigma_clip(med_flux[0]).mask
         _mask &= ~sigma_clip(np.std(f - time_corr, axis=0)).mask
diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py
index a243727..0cc4977 100644
--- a/src/psfmachine/utils.py
+++ b/src/psfmachine/utils.py
@@ -675,21 +675,16 @@ def bspline_smooth(y, x=None, degree=3, do_segments=False, breaks=None, n_knots=
     else:
         splits = [0, y.shape[-1]]
 
-    def spline(v):
-        knots = np.linspace(v.min(), v.max(), n_knots)
-        DM = np.asarray(
-            dmatrix(
-                "bs(x, knots=knots, degree=2, include_intercept=True)",
-                {"x": list(v), "knots": knots},
-            )
-        )
-        x = np.array_split(np.arange(len(v)), splits)
-        masks = np.asarray([np.in1d(np.arange(len(v)), x1).astype(float) for x1 in x]).T
-        DM = np.hstack([DM[:, idx][:, None] * masks for idx in range(DM.shape[1])])
-        return DM
-
     y_smooth = []
-    DM = spline(np.arange(y.shape[-1]))
+    v = np.arange(y.shape[-1])
+    DM = spline1d(v, np.linspace(v.min(), v.max(), n_knots)).toarray()
+    # do segments
+    arr_splits = np.array_split(np.arange(len(v)), splits)
+    masks = np.asarray(
+        [np.in1d(np.arange(len(v)), x1).astype(float) for x1 in arr_splits]
+    ).T
+    DM = np.hstack([DM[:, idx][:, None] * masks for idx in range(DM.shape[1])])
+
     prior_mu = np.zeros(DM.shape[1])
     prior_sigma = np.ones(DM.shape[1]) * 1e5
     # iterate over vectors in y
diff --git a/src/psfmachine/version.py b/src/psfmachine/version.py
index 3459b59..8c6a373 100644
--- a/src/psfmachine/version.py
+++ b/src/psfmachine/version.py
@@ -1,3 +1,3 @@
 # It is important to store the version number in a separate file
 # so that we can read it from setup.py without importing the package
-__version__ = "1.1.1"
+__version__ = "1.1.2"

From a6612f6c2307b0f328e683ed41a52a89a8b487df Mon Sep 17 00:00:00 2001
From: Jorge <jorgemarpa@ug.uchile.cl>
Date: Tue, 21 Jun 2022 14:01:32 -0700
Subject: [PATCH 53/53] docstrings fixed

---
 src/psfmachine/perturbation.py | 9 ++++-----
 src/psfmachine/tpf.py          | 4 ++++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/psfmachine/perturbation.py b/src/psfmachine/perturbation.py
index 57ea06e..c645107 100644
--- a/src/psfmachine/perturbation.py
+++ b/src/psfmachine/perturbation.py
@@ -306,10 +306,8 @@ def func(x, quad=False):
         return func
 
     def pca(self, y, ncomponents=5, smooth_time_scale=0):
-        """Adds the principal components of `y` to the design matrix
-
-        Will add two time scales of principal components, definied by `long_time_scale`
-        and `med_time_scale`.
+        """Adds the first `ncomponents` principal components of `y` to the design
+        matrix. `y` is smoothen with a spline function and scale `smooth_time_scale`.
 
         Parameters
         ----------
@@ -558,7 +556,8 @@ def model(self, time_indices: Optional[list] = None):
         )
 
     def pca(self, y, ncomponents=3, smooth_time_scale=0):
-        """Adds the principal components of `y` to the design matrix
+        """Adds the first `ncomponents` principal components of `y` to the design
+        matrix. `y` is smoothen with a spline function and scale `smooth_time_scale`.
 
         Parameters
         ----------
diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py
index 899003c..f649ccb 100644
--- a/src/psfmachine/tpf.py
+++ b/src/psfmachine/tpf.py
@@ -145,6 +145,10 @@ def remove_background_model(self, plot=False, data_augment=None):
         pixels_in_tpf = np.ones_like(self.row, dtype=bool)
 
         # enheance pixel mask
+        # creates a quick simple bkg model as fx of time to find variable pixels by
+        # taking the percentile value of pixel-flux distribution at each time
+        # the choice of percentile is irrelevant, it's to capture the global trend
+        # of the background pixels in time
         time_corr = np.nanpercentile(bkg_flux, 20, axis=1)[:, None]
         med_flux = np.median(bkg_flux - time_corr, axis=0)[None, :]