IceLab-X · ZidongChen25 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/Demo.ipynb b/Demo.ipynb
diff --git a/GPmodels_Advance/01_GP&GPU.ipynb b/GPmodels_Advance/01_GP&GPU.ipynb
diff --git a/GPmodels_Advance/01_GP&GPU_GPTutorial.ipynb b/GPmodels_Advance/01_GP&GPU_GPTutorial.ipynb
diff --git a/..._Advance/03_DynamicModel_GPTutorial.ipynb → ..._Advance/02_DynamicModel_GPTutorial.ipynb b/..._Advance/03_DynamicModel_GPTutorial.ipynb → ..._Advance/02_DynamicModel_GPTutorial.ipynb
diff --git a/GPmodels_Classic/01_simpleGP.ipynb b/GPmodels_Classic/01_simpleGP.ipynb
diff --git a/GPmodels_Classic/04_neuralKernelGP.ipynb b/GPmodels_Classic/04_neuralKernelGP.ipynb
@@ -36,7 +36,7 @@
     "import core.GP_CommonCalculation as GP\n",
     "from core.kernel import RBFKernel, LinearKernel, ARDKernel,RationalQuadraticKernel, PeriodicKernel\n",
     "import numpy as np\n",
-    "from core.cigp_baseline import cigp\n",
+    "from core.simpleGP import cigp\n",
     "# I use torch (1.11.0) for this work. lower version may not work.\n",
     "import os\n",
     "\n",

diff --git a/README.md b/README.md
@@ -1,48 +1,69 @@
 # MiniGP
-MiniGP is a minimalistic Gaussian Process (GP) library focused on regression tasks. It is designed to be simple and easy to understand. 
+MiniGP is a minimalistic Gaussian Process (GP) library focused on regression tasks. It is designed to be simple and easy to understand, super lightweight, and friendly to researchers and developers.
+
+## Motivation
+Despite that there are many successful GP libraries, such as GPy, GPflow, and GPyTorch we find them difficult to use for beginners and very time-consuming to customize. It will take a lot of time to understand the structure of the library and the GP model, by which time the user (young research student) may give up. 
+
+Thus we want to create a simple and easy-to-use GP library that can be used by anyone. MiniGP is designed to be simple and easy to understand. It is a great tool for educational purposes. We also try to make it easy for anyone to use without a lot of background knowledge of GP.
+
+## Useful and practical GP Models:
+- [CIGP](https://github.com/IceLab-X/Mini-GP/blob/6899d3fb947293122d758fb6ef4dd4799a799eac/core/cigp.py): simple yet accurate multi-output regression model with complexity $O(n^3 d)$ for n training points with d outputs.
+- [NeuralKernel](https://github.com/IceLab-X/Mini-GP/blob/64873663f7efb63de9a6f33d1de207e7a2db1f5d/GPmodels_Classic/04_neuralKernelGP.ipynb): automatic kernel learning with neural network structured kernel.
+- [GPU&GP](https://github.com/IceLab-X/Mini-GP/blob/64873663f7efb63de9a6f33d1de207e7a2db1f5d/GPmodels_Advance/01_GP&GPU_GPTutorial.ipynb): Inference method that leverage GPU acceleration for GP model. This can be used in sparse GP model to speed up the computation for large inducing point number.
+- [AutoGP](https://github.com/IceLab-X/Mini-GP/blob/64873663f7efb63de9a6f33d1de207e7a2db1f5d/core/autoGP.py): A powerful GP model that incorporates a learnable input warp module, a deep neural kernel, and Sparse Gaussian Processes.
 
-Despite that there are many successful GP libraries, such as GPy, GPflow, and Pyro, they are often too complex for beginners to understand. Things get worse when the user wants to customize the model. MiniGP is designed to be simple and easy to understand. It is a great tool for educational purposes. We also try to make it easy for anyone to use without a lot of background knowledge of GP.
 
 ## Installation
 We do not have a pip package yet. You can install it by cloning the repository and rune the code for your own purpose. At this stage we think it is better to keep it simple and customizable. It servers as rather demo code than a library, with some useful functions to make computation easier.
 
+To start using MiniGP, you can clone the repository by running the following command:
+```bash
+git clone
+```
+You can start by running the [Demo.ipynb](https://github.com/IceLab-X/Mini-GP/blob/bf66c980d55934d037992cd70625bd692ea02aaa/Demo.ipynb) to have a taste of the library. You can also check the tutorial in the GPmodels_xxx folder to learn how to use the library.
+
+Most models have two version, the API version for direct call and the tutorial version for customized usage. The API version is in the 'core' folder, and the tutorial version is in the 'GPmodels_xxx' folder.
+
+
+
+
 ## Structure
-- **core:** This folder contains all the core functions for Gaussian Processes (GP). It serves as the backbone of the library. Additionally, it includes Python scripts for GP models that are designed to be easy and quick to use for research and experimentation. The folder also contains a model comparison script and the corresponding results. More details can be found in the README file within the core folder.
+- **core:** This folder contains all the core functions (computing likelihood, matrix inversion, kernels, etc.) for Gaussian Processes (GP). It serves as the backbone of the library. 
+Additionally, it includes API GP models (.py) that are designed to be directly called for research and experimentation. 
+More details can be found in the [README](https://github.com/IceLab-X/Mini-GP/blob/64873663f7efb63de9a6f33d1de207e7a2db1f5d/core/README.md) file within the core folder.
 
-<!-- - Self-contained GP models and signiture GPTutorials for educational purposes.
-  Several GP models that are self-contained and practical to use (we use them in many of our research projects).  -->
 
   - **GPmodels_Advance:** Advance GP models, including GP with GPU acceleration, and automatic GP.
-    - 01_GP&GPU: a GP model leverage GPU acceleration.
     - 01_GP&GPU_GPTutorial: Algorithms for GP leverage GPU acceleration.
-    - 02_DynamicModel_GPTutorial: GP dynamic model in Chinese. [Original paper](https://www.dgp.toronto.edu/~jmwang/gpdm/nips05final.pdf) 
+    - 01_GP&GPU: Algorithms for GP leverage GPU acceleration.
+    - 02_DynamicModel_GPTutorial: GP dynamic model in Chinese. ([Gaussian Process Dynamical Models](https://www.dgp.toronto.edu/~jmwang/gpdm/nips05final.pdf) )
 
   - **GPmodels_Classic:** basic GP model and its variation, such as DeepKernel GP, InputWarp GP . It demonstrates how to build a GP model with the GP_CommonCalculation.
     - 01_simpleGP_GPTutorial: simple GP tutorial in both English and Chinese. This is a good starting point for beginners.
     - 01_simpleGP, a basic GP model. It demonstrates how to build a GP model with the GP_CommonCalculation.
-    - 02_deepKernelGP, a GP model with deep kernel. [Original paper](https://arxiv.org/abs/1511.02222) 
+    - 02_deepKernelGP, a GP model with deep kernel. ([Deep Kernel Learning](https://arxiv.org/abs/1511.02222) )
     - 03_logTransformWarpGP, a GP model with log transform on the target values, this can improve the model performance when the noise does not follow Gaussian distribution.
     - 04_neuralKernelGP, a GP model with neural kernel.
 
   - **GPmodels_MultiOutput:** provides tools for implementing Gaussian Process models with multiple outputs.
     - 01_IntrinsicModel: Foundation work for multi-output GP.
-    - 02_hogp_GPTutorial_Chinese: high-oreder GP in Chinese. [Original paper](https://proceedings.mlr.press/v89/zhe19a.html) 
+    - 02_hogp_GPTutorial_Chinese: high-oreder GP in Chinese. ([Scalable High-Order Gaussian Process Regression](https://proceedings.mlr.press/v89/zhe19a.html) )
   - **GPmodels_Sparse:** provides tools for implementing sparse Gaussian Process models.
-    - 01_sgpr_GPTutorial: A detailed tutorial for Sparse Gaussian Process with variational learning inducing points. [Original paper](https://proceedings.mlr.press/v5/titsias09a/titsias09a.pdf)
+    - 01_sgpr_GPTutorial: A detailed tutorial for Sparse Gaussian Process with variational learning inducing points. ([Variational Learning of Inducing Variables in Sparse Gaussian
+Processes](https://proceedings.mlr.press/v5/titsias09a/titsias09a.pdf))
     - 02_svgp, A demo for implementing mini-batch gradient descent on SVGP allows training a GP with 10k inputs in 2 seconds. 
-    - 02_svgp_GPTutorial, A detailed tutorial for Stochastic Variational Interference GP model that can allow mini-batch training. [Original paper](https://arxiv.org/abs/1411.2005)
+    - 02_svgp_GPTutorial, A detailed tutorial for Stochastic Variational Interference GP model that can allow mini-batch training. ([Gaussian Process for Big Data](https://arxiv.org/abs/1411.2005))
 - **Debug_NaNError_FAQ:** Frequently asked questions for GP model. It contains some techniques to solve NaN problem in GP model. More details can be found in the README file in the Debug_NaNError_FAQ folder.
 - **Bayesian_Optimization:** This folder contains useful tools for Bayesian optimization
     - acq: A Python scripy including several widely used acquisition functions. 
     - BO_demo: A demonstration of the process of Bayesian optimization.
-- **asset:** This folder contains the python scripts for the model comparison and regression test. As well as the result in both .csv and .png format. For more details, please refer to the README.md in the folder.
+    <img src="https://github.com/IceLab-X/Mini-GP/blob/bf66c980d55934d037992cd70625bd692ea02aaa/asset/Bayesian_Optimization.png" width="700"/>
+- **experiment:** This folder contains the python scripts of the experiment.
+
+- **asset:** This folder contains the result of the experiment in both .csv and .png format. 
 
-  - **Model_comparison.py:** A Python script that compares the performance of different GP models on various synthetic datasets, including periodic, warped, and polynomial. The default models are set as autoGP and its base model vsgp.
     <img src="https://github.com/IceLab-X/Mini-GP/blob/29a021305924757376b25905c75b36bdbdfc5017/asset/Model_comparison_autoGP.png"/>
 
-
-  - **Regression_test.py:** A Python script that tests the accuracy and training speed on different sizes of training sets. The results are stored in result1.csv and result2.csv.
-
   - **result1.csv:** The result of the regression test for different training set sizes.
 
     <img src="https://github.com/IceLab-X/Mini-GP/blob/29a021305924757376b25905c75b36bdbdfc5017/asset/Model_comparison_result1.png"/>

diff --git a/__init__.py b/__init__.py
diff --git a/asset/Bayesian_Optimization.png b/asset/Bayesian_Optimization.png
diff --git a/core/GP_CommonCalculation.py b/core/GP_CommonCalculation.py
@@ -53,6 +53,58 @@ def conjugate_gradient(A, b, x0=None, tol=1e-1, max_iter=1000):
 
     return x
 
+def lanc_quad_logdet(A, m=10, nvecs=10):
+    """
+    Estimate the log-determinant of a symmetric positive definite matrix using
+    the Stochastic Lanczos Quadrature (SLQ) method.
+
+    Parameters:
+    A (torch.Tensor): The symmetric positive definite input matrix.
+    m (int): Number of Lanczos steps (degree).
+    nvecs (int): Number of starting vectors.
+
+    Returns:
+    z1 mean: The average of estimates for starting vectors.
+    """
+    n = A.shape[0]
+    z1 = torch.zeros(nvecs, dtype=A.dtype, device=A.device)
+
+    for ii in range(nvecs):
+        w = torch.sign(torch.randn(n, dtype=A.dtype, device=A.device))  # Random Rademacher vector
+        v0 = w / (torch.norm(w)+EPS)
+
+        # Lanczos algorithm
+        V = torch.zeros((n, m), dtype=A.dtype, device=A.device)
+        alpha = torch.zeros(m, dtype=A.dtype, device=A.device)
+        beta = torch.zeros(m - 1, dtype=A.dtype, device=A.device)
+        V[:, 0] = v0.clone()
+
+        w = A @ V[:, 0].clone()
+
+        alpha[0] = torch.dot(V[:, 0].clone(), w)
+        w = w - alpha[0].clone() * V[:, 0].clone()
+
+        for j in range(1, m):
+            beta[j - 1] = torch.norm(w)
+            if beta[j - 1] != 0:
+                V[:, j] = w / (beta[j - 1].clone()+EPS)
+                w = A @ V[:, j].clone() - beta[j - 1].clone() * V[:, j - 1].clone()
+                alpha[j] = torch.dot(V[:, j].clone(), w)
+                w = w - alpha[j].clone() * V[:, j].clone()
+
+        H = torch.diag(alpha) + torch.diag(beta, 1) + torch.diag(beta, -1)
+
+        eigvals, eigvecs = torch.linalg.eig(H)
+        eigvals, eigvecs = eigvals.real, eigvecs.real
+
+        theta = torch.abs(eigvals)
+        gamma2 = eigvecs[0, :] ** 2
+
+        # Sum of gamma2 * log(theta)
+        count = torch.sum(gamma2 * torch.log(theta))
+        z1[ii] = (count * n).real
+
+    return z1.mean()
 
 def compute_inverse_and_log_det_positive_eigen(matrix):
     """
@@ -92,7 +144,7 @@ def Gaussian_log_likelihood(y, cov, Kinv_method='cholesky'):
         mean (torch.Tensor): The mean of the Gaussian distribution.
         cov (torch.Tensor): The covariance matrix of the Gaussian distribution.
         Kinv_method (str, optional): The method to compute the inverse of the covariance matrix.
-            Defaults to 'cholesky3'.
+            Defaults to 'cholesky'.
 
     Returns:
         torch.Tensor: The log-likelihood of the Gaussian distribution.
@@ -121,23 +173,23 @@ def Gaussian_log_likelihood(y, cov, Kinv_method='cholesky'):
         else:
             gamma = torch.linalg.solve_triangular(L, y, upper=False)
             return -0.5 * (gamma.T @ gamma + 2 * L.diag().log().sum() + len(y) * np.log(2 * np.pi))
-
-    elif Kinv_method == 'torch_distribution_MN1':
-        L = torch.linalg.cholesky(cov)
-        return torch.distributions.MultivariateNormal(y, scale_tril=L).log_prob(y)
-    elif Kinv_method == 'torch_distribution_MN2':
-        return torch.distributions.MultivariateNormal(y, cov).log_prob(y)
     elif Kinv_method == 'eigen':
         K_inv, log_det_K = compute_inverse_and_log_det_positive_eigen(cov)
         return -0.5 * (y.T @ K_inv @ y + log_det_K + len(y) * np.log(2 * np.pi))
     elif Kinv_method == 'conjugate':
-        L = torch.linalg.cholesky(cov)
+
         Sigma_inv_y = conjugate_gradient(cov, y)
 
         return -0.5 * (torch.matmul(y.t(), Sigma_inv_y) - 0.5 * len(y) * torch.log(
-            2 * torch.tensor(torch.pi))) - L.diag().log().sum()
+            2 * torch.tensor(torch.pi))) - 0.5 * lanc_quad_logdet(cov)
+    elif Kinv_method == 'torch_distribution_MN1':
+        L = torch.linalg.cholesky(cov)
+        return torch.distributions.MultivariateNormal(y, scale_tril=L).log_prob(y)
+    elif Kinv_method == 'torch_distribution_MN2':
+        return torch.distributions.MultivariateNormal(y, cov).log_prob(y)
+
     else:
-        raise ValueError('Kinv_method should be either direct or cholesky')
+        raise ValueError('Kinv_method is not supported.')
 
 
 def conditional_Gaussian(y, Sigma, K_s, K_ss, Kinv_method='cholesky'):

diff --git a/core/README.md b/core/README.md
@@ -9,11 +9,11 @@ This README provides an overview of the folder's structure, key files, and instr
 ## Python scripts for the core models
 
   - autoGP.py: A GP model that automatically standardize the data and choose the kernel for you. It is a simple GP model that is easy to use for general users.
-  - deepkernelGP.py: A GP model that uses a deep kernel to model the data. It is a more complex GP model that is suitable for users who want to model complex data. [Original paper](https://arxiv.org/abs/1511.02222)
-  - cigp_baseline.py: A basic GP model that is used as a baseline methods, and it is used for compare the performance of the other GP models.
-  - hogp.py: A GP model that uses high-order output to model the data. It is suitable for users who want to model data with high-order output. [Original paper](https://proceedings.mlr.press/v89/zhe19a.html)
-  - sgpr.py: A sparse GP model that uses variational inference to approximate the posterior distribution. It is suitable for users who want to model data with size between 1k to 10k. [Original paper](https://proceedings.mlr.press/v5/titsias09a/titsias09a.pdf)
-  - svgp.py: A sparse GP model that uses stochastic variational inference that allow Mini-Batch gradient descent. It is suitable for users who want to model data with size over 10k. [Original paper](https://arxiv.org/abs/1411.2005)
+  - deepkernelGP.py: A GP model that uses a deep kernel to model the data. It is a more complex GP model that is suitable for users who want to model complex data. 
+  - cigp.py: A basic GP model that is used as a baseline methods, and it is used for compare the performance of the other GP models.
+  - hogp.py: A GP model that uses high-order output to model the data. It is suitable for users who want to model data with high-order output. 
+  - sgpr.py: A sparse GP model that uses variational inference to approximate the posterior distribution. It is suitable for users who want to model data with size between 1k to 10k. 
+  - svgp.py: A sparse GP model that uses stochastic variational inference that allow Mini-Batch gradient descent. It is suitable for users who want to model data with size over 10k. 
   - parametricGP.py: A seemly self-contradict idea, but a highly efficient and accurate GP model for large dataset. It is suitable for users who want to model data with size over 10k. 
   - inputWarpedGP.py: A GP model that uses input warping to model the data. It is suitable for users who want to model non-stationary data. 
 

diff --git a/core/cigp_baseline.py → core/cigp.py b/core/cigp_baseline.py → core/cigp.py
@@ -1,12 +1,15 @@
-import torch
-import os
+# # Conditional independent Gaussian process (CIGP) for vector output regression based on pytorch
+# #
+# # CIGP use a single kernel for each output. Thus the log likelihood is simply a sum of the log likelihood of each output.
 import sys
-# sys.path.append('/Users/zidongchen/PycharmProjects/MiniGP/Mini-GP')
+import os
+_path = os.path.dirname(__file__).split(os.sep)
+sys.path.append(os.sep.join(_path[:_path.index('MiniGP')+1]))
+
+import torch
 import torch.nn as nn
-from core.kernel import ARDKernel, NeuralKernel, PeriodicKernel, MaternKernel, PolynomialKernel
-import numpy as np
+from core.kernel import ARDKernel
 import matplotlib.pyplot as plt
-import data_sample.generate_example_data as data
 import core.GP_CommonCalculation as GP
 
 JITTER = 1e-6
@@ -26,7 +29,7 @@ def __init__(self, kernel=ARDKernel(1), log_beta=None, K_inv_method='cholesky'):
         self.kernel = kernel
         self.K_inv_method = K_inv_method
 
-    def forward(self, xtr, ytr, xte):
+    def forward(self, xtr, ytr, xte, ytr_var=None):
         Sigma = self.kernel(xtr, xtr) + self.log_beta.exp().pow(-1) * torch.eye(xtr.size(0),
                                                                                 device=self.log_beta.device) \
                 + JITTER * torch.eye(xtr.size(0), device=self.log_beta.device)
@@ -39,10 +42,11 @@ def forward(self, xtr, ytr, xte):
 
         return mean, var_diag
 
-    def negative_log_likelihood(self, xtr, ytr):
+    def negative_log_likelihood(self, xtr, ytr, ytr_var=None):
         Sigma = self.kernel(xtr, xtr) + self.log_beta.exp().pow(-1) * torch.eye(
             xtr.size(0), device=self.log_beta.device) + JITTER * torch.eye(xtr.size(0), device=self.log_beta.device)
-
+        if ytr_var is not None:
+            Sigma = Sigma + ytr_var.diag()* torch.eye(xtr.size(0)).to(xtr.device)
         return -GP.Gaussian_log_likelihood(ytr, Sigma, Kinv_method=self.K_inv_method)
 
     def train_adam(self, xtr, ytr, niteration=10, lr=0.1):