From 988b42e159111f52dbba3301e7b1ae1187d2dc12 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Tue, 9 Feb 2021 20:28:42 +0800
Subject: [PATCH 01/32] Add Structured Covariance Estimator to riskmodel.py

---
 qlib/model/riskmodel.py | 141 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 138 insertions(+), 3 deletions(-)

diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py
index 07a1e0c9f65..32984ed6a3d 100644
--- a/qlib/model/riskmodel.py
+++ b/qlib/model/riskmodel.py
@@ -39,7 +39,7 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc
         self.scale_return = scale_return
 
     def predict(
-        self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
+            self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
     ) -> Union[pd.DataFrame, np.ndarray]:
         """
         Args:
@@ -373,7 +373,8 @@ def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np
         roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
         v3 = z.T.dot(z) / t - var_mkt * S
         roff3 = (
-            np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
+                np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(
+            np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
         )
         roff = 2 * roff1 - roff3
         rho = rdiag + roff
@@ -433,7 +434,7 @@ def _predict(self, X: np.ndarray) -> np.ndarray:
         if self.num_factors > 0:
             Dd, V = np.linalg.eig(Y.T.dot(Y))
             V = V[:, np.argsort(Dd)]
-            F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n)
+            F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n)
             LamPCA = Y.dot(F) / n
             uhat = np.asarray(Y - LamPCA.dot(F.T))
             Lowrank = np.asarray(LamPCA.dot(LamPCA.T))
@@ -465,3 +466,137 @@ def _predict(self, X: np.ndarray) -> np.ndarray:
         SigmaY = SigmaU + Lowrank
 
         return SigmaY
+
+
+class StructuredCovEstimator(RiskModel):
+    """Structured Covariance Estimator
+
+    This estimator assumes observations can be predicted by multiple factors
+        X = FB + U
+    where `F` can be specified by explicit risk factors or latent factors.
+
+    Therefore the structured covariance can be estimated by
+        cov(X) = F cov(B) F.T + cov(U)
+
+    We use latent factor models to estimate the structured covariance.
+    Specifically, the following latent factor models are supported:
+        - `pca`: Principal Component Analysis
+        - `fa`: Factor Analysis
+
+    Reference: [1] Fan, J., Liao, Y., & Liu, H. (2016). An overview of the estimation of large covariance and
+    precision matrices. Econometrics Journal, 19(1), C1–C32. https://doi.org/10.1111/ectj.12061
+    """
+
+    FACTOR_MODEL_PCA = "pca"
+    FACTOR_MODEL_FA = "fa"
+
+    def __init__(self, factor_model: str = 'pca', num_factors: int = 10, nan_option: str = "ignore",
+                 assume_centered: bool = False, scale_return: bool = True):
+        """
+        Args:
+            factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`).
+            num_factors (int): number of components to keep.
+            nan_option (str): nan handling option (`ignore`/`fill`).
+            assume_centered (bool): whether the data is assumed to be centered.
+            scale_return (bool): whether scale returns as percentage.
+        """
+        super().__init__(nan_option, assume_centered, scale_return)
+
+        assert factor_model in [
+            self.FACTOR_MODEL_PCA,
+            self.FACTOR_MODEL_FA,
+        ], 'factor_model={} is not supported'.format(factor_model)
+        self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis
+
+        self.num_factors = num_factors
+
+    def predict(
+            self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True,
+            return_decomposed_components=False
+    ) -> Union[pd.DataFrame, np.ndarray, tuple]:
+        """
+        Args:
+            X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
+                with variables as columns and observations as rows.
+            return_corr (bool): whether return the correlation matrix.
+            is_price (bool): whether `X` contains price (if not assume stock returns).
+            return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
+
+        Returns:
+            tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
+        """
+        assert not return_corr or not return_decomposed_components, \
+            'Can only return either correlation matrix or decomposed components.'
+
+        # transform input into 2D array
+        if not isinstance(X, (pd.Series, pd.DataFrame)):
+            columns = None
+        else:
+            if isinstance(X.index, pd.MultiIndex):
+                if isinstance(X, pd.DataFrame):
+                    X = X.iloc[:, 0].unstack(level="instrument")  # always use the first column
+                else:
+                    X = X.unstack(level="instrument")
+            else:
+                # X is 2D DataFrame
+                pass
+            columns = X.columns  # will be used to restore dataframe
+            X = X.values
+
+        # calculate pct_change
+        if is_price:
+            X = X[1:] / X[:-1] - 1  # NOTE: resulting `n - 1` rows
+
+        # scale return
+        if self.scale_return:
+            X *= 100
+
+        # handle nan and centered
+        X = self._preprocess(X)
+
+        if return_decomposed_components:
+            F, cov_b, var_u = self._predict(X, return_structured=True)
+            return F, cov_b, var_u
+        else:
+            # estimate covariance
+            S = self._predict(X)
+
+            # return correlation if needed
+            if return_corr:
+                vola = np.sqrt(np.diag(S))
+                corr = S / np.outer(vola, vola)
+                if columns is None:
+                    return corr
+                return pd.DataFrame(corr, index=columns, columns=columns)
+
+            # return covariance
+            if columns is None:
+                return S
+            return pd.DataFrame(S, index=columns, columns=columns)
+
+    def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]:
+        """
+        covariance estimation implementation
+
+        Args:
+            X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
+            return_structured (bool): whether return decomposed components of the covariance matrix.
+
+        Returns:
+            tuple or np.ndarray: decomposed covariance matrix or covariance matrix.
+        """
+
+        model = self.solver(self.num_factors, random_state=0).fit(X)
+
+        F = model.components_.T  # num_features x num_factors
+        B = model.transform(X)  # num_samples x num_factors
+        U = X - B @ F.T
+        cov_b = np.cov(B.T)  # num_factors x num_factors
+        var_u = np.var(U, axis=0)  # diagonal
+
+        if return_structured:
+            return F, cov_b, var_u
+
+        cov_x = F @ cov_b @ F.T + np.diag(var_u)
+
+        return cov_x

From 7b01c5cae7830d2b75c5566443f5a4559b5b2f40 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Tue, 9 Feb 2021 20:30:26 +0800
Subject: [PATCH 02/32] Add an implementation of Enhanced Indexing to
 optimizer.py

---
 qlib/.DS_Store              | Bin 0 -> 6148 bytes
 qlib/portfolio/optimizer.py | 129 ++++++++++++++++++++++++++++++++----
 2 files changed, 117 insertions(+), 12 deletions(-)
 create mode 100644 qlib/.DS_Store

diff --git a/qlib/.DS_Store b/qlib/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..3b196d96a164ebf17b658d6f6d8c5ef19fb26c8c
GIT binary patch
literal 6148
zcmeHK%Wl&^6upx=txW}FQHgGlykS>WX{it_kU}Vn?vR3D0VuU=(^|NmD0Z;g5R^S%
z0N;SbckloAHxW<a)+syl?rTx;fB&zw89&%8WDB>IzRhp0nDE)t`24cQXodQJ=0
zG9@>FLbZ`lK|@^IG+JQW0;_;k;9paK=kB_ivk7HXoxf)XsXPi(8G^yf!y}~apUU77
zd2~b)F>erctY&;O;%g&f%|sM5Mod_5U)*l69f~NxdJ;5v9~-<?O7R{`-$49maNBeW
zZbUJq^;TQQ7c;{$9~m9Kg`+gdi*EP3XstNy)iu|{b=`eEIFaR`2#QJG55_O~?rBh*
zgvNg+W%|bt!skg`4m%r<WmW`97LQeM62+jrdXZ$2Ec<ejMY(Ecpey(}-mtSdo!-6Q
z-SYQ#yR$8S`e2v+dwa8)=ia)r^YGyC^z3~4e)i!LJ_jbSEGxTd@Hc#hQxW6cV4S5g
zV-YyQ!}2ZBNd-DrLf(XAT`n~kzSIW8Lc~>-ZHYJy`PvXJrW|?Rkd0?VRkk)wz)B!-
zgQ;dimFZHcy5udogvWbXZ&~oHsPb`pshPpyu3yTu1gn5m;D1wq*9RYov1f3uQ7s+F
z)DZyapja7v{#oD{-(b(+Tq9~=LWcr%s4zzip~F$`8(z=gT%!&rVGbX{JXx3%icn8S
z`@W)+=xMaMRlq7xS71foc6k4Pa`E}U9%S#V0#<=5rGRku2m5_|lG$4qK92WVAL$T@
qjd^p8Dg>F?j+Mh(@g|Zo)cNcHdj{tkQ3A6+0!jv(Sq1*80>1zhm={X`

literal 0
HcmV?d00001

diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index 0e7d2725458..e04923ed6fa 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -28,13 +28,13 @@ class PortfolioOptimizer:
     OPT_INV = "inv"
 
     def __init__(
-        self,
-        method: str = "inv",
-        lamb: float = 0,
-        delta: float = 0,
-        alpha: float = 0.0,
-        scale_alpha: bool = True,
-        tol: float = 1e-8,
+            self,
+            method: str = "inv",
+            lamb: float = 0,
+            delta: float = 0,
+            alpha: float = 0.0,
+            scale_alpha: bool = True,
+            tol: float = 1e-8,
     ):
         """
         Args:
@@ -59,10 +59,10 @@ def __init__(
         self.tol = tol
 
     def __call__(
-        self,
-        S: Union[np.ndarray, pd.DataFrame],
-        u: Optional[Union[np.ndarray, pd.Series]] = None,
-        w0: Optional[Union[np.ndarray, pd.Series]] = None,
+            self,
+            S: Union[np.ndarray, pd.DataFrame],
+            u: Optional[Union[np.ndarray, pd.Series]] = None,
+            w0: Optional[Union[np.ndarray, pd.Series]] = None,
     ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
@@ -151,7 +151,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd
         return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
 
     def _optimize_mvo(
-        self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
+            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
     ) -> np.ndarray:
         """optimize mean-variance portfolio
 
@@ -256,3 +256,108 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda
             warnings.warn(f"optimization not success ({sol.status})")
 
         return sol.x
+
+
+class EnhancedIndexingOptimizer:
+    """
+    Portfolio Optimizer with Enhanced Indexing
+
+    Note:
+        This optimizer always assumes full investment and no-shorting.
+    """
+
+    START_FROM_W0 = 'w0'
+    START_FROM_BENCH = 'benchmark'
+    DO_NOT_START_FROM = ''
+
+    def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01,
+                 scale_alpha=True, verbose: bool = False, warm_start: str = '', max_iters: int = 10000):
+        """
+        Args:
+            lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
+            delta (float): turnover rate limit
+            bench_dev (float): benchmark deviation limit
+            inds_dev (float): industry deviation limit
+            verbose (bool): if print detailed information about the solver
+            warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
+                              (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)
+        """
+
+        assert lamb >= 0, "risk aversion parameter `lamb` should be positive"
+        self.lamb = lamb
+
+        assert delta >= 0, "turnover limit `delta` should be positive"
+        self.delta = delta
+
+        assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive"
+        self.bench_dev = bench_dev
+
+        assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive"
+        self.inds_dev = inds_dev
+
+        assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0,
+                              self.START_FROM_BENCH], "illegal warm start option"
+        self.start_from_w0 = (warm_start == self.START_FROM_W0)
+        self.start_from_bench = (warm_start == self.START_FROM_BENCH)
+
+        self.scale_alpha = scale_alpha
+        self.verbose = verbose
+        self.max_iters = max_iters
+
+    def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray,
+                 w_bench: np.ndarray, inds_onehot: np.ndarray
+                 ) -> Union[np.ndarray, pd.Series]:
+        """
+        Args:
+            u (np.ndarray): expected returns (a.k.a., alpha)
+            F, covB, varU (np.ndarray): see StructuredCovEstimator
+            w0 (np.ndarray): initial weights (for turnover control)
+            w_bench (np.ndarray): benchmark weights
+            inds_onehot (np.ndarray): industry (onehot)
+
+        Returns:
+            np.ndarray or pd.Series: optimized portfolio allocation
+        """
+        # scale alpha to match volatility
+        if self.scale_alpha:
+            u = u / u.std()
+            x_variance = np.mean(np.diag(F @ covB @ F.T) + varU)
+            u *= x_variance ** 0.5
+
+        w = cp.Variable(len(u))  # num_assets
+        v = w @ F  # num_factors
+        ret = w @ u
+        risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
+        obj = cp.Maximize(ret - self.lamb * risk)
+        d_bench = w - w_bench
+        d_inds = d_bench @ inds_onehot
+        cons = [
+            w >= 0,
+            cp.sum(w) == 1,
+            d_bench >= -self.bench_dev,
+            d_bench <= self.bench_dev,
+            d_inds >= -self.inds_dev,
+            d_inds <= self.inds_dev
+        ]
+        if w0 is not None:
+            turnover = cp.sum(cp.abs(w - w0))
+            cons.append(turnover <= self.delta)
+
+        warm_start = False
+        if self.start_from_w0:
+            if w0 is None:
+                print('Warning: try warm start with w0, but w0 is `None`.')
+            else:
+                w.value = w0
+                warm_start = True
+        elif self.start_from_bench:
+            w.value = w_bench
+            warm_start = True
+
+        prob = cp.Problem(obj, cons)
+        prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
+
+        if prob.status != 'optimal':
+            print('Warning: solve failed.', prob.status)
+
+        return np.asarray(w.value)

From 9c2653f125e31e754f00ef6df0c455f8e828d78a Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Tue, 9 Feb 2021 20:31:00 +0800
Subject: [PATCH 03/32] Add an implementation of Enhanced Indexing to
 optimizer.py

---
 qlib/.DS_Store | Bin 6148 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 qlib/.DS_Store

diff --git a/qlib/.DS_Store b/qlib/.DS_Store
deleted file mode 100644
index 3b196d96a164ebf17b658d6f6d8c5ef19fb26c8c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK%Wl&^6upx=txW}FQHgGlykS>WX{it_kU}Vn?vR3D0VuU=(^|NmD0Z;g5R^S%
z0N;SbckloAHxW<a)+syl?rTx;fB&zw89&%8WDB>IzRhp0nDE)t`24cQXodQJ=0
zG9@>FLbZ`lK|@^IG+JQW0;_;k;9paK=kB_ivk7HXoxf)XsXPi(8G^yf!y}~apUU77
zd2~b)F>erctY&;O;%g&f%|sM5Mod_5U)*l69f~NxdJ;5v9~-<?O7R{`-$49maNBeW
zZbUJq^;TQQ7c;{$9~m9Kg`+gdi*EP3XstNy)iu|{b=`eEIFaR`2#QJG55_O~?rBh*
zgvNg+W%|bt!skg`4m%r<WmW`97LQeM62+jrdXZ$2Ec<ejMY(Ecpey(}-mtSdo!-6Q
z-SYQ#yR$8S`e2v+dwa8)=ia)r^YGyC^z3~4e)i!LJ_jbSEGxTd@Hc#hQxW6cV4S5g
zV-YyQ!}2ZBNd-DrLf(XAT`n~kzSIW8Lc~>-ZHYJy`PvXJrW|?Rkd0?VRkk)wz)B!-
zgQ;dimFZHcy5udogvWbXZ&~oHsPb`pshPpyu3yTu1gn5m;D1wq*9RYov1f3uQ7s+F
z)DZyapja7v{#oD{-(b(+Tq9~=LWcr%s4zzip~F$`8(z=gT%!&rVGbX{JXx3%icn8S
z`@W)+=xMaMRlq7xS71foc6k4Pa`E}U9%S#V0#<=5rGRku2m5_|lG$4qK92WVAL$T@
qjd^p8Dg>F?j+Mh(@g|Zo)cNcHdj{tkQ3A6+0!jv(Sq1*80>1zhm={X`


From 4000518698f0d5f929a2a493bdb9bd207a313d17 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 08:41:35 +0800
Subject: [PATCH 04/32] Separate specific implementation of Portfolio Optimizer
 to  folder.

---
 .../portfolio_optimizer/enhanced_indexing.py  | 112 ++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 qlib/contrib/portfolio_optimizer/enhanced_indexing.py

diff --git a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py
new file mode 100644
index 00000000000..0c40a617ef7
--- /dev/null
+++ b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py
@@ -0,0 +1,112 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import cvxpy as cp
+from typing import Union
+
+
+class EnhancedIndexingOptimizer:
+    """
+    Portfolio Optimizer with Enhanced Indexing
+
+    Note:
+        This optimizer always assumes full investment and no-shorting.
+    """
+
+    START_FROM_W0 = 'w0'
+    START_FROM_BENCH = 'benchmark'
+    DO_NOT_START_FROM = 'no_warm_start'
+
+    def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01,
+                 scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000):
+        """
+        Args:
+            lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
+            delta (float): turnover rate limit
+            bench_dev (float): benchmark deviation limit
+            inds_dev (float): industry deviation limit
+            verbose (bool): if print detailed information about the solver
+            warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
+                              (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)
+        """
+
+        assert lamb >= 0, "risk aversion parameter `lamb` should be positive"
+        self.lamb = lamb
+
+        assert delta >= 0, "turnover limit `delta` should be positive"
+        self.delta = delta
+
+        assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive"
+        self.bench_dev = bench_dev
+
+        assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive"
+        self.inds_dev = inds_dev
+
+        assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0,
+                              self.START_FROM_BENCH], "illegal warm start option"
+        self.start_from_w0 = (warm_start == self.START_FROM_W0)
+        self.start_from_bench = (warm_start == self.START_FROM_BENCH)
+
+        self.scale_alpha = scale_alpha
+        self.verbose = verbose
+        self.max_iters = max_iters
+
+    def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray,
+                 w_bench: np.ndarray, inds_onehot: np.ndarray
+                 ) -> Union[np.ndarray, pd.Series]:
+        """
+        Args:
+            u (np.ndarray): expected returns (a.k.a., alpha)
+            F, covB, varU (np.ndarray): see StructuredCovEstimator
+            w0 (np.ndarray): initial weights (for turnover control)
+            w_bench (np.ndarray): benchmark weights
+            inds_onehot (np.ndarray): industry (onehot)
+
+        Returns:
+            np.ndarray or pd.Series: optimized portfolio allocation
+        """
+        # scale alpha to match volatility
+        if self.scale_alpha:
+            u = u / u.std()
+            x_variance = np.mean(np.diag(F @ covB @ F.T) + varU)
+            u *= x_variance ** 0.5
+
+        w = cp.Variable(len(u))  # num_assets
+        v = w @ F  # num_factors
+        ret = w @ u
+        risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
+        obj = cp.Maximize(ret - self.lamb * risk)
+        d_bench = w - w_bench
+        d_inds = d_bench @ inds_onehot
+        cons = [
+            w >= 0,
+            cp.sum(w) == 1,
+            d_bench >= -self.bench_dev,
+            d_bench <= self.bench_dev,
+            d_inds >= -self.inds_dev,
+            d_inds <= self.inds_dev
+        ]
+        if w0 is not None:
+            turnover = cp.sum(cp.abs(w - w0))
+            cons.append(turnover <= self.delta)
+
+        warm_start = False
+        if self.start_from_w0:
+            if w0 is None:
+                print('Warning: try warm start with w0, but w0 is `None`.')
+            else:
+                w.value = w0
+                warm_start = True
+        elif self.start_from_bench:
+            w.value = w_bench
+            warm_start = True
+
+        prob = cp.Problem(obj, cons)
+        prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
+
+        if prob.status != 'optimal':
+            print('Warning: solve failed.', prob.status)
+
+        return np.asarray(w.value)

From b2e2142594d38c0afa4e31d560a17cfad05e2705 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 09:00:12 +0800
Subject: [PATCH 05/32] Applied slight modification to follow PEP 8.

---
 qlib/portfolio/optimizer.py | 114 +++---------------------------------
 1 file changed, 7 insertions(+), 107 deletions(-)

diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index e04923ed6fa..104e2c441dc 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -42,6 +42,7 @@ def __init__(
             lamb (float): risk aversion parameter (larger `lamb` means more focus on return)
             delta (float): turnover rate limit
             alpha (float): l2 norm regularizer
+            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
             tol (float): tolerance for optimization termination
         """
         assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported"
@@ -57,6 +58,7 @@ def __init__(
         self.alpha = alpha
 
         self.tol = tol
+        self.scale_alpha = scale_alpha
 
     def __call__(
             self,
@@ -94,7 +96,7 @@ def __call__(
                 w0 = w0.values
 
         # scale alpha to match volatility
-        if u is not None:
+        if u is not None and self.scale_alpha:
             u = u / u.std()
             u *= np.mean(np.diag(S)) ** 0.5
 
@@ -247,7 +249,10 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda
         # add l2 regularization
         wrapped_obj = obj
         if self.alpha > 0:
-            wrapped_obj = lambda x: obj(x) + self.alpha * np.sum(np.square(x))
+            def opt_obj(x):
+                return obj(x) + self.alpha * np.sum(np.square(x))
+
+            wrapped_obj = opt_obj
 
         # solve
         x0 = np.ones(n) / n  # init results
@@ -256,108 +261,3 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda
             warnings.warn(f"optimization not success ({sol.status})")
 
         return sol.x
-
-
-class EnhancedIndexingOptimizer:
-    """
-    Portfolio Optimizer with Enhanced Indexing
-
-    Note:
-        This optimizer always assumes full investment and no-shorting.
-    """
-
-    START_FROM_W0 = 'w0'
-    START_FROM_BENCH = 'benchmark'
-    DO_NOT_START_FROM = ''
-
-    def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01,
-                 scale_alpha=True, verbose: bool = False, warm_start: str = '', max_iters: int = 10000):
-        """
-        Args:
-            lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
-            delta (float): turnover rate limit
-            bench_dev (float): benchmark deviation limit
-            inds_dev (float): industry deviation limit
-            verbose (bool): if print detailed information about the solver
-            warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
-                              (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)
-        """
-
-        assert lamb >= 0, "risk aversion parameter `lamb` should be positive"
-        self.lamb = lamb
-
-        assert delta >= 0, "turnover limit `delta` should be positive"
-        self.delta = delta
-
-        assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive"
-        self.bench_dev = bench_dev
-
-        assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive"
-        self.inds_dev = inds_dev
-
-        assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0,
-                              self.START_FROM_BENCH], "illegal warm start option"
-        self.start_from_w0 = (warm_start == self.START_FROM_W0)
-        self.start_from_bench = (warm_start == self.START_FROM_BENCH)
-
-        self.scale_alpha = scale_alpha
-        self.verbose = verbose
-        self.max_iters = max_iters
-
-    def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray,
-                 w_bench: np.ndarray, inds_onehot: np.ndarray
-                 ) -> Union[np.ndarray, pd.Series]:
-        """
-        Args:
-            u (np.ndarray): expected returns (a.k.a., alpha)
-            F, covB, varU (np.ndarray): see StructuredCovEstimator
-            w0 (np.ndarray): initial weights (for turnover control)
-            w_bench (np.ndarray): benchmark weights
-            inds_onehot (np.ndarray): industry (onehot)
-
-        Returns:
-            np.ndarray or pd.Series: optimized portfolio allocation
-        """
-        # scale alpha to match volatility
-        if self.scale_alpha:
-            u = u / u.std()
-            x_variance = np.mean(np.diag(F @ covB @ F.T) + varU)
-            u *= x_variance ** 0.5
-
-        w = cp.Variable(len(u))  # num_assets
-        v = w @ F  # num_factors
-        ret = w @ u
-        risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
-        obj = cp.Maximize(ret - self.lamb * risk)
-        d_bench = w - w_bench
-        d_inds = d_bench @ inds_onehot
-        cons = [
-            w >= 0,
-            cp.sum(w) == 1,
-            d_bench >= -self.bench_dev,
-            d_bench <= self.bench_dev,
-            d_inds >= -self.inds_dev,
-            d_inds <= self.inds_dev
-        ]
-        if w0 is not None:
-            turnover = cp.sum(cp.abs(w - w0))
-            cons.append(turnover <= self.delta)
-
-        warm_start = False
-        if self.start_from_w0:
-            if w0 is None:
-                print('Warning: try warm start with w0, but w0 is `None`.')
-            else:
-                w.value = w0
-                warm_start = True
-        elif self.start_from_bench:
-            w.value = w_bench
-            warm_start = True
-
-        prob = cp.Problem(obj, cons)
-        prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
-
-        if prob.status != 'optimal':
-            print('Warning: solve failed.', prob.status)
-
-        return np.asarray(w.value)

From 2cc057e438cb412ff60a39d6e83df6724f29b4fe Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 09:09:03 +0800
Subject: [PATCH 06/32] Fix minor mismatches of type hints.

---
 qlib/portfolio/optimizer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index 104e2c441dc..87a8b7416f9 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -85,14 +85,14 @@ def __call__(
         if u is not None:
             assert len(u) == len(S), "`u` has mismatched shape"
             if isinstance(u, pd.Series):
-                assert all(u.index == index), "`u` has mismatched index"
+                assert u.index.equals(index), "`u` has mismatched index"
                 u = u.values
 
         # transform initial weights
         if w0 is not None:
             assert len(w0) == len(S), "`w0` has mismatched shape"
             if isinstance(w0, pd.Series):
-                assert all(w0.index == index), "`w0` has mismatched index"
+                assert w0.index.equals(index), "`w0` has mismatched index"
                 w0 = w0.values
 
         # scale alpha to match volatility
@@ -175,7 +175,7 @@ def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nda
         """
         return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0))
 
-    def _get_objective_gmv(self, S: np.ndarray) -> np.ndarray:
+    def _get_objective_gmv(self, S: np.ndarray) -> Callable:
         """global minimum variance optimization objective
 
         Optimization objective
@@ -187,7 +187,7 @@ def func(x):
 
         return func
 
-    def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> np.ndarray:
+    def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable:
         """mean-variance optimization objective
 
         Optimization objective
@@ -201,7 +201,7 @@ def func(x):
 
         return func
 
-    def _get_objective_rp(self, S: np.ndarray) -> np.ndarray:
+    def _get_objective_rp(self, S: np.ndarray) -> Callable:
         """risk-parity optimization objective
 
         Optimization objective

From 9448a6e2c79a344516e17abba7060d6e62231582 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 09:23:48 +0800
Subject: [PATCH 07/32] Add a abstract class as the base class for all
 optimization related portfolio constructions.

---
 .../portfolio_optimizer/enhanced_indexing.py  |   4 +-
 .../portfolio_optimizer/mean_variance.py      | 264 ++++++++++++++++++
 qlib/portfolio/optimizer.py                   | 264 +-----------------
 3 files changed, 274 insertions(+), 258 deletions(-)
 create mode 100644 qlib/contrib/portfolio_optimizer/mean_variance.py

diff --git a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py
index 0c40a617ef7..323e3154b67 100644
--- a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py
+++ b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py
@@ -6,8 +6,10 @@
 import cvxpy as cp
 from typing import Union
 
+from ...portfolio.optimizer import BaseOptimizer
 
-class EnhancedIndexingOptimizer:
+
+class EnhancedIndexingOptimizer(BaseOptimizer):
     """
     Portfolio Optimizer with Enhanced Indexing
 
diff --git a/qlib/contrib/portfolio_optimizer/mean_variance.py b/qlib/contrib/portfolio_optimizer/mean_variance.py
new file mode 100644
index 00000000000..c3c4f7a3d01
--- /dev/null
+++ b/qlib/contrib/portfolio_optimizer/mean_variance.py
@@ -0,0 +1,264 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import warnings
+import numpy as np
+import pandas as pd
+import scipy.optimize as so
+from typing import Optional, Union, Callable, List
+
+from ...portfolio.optimizer import BaseOptimizer
+
+
+class PortfolioOptimizer(BaseOptimizer):
+    """Portfolio Optimizer
+
+    The following optimization algorithms are supported:
+        - `gmv`: Global Minimum Variance Portfolio
+        - `mvo`: Mean Variance Optimized Portfolio
+        - `rp`: Risk Parity
+        - `inv`: Inverse Volatility
+
+    Note:
+        This optimizer always assumes full investment and no-shorting.
+    """
+
+    OPT_GMV = "gmv"
+    OPT_MVO = "mvo"
+    OPT_RP = "rp"
+    OPT_INV = "inv"
+
+    def __init__(
+            self,
+            method: str = "inv",
+            lamb: float = 0,
+            delta: float = 0,
+            alpha: float = 0.0,
+            scale_alpha: bool = True,
+            tol: float = 1e-8,
+    ):
+        """
+        Args:
+            method (str): portfolio optimization method
+            lamb (float): risk aversion parameter (larger `lamb` means more focus on return)
+            delta (float): turnover rate limit
+            alpha (float): l2 norm regularizer
+            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
+            tol (float): tolerance for optimization termination
+        """
+        assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported"
+        self.method = method
+
+        assert lamb >= 0, f"risk aversion parameter `lamb` should be positive"
+        self.lamb = lamb
+
+        assert delta >= 0, f"turnover limit `delta` should be positive"
+        self.delta = delta
+
+        assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive"
+        self.alpha = alpha
+
+        self.tol = tol
+        self.scale_alpha = scale_alpha
+
+    def __call__(
+            self,
+            S: Union[np.ndarray, pd.DataFrame],
+            u: Optional[Union[np.ndarray, pd.Series]] = None,
+            w0: Optional[Union[np.ndarray, pd.Series]] = None,
+    ) -> Union[np.ndarray, pd.Series]:
+        """
+        Args:
+            S (np.ndarray or pd.DataFrame): covariance matrix
+            u (np.ndarray or pd.Series): expected returns (a.k.a., alpha)
+            w0 (np.ndarray or pd.Series): initial weights (for turnover control)
+
+        Returns:
+            np.ndarray or pd.Series: optimized portfolio allocation
+        """
+        # transform dataframe into array
+        index = None
+        if isinstance(S, pd.DataFrame):
+            index = S.index
+            S = S.values
+
+        # transform alpha
+        if u is not None:
+            assert len(u) == len(S), "`u` has mismatched shape"
+            if isinstance(u, pd.Series):
+                assert u.index.equals(index), "`u` has mismatched index"
+                u = u.values
+
+        # transform initial weights
+        if w0 is not None:
+            assert len(w0) == len(S), "`w0` has mismatched shape"
+            if isinstance(w0, pd.Series):
+                assert w0.index.equals(index), "`w0` has mismatched index"
+                w0 = w0.values
+
+        # scale alpha to match volatility
+        if u is not None and self.scale_alpha:
+            u = u / u.std()
+            u *= np.mean(np.diag(S)) ** 0.5
+
+        # optimize
+        w = self._optimize(S, u, w0)
+
+        # restore index if needed
+        if index is not None:
+            w = pd.Series(w, index=index)
+
+        return w
+
+    def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray:
+
+        # inverse volatility
+        if self.method == self.OPT_INV:
+            if u is not None:
+                warnings.warn("`u` is set but will not be used for `inv` portfolio")
+            if w0 is not None:
+                warnings.warn("`w0` is set but will not be used for `inv` portfolio")
+            return self._optimize_inv(S)
+
+        # global minimum variance
+        if self.method == self.OPT_GMV:
+            if u is not None:
+                warnings.warn("`u` is set but will not be used for `gmv` portfolio")
+            return self._optimize_gmv(S, w0)
+
+        # mean-variance
+        if self.method == self.OPT_MVO:
+            return self._optimize_mvo(S, u, w0)
+
+        # risk parity
+        if self.method == self.OPT_RP:
+            if u is not None:
+                warnings.warn("`u` is set but will not be used for `rp` portfolio")
+            return self._optimize_rp(S, w0)
+
+    def _optimize_inv(self, S: np.ndarray) -> np.ndarray:
+        """Inverse volatility"""
+        vola = np.diag(S) ** 0.5
+        w = 1 / vola
+        w /= w.sum()
+        return w
+
+    def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
+        """optimize global minimum variance portfolio
+
+        This method solves the following optimization problem
+            min_w w' S w
+            s.t. w >= 0, sum(w) == 1
+        where `S` is the covariance matrix.
+        """
+        return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
+
+    def _optimize_mvo(
+            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
+    ) -> np.ndarray:
+        """optimize mean-variance portfolio
+
+        This method solves the following optimization problem
+            min_w   - w' u + lamb * w' S w
+            s.t.   w >= 0, sum(w) == 1
+        where `S` is the covariance matrix, `u` is the expected returns,
+        and `lamb` is the risk aversion parameter.
+        """
+        return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0))
+
+    def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
+        """optimize risk parity portfolio
+
+        This method solves the following optimization problem
+            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
+            s.t. w >= 0, sum(w) == 1
+        where `S` is the covariance matrix and `N` is the number of stocks.
+        """
+        return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0))
+
+    def _get_objective_gmv(self, S: np.ndarray) -> Callable:
+        """global minimum variance optimization objective
+
+        Optimization objective
+            min_w w' S w
+        """
+
+        def func(x):
+            return x @ S @ x
+
+        return func
+
+    def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable:
+        """mean-variance optimization objective
+
+        Optimization objective
+            min_w - w' u + lamb * w' S w
+        """
+
+        def func(x):
+            risk = x @ S @ x
+            ret = x @ u
+            return -ret + self.lamb * risk
+
+        return func
+
+    def _get_objective_rp(self, S: np.ndarray) -> Callable:
+        """risk-parity optimization objective
+
+        Optimization objective
+            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
+        """
+
+        def func(x):
+            N = len(x)
+            Sx = S @ x
+            xSx = x @ Sx
+            return np.sum((x - xSx / Sx / N) ** 2)
+
+        return func
+
+    def _get_constrains(self, w0: Optional[np.ndarray] = None):
+        """optimization constraints
+
+        Defines the following constraints:
+            - no shorting and leverage: 0 <= w <= 1
+            - full investment: sum(w) == 1
+            - turnover constraint: |w - w0| <= delta
+        """
+
+        # no shorting and leverage
+        bounds = so.Bounds(0.0, 1.0)
+
+        # full investment constraint
+        cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]  # == 0
+
+        # turnover constraint
+        if w0 is not None:
+            cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))})  # >= 0
+
+        return bounds, cons
+
+    def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray:
+        """solve optimization
+
+        Args:
+            n (int): number of parameters
+            obj (callable): optimization objective
+            bounds (Bounds): bounds of parameters
+            cons (list): optimization constraints
+        """
+        # add l2 regularization
+        wrapped_obj = obj
+        if self.alpha > 0:
+            def opt_obj(x):
+                return obj(x) + self.alpha * np.sum(np.square(x))
+
+            wrapped_obj = opt_obj
+
+        # solve
+        x0 = np.ones(n) / n  # init results
+        sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol)
+        if not sol.success:
+            warnings.warn(f"optimization not success ({sol.status})")
+
+        return sol.x
diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index 87a8b7416f9..c63d936564d 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -1,263 +1,13 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import warnings
-import numpy as np
-import pandas as pd
-import scipy.optimize as so
+import abc
 
-from typing import Optional, Union, Callable, List
 
+class BaseOptimizer(abc.ABC):
+    """Modeling things"""
 
-class PortfolioOptimizer:
-    """Portfolio Optimizer
-
-    The following optimization algorithms are supported:
-        - `gmv`: Global Minimum Variance Portfolio
-        - `mvo`: Mean Variance Optimized Portfolio
-        - `rp`: Risk Parity
-        - `inv`: Inverse Volatility
-
-    Note:
-        This optimizer always assumes full investment and no-shorting.
-    """
-
-    OPT_GMV = "gmv"
-    OPT_MVO = "mvo"
-    OPT_RP = "rp"
-    OPT_INV = "inv"
-
-    def __init__(
-            self,
-            method: str = "inv",
-            lamb: float = 0,
-            delta: float = 0,
-            alpha: float = 0.0,
-            scale_alpha: bool = True,
-            tol: float = 1e-8,
-    ):
-        """
-        Args:
-            method (str): portfolio optimization method
-            lamb (float): risk aversion parameter (larger `lamb` means more focus on return)
-            delta (float): turnover rate limit
-            alpha (float): l2 norm regularizer
-            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
-            tol (float): tolerance for optimization termination
-        """
-        assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported"
-        self.method = method
-
-        assert lamb >= 0, f"risk aversion parameter `lamb` should be positive"
-        self.lamb = lamb
-
-        assert delta >= 0, f"turnover limit `delta` should be positive"
-        self.delta = delta
-
-        assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive"
-        self.alpha = alpha
-
-        self.tol = tol
-        self.scale_alpha = scale_alpha
-
-    def __call__(
-            self,
-            S: Union[np.ndarray, pd.DataFrame],
-            u: Optional[Union[np.ndarray, pd.Series]] = None,
-            w0: Optional[Union[np.ndarray, pd.Series]] = None,
-    ) -> Union[np.ndarray, pd.Series]:
-        """
-        Args:
-            S (np.ndarray or pd.DataFrame): covariance matrix
-            u (np.ndarray or pd.Series): expected returns (a.k.a., alpha)
-            w0 (np.ndarray or pd.Series): initial weights (for turnover control)
-
-        Returns:
-            np.ndarray or pd.Series: optimized portfolio allocation
-        """
-        # transform dataframe into array
-        index = None
-        if isinstance(S, pd.DataFrame):
-            index = S.index
-            S = S.values
-
-        # transform alpha
-        if u is not None:
-            assert len(u) == len(S), "`u` has mismatched shape"
-            if isinstance(u, pd.Series):
-                assert u.index.equals(index), "`u` has mismatched index"
-                u = u.values
-
-        # transform initial weights
-        if w0 is not None:
-            assert len(w0) == len(S), "`w0` has mismatched shape"
-            if isinstance(w0, pd.Series):
-                assert w0.index.equals(index), "`w0` has mismatched index"
-                w0 = w0.values
-
-        # scale alpha to match volatility
-        if u is not None and self.scale_alpha:
-            u = u / u.std()
-            u *= np.mean(np.diag(S)) ** 0.5
-
-        # optimize
-        w = self._optimize(S, u, w0)
-
-        # restore index if needed
-        if index is not None:
-            w = pd.Series(w, index=index)
-
-        return w
-
-    def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray:
-
-        # inverse volatility
-        if self.method == self.OPT_INV:
-            if u is not None:
-                warnings.warn("`u` is set but will not be used for `inv` portfolio")
-            if w0 is not None:
-                warnings.warn("`w0` is set but will not be used for `inv` portfolio")
-            return self._optimize_inv(S)
-
-        # global minimum variance
-        if self.method == self.OPT_GMV:
-            if u is not None:
-                warnings.warn("`u` is set but will not be used for `gmv` portfolio")
-            return self._optimize_gmv(S, w0)
-
-        # mean-variance
-        if self.method == self.OPT_MVO:
-            return self._optimize_mvo(S, u, w0)
-
-        # risk parity
-        if self.method == self.OPT_RP:
-            if u is not None:
-                warnings.warn("`u` is set but will not be used for `rp` portfolio")
-            return self._optimize_rp(S, w0)
-
-    def _optimize_inv(self, S: np.ndarray) -> np.ndarray:
-        """Inverse volatility"""
-        vola = np.diag(S) ** 0.5
-        w = 1 / vola
-        w /= w.sum()
-        return w
-
-    def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
-        """optimize global minimum variance portfolio
-
-        This method solves the following optimization problem
-            min_w w' S w
-            s.t. w >= 0, sum(w) == 1
-        where `S` is the covariance matrix.
-        """
-        return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
-
-    def _optimize_mvo(
-            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
-    ) -> np.ndarray:
-        """optimize mean-variance portfolio
-
-        This method solves the following optimization problem
-            min_w   - w' u + lamb * w' S w
-            s.t.   w >= 0, sum(w) == 1
-        where `S` is the covariance matrix, `u` is the expected returns,
-        and `lamb` is the risk aversion parameter.
-        """
-        return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0))
-
-    def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
-        """optimize risk parity portfolio
-
-        This method solves the following optimization problem
-            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
-            s.t. w >= 0, sum(w) == 1
-        where `S` is the covariance matrix and `N` is the number of stocks.
-        """
-        return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0))
-
-    def _get_objective_gmv(self, S: np.ndarray) -> Callable:
-        """global minimum variance optimization objective
-
-        Optimization objective
-            min_w w' S w
-        """
-
-        def func(x):
-            return x @ S @ x
-
-        return func
-
-    def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable:
-        """mean-variance optimization objective
-
-        Optimization objective
-            min_w - w' u + lamb * w' S w
-        """
-
-        def func(x):
-            risk = x @ S @ x
-            ret = x @ u
-            return -ret + self.lamb * risk
-
-        return func
-
-    def _get_objective_rp(self, S: np.ndarray) -> Callable:
-        """risk-parity optimization objective
-
-        Optimization objective
-            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
-        """
-
-        def func(x):
-            N = len(x)
-            Sx = S @ x
-            xSx = x @ Sx
-            return np.sum((x - xSx / Sx / N) ** 2)
-
-        return func
-
-    def _get_constrains(self, w0: Optional[np.ndarray] = None):
-        """optimization constraints
-
-        Defines the following constraints:
-            - no shorting and leverage: 0 <= w <= 1
-            - full investment: sum(w) == 1
-            - turnover constraint: |w - w0| <= delta
-        """
-
-        # no shorting and leverage
-        bounds = so.Bounds(0.0, 1.0)
-
-        # full investment constraint
-        cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]  # == 0
-
-        # turnover constraint
-        if w0 is not None:
-            cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))})  # >= 0
-
-        return bounds, cons
-
-    def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray:
-        """solve optimization
-
-        Args:
-            n (int): number of parameters
-            obj (callable): optimization objective
-            bounds (Bounds): bounds of parameters
-            cons (list): optimization constraints
-        """
-        # add l2 regularization
-        wrapped_obj = obj
-        if self.alpha > 0:
-            def opt_obj(x):
-                return obj(x) + self.alpha * np.sum(np.square(x))
-
-            wrapped_obj = opt_obj
-
-        # solve
-        x0 = np.ones(n) / n  # init results
-        sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol)
-        if not sol.success:
-            warnings.warn(f"optimization not success ({sol.status})")
-
-        return sol.x
+    @abc.abstractmethod
+    def __call__(self, *args, **kwargs) -> object:
+        """ Generate a optimized portfolio allocation """
+        pass

From 42f882504e09d36f20c29f3eaafa11f0249144ed Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 09:25:48 +0800
Subject: [PATCH 08/32] Reformat code to follow PEP 8.

---
 qlib/model/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qlib/model/base.py b/qlib/model/base.py
index 5a295787f76..a7001f0a67b 100644
--- a/qlib/model/base.py
+++ b/qlib/model/base.py
@@ -43,8 +43,8 @@ def fit(self, dataset: Dataset):
 
                 # get weights
                 try:
-                    wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"], data_key=DataHandlerLP.DK_L)
-                    w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
+                    wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"],
+                    data_key=DataHandlerLP.DK_L, w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
                 except KeyError as e:
                     w_train = pd.DataFrame(np.ones_like(y_train.values), index=y_train.index)
                     w_valid = pd.DataFrame(np.ones_like(y_valid.values), index=y_valid.index)

From f7d3e56561d4059bc85dc7922017706bfa322750 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 09:57:41 +0800
Subject: [PATCH 09/32] Merge optimization related portfolio construction back
 to portfolio/optimizer.

---
 .../portfolio_optimizer/enhanced_indexing.py  | 114 ------
 .../portfolio_optimizer/mean_variance.py      | 264 -------------
 qlib/portfolio/optimizer.py                   | 367 +++++++++++++++++-
 3 files changed, 366 insertions(+), 379 deletions(-)
 delete mode 100644 qlib/contrib/portfolio_optimizer/enhanced_indexing.py
 delete mode 100644 qlib/contrib/portfolio_optimizer/mean_variance.py

diff --git a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py
deleted file mode 100644
index 323e3154b67..00000000000
--- a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import numpy as np
-import pandas as pd
-import cvxpy as cp
-from typing import Union
-
-from ...portfolio.optimizer import BaseOptimizer
-
-
-class EnhancedIndexingOptimizer(BaseOptimizer):
-    """
-    Portfolio Optimizer with Enhanced Indexing
-
-    Note:
-        This optimizer always assumes full investment and no-shorting.
-    """
-
-    START_FROM_W0 = 'w0'
-    START_FROM_BENCH = 'benchmark'
-    DO_NOT_START_FROM = 'no_warm_start'
-
-    def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01,
-                 scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000):
-        """
-        Args:
-            lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
-            delta (float): turnover rate limit
-            bench_dev (float): benchmark deviation limit
-            inds_dev (float): industry deviation limit
-            verbose (bool): if print detailed information about the solver
-            warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
-                              (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)
-        """
-
-        assert lamb >= 0, "risk aversion parameter `lamb` should be positive"
-        self.lamb = lamb
-
-        assert delta >= 0, "turnover limit `delta` should be positive"
-        self.delta = delta
-
-        assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive"
-        self.bench_dev = bench_dev
-
-        assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive"
-        self.inds_dev = inds_dev
-
-        assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0,
-                              self.START_FROM_BENCH], "illegal warm start option"
-        self.start_from_w0 = (warm_start == self.START_FROM_W0)
-        self.start_from_bench = (warm_start == self.START_FROM_BENCH)
-
-        self.scale_alpha = scale_alpha
-        self.verbose = verbose
-        self.max_iters = max_iters
-
-    def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray,
-                 w_bench: np.ndarray, inds_onehot: np.ndarray
-                 ) -> Union[np.ndarray, pd.Series]:
-        """
-        Args:
-            u (np.ndarray): expected returns (a.k.a., alpha)
-            F, covB, varU (np.ndarray): see StructuredCovEstimator
-            w0 (np.ndarray): initial weights (for turnover control)
-            w_bench (np.ndarray): benchmark weights
-            inds_onehot (np.ndarray): industry (onehot)
-
-        Returns:
-            np.ndarray or pd.Series: optimized portfolio allocation
-        """
-        # scale alpha to match volatility
-        if self.scale_alpha:
-            u = u / u.std()
-            x_variance = np.mean(np.diag(F @ covB @ F.T) + varU)
-            u *= x_variance ** 0.5
-
-        w = cp.Variable(len(u))  # num_assets
-        v = w @ F  # num_factors
-        ret = w @ u
-        risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
-        obj = cp.Maximize(ret - self.lamb * risk)
-        d_bench = w - w_bench
-        d_inds = d_bench @ inds_onehot
-        cons = [
-            w >= 0,
-            cp.sum(w) == 1,
-            d_bench >= -self.bench_dev,
-            d_bench <= self.bench_dev,
-            d_inds >= -self.inds_dev,
-            d_inds <= self.inds_dev
-        ]
-        if w0 is not None:
-            turnover = cp.sum(cp.abs(w - w0))
-            cons.append(turnover <= self.delta)
-
-        warm_start = False
-        if self.start_from_w0:
-            if w0 is None:
-                print('Warning: try warm start with w0, but w0 is `None`.')
-            else:
-                w.value = w0
-                warm_start = True
-        elif self.start_from_bench:
-            w.value = w_bench
-            warm_start = True
-
-        prob = cp.Problem(obj, cons)
-        prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
-
-        if prob.status != 'optimal':
-            print('Warning: solve failed.', prob.status)
-
-        return np.asarray(w.value)
diff --git a/qlib/contrib/portfolio_optimizer/mean_variance.py b/qlib/contrib/portfolio_optimizer/mean_variance.py
deleted file mode 100644
index c3c4f7a3d01..00000000000
--- a/qlib/contrib/portfolio_optimizer/mean_variance.py
+++ /dev/null
@@ -1,264 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import warnings
-import numpy as np
-import pandas as pd
-import scipy.optimize as so
-from typing import Optional, Union, Callable, List
-
-from ...portfolio.optimizer import BaseOptimizer
-
-
-class PortfolioOptimizer(BaseOptimizer):
-    """Portfolio Optimizer
-
-    The following optimization algorithms are supported:
-        - `gmv`: Global Minimum Variance Portfolio
-        - `mvo`: Mean Variance Optimized Portfolio
-        - `rp`: Risk Parity
-        - `inv`: Inverse Volatility
-
-    Note:
-        This optimizer always assumes full investment and no-shorting.
-    """
-
-    OPT_GMV = "gmv"
-    OPT_MVO = "mvo"
-    OPT_RP = "rp"
-    OPT_INV = "inv"
-
-    def __init__(
-            self,
-            method: str = "inv",
-            lamb: float = 0,
-            delta: float = 0,
-            alpha: float = 0.0,
-            scale_alpha: bool = True,
-            tol: float = 1e-8,
-    ):
-        """
-        Args:
-            method (str): portfolio optimization method
-            lamb (float): risk aversion parameter (larger `lamb` means more focus on return)
-            delta (float): turnover rate limit
-            alpha (float): l2 norm regularizer
-            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
-            tol (float): tolerance for optimization termination
-        """
-        assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported"
-        self.method = method
-
-        assert lamb >= 0, f"risk aversion parameter `lamb` should be positive"
-        self.lamb = lamb
-
-        assert delta >= 0, f"turnover limit `delta` should be positive"
-        self.delta = delta
-
-        assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive"
-        self.alpha = alpha
-
-        self.tol = tol
-        self.scale_alpha = scale_alpha
-
-    def __call__(
-            self,
-            S: Union[np.ndarray, pd.DataFrame],
-            u: Optional[Union[np.ndarray, pd.Series]] = None,
-            w0: Optional[Union[np.ndarray, pd.Series]] = None,
-    ) -> Union[np.ndarray, pd.Series]:
-        """
-        Args:
-            S (np.ndarray or pd.DataFrame): covariance matrix
-            u (np.ndarray or pd.Series): expected returns (a.k.a., alpha)
-            w0 (np.ndarray or pd.Series): initial weights (for turnover control)
-
-        Returns:
-            np.ndarray or pd.Series: optimized portfolio allocation
-        """
-        # transform dataframe into array
-        index = None
-        if isinstance(S, pd.DataFrame):
-            index = S.index
-            S = S.values
-
-        # transform alpha
-        if u is not None:
-            assert len(u) == len(S), "`u` has mismatched shape"
-            if isinstance(u, pd.Series):
-                assert u.index.equals(index), "`u` has mismatched index"
-                u = u.values
-
-        # transform initial weights
-        if w0 is not None:
-            assert len(w0) == len(S), "`w0` has mismatched shape"
-            if isinstance(w0, pd.Series):
-                assert w0.index.equals(index), "`w0` has mismatched index"
-                w0 = w0.values
-
-        # scale alpha to match volatility
-        if u is not None and self.scale_alpha:
-            u = u / u.std()
-            u *= np.mean(np.diag(S)) ** 0.5
-
-        # optimize
-        w = self._optimize(S, u, w0)
-
-        # restore index if needed
-        if index is not None:
-            w = pd.Series(w, index=index)
-
-        return w
-
-    def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray:
-
-        # inverse volatility
-        if self.method == self.OPT_INV:
-            if u is not None:
-                warnings.warn("`u` is set but will not be used for `inv` portfolio")
-            if w0 is not None:
-                warnings.warn("`w0` is set but will not be used for `inv` portfolio")
-            return self._optimize_inv(S)
-
-        # global minimum variance
-        if self.method == self.OPT_GMV:
-            if u is not None:
-                warnings.warn("`u` is set but will not be used for `gmv` portfolio")
-            return self._optimize_gmv(S, w0)
-
-        # mean-variance
-        if self.method == self.OPT_MVO:
-            return self._optimize_mvo(S, u, w0)
-
-        # risk parity
-        if self.method == self.OPT_RP:
-            if u is not None:
-                warnings.warn("`u` is set but will not be used for `rp` portfolio")
-            return self._optimize_rp(S, w0)
-
-    def _optimize_inv(self, S: np.ndarray) -> np.ndarray:
-        """Inverse volatility"""
-        vola = np.diag(S) ** 0.5
-        w = 1 / vola
-        w /= w.sum()
-        return w
-
-    def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
-        """optimize global minimum variance portfolio
-
-        This method solves the following optimization problem
-            min_w w' S w
-            s.t. w >= 0, sum(w) == 1
-        where `S` is the covariance matrix.
-        """
-        return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
-
-    def _optimize_mvo(
-            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
-    ) -> np.ndarray:
-        """optimize mean-variance portfolio
-
-        This method solves the following optimization problem
-            min_w   - w' u + lamb * w' S w
-            s.t.   w >= 0, sum(w) == 1
-        where `S` is the covariance matrix, `u` is the expected returns,
-        and `lamb` is the risk aversion parameter.
-        """
-        return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0))
-
-    def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
-        """optimize risk parity portfolio
-
-        This method solves the following optimization problem
-            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
-            s.t. w >= 0, sum(w) == 1
-        where `S` is the covariance matrix and `N` is the number of stocks.
-        """
-        return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0))
-
-    def _get_objective_gmv(self, S: np.ndarray) -> Callable:
-        """global minimum variance optimization objective
-
-        Optimization objective
-            min_w w' S w
-        """
-
-        def func(x):
-            return x @ S @ x
-
-        return func
-
-    def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable:
-        """mean-variance optimization objective
-
-        Optimization objective
-            min_w - w' u + lamb * w' S w
-        """
-
-        def func(x):
-            risk = x @ S @ x
-            ret = x @ u
-            return -ret + self.lamb * risk
-
-        return func
-
-    def _get_objective_rp(self, S: np.ndarray) -> Callable:
-        """risk-parity optimization objective
-
-        Optimization objective
-            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
-        """
-
-        def func(x):
-            N = len(x)
-            Sx = S @ x
-            xSx = x @ Sx
-            return np.sum((x - xSx / Sx / N) ** 2)
-
-        return func
-
-    def _get_constrains(self, w0: Optional[np.ndarray] = None):
-        """optimization constraints
-
-        Defines the following constraints:
-            - no shorting and leverage: 0 <= w <= 1
-            - full investment: sum(w) == 1
-            - turnover constraint: |w - w0| <= delta
-        """
-
-        # no shorting and leverage
-        bounds = so.Bounds(0.0, 1.0)
-
-        # full investment constraint
-        cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]  # == 0
-
-        # turnover constraint
-        if w0 is not None:
-            cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))})  # >= 0
-
-        return bounds, cons
-
-    def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray:
-        """solve optimization
-
-        Args:
-            n (int): number of parameters
-            obj (callable): optimization objective
-            bounds (Bounds): bounds of parameters
-            cons (list): optimization constraints
-        """
-        # add l2 regularization
-        wrapped_obj = obj
-        if self.alpha > 0:
-            def opt_obj(x):
-                return obj(x) + self.alpha * np.sum(np.square(x))
-
-            wrapped_obj = opt_obj
-
-        # solve
-        x0 = np.ones(n) / n  # init results
-        sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol)
-        if not sol.success:
-            warnings.warn(f"optimization not success ({sol.status})")
-
-        return sol.x
diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index c63d936564d..728a04ea9db 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -2,12 +2,377 @@
 # Licensed under the MIT License.
 
 import abc
+import warnings
+import numpy as np
+import cvxpy as cp
+import pandas as pd
+import scipy.optimize as so
+from typing import Optional, Union, Callable, List
 
 
 class BaseOptimizer(abc.ABC):
-    """Modeling things"""
+    """ Construct portfolio with a optimization related method """
 
     @abc.abstractmethod
     def __call__(self, *args, **kwargs) -> object:
         """ Generate a optimized portfolio allocation """
         pass
+
+
+class PortfolioOptimizer(BaseOptimizer):
+    """Portfolio Optimizer
+
+    The following optimization algorithms are supported:
+        - `gmv`: Global Minimum Variance Portfolio
+        - `mvo`: Mean Variance Optimized Portfolio
+        - `rp`: Risk Parity
+        - `inv`: Inverse Volatility
+
+    Note:
+        This optimizer always assumes full investment and no-shorting.
+    """
+
+    OPT_GMV = "gmv"
+    OPT_MVO = "mvo"
+    OPT_RP = "rp"
+    OPT_INV = "inv"
+
+    def __init__(
+            self,
+            method: str = "inv",
+            lamb: float = 0,
+            delta: float = 0,
+            alpha: float = 0.0,
+            scale_alpha: bool = True,
+            tol: float = 1e-8,
+    ):
+        """
+        Args:
+            method (str): portfolio optimization method
+            lamb (float): risk aversion parameter (larger `lamb` means more focus on return)
+            delta (float): turnover rate limit
+            alpha (float): l2 norm regularizer
+            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
+            tol (float): tolerance for optimization termination
+        """
+        assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported"
+        self.method = method
+
+        assert lamb >= 0, f"risk aversion parameter `lamb` should be positive"
+        self.lamb = lamb
+
+        assert delta >= 0, f"turnover limit `delta` should be positive"
+        self.delta = delta
+
+        assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive"
+        self.alpha = alpha
+
+        self.tol = tol
+        self.scale_alpha = scale_alpha
+
+    def __call__(
+            self,
+            S: Union[np.ndarray, pd.DataFrame],
+            u: Optional[Union[np.ndarray, pd.Series]] = None,
+            w0: Optional[Union[np.ndarray, pd.Series]] = None,
+    ) -> Union[np.ndarray, pd.Series]:
+        """
+        Args:
+            S (np.ndarray or pd.DataFrame): covariance matrix
+            u (np.ndarray or pd.Series): expected returns (a.k.a., alpha)
+            w0 (np.ndarray or pd.Series): initial weights (for turnover control)
+
+        Returns:
+            np.ndarray or pd.Series: optimized portfolio allocation
+        """
+        # transform dataframe into array
+        index = None
+        if isinstance(S, pd.DataFrame):
+            index = S.index
+            S = S.values
+
+        # transform alpha
+        if u is not None:
+            assert len(u) == len(S), "`u` has mismatched shape"
+            if isinstance(u, pd.Series):
+                assert u.index.equals(index), "`u` has mismatched index"
+                u = u.values
+
+        # transform initial weights
+        if w0 is not None:
+            assert len(w0) == len(S), "`w0` has mismatched shape"
+            if isinstance(w0, pd.Series):
+                assert w0.index.equals(index), "`w0` has mismatched index"
+                w0 = w0.values
+
+        # scale alpha to match volatility
+        if u is not None and self.scale_alpha:
+            u = u / u.std()
+            u *= np.mean(np.diag(S)) ** 0.5
+
+        # optimize
+        w = self._optimize(S, u, w0)
+
+        # restore index if needed
+        if index is not None:
+            w = pd.Series(w, index=index)
+
+        return w
+
+    def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray:
+
+        # inverse volatility
+        if self.method == self.OPT_INV:
+            if u is not None:
+                warnings.warn("`u` is set but will not be used for `inv` portfolio")
+            if w0 is not None:
+                warnings.warn("`w0` is set but will not be used for `inv` portfolio")
+            return self._optimize_inv(S)
+
+        # global minimum variance
+        if self.method == self.OPT_GMV:
+            if u is not None:
+                warnings.warn("`u` is set but will not be used for `gmv` portfolio")
+            return self._optimize_gmv(S, w0)
+
+        # mean-variance
+        if self.method == self.OPT_MVO:
+            return self._optimize_mvo(S, u, w0)
+
+        # risk parity
+        if self.method == self.OPT_RP:
+            if u is not None:
+                warnings.warn("`u` is set but will not be used for `rp` portfolio")
+            return self._optimize_rp(S, w0)
+
+    def _optimize_inv(self, S: np.ndarray) -> np.ndarray:
+        """Inverse volatility"""
+        vola = np.diag(S) ** 0.5
+        w = 1 / vola
+        w /= w.sum()
+        return w
+
+    def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
+        """optimize global minimum variance portfolio
+
+        This method solves the following optimization problem
+            min_w w' S w
+            s.t. w >= 0, sum(w) == 1
+        where `S` is the covariance matrix.
+        """
+        return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
+
+    def _optimize_mvo(
+            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
+    ) -> np.ndarray:
+        """optimize mean-variance portfolio
+
+        This method solves the following optimization problem
+            min_w   - w' u + lamb * w' S w
+            s.t.   w >= 0, sum(w) == 1
+        where `S` is the covariance matrix, `u` is the expected returns,
+        and `lamb` is the risk aversion parameter.
+        """
+        return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0))
+
+    def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
+        """optimize risk parity portfolio
+
+        This method solves the following optimization problem
+            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
+            s.t. w >= 0, sum(w) == 1
+        where `S` is the covariance matrix and `N` is the number of stocks.
+        """
+        return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0))
+
+    def _get_objective_gmv(self, S: np.ndarray) -> Callable:
+        """global minimum variance optimization objective
+
+        Optimization objective
+            min_w w' S w
+        """
+
+        def func(x):
+            return x @ S @ x
+
+        return func
+
+    def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable:
+        """mean-variance optimization objective
+
+        Optimization objective
+            min_w - w' u + lamb * w' S w
+        """
+
+        def func(x):
+            risk = x @ S @ x
+            ret = x @ u
+            return -ret + self.lamb * risk
+
+        return func
+
+    def _get_objective_rp(self, S: np.ndarray) -> Callable:
+        """risk-parity optimization objective
+
+        Optimization objective
+            min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2
+        """
+
+        def func(x):
+            N = len(x)
+            Sx = S @ x
+            xSx = x @ Sx
+            return np.sum((x - xSx / Sx / N) ** 2)
+
+        return func
+
+    def _get_constrains(self, w0: Optional[np.ndarray] = None):
+        """optimization constraints
+
+        Defines the following constraints:
+            - no shorting and leverage: 0 <= w <= 1
+            - full investment: sum(w) == 1
+            - turnover constraint: |w - w0| <= delta
+        """
+
+        # no shorting and leverage
+        bounds = so.Bounds(0.0, 1.0)
+
+        # full investment constraint
+        cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]  # == 0
+
+        # turnover constraint
+        if w0 is not None:
+            cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))})  # >= 0
+
+        return bounds, cons
+
+    def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray:
+        """solve optimization
+
+        Args:
+            n (int): number of parameters
+            obj (callable): optimization objective
+            bounds (Bounds): bounds of parameters
+            cons (list): optimization constraints
+        """
+        # add l2 regularization
+        wrapped_obj = obj
+        if self.alpha > 0:
+            def opt_obj(x):
+                return obj(x) + self.alpha * np.sum(np.square(x))
+
+            wrapped_obj = opt_obj
+
+        # solve
+        x0 = np.ones(n) / n  # init results
+        sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol)
+        if not sol.success:
+            warnings.warn(f"optimization not success ({sol.status})")
+
+        return sol.x
+
+
+class EnhancedIndexingOptimizer(BaseOptimizer):
+    """
+    Portfolio Optimizer with Enhanced Indexing
+
+    Note:
+        This optimizer always assumes full investment and no-shorting.
+    """
+
+    START_FROM_W0 = 'w0'
+    START_FROM_BENCH = 'benchmark'
+    DO_NOT_START_FROM = 'no_warm_start'
+
+    def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01,
+                 scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000):
+        """
+        Args:
+            lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
+            delta (float): turnover rate limit
+            bench_dev (float): benchmark deviation limit
+            inds_dev (float): industry deviation limit
+            verbose (bool): if print detailed information about the solver
+            warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
+                              (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)
+        """
+
+        assert lamb >= 0, "risk aversion parameter `lamb` should be positive"
+        self.lamb = lamb
+
+        assert delta >= 0, "turnover limit `delta` should be positive"
+        self.delta = delta
+
+        assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive"
+        self.bench_dev = bench_dev
+
+        assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive"
+        self.inds_dev = inds_dev
+
+        assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0,
+                              self.START_FROM_BENCH], "illegal warm start option"
+        self.start_from_w0 = (warm_start == self.START_FROM_W0)
+        self.start_from_bench = (warm_start == self.START_FROM_BENCH)
+
+        self.scale_alpha = scale_alpha
+        self.verbose = verbose
+        self.max_iters = max_iters
+
+    def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray,
+                 w_bench: np.ndarray, inds_onehot: np.ndarray
+                 ) -> Union[np.ndarray, pd.Series]:
+        """
+        Args:
+            u (np.ndarray): expected returns (a.k.a., alpha)
+            F, covB, varU (np.ndarray): see StructuredCovEstimator
+            w0 (np.ndarray): initial weights (for turnover control)
+            w_bench (np.ndarray): benchmark weights
+            inds_onehot (np.ndarray): industry (onehot)
+
+        Returns:
+            np.ndarray or pd.Series: optimized portfolio allocation
+        """
+        # scale alpha to match volatility
+        if self.scale_alpha:
+            u = u / u.std()
+            x_variance = np.mean(np.diag(F @ covB @ F.T) + varU)
+            u *= x_variance ** 0.5
+
+        w = cp.Variable(len(u))  # num_assets
+        v = w @ F  # num_factors
+        ret = w @ u
+        risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
+        obj = cp.Maximize(ret - self.lamb * risk)
+        d_bench = w - w_bench
+        d_inds = d_bench @ inds_onehot
+        cons = [
+            w >= 0,
+            cp.sum(w) == 1,
+            d_bench >= -self.bench_dev,
+            d_bench <= self.bench_dev,
+            d_inds >= -self.inds_dev,
+            d_inds <= self.inds_dev
+        ]
+        if w0 is not None:
+            turnover = cp.sum(cp.abs(w - w0))
+            cons.append(turnover <= self.delta)
+
+        warm_start = False
+        if self.start_from_w0:
+            if w0 is None:
+                print('Warning: try warm start with w0, but w0 is `None`.')
+            else:
+                w.value = w0
+                warm_start = True
+        elif self.start_from_bench:
+            w.value = w_bench
+            warm_start = True
+
+        prob = cp.Problem(obj, cons)
+        prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
+
+        if prob.status != 'optimal':
+            print('Warning: solve failed.', prob.status)
+
+        return np.asarray(w.value)

From 58f74cfd84b1f7e94de4f47b5c09d063ca8ed507 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 10:07:03 +0800
Subject: [PATCH 10/32] Reformat code to follow PEP 8.

---
 qlib/model/riskmodel.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py
index 32984ed6a3d..8eec73e00ae 100644
--- a/qlib/model/riskmodel.py
+++ b/qlib/model/riskmodel.py
@@ -1,11 +1,10 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import warnings
 import numpy as np
 import pandas as pd
-
 from typing import Union
+from sklearn.decomposition import PCA, FactorAnalysis
 
 from qlib.model.base import BaseModel
 
@@ -124,7 +123,7 @@ def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]:
             X = np.nan_to_num(X)
         elif self.nan_option == self.MASK_NAN:
             X = np.ma.masked_invalid(X)
-        # centerize
+        # centralize
         if not self.assume_centered:
             X = X - np.nanmean(X, axis=0)
         return X
@@ -162,8 +161,9 @@ class ShrinkCovEstimator(RiskModel):
         [3] Ledoit, O., & Wolf, M. (2003). Improved estimation of the covariance matrix of stock returns
             with an application to portfolio selection.
             Journal of Empirical Finance, 10(5), 603–621. https://doi.org/10.1016/S0927-5398(03)00007-0
-        [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance estimation.
-            IEEE Transactions on Signal Processing, 58(10), 5016–5029. https://doi.org/10.1109/TSP.2010.2053029
+        [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance
+            estimation. IEEE Transactions on Signal Processing, 58(10), 5016–5029.
+            https://doi.org/10.1109/TSP.2010.2053029
         [5] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-00007f64e5b9/cov1para.m.zip
         [6] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip
         [7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip

From 164687d54bfc3ea454eb72e060de16c6dc4a43c1 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 10:13:08 +0800
Subject: [PATCH 11/32] Add scikit-learn to dependencies.

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index f759945fd58..6582054b9c6 100644
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,7 @@
     "tornado",
     "joblib>=0.17.0",
     "ruamel.yaml>=0.16.12",
+    "scikit-learn>=0.22"
 ]
 
 # Numpy include

From b8647c13c78842d8ceb20ffb3788cc034cba6041 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 10:20:51 +0800
Subject: [PATCH 12/32] Reformat code to follow PEP 8.

---
 qlib/contrib/strategy/strategy.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/qlib/contrib/strategy/strategy.py b/qlib/contrib/strategy/strategy.py
index 74df39f3e31..550ff649db8 100644
--- a/qlib/contrib/strategy/strategy.py
+++ b/qlib/contrib/strategy/strategy.py
@@ -7,7 +7,6 @@
 import pandas as pd
 
 from ..backtest.order import Order
-from ...utils import get_pre_trading_date
 from .order_generator import OrderGenWInteract
 
 
@@ -390,11 +389,11 @@ def filter_stock(l):
         current_stock_list = current_temp.get_stock_list()
         value = cash * self.risk_degree / len(buy) if len(buy) > 0 else 0
 
-        # open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not consider it
-        # as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line
+        # open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not
+        # consider it as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line
         # value = value / (1+trade_exchange.open_cost) # set open_cost limit
         for code in buy:
-            # check is stock supended
+            # check is stock suspended
             if not trade_exchange.is_stock_tradable(stock_id=code, trade_date=trade_date):
                 continue
             # buy order

From 2f9d45e03ac429d56ab2356e104089c8544316a3 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 10:29:29 +0800
Subject: [PATCH 13/32] Reformat code with black.

---
 docs/conf.py                                  | 10 +--
 examples/benchmarks/TFT/libs/tft_model.py     | 12 +--
 examples/highfreq/highfreq_handler.py         | 33 ++------
 examples/highfreq/highfreq_processor.py       |  4 +-
 examples/highfreq/workflow.py                 | 35 ++-------
 examples/run_all_model.py                     |  5 +-
 examples/workflow_by_code.py                  |  5 +-
 qlib/config.py                                | 24 +-----
 qlib/contrib/backtest/__init__.py             | 18 +----
 qlib/contrib/backtest/profit_attribution.py   | 23 +-----
 qlib/contrib/data/handler.py                  | 10 +--
 qlib/contrib/eva/alpha.py                     |  6 +-
 qlib/contrib/evaluate.py                      |  7 +-
 qlib/contrib/evaluate_portfolio.py            | 16 +---
 qlib/contrib/model/catboost_model.py          |  4 +-
 qlib/contrib/model/pytorch_alstm.py           | 21 ++---
 qlib/contrib/model/pytorch_alstm_ts.py        | 17 +----
 qlib/contrib/model/pytorch_gats.py            | 22 +-----
 qlib/contrib/model/pytorch_gats_ts.py         | 18 +----
 qlib/contrib/model/pytorch_gru.py             | 21 +----
 qlib/contrib/model/pytorch_gru_ts.py          | 17 +----
 qlib/contrib/model/pytorch_lstm.py            | 21 +----
 qlib/contrib/model/pytorch_lstm_ts.py         | 17 +----
 qlib/contrib/model/pytorch_nn.py              |  6 +-
 qlib/contrib/model/pytorch_sfm.py             | 19 +----
 qlib/contrib/model/pytorch_tabnet.py          | 14 +---
 qlib/contrib/model/xgboost.py                 |  4 +-
 qlib/contrib/online/executor.py               | 24 +-----
 qlib/contrib/online/manager.py                |  6 +-
 qlib/contrib/online/operator.py               |  8 +-
 qlib/contrib/online/utils.py                  |  6 +-
 .../analysis_model_performance.py             | 66 +++-------------
 .../analysis_position/cumulative_return.py    | 36 ++-------
 .../analysis_position/parse_position.py       |  5 +-
 .../report/analysis_position/rank_label.py    | 16 +---
 .../report/analysis_position/report.py        | 15 +---
 qlib/contrib/report/graph.py                  |  6 +-
 qlib/contrib/strategy/cost_control.py         |  5 +-
 qlib/contrib/strategy/order_generator.py      | 12 +--
 qlib/contrib/tuner/launcher.py                |  6 +-
 qlib/contrib/tuner/space.py                   |  5 +-
 qlib/contrib/tuner/tuner.py                   | 26 ++-----
 qlib/data/client.py                           |  3 +-
 qlib/data/data.py                             | 69 ++---------------
 qlib/data/dataset/utils.py                    |  5 +-
 qlib/data/filter.py                           |  7 +-
 qlib/model/riskmodel.py                       | 31 +++++---
 qlib/portfolio/optimizer.py                   | 76 ++++++++++++-------
 qlib/tests/__init__.py                        |  6 +-
 qlib/workflow/record_temp.py                  |  5 +-
 scripts/data_collector/yahoo/collector.py     | 27 ++-----
 scripts/dump_bin.py                           | 13 +---
 setup.py                                      | 16 +---
 tests/test_all_pipeline.py                    |  9 +--
 tests/test_dump_data.py                       |  9 +--
 tests/test_get_data.py                        |  4 +-
 56 files changed, 218 insertions(+), 713 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 6e52b0e34a4..61fe784e7a9 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -191,15 +191,7 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (
-        master_doc,
-        "QLib",
-        u"QLib Documentation",
-        author,
-        "QLib",
-        "One line description of project.",
-        "Miscellaneous",
-    ),
+    (master_doc, "QLib", u"QLib Documentation", author, "QLib", "One line description of project.", "Miscellaneous",),
 ]
 
 
diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py
index b39f1782553..f40a1aece33 100644
--- a/examples/benchmarks/TFT/libs/tft_model.py
+++ b/examples/benchmarks/TFT/libs/tft_model.py
@@ -721,12 +721,7 @@ def _build_base_graph(self):
         encoder_steps = self.num_encoder_steps
 
         # Inputs.
-        all_inputs = tf.keras.layers.Input(
-            shape=(
-                time_steps,
-                combined_input_size,
-            )
-        )
+        all_inputs = tf.keras.layers.Input(shape=(time_steps, combined_input_size,))
 
         unknown_inputs, known_combined_layer, obs_inputs, static_inputs = self.get_tft_embeddings(all_inputs)
 
@@ -866,10 +861,7 @@ def get_lstm(return_state):
             """Returns LSTM cell initialized with default parameters."""
             if self.use_cudnn:
                 lstm = tf.keras.layers.CuDNNLSTM(
-                    self.hidden_layer_size,
-                    return_sequences=True,
-                    return_state=return_state,
-                    stateful=False,
+                    self.hidden_layer_size, return_sequences=True, return_state=return_state, stateful=False,
                 )
             else:
                 lstm = tf.keras.layers.LSTM(
diff --git a/examples/highfreq/highfreq_handler.py b/examples/highfreq/highfreq_handler.py
index d3565051446..2fc411ab660 100644
--- a/examples/highfreq/highfreq_handler.py
+++ b/examples/highfreq/highfreq_handler.py
@@ -20,10 +20,7 @@ def check_transform_proc(proc_l):
             new_l = []
             for p in proc_l:
                 p["kwargs"].update(
-                    {
-                        "fit_start_time": fit_start_time,
-                        "fit_end_time": fit_end_time,
-                    }
+                    {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,}
                 )
                 new_l.append(p)
             return new_l
@@ -33,11 +30,7 @@ def check_transform_proc(proc_l):
 
         data_loader = {
             "class": "QlibDataLoader",
-            "kwargs": {
-                "config": self.get_feature_config(),
-                "swap_level": False,
-                "freq": "1min",
-            },
+            "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",},
         }
         super().__init__(
             instruments=instruments,
@@ -68,8 +61,7 @@ def get_normalized_price_feature(price_field, shift=0):
 
             feature_ops = template_norm.format(
                 template_if.format(
-                    template_fillnan.format(template_paused.format("$close")),
-                    template_paused.format(price_field),
+                    template_fillnan.format(template_paused.format("$close")), template_paused.format(price_field),
                 ),
                 template_fillnan.format(template_paused.format("$close")),
             )
@@ -119,24 +111,14 @@ def get_normalized_price_feature(price_field, shift=0):
 
 class HighFreqBacktestHandler(DataHandler):
     def __init__(
-        self,
-        instruments="csi300",
-        start_time=None,
-        end_time=None,
+        self, instruments="csi300", start_time=None, end_time=None,
     ):
         data_loader = {
             "class": "QlibDataLoader",
-            "kwargs": {
-                "config": self.get_feature_config(),
-                "swap_level": False,
-                "freq": "1min",
-            },
+            "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",},
         }
         super().__init__(
-            instruments=instruments,
-            start_time=start_time,
-            end_time=end_time,
-            data_loader=data_loader,
+            instruments=instruments, start_time=start_time, end_time=end_time, data_loader=data_loader,
         )
 
     def get_feature_config(self):
@@ -155,8 +137,7 @@ def get_feature_config(self):
         fields += [
             "Cut({0}, 240, None)".format(
                 template_if.format(
-                    template_fillnan.format(template_paused.format("$close")),
-                    template_paused.format(simpson_vwap),
+                    template_fillnan.format(template_paused.format("$close")), template_paused.format(simpson_vwap),
                 )
             )
         ]
diff --git a/examples/highfreq/highfreq_processor.py b/examples/highfreq/highfreq_processor.py
index f0ab0dec2b1..73510ef0689 100644
--- a/examples/highfreq/highfreq_processor.py
+++ b/examples/highfreq/highfreq_processor.py
@@ -65,8 +65,6 @@ def __call__(self, df_features):
         feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240)
         feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240)
         df_new_features = pd.DataFrame(
-            data=np.concatenate((feat, feat_1), axis=1),
-            index=idx,
-            columns=["FEATURE_%d" % i for i in range(12 * 240)],
+            data=np.concatenate((feat, feat_1), axis=1), index=idx, columns=["FEATURE_%d" % i for i in range(12 * 240)],
         ).sort_index()
         return df_new_features
diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py
index 01de59c0e77..0bfd0c2a09c 100644
--- a/examples/highfreq/workflow.py
+++ b/examples/highfreq/workflow.py
@@ -63,13 +63,7 @@ class HighfreqWorkflow(object):
                     "module_path": "highfreq_handler",
                     "kwargs": DATA_HANDLER_CONFIG0,
                 },
-                "segments": {
-                    "train": (start_time, train_end_time),
-                    "test": (
-                        test_start_time,
-                        end_time,
-                    ),
-                },
+                "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),},
             },
         },
         "dataset_backtest": {
@@ -81,13 +75,7 @@ class HighfreqWorkflow(object):
                     "module_path": "highfreq_handler",
                     "kwargs": DATA_HANDLER_CONFIG1,
                 },
-                "segments": {
-                    "train": (start_time, train_end_time),
-                    "test": (
-                        test_start_time,
-                        end_time,
-                    ),
-                },
+                "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),},
             },
         },
     }
@@ -152,24 +140,11 @@ def dump_and_load_dataset(self):
                 "start_time": "2021-01-19 00:00:00",
                 "end_time": "2021-01-25 16:00:00",
             },
-            segment_kwargs={
-                "test": (
-                    "2021-01-19 00:00:00",
-                    "2021-01-25 16:00:00",
-                ),
-            },
+            segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),},
         )
         dataset_backtest.init(
-            handler_kwargs={
-                "start_time": "2021-01-19 00:00:00",
-                "end_time": "2021-01-25 16:00:00",
-            },
-            segment_kwargs={
-                "test": (
-                    "2021-01-19 00:00:00",
-                    "2021-01-25 16:00:00",
-                ),
-            },
+            handler_kwargs={"start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00",},
+            segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),},
         )
 
         ##=============get data=============
diff --git a/examples/run_all_model.py b/examples/run_all_model.py
index d587eff1559..d356b41285e 100644
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -34,10 +34,7 @@
 exp_manager = {
     "class": "MLflowExpManager",
     "module_path": "qlib.workflow.expm",
-    "kwargs": {
-        "uri": "file:" + exp_path,
-        "default_exp_name": "Experiment",
-    },
+    "kwargs": {"uri": "file:" + exp_path, "default_exp_name": "Experiment",},
 }
 if not exists_qlib_data(provider_uri):
     print(f"Qlib data is not found in {provider_uri}")
diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py
index d5dab891789..6f5c11dc020 100644
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@@ -81,10 +81,7 @@
         "strategy": {
             "class": "TopkDropoutStrategy",
             "module_path": "qlib.contrib.strategy.strategy",
-            "kwargs": {
-                "topk": 50,
-                "n_drop": 5,
-            },
+            "kwargs": {"topk": 50, "n_drop": 5,},
         },
         "backtest": {
             "verbose": False,
diff --git a/qlib/config.py b/qlib/config.py
index 52b05568d57..344eb852777 100644
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -115,12 +115,7 @@ def set_conf_from_C(self, config_c):
                 "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s"
             }
         },
-        "filters": {
-            "field_not_found": {
-                "()": "qlib.log.LogFilter",
-                "param": [".*?WARN: data not found for.*?"],
-            }
-        },
+        "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}},
         "handlers": {
             "console": {
                 "class": "logging.StreamHandler",
@@ -135,10 +130,7 @@ def set_conf_from_C(self, config_c):
     "exp_manager": {
         "class": "MLflowExpManager",
         "module_path": "qlib.workflow.expm",
-        "kwargs": {
-            "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
-            "default_exp_name": "Experiment",
-        },
+        "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",},
     },
 }
 
@@ -200,16 +192,8 @@ def set_conf_from_C(self, config_c):
 }
 
 _default_region_config = {
-    REG_CN: {
-        "trade_unit": 100,
-        "limit_threshold": 0.099,
-        "deal_price": "vwap",
-    },
-    REG_US: {
-        "trade_unit": 1,
-        "limit_threshold": None,
-        "deal_price": "close",
-    },
+    REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",},
+    REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",},
 }
 
 
diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py
index aa24ffb0cf6..bd3494abf6a 100644
--- a/qlib/contrib/backtest/__init__.py
+++ b/qlib/contrib/backtest/__init__.py
@@ -18,13 +18,7 @@
 
 
 def get_strategy(
-    strategy=None,
-    topk=50,
-    margin=0.5,
-    n_drop=5,
-    risk_degree=0.95,
-    str_type="dropout",
-    adjust_dates=None,
+    strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None,
 ):
     """get_strategy
 
@@ -75,11 +69,7 @@ def get_strategy(
 
         str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
         strategy = str_cls(
-            topk=topk,
-            buffer_margin=margin,
-            n_drop=n_drop,
-            risk_degree=risk_degree,
-            adjust_dates=adjust_dates,
+            topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates,
         )
     elif isinstance(strategy, (dict, str)):
         # 2) create strategy with init_instance_by_config
@@ -172,9 +162,7 @@ def get_exchange(
 
 
 def get_executor(
-    executor=None,
-    trade_exchange=None,
-    verbose=True,
+    executor=None, trade_exchange=None, verbose=True,
 ):
     """get_executor
 
diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py
index 20c6f638fcd..355f0637395 100644
--- a/qlib/contrib/backtest/profit_attribution.py
+++ b/qlib/contrib/backtest/profit_attribution.py
@@ -12,10 +12,7 @@
 
 
 def get_benchmark_weight(
-    bench,
-    start_date=None,
-    end_date=None,
-    path=None,
+    bench, start_date=None, end_date=None, path=None,
 ):
     """get_benchmark_weight
 
@@ -216,12 +213,7 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g
 
 
 def brinson_pa(
-    positions,
-    bench="SH000905",
-    group_field="industry",
-    group_method="category",
-    group_n=None,
-    deal_price="vwap",
+    positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap",
 ):
     """brinson profit attribution
 
@@ -255,17 +247,10 @@ def brinson_pa(
     # suspend stock is NAN. So we have to get more date to forward fill the NAN
     shift_start_date = start_date - datetime.timedelta(days=250)
     instruments = D.list_instruments(
-        D.instruments(market="all"),
-        start_time=shift_start_date,
-        end_time=end_date,
-        as_list=True,
+        D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True,
     )
     stock_df = D.features(
-        instruments,
-        [group_field, deal_price],
-        start_time=shift_start_date,
-        end_time=end_date,
-        freq="day",
+        instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day",
     )
     stock_df.columns = [group_field, "deal_price"]
 
diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py
index 970b032d6b0..574287819b7 100644
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -21,10 +21,7 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time):
                     fit_start_time is not None and fit_end_time is not None
                 ), "Make sure `fit_start_time` and `fit_end_time` are not None."
                 pkwargs.update(
-                    {
-                        "fit_start_time": fit_start_time,
-                        "fit_end_time": fit_end_time,
-                    }
+                    {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,}
                 )
             new_l.append({"class": klass.__name__, "kwargs": pkwargs})
         else:
@@ -170,10 +167,7 @@ def __init__(
     def get_feature_config(self):
         conf = {
             "kbar": {},
-            "price": {
-                "windows": [0],
-                "feature": ["OPEN", "HIGH", "LOW", "VWAP"],
-            },
+            "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],},
             "rolling": {},
         }
         return self.parse_config_to_fields(conf)
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index c68571853f1..363a184582d 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -35,11 +35,7 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False
 
 
 def calc_long_short_return(
-    pred: pd.Series,
-    label: pd.Series,
-    date_col: str = "datetime",
-    quantile: float = 0.2,
-    dropna: bool = False,
+    pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False,
 ) -> Tuple[pd.Series, pd.Series]:
     """
     calculate long-short return
diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py
index 4aa5b55156f..5cb1ce4eb67 100644
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -244,12 +244,7 @@ def long_short_backtest(
         short_returns[date] = np.mean(short_profit) + np.mean(all_profit)
         ls_returns[date] = np.mean(short_profit) + np.mean(long_profit)
 
-    return dict(
-        zip(
-            ["long", "short", "long_short"],
-            map(pd.Series, [long_returns, short_returns, ls_returns]),
-        )
-    )
+    return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),))
 
 
 def t_run():
diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py
index 04ddd8db041..2d94105e482 100644
--- a/qlib/contrib/evaluate_portfolio.py
+++ b/qlib/contrib/evaluate_portfolio.py
@@ -64,12 +64,7 @@ def get_position_value(evaluate_date, position):
     instruments = list(set(instruments) - set(["cash"]))  # filter 'cash'
     fields = ["$close"]
     close_data_df = D.features(
-        instruments,
-        fields,
-        start_time=evaluate_date,
-        end_time=evaluate_date,
-        freq="day",
-        disk_cache=0,
+        instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0,
     )
     value = _get_position_value_from_df(evaluate_date, position, close_data_df)
     return value
@@ -87,14 +82,7 @@ def get_position_list_value(positions):
     start_date, end_date = day_list[0], day_list[-1]
     # load data
     fields = ["$close"]
-    close_data_df = D.features(
-        instruments,
-        fields,
-        start_time=start_date,
-        end_time=end_date,
-        freq="day",
-        disk_cache=0,
-    )
+    close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,)
     # generate value
     # return dict for time:position_value
     value_dict = OrderedDict()
diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py
index d57c32b7022..2840c2cef5a 100644
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -32,9 +32,7 @@ def fit(
         **kwargs
     ):
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py
index bbbb61851b1..306e68aadf2 100644
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -118,10 +118,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -328,14 +319,12 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in",
-            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out",
-            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py
index 725568de855..612bacbec93 100644
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -123,10 +123,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -198,11 +195,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -309,14 +302,12 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in",
-            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out",
-            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py
index 07048e1bc1a..c59dc91973f 100644
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -229,17 +229,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -340,19 +334,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py
index 1e94f56e418..dfc5f4ab5ed 100644
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -242,11 +242,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -361,19 +357,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py
index 84f863b9fb0..d2a774b65b4 100755
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -118,10 +118,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.gru_model = GRUModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr)
@@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py
index bb6618b854c..49f438cc379 100755
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -123,10 +123,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.GRU_model = GRUModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr)
@@ -198,11 +195,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py
index 163d500ec87..02ca16e36b8 100755
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -118,10 +118,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.lstm_model = LSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr)
@@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py
index cf4f8fb9f1f..2ec36f96e34 100755
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -123,10 +123,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.LSTM_model = LSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr)
@@ -198,11 +195,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py
index 16fcea9ff53..8c1a77ec3c5 100644
--- a/qlib/contrib/model/pytorch_nn.py
+++ b/qlib/contrib/model/pytorch_nn.py
@@ -154,11 +154,7 @@ def __init__(
         self.dnn_model.to(self.device)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
         df_train, df_valid = dataset.prepare(
             ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py
index d5169e6c7bd..1f7433e053d 100644
--- a/qlib/contrib/model/pytorch_sfm.py
+++ b/qlib/contrib/model/pytorch_sfm.py
@@ -30,14 +30,7 @@
 
 class SFM_Model(nn.Module):
     def __init__(
-        self,
-        d_feat=6,
-        output_dim=1,
-        freq_dim=10,
-        hidden_size=64,
-        dropout_W=0.0,
-        dropout_U=0.0,
-        device="cpu",
+        self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu",
     ):
         super().__init__()
 
@@ -362,17 +355,11 @@ def train_epoch(self, x_train, y_train):
             self.train_optimizer.step()
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py
index 62e32d701ce..18e9d8eb404 100644
--- a/qlib/contrib/model/pytorch_tabnet.py
+++ b/qlib/contrib/model/pytorch_tabnet.py
@@ -120,9 +120,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
             os.makedirs("pretrain")
 
         [df_train, df_valid] = dataset.prepare(
-            ["pretrain", "pretrain_validation"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         df_train.fillna(df_train.mean(), inplace=True)
@@ -156,11 +154,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
                     break
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
         if self.pretrain:
             # there is a  pretrained model, load the model
@@ -172,9 +166,7 @@ def fit(
         # adding one more linear layer to fit the final output dimension
         self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device)
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         df_train.fillna(df_train.mean(), inplace=True)
         x_train, y_train = df_train["feature"], df_train["label"]
diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py
index ba2e5789b85..e37725c2eb6 100755
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -29,9 +29,7 @@ def fit(
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py
index 2bd0937a032..52b86888133 100644
--- a/qlib/contrib/online/executor.py
+++ b/qlib/contrib/online/executor.py
@@ -150,21 +150,13 @@ def execute(self, trade_account, order_list, trade_date):
                     if order.direction == Order.SELL:  # sell
                         print(
                             "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date,
-                                order.stock_id,
-                                trade_price,
-                                order.deal_amount,
-                                trade_val,
+                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
                             )
                         )
                     else:
                         print(
                             "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date,
-                                order.stock_id,
-                                trade_price,
-                                order.deal_amount,
-                                trade_val,
+                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
                             )
                         )
 
@@ -271,21 +263,13 @@ def load_order_list(user_path, trade_date):
     for stock_id in order_dict["sell"]:
         amount, factor = order_dict["sell"][stock_id]
         order = Order(
-            stock_id=stock_id,
-            amount=amount,
-            trade_date=pd.Timestamp(trade_date),
-            direction=Order.SELL,
-            factor=factor,
+            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor,
         )
         order_list.append(order)
     for stock_id in order_dict["buy"]:
         amount, factor = order_dict["buy"][stock_id]
         order = Order(
-            stock_id=stock_id,
-            amount=amount,
-            trade_date=pd.Timestamp(trade_date),
-            direction=Order.BUY,
-            factor=factor,
+            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor,
         )
         order_list.append(order)
     return order_list
diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py
index cf850b9dace..a4476709de0 100644
--- a/qlib/contrib/online/manager.py
+++ b/qlib/contrib/online/manager.py
@@ -84,12 +84,10 @@ def save_user_data(self, user_id):
             raise ValueError("Cannot find user {}".format(user_id))
         self.users[user_id].account.save_account(self.data_path / user_id)
         save_instance(
-            self.users[user_id].strategy,
-            self.data_path / user_id / "strategy_{}.pickle".format(user_id),
+            self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id),
         )
         save_instance(
-            self.users[user_id].model,
-            self.data_path / user_id / "model_{}.pickle".format(user_id),
+            self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id),
         )
 
     def add_user(self, user_id, config_file, add_date):
diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py
index c8b44f57858..c82deb3945c 100644
--- a/qlib/contrib/online/operator.py
+++ b/qlib/contrib/online/operator.py
@@ -125,9 +125,7 @@ def generate(self, date, path):
                 trade_date=trade_date,
             )
             save_order_list(
-                order_list=order_list,
-                user_path=(pathlib.Path(path) / user_id),
-                trade_date=trade_date,
+                order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
             )
             self.logger.info("Generate order list at {} for {}".format(trade_date, user_id))
             um.save_user_data(user_id)
@@ -160,9 +158,7 @@ def execute(self, date, exchange_config, path):
             order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date)
             trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date)
             executor.save_executed_file_from_trade_info(
-                trade_info=trade_info,
-                user_path=(pathlib.Path(path) / user_id),
-                trade_date=trade_date,
+                trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
             )
             self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id))
 
diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py
index 611af63e4af..fb96c87bd31 100644
--- a/qlib/contrib/online/utils.py
+++ b/qlib/contrib/online/utils.py
@@ -79,11 +79,7 @@ def prepare(um, today, user_id, exchange_config=None):
         log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today))
         return [pd.Timestamp(latest_trading_date)], None
 
-    dates = D.calendar(
-        start_time=pd.Timestamp(latest_trading_date),
-        end_time=pd.Timestamp(today),
-        future=True,
-    )
+    dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,)
     dates = list(dates)
     dates.append(get_next_trading_date(dates[-1], future=True))
     if exchange_config:
diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py
index 1cb14d26153..ef1447a12be 100644
--- a/qlib/contrib/report/analysis_model/analysis_model_performance.py
+++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py
@@ -53,8 +53,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     t_df.index = t_df.index.strftime("%Y-%m-%d")
     # Cumulative Return By Group
     group_scatter_figure = ScatterGraph(
-        t_df.cumsum(),
-        layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
+        t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
     ).figure
 
     t_df = t_df.loc[:, ["long-short", "long-average"]]
@@ -62,12 +61,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     group_hist_figure = SubplotsGraph(
         t_df,
         kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)),
-        subplots_kwargs=dict(
-            rows=1,
-            cols=2,
-            print_grid=False,
-            subplot_titles=["long-short", "long-average"],
-        ),
+        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],),
     ).figure
 
     return group_scatter_figure, group_hist_figure
@@ -102,15 +96,12 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
     _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
     _monthly_ic = ic.groupby(_index).mean()
     _monthly_ic.index = pd.MultiIndex.from_arrays(
-        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)],
-        names=["year", "month"],
+        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"],
     )
 
     # fill month
     _month_list = pd.date_range(
-        start=pd.Timestamp(f"{_index.min()[:4]}0101"),
-        end=pd.Timestamp(f"{_index.max()[:4]}1231"),
-        freq="1M",
+        start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M",
     )
     _years = []
     _month = []
@@ -142,32 +133,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
 
     _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min()
     _sub_graph_data = [
-        (
-            "ic",
-            dict(
-                row=1,
-                col=1,
-                name="",
-                kind="DistplotGraph",
-                graph_kwargs=dict(bin_size=_bin_size),
-            ),
-        ),
+        ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),),
         (_qqplot_fig, dict(row=1, col=2)),
     ]
     ic_hist_figure = SubplotsGraph(
         _ic_df.dropna(),
         kind_map=dict(kind="HistogramGraph", kwargs=dict()),
-        subplots_kwargs=dict(
-            rows=1,
-            cols=2,
-            print_grid=False,
-            subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],
-        ),
+        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],),
         sub_graph_data=_sub_graph_data,
-        layout=dict(
-            yaxis2=dict(title="Observed Quantile"),
-            xaxis2=dict(title=f"{dist_name} Distribution Quantile"),
-        ),
+        layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),),
     ).figure
 
     return ic_bar_figure, ic_heatmap_figure, ic_hist_figure
@@ -181,8 +155,7 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
     _df = ac.to_frame("value")
     _df.index = _df.index.strftime("%Y-%m-%d")
     ac_figure = ScatterGraph(
-        _df,
-        layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
+        _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (ac_figure,)
 
@@ -202,17 +175,11 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
         .sum()
         / (len(x) // N)
     )
-    r_df = pd.DataFrame(
-        {
-            "Top": top,
-            "Bottom": bottom,
-        }
-    )
+    r_df = pd.DataFrame({"Top": top, "Bottom": bottom,})
     # FIXME: support HIGH-FREQ
     r_df.index = r_df.index.strftime("%Y-%m-%d")
     turnover_figure = ScatterGraph(
-        r_df,
-        layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
+        r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (turnover_figure,)
 
@@ -230,11 +197,7 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure:
     # FIXME: support HIGH-FREQ
     ic_df.index = ic_df.index.strftime("%Y-%m-%d")
     ic_bar_figure = BarGraph(
-        ic_df,
-        layout=dict(
-            title="Information Coefficient (IC)",
-            xaxis=dict(type="category", tickangle=45),
-        ),
+        ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),),
     ).figure
     return ic_bar_figure
 
@@ -277,12 +240,7 @@ def model_performance_graph(
     figure_list = []
     for graph_name in graph_names:
         fun_res = eval(f"_{graph_name}")(
-            pred_label=pred_label,
-            lag=lag,
-            N=N,
-            reverse=reverse,
-            rank=rank,
-            show_nature_day=show_nature_day,
+            pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day,
         )
         figure_list += fun_res
 
diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py
index abb68ea6051..604189c94b6 100644
--- a/qlib/contrib/report/analysis_position/cumulative_return.py
+++ b/qlib/contrib/report/analysis_position/cumulative_return.py
@@ -13,11 +13,7 @@
 
 
 def _get_cum_return_data_with_position(
-    position: dict,
-    report_normal: pd.DataFrame,
-    label_data: pd.DataFrame,
-    start_date=None,
-    end_date=None,
+    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
 ):
     """
 
@@ -29,11 +25,7 @@ def _get_cum_return_data_with_position(
     :return:
     """
     _cumulative_return_df = get_position_data(
-        position=position,
-        report_normal=report_normal,
-        label_data=label_data,
-        start_date=start_date,
-        end_date=end_date,
+        position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date,
     ).copy()
 
     _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"]
@@ -87,11 +79,7 @@ def _get_cum_return_data_with_position(
 
 
 def _get_figure_with_position(
-    position: dict,
-    report_normal: pd.DataFrame,
-    label_data: pd.DataFrame,
-    start_date=None,
-    end_date=None,
+    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
 ) -> Iterable[go.Figure]:
     """Get average analysis figures
 
@@ -111,18 +99,12 @@ def _get_figure_with_position(
     # Create figures
     for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]:
         sub_graph_data = [
-            (
-                "cum_{}".format(_t_name),
-                dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),
-            ),
+            ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),),
             (
                 "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name),
                 dict(row=2, col=1),
             ),
-            (
-                "{}_value".format(_t_name),
-                dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),
-            ),
+            ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),),
         ]
 
         _default_xaxis = dict(showline=False, zeroline=True, tickangle=45)
@@ -161,13 +143,7 @@ def _get_figure_with_position(
             [{"rowspan": 1}, None],
         ]
         subplots_kwargs = dict(
-            vertical_spacing=0.01,
-            rows=2,
-            cols=2,
-            row_width=[1, 2],
-            column_width=[3, 1],
-            print_grid=False,
-            specs=specs,
+            vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs,
         )
         yield SubplotsGraph(
             cum_return_df,
diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py
index fe1d6113709..23f9c592c0a 100644
--- a/qlib/contrib/report/analysis_position/parse_position.py
+++ b/qlib/contrib/report/analysis_position/parse_position.py
@@ -72,10 +72,7 @@ def parse_position(position: dict = None) -> pd.DataFrame:
 
         result_df = result_df.append(_trading_day_df, sort=True)
 
-        previous_data = dict(
-            date=_trading_date,
-            code_list=_trading_day_df[_trading_day_df["status"] != -1].index,
-        )
+        previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,)
 
     result_df.reset_index(inplace=True)
     result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True)
diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py
index 72a358adcbf..9a4d834ed92 100644
--- a/qlib/contrib/report/analysis_position/rank_label.py
+++ b/qlib/contrib/report/analysis_position/rank_label.py
@@ -23,11 +23,7 @@ def _get_figure_with_position(
     :return:
     """
     _position_df = get_position_data(
-        position,
-        label_data,
-        calculate_label_rank=True,
-        start_date=start_date,
-        end_date=end_date,
+        position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date,
     )
 
     res_dict = dict()
@@ -51,20 +47,14 @@ def _get_figure_with_position(
         yield ScatterGraph(
             _res_df.loc[:, [_col]],
             layout=dict(
-                title=_col,
-                xaxis=dict(type="category", tickangle=45),
-                yaxis=dict(title="lable-rank-ratio: %"),
+                title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"),
             ),
             graph_kwargs=dict(mode="lines+markers"),
         ).figure
 
 
 def rank_label_graph(
-    position: dict,
-    label_data: pd.DataFrame,
-    start_date=None,
-    end_date=None,
-    show_notebook=True,
+    position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True,
 ) -> Iterable[go.Figure]:
     """Ranking percentage of stocks buy, sell, and holding on the trading day.
     Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading
diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py
index f82e654c432..8e2c05c0a38 100644
--- a/qlib/contrib/report/analysis_position/report.py
+++ b/qlib/contrib/report/analysis_position/report.py
@@ -123,9 +123,7 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 1,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {
-                    "width": 0,
-                },
+                "line": {"width": 0,},
             },
             {
                 "type": "rect",
@@ -137,20 +135,13 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 0.55,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {
-                    "width": 0,
-                },
+                "line": {"width": 0,},
             },
         ],
     )
 
     _subplot_kwargs = dict(
-        shared_xaxes=True,
-        vertical_spacing=0.01,
-        rows=7,
-        cols=1,
-        row_width=[1, 1, 1, 3, 1, 1, 3],
-        print_grid=False,
+        shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False,
     )
     figure = SubplotsGraph(
         df=report_df,
diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py
index 70e382fb165..dbbc411109d 100644
--- a/qlib/contrib/report/graph.py
+++ b/qlib/contrib/report/graph.py
@@ -311,11 +311,7 @@ def _init_sub_graph_data(self):
             _temp_row_data = (
                 column_name,
                 dict(
-                    row=row,
-                    col=col,
-                    name=res_name,
-                    kind=self._kind_map["kind"],
-                    graph_kwargs=self._kind_map["kwargs"],
+                    row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"],
                 ),
             )
             self._sub_graph_data.append(_temp_row_data)
diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py
index dd90437b03f..ee3ee03ecfd 100644
--- a/qlib/contrib/strategy/cost_control.py
+++ b/qlib/contrib/strategy/cost_control.py
@@ -57,10 +57,7 @@ def generate_target_weight_position(self, score, current, trade_date):
                     final_stock_weight[stock_id] -= sw
             if self.buy_method == "first_fill":
                 for stock_id in buy_signal_stocks:
-                    add_weight = min(
-                        max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0),
-                        sold_stock_weight,
-                    )
+                    add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,)
                     final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight
                     sold_stock_weight -= add_weight
             elif self.buy_method == "average_fill":
diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py
index 494981ecc09..6f168b4dd52 100644
--- a/qlib/contrib/strategy/order_generator.py
+++ b/qlib/contrib/strategy/order_generator.py
@@ -102,14 +102,10 @@ def generate_order_list_from_target_weight_position(
             # strategy 1 : generate amount_position by weight_position
             # Use API in Exchange()
             target_amount_dict = trade_exchange.generate_amount_position_from_weight_position(
-                weight_position=target_weight_position,
-                cash=current_tradable_value,
-                trade_date=trade_date,
+                weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date,
             )
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=target_amount_dict,
-            current_position=current_amount_dict,
-            trade_date=trade_date,
+            target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date,
         )
         return order_list
 
@@ -164,8 +160,6 @@ def generate_order_list_from_target_weight_position(
             else:
                 continue
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=amount_dict,
-            current_position=current.get_stock_amount_dict(),
-            trade_date=trade_date,
+            target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date,
         )
         return order_list
diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py
index 711658c9a63..409410a2ab4 100644
--- a/qlib/contrib/tuner/launcher.py
+++ b/qlib/contrib/tuner/launcher.py
@@ -13,11 +13,7 @@
 
 args_parser = argparse.ArgumentParser(prog="tuner")
 args_parser.add_argument(
-    "-c",
-    "--config_path",
-    required=True,
-    type=str,
-    help="config path indicates where to load yaml config.",
+    "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.",
 )
 
 args = args_parser.parse_args()
diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py
index 76f101671b7..57f57a6c34e 100644
--- a/qlib/contrib/tuner/space.py
+++ b/qlib/contrib/tuner/space.py
@@ -10,8 +10,5 @@
 }
 
 QLibDataLabelSpace = {
-    "labels": hp.choice(
-        "labels",
-        [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],
-    )
+    "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],)
 }
diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py
index 2ce957859b2..e81d41a9ad0 100644
--- a/qlib/contrib/tuner/tuner.py
+++ b/qlib/contrib/tuner/tuner.py
@@ -28,10 +28,7 @@ def __init__(self, tuner_config, optim_config):
         self.optim_config = optim_config
 
         self.max_evals = self.tuner_config.get("max_evals", 10)
-        self.ex_dir = os.path.join(
-            self.tuner_config["experiment"]["dir"],
-            self.tuner_config["experiment"]["name"],
-        )
+        self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],)
 
         self.best_params = None
         self.best_res = None
@@ -42,10 +39,7 @@ def tune(self):
 
         TimeInspector.set_time_mark()
         fmin(
-            fn=self.objective,
-            space=self.space,
-            algo=tpe.suggest,
-            max_evals=self.max_evals,
+            fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals,
         )
         self.logger.info("Local best params: {} ".format(self.best_params))
         TimeInspector.log_cost_time(
@@ -159,8 +153,7 @@ def setup_estimator_config(self, params):
             estimator_config["data"]["args"].update(params["data_label_space"])
 
         estimator_path = os.path.join(
-            self.tuner_config["experiment"].get("dir", "../"),
-            QLibTuner.ESTIMATOR_CONFIG_NAME,
+            self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME,
         )
 
         with open(estimator_path, "w") as fp:
@@ -173,27 +166,20 @@ def setup_space(self):
         model_space_name = self.tuner_config["model"].get("space", None)
         if model_space_name is None:
             raise ValueError("Please give the search space of model.")
-        model_space = getattr(
-            importlib.import_module(".space", package="qlib.contrib.tuner"),
-            model_space_name,
-        )
+        model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,)
 
         # 2. Setup strategy space
         strategy_space_name = self.tuner_config["strategy"].get("space", None)
         if strategy_space_name is None:
             raise ValueError("Please give the search space of strategy.")
-        strategy_space = getattr(
-            importlib.import_module(".space", package="qlib.contrib.tuner"),
-            strategy_space_name,
-        )
+        strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,)
 
         # 3. Setup data label space if given
         if self.tuner_config.get("data_label", None) is not None:
             data_label_space_name = self.tuner_config["data_label"].get("space", None)
             if data_label_space_name is not None:
                 data_label_space = getattr(
-                    importlib.import_module(".space", package="qlib.contrib.tuner"),
-                    data_label_space_name,
+                    importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name,
                 )
         else:
             data_label_space_name = None
diff --git a/qlib/data/client.py b/qlib/data/client.py
index 5244a7e45cf..d1a68cb3857 100644
--- a/qlib/data/client.py
+++ b/qlib/data/client.py
@@ -26,8 +26,7 @@ def __init__(self, host, port):
         self.logger = get_module_logger(self.__class__.__name__)
         # bind connect/disconnect callbacks
         self.sio.on(
-            "connect",
-            lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
+            "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
         )
         self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!"))
 
diff --git a/qlib/data/data.py b/qlib/data/data.py
index 762467da35e..47cded79cec 100644
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -328,14 +328,7 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day
         raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method")
 
     def _uri(
-        self,
-        instruments,
-        fields,
-        start_time=None,
-        end_time=None,
-        freq="day",
-        disk_cache=1,
-        **kwargs,
+        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs,
     ):
         """Get task uri, used when generating rabbitmq task in qlib_server
 
@@ -414,29 +407,13 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq):
             for inst, spans in instruments_d.items():
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(
-                        inst,
-                        start_time,
-                        end_time,
-                        freq,
-                        normalize_column_names,
-                        spans,
-                        C,
-                    ),
+                    args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,),
                 )
         else:
             for inst in instruments_d:
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(
-                        inst,
-                        start_time,
-                        end_time,
-                        freq,
-                        normalize_column_names,
-                        None,
-                        C,
-                    ),
+                    args=(inst, start_time, end_time, freq, normalize_column_names, None, C,),
                 )
 
         p.close()
@@ -598,12 +575,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         start_time = pd.Timestamp(start_time or cal[0])
         end_time = pd.Timestamp(end_time or cal[-1])
         _instruments_filtered = {
-            inst: list(
-                filter(
-                    lambda x: x[0] <= x[1],
-                    [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],
-                )
-            )
+            inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],))
             for inst, spans in _instruments.items()
         }
         _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value}
@@ -723,14 +695,7 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq
 
         for inst in instruments_d:
             p.apply_async(
-                LocalDatasetProvider.cache_walker,
-                args=(
-                    inst,
-                    start_time,
-                    end_time,
-                    freq,
-                    column_names,
-                ),
+                LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,),
             )
 
         p.close()
@@ -763,12 +728,7 @@ def set_conn(self, conn):
     def calendar(self, start_time=None, end_time=None, freq="day", future=False):
         self.conn.send_request(
             request_type="calendar",
-            request_content={
-                "start_time": str(start_time),
-                "end_time": str(end_time),
-                "freq": freq,
-                "future": future,
-            },
+            request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,},
             msg_queue=self.queue,
             msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content],
         )
@@ -832,14 +792,7 @@ def set_conn(self, conn):
         self.queue = queue.Queue()
 
     def dataset(
-        self,
-        instruments,
-        fields,
-        start_time=None,
-        end_time=None,
-        freq="day",
-        disk_cache=0,
-        return_uri=False,
+        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False,
     ):
         if Inst.get_inst_type(instruments) == Inst.DICT:
             get_module_logger("data").warning(
@@ -942,13 +895,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         return Inst.list_instruments(instruments, start_time, end_time, freq, as_list)
 
     def features(
-        self,
-        instruments,
-        fields,
-        start_time=None,
-        end_time=None,
-        freq="day",
-        disk_cache=None,
+        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None,
     ):
         """
         Parameters:
diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py
index feda1904463..58e2bd96811 100644
--- a/qlib/data/dataset/utils.py
+++ b/qlib/data/dataset/utils.py
@@ -32,10 +32,7 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int:
 
 
 def fetch_df_by_index(
-    df: pd.DataFrame,
-    selector: Union[pd.Timestamp, slice, str, list],
-    level: Union[str, int],
-    fetch_orig=True,
+    df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True,
 ) -> pd.DataFrame:
     """
     fetch data from `data` with `selector` and `level`
diff --git a/qlib/data/filter.py b/qlib/data/filter.py
index 70f9d32780d..811fd387f14 100644
--- a/qlib/data/filter.py
+++ b/qlib/data/filter.py
@@ -341,12 +341,7 @@ def _getFilterSeries(self, instruments, fstart, fend):
         # do not use dataset cache
         try:
             _features = DatasetD.dataset(
-                instruments,
-                [self.rule_expression],
-                fstart,
-                fend,
-                freq=self.filter_freq,
-                disk_cache=0,
+                instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0,
             )
         except TypeError:
             # use LocalDatasetProvider
diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py
index 8eec73e00ae..f19c60fc9be 100644
--- a/qlib/model/riskmodel.py
+++ b/qlib/model/riskmodel.py
@@ -38,7 +38,7 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc
         self.scale_return = scale_return
 
     def predict(
-            self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
+        self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
     ) -> Union[pd.DataFrame, np.ndarray]:
         """
         Args:
@@ -373,8 +373,7 @@ def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np
         roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
         v3 = z.T.dot(z) / t - var_mkt * S
         roff3 = (
-                np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(
-            np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
+            np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
         )
         roff = 2 * roff1 - roff3
         rho = rdiag + roff
@@ -434,7 +433,7 @@ def _predict(self, X: np.ndarray) -> np.ndarray:
         if self.num_factors > 0:
             Dd, V = np.linalg.eig(Y.T.dot(Y))
             V = V[:, np.argsort(Dd)]
-            F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n)
+            F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n)
             LamPCA = Y.dot(F) / n
             uhat = np.asarray(Y - LamPCA.dot(F.T))
             Lowrank = np.asarray(LamPCA.dot(LamPCA.T))
@@ -490,8 +489,14 @@ class StructuredCovEstimator(RiskModel):
     FACTOR_MODEL_PCA = "pca"
     FACTOR_MODEL_FA = "fa"
 
-    def __init__(self, factor_model: str = 'pca', num_factors: int = 10, nan_option: str = "ignore",
-                 assume_centered: bool = False, scale_return: bool = True):
+    def __init__(
+        self,
+        factor_model: str = "pca",
+        num_factors: int = 10,
+        nan_option: str = "ignore",
+        assume_centered: bool = False,
+        scale_return: bool = True,
+    ):
         """
         Args:
             factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`).
@@ -505,14 +510,17 @@ def __init__(self, factor_model: str = 'pca', num_factors: int = 10, nan_option:
         assert factor_model in [
             self.FACTOR_MODEL_PCA,
             self.FACTOR_MODEL_FA,
-        ], 'factor_model={} is not supported'.format(factor_model)
+        ], "factor_model={} is not supported".format(factor_model)
         self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis
 
         self.num_factors = num_factors
 
     def predict(
-            self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True,
-            return_decomposed_components=False
+        self,
+        X: Union[pd.Series, pd.DataFrame, np.ndarray],
+        return_corr: bool = False,
+        is_price: bool = True,
+        return_decomposed_components=False,
     ) -> Union[pd.DataFrame, np.ndarray, tuple]:
         """
         Args:
@@ -525,8 +533,9 @@ def predict(
         Returns:
             tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
         """
-        assert not return_corr or not return_decomposed_components, \
-            'Can only return either correlation matrix or decomposed components.'
+        assert (
+            not return_corr or not return_decomposed_components
+        ), "Can only return either correlation matrix or decomposed components."
 
         # transform input into 2D array
         if not isinstance(X, (pd.Series, pd.DataFrame)):
diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index 728a04ea9db..3912421277c 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -38,13 +38,13 @@ class PortfolioOptimizer(BaseOptimizer):
     OPT_INV = "inv"
 
     def __init__(
-            self,
-            method: str = "inv",
-            lamb: float = 0,
-            delta: float = 0,
-            alpha: float = 0.0,
-            scale_alpha: bool = True,
-            tol: float = 1e-8,
+        self,
+        method: str = "inv",
+        lamb: float = 0,
+        delta: float = 0,
+        alpha: float = 0.0,
+        scale_alpha: bool = True,
+        tol: float = 1e-8,
     ):
         """
         Args:
@@ -71,10 +71,10 @@ def __init__(
         self.scale_alpha = scale_alpha
 
     def __call__(
-            self,
-            S: Union[np.ndarray, pd.DataFrame],
-            u: Optional[Union[np.ndarray, pd.Series]] = None,
-            w0: Optional[Union[np.ndarray, pd.Series]] = None,
+        self,
+        S: Union[np.ndarray, pd.DataFrame],
+        u: Optional[Union[np.ndarray, pd.Series]] = None,
+        w0: Optional[Union[np.ndarray, pd.Series]] = None,
     ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
@@ -163,7 +163,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd
         return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
 
     def _optimize_mvo(
-            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
+        self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
     ) -> np.ndarray:
         """optimize mean-variance portfolio
 
@@ -259,6 +259,7 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda
         # add l2 regularization
         wrapped_obj = obj
         if self.alpha > 0:
+
             def opt_obj(x):
                 return obj(x) + self.alpha * np.sum(np.square(x))
 
@@ -281,12 +282,21 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
         This optimizer always assumes full investment and no-shorting.
     """
 
-    START_FROM_W0 = 'w0'
-    START_FROM_BENCH = 'benchmark'
-    DO_NOT_START_FROM = 'no_warm_start'
+    START_FROM_W0 = "w0"
+    START_FROM_BENCH = "benchmark"
+    DO_NOT_START_FROM = "no_warm_start"
 
-    def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01,
-                 scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000):
+    def __init__(
+        self,
+        lamb: float = 10,
+        delta: float = 0.4,
+        bench_dev: float = 0.01,
+        inds_dev: float = 0.01,
+        scale_alpha=True,
+        verbose: bool = False,
+        warm_start: str = DO_NOT_START_FROM,
+        max_iters: int = 10000,
+    ):
         """
         Args:
             lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
@@ -310,18 +320,28 @@ def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01
         assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive"
         self.inds_dev = inds_dev
 
-        assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0,
-                              self.START_FROM_BENCH], "illegal warm start option"
-        self.start_from_w0 = (warm_start == self.START_FROM_W0)
-        self.start_from_bench = (warm_start == self.START_FROM_BENCH)
+        assert warm_start in [
+            self.DO_NOT_START_FROM,
+            self.START_FROM_W0,
+            self.START_FROM_BENCH,
+        ], "illegal warm start option"
+        self.start_from_w0 = warm_start == self.START_FROM_W0
+        self.start_from_bench = warm_start == self.START_FROM_BENCH
 
         self.scale_alpha = scale_alpha
         self.verbose = verbose
         self.max_iters = max_iters
 
-    def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray,
-                 w_bench: np.ndarray, inds_onehot: np.ndarray
-                 ) -> Union[np.ndarray, pd.Series]:
+    def __call__(
+        self,
+        u: np.ndarray,
+        F: np.ndarray,
+        covB: np.ndarray,
+        varU: np.ndarray,
+        w0: np.ndarray,
+        w_bench: np.ndarray,
+        inds_onehot: np.ndarray,
+    ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
             u (np.ndarray): expected returns (a.k.a., alpha)
@@ -352,7 +372,7 @@ def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndar
             d_bench >= -self.bench_dev,
             d_bench <= self.bench_dev,
             d_inds >= -self.inds_dev,
-            d_inds <= self.inds_dev
+            d_inds <= self.inds_dev,
         ]
         if w0 is not None:
             turnover = cp.sum(cp.abs(w - w0))
@@ -361,7 +381,7 @@ def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndar
         warm_start = False
         if self.start_from_w0:
             if w0 is None:
-                print('Warning: try warm start with w0, but w0 is `None`.')
+                print("Warning: try warm start with w0, but w0 is `None`.")
             else:
                 w.value = w0
                 warm_start = True
@@ -372,7 +392,7 @@ def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndar
         prob = cp.Problem(obj, cons)
         prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
 
-        if prob.status != 'optimal':
-            print('Warning: solve failed.', prob.status)
+        if prob.status != "optimal":
+            print("Warning: solve failed.", prob.status)
 
         return np.asarray(w.value)
diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py
index f92e7278758..eb6f9c5edb5 100644
--- a/qlib/tests/__init__.py
+++ b/qlib/tests/__init__.py
@@ -18,10 +18,6 @@ def setUpClass(cls) -> None:
             print(f"Qlib data is not found in {provider_uri}")
 
             GetData().qlib_data(
-                name="qlib_data_simple",
-                region="cn",
-                interval="1d",
-                target_dir=provider_uri,
-                delete_old=False,
+                name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False,
             )
         init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs)
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index be458a24d29..0c704b89669 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -193,10 +193,7 @@ def generate(self):
                 }
             )
             objects.update(
-                {
-                    "long_short_r.pkl": long_short_r,
-                    "long_avg_r.pkl": long_avg_r,
-                }
+                {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,}
             )
         self.recorder.log_metrics(**metrics)
         self.recorder.save_objects(**objects, artifact_path=self.get_path())
diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py
index 743f89462d0..24526e3328b 100644
--- a/scripts/data_collector/yahoo/collector.py
+++ b/scripts/data_collector/yahoo/collector.py
@@ -39,13 +39,7 @@ class YahooData:
     INTERVAL_1d = "1d"
 
     def __init__(
-        self,
-        timezone: str = None,
-        start=None,
-        end=None,
-        interval="1d",
-        delay=0,
-        show_1min_logging: bool = False,
+        self, timezone: str = None, start=None, end=None, interval="1d", delay=0, show_1min_logging: bool = False,
     ):
         """
 
@@ -125,11 +119,7 @@ def _get_simple(start_, end_):
             self._sleep()
             _remote_interval = "1m" if self._interval == self.INTERVAL_1min else self._interval
             return self.get_data_from_remote(
-                symbol,
-                interval=_remote_interval,
-                start=start_,
-                end=end_,
-                show_1min_logging=self._show_1min_logging,
+                symbol, interval=_remote_interval, start=start_, end=end_, show_1min_logging=self._show_1min_logging,
             )
 
         _result = None
@@ -438,9 +428,7 @@ class YahooNormalize:
     DAILY_FORMAT = "%Y-%m-%d"
 
     def __init__(
-        self,
-        date_field_name: str = "date",
-        symbol_field_name: str = "symbol",
+        self, date_field_name: str = "date", symbol_field_name: str = "symbol",
     ):
         """
 
@@ -458,10 +446,7 @@ def __init__(
 
     @staticmethod
     def normalize_yahoo(
-        df: pd.DataFrame,
-        calendar_list: list = None,
-        date_field_name: str = "date",
-        symbol_field_name: str = "symbol",
+        df: pd.DataFrame, calendar_list: list = None, date_field_name: str = "date", symbol_field_name: str = "symbol",
     ):
         if df.empty:
             return df
@@ -566,9 +551,7 @@ class YahooNormalize1min(YahooNormalize, ABC):
     CONSISTENT_1d = False
 
     def __init__(
-        self,
-        date_field_name: str = "date",
-        symbol_field_name: str = "symbol",
+        self, date_field_name: str = "date", symbol_field_name: str = "symbol",
     ):
         """
 
diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py
index 4811fd48612..ab24fa9cacf 100644
--- a/scripts/dump_bin.py
+++ b/scripts/dump_bin.py
@@ -153,22 +153,13 @@ def get_dump_fields(self, df_columns: Iterable[str]) -> Iterable[str]:
 
     @staticmethod
     def _read_calendars(calendar_path: Path) -> List[pd.Timestamp]:
-        return sorted(
-            map(
-                pd.Timestamp,
-                pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),
-            )
-        )
+        return sorted(map(pd.Timestamp, pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),))
 
     def _read_instruments(self, instrument_path: Path) -> pd.DataFrame:
         df = pd.read_csv(
             instrument_path,
             sep=self.INSTRUMENTS_SEP,
-            names=[
-                self.symbol_field_name,
-                self.INSTRUMENTS_START_FIELD,
-                self.INSTRUMENTS_END_FIELD,
-            ],
+            names=[self.symbol_field_name, self.INSTRUMENTS_START_FIELD, self.INSTRUMENTS_END_FIELD,],
         )
 
         return df
diff --git a/setup.py b/setup.py
index 6582054b9c6..d8a9d9efa6b 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@
     "tornado",
     "joblib>=0.17.0",
     "ruamel.yaml>=0.16.12",
-    "scikit-learn>=0.22"
+    "scikit-learn>=0.22",
 ]
 
 # Numpy include
@@ -70,16 +70,10 @@
 # Cython Extensions
 extensions = [
     Extension(
-        "qlib.data._libs.rolling",
-        ["qlib/data/_libs/rolling.pyx"],
-        language="c++",
-        include_dirs=[NUMPY_INCLUDE],
+        "qlib.data._libs.rolling", ["qlib/data/_libs/rolling.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE],
     ),
     Extension(
-        "qlib.data._libs.expanding",
-        ["qlib/data/_libs/expanding.pyx"],
-        language="c++",
-        include_dirs=[NUMPY_INCLUDE],
+        "qlib.data._libs.expanding", ["qlib/data/_libs/expanding.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE],
     ),
 ]
 
@@ -98,9 +92,7 @@
     # py_modules=['qlib'],
     entry_points={
         # 'console_scripts': ['mycli=mymodule:cli'],
-        "console_scripts": [
-            "qrun=qlib.workflow.cli:run",
-        ],
+        "console_scripts": ["qrun=qlib.workflow.cli:run",],
     },
     ext_modules=extensions,
     install_requires=REQUIRED,
diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py
index f6e77cba4d8..8b3819c8302 100644
--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -78,10 +78,7 @@
     "strategy": {
         "class": "TopkDropoutStrategy",
         "module_path": "qlib.contrib.strategy.strategy",
-        "kwargs": {
-            "topk": 50,
-            "n_drop": 5,
-        },
+        "kwargs": {"topk": 50, "n_drop": 5,},
     },
     "backtest": {
         "verbose": False,
@@ -176,9 +173,7 @@ def test_0_train(self):
     def test_1_backtest(self):
         analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
         self.assertGreaterEqual(
-            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
-            0.10,
-            "backtest failed",
+            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed",
         )
 
 
diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py
index dfa7f8556dd..de649c37edf 100644
--- a/tests/test_dump_data.py
+++ b/tests/test_dump_data.py
@@ -40,9 +40,7 @@ def setUpClass(cls) -> None:
         TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri,
-            expression_cache=None,
-            dataset_cache=None,
+            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
         )
 
     @classmethod
@@ -54,10 +52,7 @@ def test_0_dump_bin(self):
 
     def test_1_dump_calendars(self):
         ori_calendars = set(
-            map(
-                pd.Timestamp,
-                pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,
-            )
+            map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,)
         )
         res_calendars = set(D.calendar())
         assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed"
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
index c511d1b910d..d5637b02595 100644
--- a/tests/test_get_data.py
+++ b/tests/test_get_data.py
@@ -26,9 +26,7 @@ class TestGetData(unittest.TestCase):
     def setUpClass(cls) -> None:
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri,
-            expression_cache=None,
-            dataset_cache=None,
+            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
         )
 
     @classmethod

From 37871389b98fe34da35cdd2e996469ac3c7434ff Mon Sep 17 00:00:00 2001
From: Charles Young <yongzhengqi@gmail.com>
Date: Mon, 22 Feb 2021 11:25:42 +0800
Subject: [PATCH 14/32] Format code with the latest version of black.

---
 qlib/config.py                                | 24 +++++--
 qlib/contrib/backtest/__init__.py             | 18 ++++-
 qlib/contrib/backtest/profit_attribution.py   | 23 +++++--
 qlib/contrib/data/handler.py                  | 10 ++-
 qlib/contrib/eva/alpha.py                     |  6 +-
 qlib/contrib/evaluate.py                      |  7 +-
 qlib/contrib/evaluate_portfolio.py            | 16 ++++-
 qlib/contrib/model/catboost_model.py          |  4 +-
 qlib/contrib/model/pytorch_alstm.py           | 21 ++++--
 qlib/contrib/model/pytorch_alstm_ts.py        | 17 +++--
 qlib/contrib/model/pytorch_gats.py            | 22 ++++--
 qlib/contrib/model/pytorch_gats_ts.py         | 18 ++++-
 qlib/contrib/model/pytorch_gru.py             | 21 ++++--
 qlib/contrib/model/pytorch_gru_ts.py          | 17 ++++-
 qlib/contrib/model/pytorch_lstm.py            | 21 ++++--
 qlib/contrib/model/pytorch_lstm_ts.py         | 17 ++++-
 qlib/contrib/model/pytorch_nn.py              |  6 +-
 qlib/contrib/model/pytorch_sfm.py             | 19 ++++-
 qlib/contrib/model/pytorch_tabnet.py          | 14 +++-
 qlib/contrib/model/xgboost.py                 |  4 +-
 qlib/contrib/online/executor.py               | 24 +++++--
 qlib/contrib/online/manager.py                |  6 +-
 qlib/contrib/online/operator.py               |  8 ++-
 qlib/contrib/online/utils.py                  |  6 +-
 .../analysis_model_performance.py             | 66 ++++++++++++++----
 .../analysis_position/cumulative_return.py    | 36 ++++++++--
 .../analysis_position/parse_position.py       |  5 +-
 .../report/analysis_position/rank_label.py    | 16 ++++-
 .../report/analysis_position/report.py        | 15 +++-
 qlib/contrib/report/graph.py                  |  6 +-
 qlib/contrib/strategy/cost_control.py         |  5 +-
 qlib/contrib/strategy/order_generator.py      | 12 +++-
 qlib/contrib/tuner/launcher.py                |  6 +-
 qlib/contrib/tuner/space.py                   |  5 +-
 qlib/contrib/tuner/tuner.py                   | 26 +++++--
 qlib/data/client.py                           |  3 +-
 qlib/data/data.py                             | 69 ++++++++++++++++---
 qlib/data/dataset/utils.py                    |  5 +-
 qlib/data/filter.py                           |  7 +-
 qlib/tests/__init__.py                        |  6 +-
 qlib/workflow/record_temp.py                  |  5 +-
 41 files changed, 526 insertions(+), 116 deletions(-)

diff --git a/qlib/config.py b/qlib/config.py
index 344eb852777..52b05568d57 100644
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -115,7 +115,12 @@ def set_conf_from_C(self, config_c):
                 "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s"
             }
         },
-        "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}},
+        "filters": {
+            "field_not_found": {
+                "()": "qlib.log.LogFilter",
+                "param": [".*?WARN: data not found for.*?"],
+            }
+        },
         "handlers": {
             "console": {
                 "class": "logging.StreamHandler",
@@ -130,7 +135,10 @@ def set_conf_from_C(self, config_c):
     "exp_manager": {
         "class": "MLflowExpManager",
         "module_path": "qlib.workflow.expm",
-        "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",},
+        "kwargs": {
+            "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
+            "default_exp_name": "Experiment",
+        },
     },
 }
 
@@ -192,8 +200,16 @@ def set_conf_from_C(self, config_c):
 }
 
 _default_region_config = {
-    REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",},
-    REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",},
+    REG_CN: {
+        "trade_unit": 100,
+        "limit_threshold": 0.099,
+        "deal_price": "vwap",
+    },
+    REG_US: {
+        "trade_unit": 1,
+        "limit_threshold": None,
+        "deal_price": "close",
+    },
 }
 
 
diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py
index bd3494abf6a..aa24ffb0cf6 100644
--- a/qlib/contrib/backtest/__init__.py
+++ b/qlib/contrib/backtest/__init__.py
@@ -18,7 +18,13 @@
 
 
 def get_strategy(
-    strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None,
+    strategy=None,
+    topk=50,
+    margin=0.5,
+    n_drop=5,
+    risk_degree=0.95,
+    str_type="dropout",
+    adjust_dates=None,
 ):
     """get_strategy
 
@@ -69,7 +75,11 @@ def get_strategy(
 
         str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
         strategy = str_cls(
-            topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates,
+            topk=topk,
+            buffer_margin=margin,
+            n_drop=n_drop,
+            risk_degree=risk_degree,
+            adjust_dates=adjust_dates,
         )
     elif isinstance(strategy, (dict, str)):
         # 2) create strategy with init_instance_by_config
@@ -162,7 +172,9 @@ def get_exchange(
 
 
 def get_executor(
-    executor=None, trade_exchange=None, verbose=True,
+    executor=None,
+    trade_exchange=None,
+    verbose=True,
 ):
     """get_executor
 
diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py
index 355f0637395..20c6f638fcd 100644
--- a/qlib/contrib/backtest/profit_attribution.py
+++ b/qlib/contrib/backtest/profit_attribution.py
@@ -12,7 +12,10 @@
 
 
 def get_benchmark_weight(
-    bench, start_date=None, end_date=None, path=None,
+    bench,
+    start_date=None,
+    end_date=None,
+    path=None,
 ):
     """get_benchmark_weight
 
@@ -213,7 +216,12 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g
 
 
 def brinson_pa(
-    positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap",
+    positions,
+    bench="SH000905",
+    group_field="industry",
+    group_method="category",
+    group_n=None,
+    deal_price="vwap",
 ):
     """brinson profit attribution
 
@@ -247,10 +255,17 @@ def brinson_pa(
     # suspend stock is NAN. So we have to get more date to forward fill the NAN
     shift_start_date = start_date - datetime.timedelta(days=250)
     instruments = D.list_instruments(
-        D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True,
+        D.instruments(market="all"),
+        start_time=shift_start_date,
+        end_time=end_date,
+        as_list=True,
     )
     stock_df = D.features(
-        instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day",
+        instruments,
+        [group_field, deal_price],
+        start_time=shift_start_date,
+        end_time=end_date,
+        freq="day",
     )
     stock_df.columns = [group_field, "deal_price"]
 
diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py
index 574287819b7..970b032d6b0 100644
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -21,7 +21,10 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time):
                     fit_start_time is not None and fit_end_time is not None
                 ), "Make sure `fit_start_time` and `fit_end_time` are not None."
                 pkwargs.update(
-                    {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,}
+                    {
+                        "fit_start_time": fit_start_time,
+                        "fit_end_time": fit_end_time,
+                    }
                 )
             new_l.append({"class": klass.__name__, "kwargs": pkwargs})
         else:
@@ -167,7 +170,10 @@ def __init__(
     def get_feature_config(self):
         conf = {
             "kbar": {},
-            "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],},
+            "price": {
+                "windows": [0],
+                "feature": ["OPEN", "HIGH", "LOW", "VWAP"],
+            },
             "rolling": {},
         }
         return self.parse_config_to_fields(conf)
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index 363a184582d..c68571853f1 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -35,7 +35,11 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False
 
 
 def calc_long_short_return(
-    pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False,
+    pred: pd.Series,
+    label: pd.Series,
+    date_col: str = "datetime",
+    quantile: float = 0.2,
+    dropna: bool = False,
 ) -> Tuple[pd.Series, pd.Series]:
     """
     calculate long-short return
diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py
index 5cb1ce4eb67..4aa5b55156f 100644
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -244,7 +244,12 @@ def long_short_backtest(
         short_returns[date] = np.mean(short_profit) + np.mean(all_profit)
         ls_returns[date] = np.mean(short_profit) + np.mean(long_profit)
 
-    return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),))
+    return dict(
+        zip(
+            ["long", "short", "long_short"],
+            map(pd.Series, [long_returns, short_returns, ls_returns]),
+        )
+    )
 
 
 def t_run():
diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py
index 2d94105e482..04ddd8db041 100644
--- a/qlib/contrib/evaluate_portfolio.py
+++ b/qlib/contrib/evaluate_portfolio.py
@@ -64,7 +64,12 @@ def get_position_value(evaluate_date, position):
     instruments = list(set(instruments) - set(["cash"]))  # filter 'cash'
     fields = ["$close"]
     close_data_df = D.features(
-        instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0,
+        instruments,
+        fields,
+        start_time=evaluate_date,
+        end_time=evaluate_date,
+        freq="day",
+        disk_cache=0,
     )
     value = _get_position_value_from_df(evaluate_date, position, close_data_df)
     return value
@@ -82,7 +87,14 @@ def get_position_list_value(positions):
     start_date, end_date = day_list[0], day_list[-1]
     # load data
     fields = ["$close"]
-    close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,)
+    close_data_df = D.features(
+        instruments,
+        fields,
+        start_time=start_date,
+        end_time=end_date,
+        freq="day",
+        disk_cache=0,
+    )
     # generate value
     # return dict for time:position_value
     value_dict = OrderedDict()
diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py
index 2840c2cef5a..d57c32b7022 100644
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -32,7 +32,9 @@ def fit(
         **kwargs
     ):
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py
index 306e68aadf2..bbbb61851b1 100644
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -118,7 +118,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -319,12 +328,14 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in",
+            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out",
+            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py
index 612bacbec93..725568de855 100644
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -123,7 +123,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -195,7 +198,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -302,12 +309,14 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in",
+            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out",
+            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py
index c59dc91973f..07048e1bc1a 100644
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -229,11 +229,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -334,11 +340,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py
index dfc5f4ab5ed..1e94f56e418 100644
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -242,7 +242,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -357,11 +361,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py
index d2a774b65b4..84f863b9fb0 100755
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -118,7 +118,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.gru_model = GRUModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr)
@@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py
index 49f438cc379..bb6618b854c 100755
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -123,7 +123,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.GRU_model = GRUModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr)
@@ -195,7 +198,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py
index 02ca16e36b8..163d500ec87 100755
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -118,7 +118,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.lstm_model = LSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr)
@@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py
index 2ec36f96e34..cf4f8fb9f1f 100755
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -123,7 +123,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.LSTM_model = LSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr)
@@ -195,7 +198,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py
index 8c1a77ec3c5..16fcea9ff53 100644
--- a/qlib/contrib/model/pytorch_nn.py
+++ b/qlib/contrib/model/pytorch_nn.py
@@ -154,7 +154,11 @@ def __init__(
         self.dnn_model.to(self.device)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         df_train, df_valid = dataset.prepare(
             ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py
index 1f7433e053d..d5169e6c7bd 100644
--- a/qlib/contrib/model/pytorch_sfm.py
+++ b/qlib/contrib/model/pytorch_sfm.py
@@ -30,7 +30,14 @@
 
 class SFM_Model(nn.Module):
     def __init__(
-        self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu",
+        self,
+        d_feat=6,
+        output_dim=1,
+        freq_dim=10,
+        hidden_size=64,
+        dropout_W=0.0,
+        dropout_U=0.0,
+        device="cpu",
     ):
         super().__init__()
 
@@ -355,11 +362,17 @@ def train_epoch(self, x_train, y_train):
             self.train_optimizer.step()
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py
index 18e9d8eb404..62e32d701ce 100644
--- a/qlib/contrib/model/pytorch_tabnet.py
+++ b/qlib/contrib/model/pytorch_tabnet.py
@@ -120,7 +120,9 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
             os.makedirs("pretrain")
 
         [df_train, df_valid] = dataset.prepare(
-            ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["pretrain", "pretrain_validation"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         df_train.fillna(df_train.mean(), inplace=True)
@@ -154,7 +156,11 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
                     break
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         if self.pretrain:
             # there is a  pretrained model, load the model
@@ -166,7 +172,9 @@ def fit(
         # adding one more linear layer to fit the final output dimension
         self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device)
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         df_train.fillna(df_train.mean(), inplace=True)
         x_train, y_train = df_train["feature"], df_train["label"]
diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py
index e37725c2eb6..ba2e5789b85 100755
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -29,7 +29,9 @@ def fit(
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py
index 52b86888133..2bd0937a032 100644
--- a/qlib/contrib/online/executor.py
+++ b/qlib/contrib/online/executor.py
@@ -150,13 +150,21 @@ def execute(self, trade_account, order_list, trade_date):
                     if order.direction == Order.SELL:  # sell
                         print(
                             "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
+                                trade_date,
+                                order.stock_id,
+                                trade_price,
+                                order.deal_amount,
+                                trade_val,
                             )
                         )
                     else:
                         print(
                             "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
+                                trade_date,
+                                order.stock_id,
+                                trade_price,
+                                order.deal_amount,
+                                trade_val,
                             )
                         )
 
@@ -263,13 +271,21 @@ def load_order_list(user_path, trade_date):
     for stock_id in order_dict["sell"]:
         amount, factor = order_dict["sell"][stock_id]
         order = Order(
-            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor,
+            stock_id=stock_id,
+            amount=amount,
+            trade_date=pd.Timestamp(trade_date),
+            direction=Order.SELL,
+            factor=factor,
         )
         order_list.append(order)
     for stock_id in order_dict["buy"]:
         amount, factor = order_dict["buy"][stock_id]
         order = Order(
-            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor,
+            stock_id=stock_id,
+            amount=amount,
+            trade_date=pd.Timestamp(trade_date),
+            direction=Order.BUY,
+            factor=factor,
         )
         order_list.append(order)
     return order_list
diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py
index a4476709de0..cf850b9dace 100644
--- a/qlib/contrib/online/manager.py
+++ b/qlib/contrib/online/manager.py
@@ -84,10 +84,12 @@ def save_user_data(self, user_id):
             raise ValueError("Cannot find user {}".format(user_id))
         self.users[user_id].account.save_account(self.data_path / user_id)
         save_instance(
-            self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id),
+            self.users[user_id].strategy,
+            self.data_path / user_id / "strategy_{}.pickle".format(user_id),
         )
         save_instance(
-            self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id),
+            self.users[user_id].model,
+            self.data_path / user_id / "model_{}.pickle".format(user_id),
         )
 
     def add_user(self, user_id, config_file, add_date):
diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py
index c82deb3945c..c8b44f57858 100644
--- a/qlib/contrib/online/operator.py
+++ b/qlib/contrib/online/operator.py
@@ -125,7 +125,9 @@ def generate(self, date, path):
                 trade_date=trade_date,
             )
             save_order_list(
-                order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
+                order_list=order_list,
+                user_path=(pathlib.Path(path) / user_id),
+                trade_date=trade_date,
             )
             self.logger.info("Generate order list at {} for {}".format(trade_date, user_id))
             um.save_user_data(user_id)
@@ -158,7 +160,9 @@ def execute(self, date, exchange_config, path):
             order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date)
             trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date)
             executor.save_executed_file_from_trade_info(
-                trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
+                trade_info=trade_info,
+                user_path=(pathlib.Path(path) / user_id),
+                trade_date=trade_date,
             )
             self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id))
 
diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py
index fb96c87bd31..611af63e4af 100644
--- a/qlib/contrib/online/utils.py
+++ b/qlib/contrib/online/utils.py
@@ -79,7 +79,11 @@ def prepare(um, today, user_id, exchange_config=None):
         log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today))
         return [pd.Timestamp(latest_trading_date)], None
 
-    dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,)
+    dates = D.calendar(
+        start_time=pd.Timestamp(latest_trading_date),
+        end_time=pd.Timestamp(today),
+        future=True,
+    )
     dates = list(dates)
     dates.append(get_next_trading_date(dates[-1], future=True))
     if exchange_config:
diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py
index ef1447a12be..1cb14d26153 100644
--- a/qlib/contrib/report/analysis_model/analysis_model_performance.py
+++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py
@@ -53,7 +53,8 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     t_df.index = t_df.index.strftime("%Y-%m-%d")
     # Cumulative Return By Group
     group_scatter_figure = ScatterGraph(
-        t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
+        t_df.cumsum(),
+        layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
     ).figure
 
     t_df = t_df.loc[:, ["long-short", "long-average"]]
@@ -61,7 +62,12 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     group_hist_figure = SubplotsGraph(
         t_df,
         kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)),
-        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],),
+        subplots_kwargs=dict(
+            rows=1,
+            cols=2,
+            print_grid=False,
+            subplot_titles=["long-short", "long-average"],
+        ),
     ).figure
 
     return group_scatter_figure, group_hist_figure
@@ -96,12 +102,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
     _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
     _monthly_ic = ic.groupby(_index).mean()
     _monthly_ic.index = pd.MultiIndex.from_arrays(
-        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"],
+        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)],
+        names=["year", "month"],
     )
 
     # fill month
     _month_list = pd.date_range(
-        start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M",
+        start=pd.Timestamp(f"{_index.min()[:4]}0101"),
+        end=pd.Timestamp(f"{_index.max()[:4]}1231"),
+        freq="1M",
     )
     _years = []
     _month = []
@@ -133,15 +142,32 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
 
     _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min()
     _sub_graph_data = [
-        ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),),
+        (
+            "ic",
+            dict(
+                row=1,
+                col=1,
+                name="",
+                kind="DistplotGraph",
+                graph_kwargs=dict(bin_size=_bin_size),
+            ),
+        ),
         (_qqplot_fig, dict(row=1, col=2)),
     ]
     ic_hist_figure = SubplotsGraph(
         _ic_df.dropna(),
         kind_map=dict(kind="HistogramGraph", kwargs=dict()),
-        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],),
+        subplots_kwargs=dict(
+            rows=1,
+            cols=2,
+            print_grid=False,
+            subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],
+        ),
         sub_graph_data=_sub_graph_data,
-        layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),),
+        layout=dict(
+            yaxis2=dict(title="Observed Quantile"),
+            xaxis2=dict(title=f"{dist_name} Distribution Quantile"),
+        ),
     ).figure
 
     return ic_bar_figure, ic_heatmap_figure, ic_hist_figure
@@ -155,7 +181,8 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
     _df = ac.to_frame("value")
     _df.index = _df.index.strftime("%Y-%m-%d")
     ac_figure = ScatterGraph(
-        _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
+        _df,
+        layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (ac_figure,)
 
@@ -175,11 +202,17 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
         .sum()
         / (len(x) // N)
     )
-    r_df = pd.DataFrame({"Top": top, "Bottom": bottom,})
+    r_df = pd.DataFrame(
+        {
+            "Top": top,
+            "Bottom": bottom,
+        }
+    )
     # FIXME: support HIGH-FREQ
     r_df.index = r_df.index.strftime("%Y-%m-%d")
     turnover_figure = ScatterGraph(
-        r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
+        r_df,
+        layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (turnover_figure,)
 
@@ -197,7 +230,11 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure:
     # FIXME: support HIGH-FREQ
     ic_df.index = ic_df.index.strftime("%Y-%m-%d")
     ic_bar_figure = BarGraph(
-        ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),),
+        ic_df,
+        layout=dict(
+            title="Information Coefficient (IC)",
+            xaxis=dict(type="category", tickangle=45),
+        ),
     ).figure
     return ic_bar_figure
 
@@ -240,7 +277,12 @@ def model_performance_graph(
     figure_list = []
     for graph_name in graph_names:
         fun_res = eval(f"_{graph_name}")(
-            pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day,
+            pred_label=pred_label,
+            lag=lag,
+            N=N,
+            reverse=reverse,
+            rank=rank,
+            show_nature_day=show_nature_day,
         )
         figure_list += fun_res
 
diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py
index 604189c94b6..abb68ea6051 100644
--- a/qlib/contrib/report/analysis_position/cumulative_return.py
+++ b/qlib/contrib/report/analysis_position/cumulative_return.py
@@ -13,7 +13,11 @@
 
 
 def _get_cum_return_data_with_position(
-    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
+    position: dict,
+    report_normal: pd.DataFrame,
+    label_data: pd.DataFrame,
+    start_date=None,
+    end_date=None,
 ):
     """
 
@@ -25,7 +29,11 @@ def _get_cum_return_data_with_position(
     :return:
     """
     _cumulative_return_df = get_position_data(
-        position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date,
+        position=position,
+        report_normal=report_normal,
+        label_data=label_data,
+        start_date=start_date,
+        end_date=end_date,
     ).copy()
 
     _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"]
@@ -79,7 +87,11 @@ def _get_cum_return_data_with_position(
 
 
 def _get_figure_with_position(
-    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
+    position: dict,
+    report_normal: pd.DataFrame,
+    label_data: pd.DataFrame,
+    start_date=None,
+    end_date=None,
 ) -> Iterable[go.Figure]:
     """Get average analysis figures
 
@@ -99,12 +111,18 @@ def _get_figure_with_position(
     # Create figures
     for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]:
         sub_graph_data = [
-            ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),),
+            (
+                "cum_{}".format(_t_name),
+                dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),
+            ),
             (
                 "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name),
                 dict(row=2, col=1),
             ),
-            ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),),
+            (
+                "{}_value".format(_t_name),
+                dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),
+            ),
         ]
 
         _default_xaxis = dict(showline=False, zeroline=True, tickangle=45)
@@ -143,7 +161,13 @@ def _get_figure_with_position(
             [{"rowspan": 1}, None],
         ]
         subplots_kwargs = dict(
-            vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs,
+            vertical_spacing=0.01,
+            rows=2,
+            cols=2,
+            row_width=[1, 2],
+            column_width=[3, 1],
+            print_grid=False,
+            specs=specs,
         )
         yield SubplotsGraph(
             cum_return_df,
diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py
index 23f9c592c0a..fe1d6113709 100644
--- a/qlib/contrib/report/analysis_position/parse_position.py
+++ b/qlib/contrib/report/analysis_position/parse_position.py
@@ -72,7 +72,10 @@ def parse_position(position: dict = None) -> pd.DataFrame:
 
         result_df = result_df.append(_trading_day_df, sort=True)
 
-        previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,)
+        previous_data = dict(
+            date=_trading_date,
+            code_list=_trading_day_df[_trading_day_df["status"] != -1].index,
+        )
 
     result_df.reset_index(inplace=True)
     result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True)
diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py
index 9a4d834ed92..72a358adcbf 100644
--- a/qlib/contrib/report/analysis_position/rank_label.py
+++ b/qlib/contrib/report/analysis_position/rank_label.py
@@ -23,7 +23,11 @@ def _get_figure_with_position(
     :return:
     """
     _position_df = get_position_data(
-        position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date,
+        position,
+        label_data,
+        calculate_label_rank=True,
+        start_date=start_date,
+        end_date=end_date,
     )
 
     res_dict = dict()
@@ -47,14 +51,20 @@ def _get_figure_with_position(
         yield ScatterGraph(
             _res_df.loc[:, [_col]],
             layout=dict(
-                title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"),
+                title=_col,
+                xaxis=dict(type="category", tickangle=45),
+                yaxis=dict(title="lable-rank-ratio: %"),
             ),
             graph_kwargs=dict(mode="lines+markers"),
         ).figure
 
 
 def rank_label_graph(
-    position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True,
+    position: dict,
+    label_data: pd.DataFrame,
+    start_date=None,
+    end_date=None,
+    show_notebook=True,
 ) -> Iterable[go.Figure]:
     """Ranking percentage of stocks buy, sell, and holding on the trading day.
     Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading
diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py
index 8e2c05c0a38..f82e654c432 100644
--- a/qlib/contrib/report/analysis_position/report.py
+++ b/qlib/contrib/report/analysis_position/report.py
@@ -123,7 +123,9 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 1,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {"width": 0,},
+                "line": {
+                    "width": 0,
+                },
             },
             {
                 "type": "rect",
@@ -135,13 +137,20 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 0.55,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {"width": 0,},
+                "line": {
+                    "width": 0,
+                },
             },
         ],
     )
 
     _subplot_kwargs = dict(
-        shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False,
+        shared_xaxes=True,
+        vertical_spacing=0.01,
+        rows=7,
+        cols=1,
+        row_width=[1, 1, 1, 3, 1, 1, 3],
+        print_grid=False,
     )
     figure = SubplotsGraph(
         df=report_df,
diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py
index dbbc411109d..70e382fb165 100644
--- a/qlib/contrib/report/graph.py
+++ b/qlib/contrib/report/graph.py
@@ -311,7 +311,11 @@ def _init_sub_graph_data(self):
             _temp_row_data = (
                 column_name,
                 dict(
-                    row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"],
+                    row=row,
+                    col=col,
+                    name=res_name,
+                    kind=self._kind_map["kind"],
+                    graph_kwargs=self._kind_map["kwargs"],
                 ),
             )
             self._sub_graph_data.append(_temp_row_data)
diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py
index ee3ee03ecfd..dd90437b03f 100644
--- a/qlib/contrib/strategy/cost_control.py
+++ b/qlib/contrib/strategy/cost_control.py
@@ -57,7 +57,10 @@ def generate_target_weight_position(self, score, current, trade_date):
                     final_stock_weight[stock_id] -= sw
             if self.buy_method == "first_fill":
                 for stock_id in buy_signal_stocks:
-                    add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,)
+                    add_weight = min(
+                        max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0),
+                        sold_stock_weight,
+                    )
                     final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight
                     sold_stock_weight -= add_weight
             elif self.buy_method == "average_fill":
diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py
index 6f168b4dd52..494981ecc09 100644
--- a/qlib/contrib/strategy/order_generator.py
+++ b/qlib/contrib/strategy/order_generator.py
@@ -102,10 +102,14 @@ def generate_order_list_from_target_weight_position(
             # strategy 1 : generate amount_position by weight_position
             # Use API in Exchange()
             target_amount_dict = trade_exchange.generate_amount_position_from_weight_position(
-                weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date,
+                weight_position=target_weight_position,
+                cash=current_tradable_value,
+                trade_date=trade_date,
             )
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date,
+            target_position=target_amount_dict,
+            current_position=current_amount_dict,
+            trade_date=trade_date,
         )
         return order_list
 
@@ -160,6 +164,8 @@ def generate_order_list_from_target_weight_position(
             else:
                 continue
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date,
+            target_position=amount_dict,
+            current_position=current.get_stock_amount_dict(),
+            trade_date=trade_date,
         )
         return order_list
diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py
index 409410a2ab4..711658c9a63 100644
--- a/qlib/contrib/tuner/launcher.py
+++ b/qlib/contrib/tuner/launcher.py
@@ -13,7 +13,11 @@
 
 args_parser = argparse.ArgumentParser(prog="tuner")
 args_parser.add_argument(
-    "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.",
+    "-c",
+    "--config_path",
+    required=True,
+    type=str,
+    help="config path indicates where to load yaml config.",
 )
 
 args = args_parser.parse_args()
diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py
index 57f57a6c34e..76f101671b7 100644
--- a/qlib/contrib/tuner/space.py
+++ b/qlib/contrib/tuner/space.py
@@ -10,5 +10,8 @@
 }
 
 QLibDataLabelSpace = {
-    "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],)
+    "labels": hp.choice(
+        "labels",
+        [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],
+    )
 }
diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py
index e81d41a9ad0..2ce957859b2 100644
--- a/qlib/contrib/tuner/tuner.py
+++ b/qlib/contrib/tuner/tuner.py
@@ -28,7 +28,10 @@ def __init__(self, tuner_config, optim_config):
         self.optim_config = optim_config
 
         self.max_evals = self.tuner_config.get("max_evals", 10)
-        self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],)
+        self.ex_dir = os.path.join(
+            self.tuner_config["experiment"]["dir"],
+            self.tuner_config["experiment"]["name"],
+        )
 
         self.best_params = None
         self.best_res = None
@@ -39,7 +42,10 @@ def tune(self):
 
         TimeInspector.set_time_mark()
         fmin(
-            fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals,
+            fn=self.objective,
+            space=self.space,
+            algo=tpe.suggest,
+            max_evals=self.max_evals,
         )
         self.logger.info("Local best params: {} ".format(self.best_params))
         TimeInspector.log_cost_time(
@@ -153,7 +159,8 @@ def setup_estimator_config(self, params):
             estimator_config["data"]["args"].update(params["data_label_space"])
 
         estimator_path = os.path.join(
-            self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME,
+            self.tuner_config["experiment"].get("dir", "../"),
+            QLibTuner.ESTIMATOR_CONFIG_NAME,
         )
 
         with open(estimator_path, "w") as fp:
@@ -166,20 +173,27 @@ def setup_space(self):
         model_space_name = self.tuner_config["model"].get("space", None)
         if model_space_name is None:
             raise ValueError("Please give the search space of model.")
-        model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,)
+        model_space = getattr(
+            importlib.import_module(".space", package="qlib.contrib.tuner"),
+            model_space_name,
+        )
 
         # 2. Setup strategy space
         strategy_space_name = self.tuner_config["strategy"].get("space", None)
         if strategy_space_name is None:
             raise ValueError("Please give the search space of strategy.")
-        strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,)
+        strategy_space = getattr(
+            importlib.import_module(".space", package="qlib.contrib.tuner"),
+            strategy_space_name,
+        )
 
         # 3. Setup data label space if given
         if self.tuner_config.get("data_label", None) is not None:
             data_label_space_name = self.tuner_config["data_label"].get("space", None)
             if data_label_space_name is not None:
                 data_label_space = getattr(
-                    importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name,
+                    importlib.import_module(".space", package="qlib.contrib.tuner"),
+                    data_label_space_name,
                 )
         else:
             data_label_space_name = None
diff --git a/qlib/data/client.py b/qlib/data/client.py
index d1a68cb3857..5244a7e45cf 100644
--- a/qlib/data/client.py
+++ b/qlib/data/client.py
@@ -26,7 +26,8 @@ def __init__(self, host, port):
         self.logger = get_module_logger(self.__class__.__name__)
         # bind connect/disconnect callbacks
         self.sio.on(
-            "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
+            "connect",
+            lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
         )
         self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!"))
 
diff --git a/qlib/data/data.py b/qlib/data/data.py
index 47cded79cec..762467da35e 100644
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -328,7 +328,14 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day
         raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method")
 
     def _uri(
-        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs,
+        self,
+        instruments,
+        fields,
+        start_time=None,
+        end_time=None,
+        freq="day",
+        disk_cache=1,
+        **kwargs,
     ):
         """Get task uri, used when generating rabbitmq task in qlib_server
 
@@ -407,13 +414,29 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq):
             for inst, spans in instruments_d.items():
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,),
+                    args=(
+                        inst,
+                        start_time,
+                        end_time,
+                        freq,
+                        normalize_column_names,
+                        spans,
+                        C,
+                    ),
                 )
         else:
             for inst in instruments_d:
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(inst, start_time, end_time, freq, normalize_column_names, None, C,),
+                    args=(
+                        inst,
+                        start_time,
+                        end_time,
+                        freq,
+                        normalize_column_names,
+                        None,
+                        C,
+                    ),
                 )
 
         p.close()
@@ -575,7 +598,12 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         start_time = pd.Timestamp(start_time or cal[0])
         end_time = pd.Timestamp(end_time or cal[-1])
         _instruments_filtered = {
-            inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],))
+            inst: list(
+                filter(
+                    lambda x: x[0] <= x[1],
+                    [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],
+                )
+            )
             for inst, spans in _instruments.items()
         }
         _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value}
@@ -695,7 +723,14 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq
 
         for inst in instruments_d:
             p.apply_async(
-                LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,),
+                LocalDatasetProvider.cache_walker,
+                args=(
+                    inst,
+                    start_time,
+                    end_time,
+                    freq,
+                    column_names,
+                ),
             )
 
         p.close()
@@ -728,7 +763,12 @@ def set_conn(self, conn):
     def calendar(self, start_time=None, end_time=None, freq="day", future=False):
         self.conn.send_request(
             request_type="calendar",
-            request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,},
+            request_content={
+                "start_time": str(start_time),
+                "end_time": str(end_time),
+                "freq": freq,
+                "future": future,
+            },
             msg_queue=self.queue,
             msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content],
         )
@@ -792,7 +832,14 @@ def set_conn(self, conn):
         self.queue = queue.Queue()
 
     def dataset(
-        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False,
+        self,
+        instruments,
+        fields,
+        start_time=None,
+        end_time=None,
+        freq="day",
+        disk_cache=0,
+        return_uri=False,
     ):
         if Inst.get_inst_type(instruments) == Inst.DICT:
             get_module_logger("data").warning(
@@ -895,7 +942,13 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         return Inst.list_instruments(instruments, start_time, end_time, freq, as_list)
 
     def features(
-        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None,
+        self,
+        instruments,
+        fields,
+        start_time=None,
+        end_time=None,
+        freq="day",
+        disk_cache=None,
     ):
         """
         Parameters:
diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py
index 58e2bd96811..feda1904463 100644
--- a/qlib/data/dataset/utils.py
+++ b/qlib/data/dataset/utils.py
@@ -32,7 +32,10 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int:
 
 
 def fetch_df_by_index(
-    df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True,
+    df: pd.DataFrame,
+    selector: Union[pd.Timestamp, slice, str, list],
+    level: Union[str, int],
+    fetch_orig=True,
 ) -> pd.DataFrame:
     """
     fetch data from `data` with `selector` and `level`
diff --git a/qlib/data/filter.py b/qlib/data/filter.py
index 811fd387f14..70f9d32780d 100644
--- a/qlib/data/filter.py
+++ b/qlib/data/filter.py
@@ -341,7 +341,12 @@ def _getFilterSeries(self, instruments, fstart, fend):
         # do not use dataset cache
         try:
             _features = DatasetD.dataset(
-                instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0,
+                instruments,
+                [self.rule_expression],
+                fstart,
+                fend,
+                freq=self.filter_freq,
+                disk_cache=0,
             )
         except TypeError:
             # use LocalDatasetProvider
diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py
index eb6f9c5edb5..f92e7278758 100644
--- a/qlib/tests/__init__.py
+++ b/qlib/tests/__init__.py
@@ -18,6 +18,10 @@ def setUpClass(cls) -> None:
             print(f"Qlib data is not found in {provider_uri}")
 
             GetData().qlib_data(
-                name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False,
+                name="qlib_data_simple",
+                region="cn",
+                interval="1d",
+                target_dir=provider_uri,
+                delete_old=False,
             )
         init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs)
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index 0c704b89669..be458a24d29 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -193,7 +193,10 @@ def generate(self):
                 }
             )
             objects.update(
-                {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,}
+                {
+                    "long_short_r.pkl": long_short_r,
+                    "long_avg_r.pkl": long_avg_r,
+                }
             )
         self.recorder.log_metrics(**metrics)
         self.recorder.save_objects(**objects, artifact_path=self.get_path())

From dc4aa675034724a9d2815763fd575b3ec56e76e2 Mon Sep 17 00:00:00 2001
From: Jactus <dw1920@nyu.edu>
Date: Mon, 22 Feb 2021 11:42:36 +0800
Subject: [PATCH 15/32] Black format

---
 docs/conf.py                              | 10 ++++++-
 examples/benchmarks/TFT/libs/tft_model.py | 12 ++++++--
 examples/highfreq/highfreq_handler.py     | 33 ++++++++++++++++-----
 examples/highfreq/highfreq_processor.py   |  4 ++-
 examples/highfreq/workflow.py             | 35 +++++++++++++++++++----
 examples/run_all_model.py                 |  5 +++-
 examples/workflow_by_code.py              |  5 +++-
 scripts/data_collector/yahoo/collector.py | 27 +++++++++++++----
 scripts/dump_bin.py                       | 13 +++++++--
 setup.py                                  | 14 +++++++--
 tests/test_all_pipeline.py                |  9 ++++--
 tests/test_dump_data.py                   |  9 ++++--
 tests/test_get_data.py                    |  4 ++-
 13 files changed, 147 insertions(+), 33 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 61fe784e7a9..6e52b0e34a4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -191,7 +191,15 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, "QLib", u"QLib Documentation", author, "QLib", "One line description of project.", "Miscellaneous",),
+    (
+        master_doc,
+        "QLib",
+        u"QLib Documentation",
+        author,
+        "QLib",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 
diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py
index f40a1aece33..b39f1782553 100644
--- a/examples/benchmarks/TFT/libs/tft_model.py
+++ b/examples/benchmarks/TFT/libs/tft_model.py
@@ -721,7 +721,12 @@ def _build_base_graph(self):
         encoder_steps = self.num_encoder_steps
 
         # Inputs.
-        all_inputs = tf.keras.layers.Input(shape=(time_steps, combined_input_size,))
+        all_inputs = tf.keras.layers.Input(
+            shape=(
+                time_steps,
+                combined_input_size,
+            )
+        )
 
         unknown_inputs, known_combined_layer, obs_inputs, static_inputs = self.get_tft_embeddings(all_inputs)
 
@@ -861,7 +866,10 @@ def get_lstm(return_state):
             """Returns LSTM cell initialized with default parameters."""
             if self.use_cudnn:
                 lstm = tf.keras.layers.CuDNNLSTM(
-                    self.hidden_layer_size, return_sequences=True, return_state=return_state, stateful=False,
+                    self.hidden_layer_size,
+                    return_sequences=True,
+                    return_state=return_state,
+                    stateful=False,
                 )
             else:
                 lstm = tf.keras.layers.LSTM(
diff --git a/examples/highfreq/highfreq_handler.py b/examples/highfreq/highfreq_handler.py
index 2fc411ab660..d3565051446 100644
--- a/examples/highfreq/highfreq_handler.py
+++ b/examples/highfreq/highfreq_handler.py
@@ -20,7 +20,10 @@ def check_transform_proc(proc_l):
             new_l = []
             for p in proc_l:
                 p["kwargs"].update(
-                    {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,}
+                    {
+                        "fit_start_time": fit_start_time,
+                        "fit_end_time": fit_end_time,
+                    }
                 )
                 new_l.append(p)
             return new_l
@@ -30,7 +33,11 @@ def check_transform_proc(proc_l):
 
         data_loader = {
             "class": "QlibDataLoader",
-            "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",},
+            "kwargs": {
+                "config": self.get_feature_config(),
+                "swap_level": False,
+                "freq": "1min",
+            },
         }
         super().__init__(
             instruments=instruments,
@@ -61,7 +68,8 @@ def get_normalized_price_feature(price_field, shift=0):
 
             feature_ops = template_norm.format(
                 template_if.format(
-                    template_fillnan.format(template_paused.format("$close")), template_paused.format(price_field),
+                    template_fillnan.format(template_paused.format("$close")),
+                    template_paused.format(price_field),
                 ),
                 template_fillnan.format(template_paused.format("$close")),
             )
@@ -111,14 +119,24 @@ def get_normalized_price_feature(price_field, shift=0):
 
 class HighFreqBacktestHandler(DataHandler):
     def __init__(
-        self, instruments="csi300", start_time=None, end_time=None,
+        self,
+        instruments="csi300",
+        start_time=None,
+        end_time=None,
     ):
         data_loader = {
             "class": "QlibDataLoader",
-            "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",},
+            "kwargs": {
+                "config": self.get_feature_config(),
+                "swap_level": False,
+                "freq": "1min",
+            },
         }
         super().__init__(
-            instruments=instruments, start_time=start_time, end_time=end_time, data_loader=data_loader,
+            instruments=instruments,
+            start_time=start_time,
+            end_time=end_time,
+            data_loader=data_loader,
         )
 
     def get_feature_config(self):
@@ -137,7 +155,8 @@ def get_feature_config(self):
         fields += [
             "Cut({0}, 240, None)".format(
                 template_if.format(
-                    template_fillnan.format(template_paused.format("$close")), template_paused.format(simpson_vwap),
+                    template_fillnan.format(template_paused.format("$close")),
+                    template_paused.format(simpson_vwap),
                 )
             )
         ]
diff --git a/examples/highfreq/highfreq_processor.py b/examples/highfreq/highfreq_processor.py
index 73510ef0689..f0ab0dec2b1 100644
--- a/examples/highfreq/highfreq_processor.py
+++ b/examples/highfreq/highfreq_processor.py
@@ -65,6 +65,8 @@ def __call__(self, df_features):
         feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240)
         feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240)
         df_new_features = pd.DataFrame(
-            data=np.concatenate((feat, feat_1), axis=1), index=idx, columns=["FEATURE_%d" % i for i in range(12 * 240)],
+            data=np.concatenate((feat, feat_1), axis=1),
+            index=idx,
+            columns=["FEATURE_%d" % i for i in range(12 * 240)],
         ).sort_index()
         return df_new_features
diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py
index 0bfd0c2a09c..01de59c0e77 100644
--- a/examples/highfreq/workflow.py
+++ b/examples/highfreq/workflow.py
@@ -63,7 +63,13 @@ class HighfreqWorkflow(object):
                     "module_path": "highfreq_handler",
                     "kwargs": DATA_HANDLER_CONFIG0,
                 },
-                "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),},
+                "segments": {
+                    "train": (start_time, train_end_time),
+                    "test": (
+                        test_start_time,
+                        end_time,
+                    ),
+                },
             },
         },
         "dataset_backtest": {
@@ -75,7 +81,13 @@ class HighfreqWorkflow(object):
                     "module_path": "highfreq_handler",
                     "kwargs": DATA_HANDLER_CONFIG1,
                 },
-                "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),},
+                "segments": {
+                    "train": (start_time, train_end_time),
+                    "test": (
+                        test_start_time,
+                        end_time,
+                    ),
+                },
             },
         },
     }
@@ -140,11 +152,24 @@ def dump_and_load_dataset(self):
                 "start_time": "2021-01-19 00:00:00",
                 "end_time": "2021-01-25 16:00:00",
             },
-            segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),},
+            segment_kwargs={
+                "test": (
+                    "2021-01-19 00:00:00",
+                    "2021-01-25 16:00:00",
+                ),
+            },
         )
         dataset_backtest.init(
-            handler_kwargs={"start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00",},
-            segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),},
+            handler_kwargs={
+                "start_time": "2021-01-19 00:00:00",
+                "end_time": "2021-01-25 16:00:00",
+            },
+            segment_kwargs={
+                "test": (
+                    "2021-01-19 00:00:00",
+                    "2021-01-25 16:00:00",
+                ),
+            },
         )
 
         ##=============get data=============
diff --git a/examples/run_all_model.py b/examples/run_all_model.py
index d356b41285e..d587eff1559 100644
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -34,7 +34,10 @@
 exp_manager = {
     "class": "MLflowExpManager",
     "module_path": "qlib.workflow.expm",
-    "kwargs": {"uri": "file:" + exp_path, "default_exp_name": "Experiment",},
+    "kwargs": {
+        "uri": "file:" + exp_path,
+        "default_exp_name": "Experiment",
+    },
 }
 if not exists_qlib_data(provider_uri):
     print(f"Qlib data is not found in {provider_uri}")
diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py
index 6f5c11dc020..d5dab891789 100644
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@@ -81,7 +81,10 @@
         "strategy": {
             "class": "TopkDropoutStrategy",
             "module_path": "qlib.contrib.strategy.strategy",
-            "kwargs": {"topk": 50, "n_drop": 5,},
+            "kwargs": {
+                "topk": 50,
+                "n_drop": 5,
+            },
         },
         "backtest": {
             "verbose": False,
diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py
index 24526e3328b..743f89462d0 100644
--- a/scripts/data_collector/yahoo/collector.py
+++ b/scripts/data_collector/yahoo/collector.py
@@ -39,7 +39,13 @@ class YahooData:
     INTERVAL_1d = "1d"
 
     def __init__(
-        self, timezone: str = None, start=None, end=None, interval="1d", delay=0, show_1min_logging: bool = False,
+        self,
+        timezone: str = None,
+        start=None,
+        end=None,
+        interval="1d",
+        delay=0,
+        show_1min_logging: bool = False,
     ):
         """
 
@@ -119,7 +125,11 @@ def _get_simple(start_, end_):
             self._sleep()
             _remote_interval = "1m" if self._interval == self.INTERVAL_1min else self._interval
             return self.get_data_from_remote(
-                symbol, interval=_remote_interval, start=start_, end=end_, show_1min_logging=self._show_1min_logging,
+                symbol,
+                interval=_remote_interval,
+                start=start_,
+                end=end_,
+                show_1min_logging=self._show_1min_logging,
             )
 
         _result = None
@@ -428,7 +438,9 @@ class YahooNormalize:
     DAILY_FORMAT = "%Y-%m-%d"
 
     def __init__(
-        self, date_field_name: str = "date", symbol_field_name: str = "symbol",
+        self,
+        date_field_name: str = "date",
+        symbol_field_name: str = "symbol",
     ):
         """
 
@@ -446,7 +458,10 @@ def __init__(
 
     @staticmethod
     def normalize_yahoo(
-        df: pd.DataFrame, calendar_list: list = None, date_field_name: str = "date", symbol_field_name: str = "symbol",
+        df: pd.DataFrame,
+        calendar_list: list = None,
+        date_field_name: str = "date",
+        symbol_field_name: str = "symbol",
     ):
         if df.empty:
             return df
@@ -551,7 +566,9 @@ class YahooNormalize1min(YahooNormalize, ABC):
     CONSISTENT_1d = False
 
     def __init__(
-        self, date_field_name: str = "date", symbol_field_name: str = "symbol",
+        self,
+        date_field_name: str = "date",
+        symbol_field_name: str = "symbol",
     ):
         """
 
diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py
index ab24fa9cacf..4811fd48612 100644
--- a/scripts/dump_bin.py
+++ b/scripts/dump_bin.py
@@ -153,13 +153,22 @@ def get_dump_fields(self, df_columns: Iterable[str]) -> Iterable[str]:
 
     @staticmethod
     def _read_calendars(calendar_path: Path) -> List[pd.Timestamp]:
-        return sorted(map(pd.Timestamp, pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),))
+        return sorted(
+            map(
+                pd.Timestamp,
+                pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),
+            )
+        )
 
     def _read_instruments(self, instrument_path: Path) -> pd.DataFrame:
         df = pd.read_csv(
             instrument_path,
             sep=self.INSTRUMENTS_SEP,
-            names=[self.symbol_field_name, self.INSTRUMENTS_START_FIELD, self.INSTRUMENTS_END_FIELD,],
+            names=[
+                self.symbol_field_name,
+                self.INSTRUMENTS_START_FIELD,
+                self.INSTRUMENTS_END_FIELD,
+            ],
         )
 
         return df
diff --git a/setup.py b/setup.py
index d8a9d9efa6b..83cf6e1b602 100644
--- a/setup.py
+++ b/setup.py
@@ -70,10 +70,16 @@
 # Cython Extensions
 extensions = [
     Extension(
-        "qlib.data._libs.rolling", ["qlib/data/_libs/rolling.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE],
+        "qlib.data._libs.rolling",
+        ["qlib/data/_libs/rolling.pyx"],
+        language="c++",
+        include_dirs=[NUMPY_INCLUDE],
     ),
     Extension(
-        "qlib.data._libs.expanding", ["qlib/data/_libs/expanding.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE],
+        "qlib.data._libs.expanding",
+        ["qlib/data/_libs/expanding.pyx"],
+        language="c++",
+        include_dirs=[NUMPY_INCLUDE],
     ),
 ]
 
@@ -92,7 +98,9 @@
     # py_modules=['qlib'],
     entry_points={
         # 'console_scripts': ['mycli=mymodule:cli'],
-        "console_scripts": ["qrun=qlib.workflow.cli:run",],
+        "console_scripts": [
+            "qrun=qlib.workflow.cli:run",
+        ],
     },
     ext_modules=extensions,
     install_requires=REQUIRED,
diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py
index 8b3819c8302..f6e77cba4d8 100644
--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -78,7 +78,10 @@
     "strategy": {
         "class": "TopkDropoutStrategy",
         "module_path": "qlib.contrib.strategy.strategy",
-        "kwargs": {"topk": 50, "n_drop": 5,},
+        "kwargs": {
+            "topk": 50,
+            "n_drop": 5,
+        },
     },
     "backtest": {
         "verbose": False,
@@ -173,7 +176,9 @@ def test_0_train(self):
     def test_1_backtest(self):
         analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
         self.assertGreaterEqual(
-            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed",
+            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
+            0.10,
+            "backtest failed",
         )
 
 
diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py
index de649c37edf..dfa7f8556dd 100644
--- a/tests/test_dump_data.py
+++ b/tests/test_dump_data.py
@@ -40,7 +40,9 @@ def setUpClass(cls) -> None:
         TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
+            provider_uri=provider_uri,
+            expression_cache=None,
+            dataset_cache=None,
         )
 
     @classmethod
@@ -52,7 +54,10 @@ def test_0_dump_bin(self):
 
     def test_1_dump_calendars(self):
         ori_calendars = set(
-            map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,)
+            map(
+                pd.Timestamp,
+                pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,
+            )
         )
         res_calendars = set(D.calendar())
         assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed"
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
index d5637b02595..c511d1b910d 100644
--- a/tests/test_get_data.py
+++ b/tests/test_get_data.py
@@ -26,7 +26,9 @@ class TestGetData(unittest.TestCase):
     def setUpClass(cls) -> None:
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
+            provider_uri=provider_uri,
+            expression_cache=None,
+            dataset_cache=None,
         )
 
     @classmethod

From f947a2fdef294ec927466ec7a287da83604c0bc8 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 15:15:51 +0800
Subject: [PATCH 16/32] Correct two mistakes in annotation.

---
 qlib/model/base.py          | 3 ++-
 qlib/portfolio/optimizer.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/qlib/model/base.py b/qlib/model/base.py
index a7001f0a67b..3708298d5ce 100644
--- a/qlib/model/base.py
+++ b/qlib/model/base.py
@@ -44,7 +44,8 @@ def fit(self, dataset: Dataset):
                 # get weights
                 try:
                     wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"],
-                    data_key=DataHandlerLP.DK_L, w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
+                                                           data_key=DataHandlerLP.DK_L)
+                    w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
                 except KeyError as e:
                     w_train = pd.DataFrame(np.ones_like(y_train.values), index=y_train.index)
                     w_valid = pd.DataFrame(np.ones_like(y_valid.values), index=y_valid.index)
diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index 3912421277c..75c6c51f52e 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -292,7 +292,7 @@ def __init__(
         delta: float = 0.4,
         bench_dev: float = 0.01,
         inds_dev: float = 0.01,
-        scale_alpha=True,
+        scale_alpha: bool = True,
         verbose: bool = False,
         warm_start: str = DO_NOT_START_FROM,
         max_iters: int = 10000,
@@ -303,6 +303,7 @@ def __init__(
             delta (float): turnover rate limit
             bench_dev (float): benchmark deviation limit
             inds_dev (float): industry deviation limit
+            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
             verbose (bool): if print detailed information about the solver
             warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
                               (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)

From d3caea60eed1caf7e8cce7ec89f9f4db938109a5 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 17:32:03 +0800
Subject: [PATCH 17/32] Add unittest for TestStructuredCovEstimator.

---
 tests/test_structured_cov_estimator.py | 80 ++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 tests/test_structured_cov_estimator.py

diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py
new file mode 100644
index 00000000000..6aeae3d8979
--- /dev/null
+++ b/tests/test_structured_cov_estimator.py
@@ -0,0 +1,80 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import unittest
+import numpy as np
+from scipy.linalg import sqrtm
+
+from qlib.model.riskmodel import StructuredCovEstimator
+
+
+class TestStructuredCovEstimator(unittest.TestCase):
+    def test_random_covariance(self):
+        # Try to estimate the covariance from a randomly generated matrix.
+        NUM_VARIABLE = 10
+        NUM_OBSERVATION = 200
+        EPS = 1e-6
+
+        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True)
+
+        X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
+
+        est_cov = estimator.predict(X, is_price=False)
+        np_cov = np.cov(X.T)  # While numpy assume row means variable, qlib assume the other wise.
+
+        delta = abs(est_cov - np_cov)
+        if_identical = (delta < EPS).all()
+
+        self.assertTrue(if_identical)
+
+    def test_constructed_covariance(self):
+        # Try to estimate the covariance from a specially crafted matrix.
+        # There should be some significant correlation since X is specially crafted.
+        NUM_VARIABLE = 7
+        NUM_OBSERVATION = 500
+        EPS = 0.1
+
+        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, num_factors=NUM_VARIABLE - 1)
+
+        sqrt_cov = None
+        while sqrt_cov is None or (np.iscomplex(sqrt_cov)).any():
+            cov = np.random.rand(NUM_VARIABLE, NUM_VARIABLE)
+            for i in range(NUM_VARIABLE):
+                cov[i][i] = 1
+            sqrt_cov = sqrtm(cov)
+        X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) @ sqrt_cov
+
+        est_cov = estimator.predict(X, is_price=False)
+        np_cov = np.cov(X.T)  # While numpy assume row means variable, qlib assume the other wise.
+
+        delta = abs(est_cov - np_cov)
+        if_identical = (delta < EPS).all()
+
+        self.assertTrue(if_identical)
+
+    def test_decomposition(self):
+        # Try to estimate the covariance from a specially crafted matrix.
+        # The matrix is generated in the assumption that observations can be predicted by multiple factors.
+        NUM_VARIABLE = 30
+        NUM_OBSERVATION = 100
+        NUM_FACTOR = 10
+        EPS = 0.1
+
+        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, num_factors=NUM_FACTOR)
+
+        F = np.random.rand(NUM_VARIABLE, NUM_FACTOR)
+        B = np.random.rand(NUM_FACTOR, NUM_OBSERVATION)
+        U = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
+        X = (F @ B).T + U
+
+        est_cov = estimator.predict(X, is_price=False)
+        np_cov = np.cov(X.T)  # While numpy assume row means variable, qlib assume the other wise.
+
+        delta = abs(est_cov - np_cov)
+        if_identical = (delta < EPS).all()
+
+        self.assertTrue(if_identical)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 527718a44015a9cac3f13bd71dfcfb583f2d268f Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 22 Feb 2021 19:04:31 +0800
Subject: [PATCH 18/32] Allow enhanced indexing to generate portfolio without
 industry related restriction.

---
 qlib/portfolio/optimizer.py     |  18 ++-
 tests/test_enhanced_indexing.py | 194 ++++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_enhanced_indexing.py

diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py
index 75c6c51f52e..6ee396a513b 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer.py
@@ -291,7 +291,7 @@ def __init__(
         lamb: float = 10,
         delta: float = 0.4,
         bench_dev: float = 0.01,
-        inds_dev: float = 0.01,
+        inds_dev: float = None,
         scale_alpha: bool = True,
         verbose: bool = False,
         warm_start: str = DO_NOT_START_FROM,
@@ -302,7 +302,8 @@ def __init__(
             lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
             delta (float): turnover rate limit
             bench_dev (float): benchmark deviation limit
-            inds_dev (float): industry deviation limit
+            inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific
+                                   restriction
             scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
             verbose (bool): if print detailed information about the solver
             warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
@@ -341,7 +342,7 @@ def __call__(
         varU: np.ndarray,
         w0: np.ndarray,
         w_bench: np.ndarray,
-        inds_onehot: np.ndarray,
+        inds_onehot: np.ndarray = None,
     ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
@@ -354,6 +355,8 @@ def __call__(
         Returns:
             np.ndarray or pd.Series: optimized portfolio allocation
         """
+        assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required."
+
         # scale alpha to match volatility
         if self.scale_alpha:
             u = u / u.std()
@@ -366,15 +369,18 @@ def __call__(
         risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
         obj = cp.Maximize(ret - self.lamb * risk)
         d_bench = w - w_bench
-        d_inds = d_bench @ inds_onehot
         cons = [
             w >= 0,
             cp.sum(w) == 1,
             d_bench >= -self.bench_dev,
             d_bench <= self.bench_dev,
-            d_inds >= -self.inds_dev,
-            d_inds <= self.inds_dev,
         ]
+
+        if self.inds_dev is not None:
+            d_inds = d_bench @ inds_onehot
+            cons.append(d_inds >= -self.inds_dev)
+            cons.append(d_inds <= self.inds_dev)
+
         if w0 is not None:
             turnover = cp.sum(cp.abs(w - w0))
             cons.append(turnover <= self.delta)
diff --git a/tests/test_enhanced_indexing.py b/tests/test_enhanced_indexing.py
new file mode 100644
index 00000000000..f6e77cba4d8
--- /dev/null
+++ b/tests/test_enhanced_indexing.py
@@ -0,0 +1,194 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import sys
+import shutil
+import unittest
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+import qlib
+from qlib.config import REG_CN, C
+from qlib.utils import drop_nan_by_y_index
+from qlib.contrib.model.gbdt import LGBModel
+from qlib.contrib.data.handler import Alpha158
+from qlib.contrib.strategy.strategy import TopkDropoutStrategy
+from qlib.contrib.evaluate import (
+    backtest as normal_backtest,
+    risk_analysis,
+)
+from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
+from qlib.workflow import R
+from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord
+from qlib.tests.data import GetData
+from qlib.tests import TestAutoData
+
+
+market = "csi300"
+benchmark = "SH000300"
+
+###################################
+# train model
+###################################
+data_handler_config = {
+    "start_time": "2008-01-01",
+    "end_time": "2020-08-01",
+    "fit_start_time": "2008-01-01",
+    "fit_end_time": "2014-12-31",
+    "instruments": market,
+}
+
+task = {
+    "model": {
+        "class": "LGBModel",
+        "module_path": "qlib.contrib.model.gbdt",
+        "kwargs": {
+            "loss": "mse",
+            "colsample_bytree": 0.8879,
+            "learning_rate": 0.0421,
+            "subsample": 0.8789,
+            "lambda_l1": 205.6999,
+            "lambda_l2": 580.9768,
+            "max_depth": 8,
+            "num_leaves": 210,
+            "num_threads": 20,
+        },
+    },
+    "dataset": {
+        "class": "DatasetH",
+        "module_path": "qlib.data.dataset",
+        "kwargs": {
+            "handler": {
+                "class": "Alpha158",
+                "module_path": "qlib.contrib.data.handler",
+                "kwargs": data_handler_config,
+            },
+            "segments": {
+                "train": ("2008-01-01", "2014-12-31"),
+                "valid": ("2015-01-01", "2016-12-31"),
+                "test": ("2017-01-01", "2020-08-01"),
+            },
+        },
+    },
+}
+
+port_analysis_config = {
+    "strategy": {
+        "class": "TopkDropoutStrategy",
+        "module_path": "qlib.contrib.strategy.strategy",
+        "kwargs": {
+            "topk": 50,
+            "n_drop": 5,
+        },
+    },
+    "backtest": {
+        "verbose": False,
+        "limit_threshold": 0.095,
+        "account": 100000000,
+        "benchmark": benchmark,
+        "deal_price": "close",
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5,
+    },
+}
+
+
+# train
+def train():
+    """train model
+
+    Returns
+    -------
+        pred_score: pandas.DataFrame
+            predict scores
+        performance: dict
+            model performance
+    """
+
+    # model initiaiton
+    model = init_instance_by_config(task["model"])
+    dataset = init_instance_by_config(task["dataset"])
+
+    # start exp
+    with R.start(experiment_name="workflow"):
+        R.log_params(**flatten_dict(task))
+        model.fit(dataset)
+
+        # prediction
+        recorder = R.get_recorder()
+        rid = recorder.id
+        sr = SignalRecord(model, dataset, recorder)
+        sr.generate()
+        pred_score = sr.load()
+
+        # calculate ic and ric
+        sar = SigAnaRecord(recorder)
+        sar.generate()
+        ic = sar.load(sar.get_path("ic.pkl"))
+        ric = sar.load(sar.get_path("ric.pkl"))
+
+    return pred_score, {"ic": ic, "ric": ric}, rid
+
+
+def backtest_analysis(pred, rid):
+    """backtest and analysis
+
+    Parameters
+    ----------
+    pred : pandas.DataFrame
+        predict scores
+    rid : str
+        the id of the recorder to be used in this function
+
+    Returns
+    -------
+    analysis : pandas.DataFrame
+        the analysis result
+
+    """
+    recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid)
+    # backtest
+    par = PortAnaRecord(recorder, port_analysis_config)
+    par.generate()
+    analysis_df = par.load(par.get_path("port_analysis.pkl"))
+    print(analysis_df)
+    return analysis_df
+
+
+class TestAllFlow(TestAutoData):
+    PRED_SCORE = None
+    REPORT_NORMAL = None
+    POSITIONS = None
+    RID = None
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve()))
+
+    def test_0_train(self):
+        TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train()
+        self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed")
+        self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed")
+
+    def test_1_backtest(self):
+        analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
+        self.assertGreaterEqual(
+            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
+            0.10,
+            "backtest failed",
+        )
+
+
+def suite():
+    _suite = unittest.TestSuite()
+    _suite.addTest(TestAllFlow("test_0_train"))
+    _suite.addTest(TestAllFlow("test_1_backtest"))
+    return _suite
+
+
+if __name__ == "__main__":
+    runner = unittest.TextTestRunner()
+    runner.run(suite())

From 2bff6eb78120b8f3fc7aac12267d1e37c847ae0f Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Thu, 4 Mar 2021 22:08:11 +0800
Subject: [PATCH 19/32] Split classes in riskmodel.py & optimizer.py into
 seperate files.

---
 qlib/model/riskmodel_poet.py        |   0
 qlib/model/riskmodel_shrink.py      |   0
 qlib/model/riskmodel_structured.py  |   0
 qlib/portfolio/enhanced_indexing.py |   0
 tests/test_enhanced_indexing.py     | 212 ++++++++++++++++++++--------
 5 files changed, 150 insertions(+), 62 deletions(-)
 create mode 100644 qlib/model/riskmodel_poet.py
 create mode 100644 qlib/model/riskmodel_shrink.py
 create mode 100644 qlib/model/riskmodel_structured.py
 create mode 100644 qlib/portfolio/enhanced_indexing.py

diff --git a/qlib/model/riskmodel_poet.py b/qlib/model/riskmodel_poet.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/qlib/model/riskmodel_shrink.py b/qlib/model/riskmodel_shrink.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/qlib/model/riskmodel_structured.py b/qlib/model/riskmodel_structured.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/qlib/portfolio/enhanced_indexing.py b/qlib/portfolio/enhanced_indexing.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/test_enhanced_indexing.py b/tests/test_enhanced_indexing.py
index f6e77cba4d8..f21d51984a6 100644
--- a/tests/test_enhanced_indexing.py
+++ b/tests/test_enhanced_indexing.py
@@ -2,32 +2,39 @@
 # Licensed under the MIT License.
 
 import sys
+import math
 import shutil
 import unittest
-from pathlib import Path
-
 import numpy as np
 import pandas as pd
+from tqdm import tqdm
+from pathlib import Path
 
 import qlib
-from qlib.config import REG_CN, C
-from qlib.utils import drop_nan_by_y_index
-from qlib.contrib.model.gbdt import LGBModel
-from qlib.contrib.data.handler import Alpha158
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
+from qlib.config import C
+from qlib.utils import init_instance_by_config, flatten_dict
 from qlib.workflow import R
-from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord
-from qlib.tests.data import GetData
+from qlib.config import REG_CN
+from qlib.workflow.record_temp import SignalRecord, SigAnaRecord
 from qlib.tests import TestAutoData
+from qlib.portfolio.optimizer import EnhancedIndexingOptimizer
+from qlib.model.riskmodel import StructuredCovEstimator
+from qlib.data.dataset.loader import QlibDataLoader
+from qlib.data.dataset.handler import DataHandler
+from qlib.data import D
+from qlib.utils import exists_qlib_data, init_instance_by_config
 
+market = "all"
+trade_gap = 21
+label_config = "Ref($close, -{}) / Ref($close, -1) - 1".format(trade_gap)  # reconstruct portfolio once a month
 
-market = "csi300"
-benchmark = "SH000300"
+provider_uri = "~/.qlib_ei/qlib_data/cn_data"  # target_dir
+if not exists_qlib_data(provider_uri):
+    print(f"Qlib data is not found in {provider_uri}")
+    sys.path.append(str(Path.cwd().parent.joinpath("scripts")))
+    from get_data import GetData
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+qlib.init(provider_uri=provider_uri, region=REG_CN)
 
 ###################################
 # train model
@@ -36,8 +43,9 @@
     "start_time": "2008-01-01",
     "end_time": "2020-08-01",
     "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-12-31",
+    "fit_end_time": "2014-11-30",
     "instruments": market,
+    "label": [label_config]
 }
 
 task = {
@@ -53,7 +61,7 @@
             "lambda_l2": 580.9768,
             "max_depth": 8,
             "num_leaves": 210,
-            "num_threads": 20,
+            "num_threads": 32,
         },
     },
     "dataset": {
@@ -66,37 +74,104 @@
                 "kwargs": data_handler_config,
             },
             "segments": {
-                "train": ("2008-01-01", "2014-12-31"),
-                "valid": ("2015-01-01", "2016-12-31"),
-                "test": ("2017-01-01", "2020-08-01"),
+                "train": ("2008-01-01", "2014-11-30"),
+                "valid": ("2015-01-01", "2016-11-30"),
+                "test": ("2017-01-01", "2018-01-01"),
             },
         },
     },
 }
 
-port_analysis_config = {
-    "strategy": {
-        "class": "TopkDropoutStrategy",
-        "module_path": "qlib.contrib.strategy.strategy",
-        "kwargs": {
-            "topk": 50,
-            "n_drop": 5,
-        },
-    },
-    "backtest": {
-        "verbose": False,
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": benchmark,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-    },
-}
+
+class CSI300:
+    """Simulate CSI300 as the Benchmark for Enhanced Indexing to Track"""
+
+    def __init__(self):
+        # provider_uri = '/nfs_data/qlib_data/ycz_daily/qlib'
+        # qlib.init(provider_uri=provider_uri, region=REG_CN, dataset_cache=None, expression_cache=None)
+        self.csi_weight = D.features(D.instruments('csi300'), ['$csi300_weight'])
+
+    def __call__(self, pd_index, trade_date):
+        weights = np.zeros(len(pd_index))
+
+        for idx, instrument in enumerate(pd_index):
+            if (instrument, trade_date) in self.csi_weight.index:
+                weight = self.csi_weight.loc[(instrument, trade_date)].values[0]
+                if not math.isnan(weight):
+                    weights[idx] = weight
+
+        assert weights.sum() > 0, ' Fetch CSI Weights Error!'
+        weights = weights / weights.sum()
+
+        return weights
+
+
+class EnhancedIndexingStrategy:
+    """Enhanced Indexing Strategy"""
+
+    def __init__(self):
+        self.benchmark = CSI300()
+
+        provider_uri = "~/.qlib_ei/qlib_data/cn_data"
+        qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+        self.data_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader(["$close"]))
+        self.label_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader([label_config]))
+        self.cov_estimator = StructuredCovEstimator()
+        self.optimizer = EnhancedIndexingOptimizer(lamb=0.1, delta=0.4, bench_dev=0.03, max_iters=50000)
+
+    def update(self, score_series, current, pred_date):
+        """
+        Parameters
+        -----------
+        score_series : pd.Series
+            stock_id , score.
+        current : Position()
+            current of account.
+        trade_exchange : Exchange()
+            exchange.
+        trade_date : pd.Timestamp
+            date.
+        """
+        print(score_series)
+        score_series = score_series.dropna()
+
+        # portfolio init weight
+        init_weight = current.reindex(score_series.index, fill_value=0).values.squeeze()
+        init_weight_sum = init_weight.sum()
+        if init_weight_sum > 0:
+            init_weight /= init_weight_sum
+
+        # covariance estimation
+        selector = (self.data_handler.get_range_selector(pred_date, 252), score_series.index)
+        price = self.data_handler.fetch(selector, level=None, squeeze=True)
+        F, cov_b, var_u = self.cov_estimator.predict(price, return_decomposed_components=True)
+
+        # optimize target portfolio
+        w_bench = self.benchmark(score_series.index, pred_date)
+        passed_init_weight = init_weight if init_weight_sum > 0 else None
+        # print(F)
+        # print(cov_b)
+        # print(var_u)
+        # print(passed_init_weight)
+        # print(w_bench)
+        target_weight = self.optimizer(score_series.values, F, cov_b, var_u, passed_init_weight, w_bench)
+        # print(target_weight)
+        target = pd.DataFrame(data=target_weight, index=score_series.index)
+
+        active_weights = target_weight - w_bench
+        selector = (self.label_handler.get_range_selector(pred_date, 1), score_series.index)
+        label = self.label_handler.fetch(selector, level=None, squeeze=True)
+        alpha = 0
+        for instrument, weight in zip(score_series.index, active_weights):
+            delta = label.loc[(pred_date, instrument)]
+            alpha += weight * (0 if math.isnan(delta) else delta)
+
+        print(alpha)
+
+        return alpha, target
 
 
-# train
 def train():
     """train model
 
@@ -108,7 +183,7 @@ def train():
             model performance
     """
 
-    # model initiaiton
+    # model initiation
     model = init_instance_by_config(task["model"])
     dataset = init_instance_by_config(task["dataset"])
 
@@ -133,29 +208,42 @@ def train():
     return pred_score, {"ic": ic, "ric": ric}, rid
 
 
-def backtest_analysis(pred, rid):
-    """backtest and analysis
+def backtest_analysis(scores):
+    """backtest enhanced indexing
 
     Parameters
     ----------
-    pred : pandas.DataFrame
-        predict scores
-    rid : str
-        the id of the recorder to be used in this function
+        scores: pandas.DataFrame
+                predict scores
 
     Returns
     -------
-    analysis : pandas.DataFrame
-        the analysis result
-
+        sharpe_ratio: floating-point
+            sharpe ratio of the enhanced indexing portfolio
     """
-    recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid)
-    # backtest
-    par = PortAnaRecord(recorder, port_analysis_config)
-    par.generate()
-    analysis_df = par.load(par.get_path("port_analysis.pkl"))
-    print(analysis_df)
-    return analysis_df
+
+    # backtest and analysis
+    with R.start(experiment_name="backtest_analysis"):
+        strategy = EnhancedIndexingStrategy()
+        dates = scores.index.get_level_values(0).unique()
+
+        alphas = []
+        current = pd.DataFrame()
+        gap_between_next_trade = 0
+        for date in tqdm(dates):
+            if gap_between_next_trade == 0:
+                score_series = scores.loc[date]
+                alpha, current = strategy.update(score_series, current, date)
+                alphas.append(alpha)
+                gap_between_next_trade = trade_gap
+            else:
+                gap_between_next_trade -= 1
+
+        alphas = np.array(alphas)
+        sharpe_ratio = alphas.mean() / np.std(alphas)
+        print('Sharpe:', sharpe_ratio)
+
+        return sharpe_ratio
 
 
 class TestAllFlow(TestAutoData):
@@ -174,10 +262,10 @@ def test_0_train(self):
         self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed")
 
     def test_1_backtest(self):
-        analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
+        sharpe_ratio = backtest_analysis(TestAllFlow.PRED_SCORE)
         self.assertGreaterEqual(
-            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
-            0.10,
+            sharpe_ratio,
+            0.90,
             "backtest failed",
         )
 

From 83c6e747835656d0c5d5f90fb3c903a239689158 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Thu, 4 Mar 2021 22:30:38 +0800
Subject: [PATCH 20/32] Reindex files.

---
 qlib/model/riskmodel.py                       | 611 ------------------
 .../__init__.py}                              |   0
 qlib/model/riskmodel/base.py                  | 141 ++++
 qlib/model/riskmodel/poet.py                  |  84 +++
 qlib/model/riskmodel/shrink.py                | 262 ++++++++
 qlib/model/riskmodel/structured.py            | 152 +++++
 qlib/portfolio/enhanced_indexing.py           |   0
 .../optimizer/__init__.py}                    |   0
 .../optimizer/base.py}                        |   0
 qlib/portfolio/optimizer/enhanced_indexing.py | 140 ++++
 qlib/portfolio/{ => optimizer}/optimizer.py   | 159 +----
 tests/test_enhanced_indexing.py               | 282 --------
 12 files changed, 793 insertions(+), 1038 deletions(-)
 delete mode 100644 qlib/model/riskmodel.py
 rename qlib/model/{riskmodel_poet.py => riskmodel/__init__.py} (100%)
 create mode 100644 qlib/model/riskmodel/base.py
 create mode 100644 qlib/model/riskmodel/poet.py
 create mode 100644 qlib/model/riskmodel/shrink.py
 create mode 100644 qlib/model/riskmodel/structured.py
 delete mode 100644 qlib/portfolio/enhanced_indexing.py
 rename qlib/{model/riskmodel_shrink.py => portfolio/optimizer/__init__.py} (100%)
 rename qlib/{model/riskmodel_structured.py => portfolio/optimizer/base.py} (100%)
 create mode 100644 qlib/portfolio/optimizer/enhanced_indexing.py
 rename qlib/portfolio/{ => optimizer}/optimizer.py (62%)
 delete mode 100644 tests/test_enhanced_indexing.py

diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py
deleted file mode 100644
index f19c60fc9be..00000000000
--- a/qlib/model/riskmodel.py
+++ /dev/null
@@ -1,611 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import numpy as np
-import pandas as pd
-from typing import Union
-from sklearn.decomposition import PCA, FactorAnalysis
-
-from qlib.model.base import BaseModel
-
-
-class RiskModel(BaseModel):
-    """Risk Model
-
-    A risk model is used to estimate the covariance matrix of stock returns.
-    """
-
-    MASK_NAN = "mask"
-    FILL_NAN = "fill"
-    IGNORE_NAN = "ignore"
-
-    def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, scale_return: bool = True):
-        """
-        Args:
-            nan_option (str): nan handling option (`ignore`/`mask`/`fill`).
-            assume_centered (bool): whether the data is assumed to be centered.
-            scale_return (bool): whether scale returns as percentage.
-        """
-        # nan
-        assert nan_option in [
-            self.MASK_NAN,
-            self.FILL_NAN,
-            self.IGNORE_NAN,
-        ], f"`nan_option={nan_option}` is not supported"
-        self.nan_option = nan_option
-
-        self.assume_centered = assume_centered
-        self.scale_return = scale_return
-
-    def predict(
-        self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
-    ) -> Union[pd.DataFrame, np.ndarray]:
-        """
-        Args:
-            X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
-                with variables as columns and observations as rows.
-            return_corr (bool): whether return the correlation matrix.
-            is_price (bool): whether `X` contains price (if not assume stock returns).
-
-        Returns:
-            pd.DataFrame or np.ndarray: estimated covariance (or correlation).
-        """
-        # transform input into 2D array
-        if not isinstance(X, (pd.Series, pd.DataFrame)):
-            columns = None
-        else:
-            if isinstance(X.index, pd.MultiIndex):
-                if isinstance(X, pd.DataFrame):
-                    X = X.iloc[:, 0].unstack(level="instrument")  # always use the first column
-                else:
-                    X = X.unstack(level="instrument")
-            else:
-                # X is 2D DataFrame
-                pass
-            columns = X.columns  # will be used to restore dataframe
-            X = X.values
-
-        # calculate pct_change
-        if is_price:
-            X = X[1:] / X[:-1] - 1  # NOTE: resulting `n - 1` rows
-
-        # scale return
-        if self.scale_return:
-            X *= 100
-
-        # handle nan and centered
-        X = self._preprocess(X)
-
-        # estimate covariance
-        S = self._predict(X)
-
-        # return correlation if needed
-        if return_corr:
-            vola = np.sqrt(np.diag(S))
-            corr = S / np.outer(vola, vola)
-            if columns is None:
-                return corr
-            return pd.DataFrame(corr, index=columns, columns=columns)
-
-        # return covariance
-        if columns is None:
-            return S
-        return pd.DataFrame(S, index=columns, columns=columns)
-
-    def _predict(self, X: np.ndarray) -> np.ndarray:
-        """covariance estimation implementation
-
-        This method should be overridden by child classes.
-
-        By default, this method implements the empirical covariance estimation.
-
-        Args:
-            X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
-
-        Returns:
-            np.ndarray: covariance matrix.
-        """
-        xTx = np.asarray(X.T.dot(X))
-        N = len(X)
-        if isinstance(X, np.ma.MaskedArray):
-            M = 1 - X.mask
-            N = M.T.dot(M)  # each pair has distinct number of samples
-        return xTx / N
-
-    def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]:
-        """handle nan and centerize data
-
-        Note:
-            if `nan_option='mask'` then the returned array will be `np.ma.MaskedArray`.
-        """
-        # handle nan
-        if self.nan_option == self.FILL_NAN:
-            X = np.nan_to_num(X)
-        elif self.nan_option == self.MASK_NAN:
-            X = np.ma.masked_invalid(X)
-        # centralize
-        if not self.assume_centered:
-            X = X - np.nanmean(X, axis=0)
-        return X
-
-
-class ShrinkCovEstimator(RiskModel):
-    """Shrinkage Covariance Estimator
-
-    This estimator will shrink the sample covariance matrix towards
-    an identify matrix:
-        S_hat = (1 - alpha) * S + alpha * F
-    where `alpha` is the shrink parameter and `F` is the shrinking target.
-
-    The following shrinking parameters (`alpha`) are supported:
-        - `lw` [1][2][3]: use Ledoit-Wolf shrinking parameter.
-        - `oas` [4]: use Oracle Approximating Shrinkage shrinking parameter.
-        - float: directly specify the shrink parameter, should be between [0, 1].
-
-    The following shrinking targets (`F`) are supported:
-        - `const_var` [1][4][5]: assume stocks have the same constant variance and zero correlation.
-        - `const_corr` [2][6]: assume stocks have different variance but equal correlation.
-        - `single_factor` [3][7]: assume single factor model as the shrinking target.
-        - np.ndarray: provide the shrinking targets directly.
-
-    Note:
-        - The optimal shrinking parameter depends on the selection of the shrinking target.
-            Currently, `oas` is not supported for `const_corr` and `single_factor`.
-        - Remember to set `nan_option` to `fill` or `mask` if your data has missing values.
-
-    References:
-        [1] Ledoit, O., & Wolf, M. (2004). A well-conditioned estimator for large-dimensional covariance matrices.
-            Journal of Multivariate Analysis, 88(2), 365–411. https://doi.org/10.1016/S0047-259X(03)00096-4
-        [2] Ledoit, O., & Wolf, M. (2004). Honey, I shrunk the sample covariance matrix.
-            Journal of Portfolio Management, 30(4), 1–22. https://doi.org/10.3905/jpm.2004.110
-        [3] Ledoit, O., & Wolf, M. (2003). Improved estimation of the covariance matrix of stock returns
-            with an application to portfolio selection.
-            Journal of Empirical Finance, 10(5), 603–621. https://doi.org/10.1016/S0927-5398(03)00007-0
-        [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance
-            estimation. IEEE Transactions on Signal Processing, 58(10), 5016–5029.
-            https://doi.org/10.1109/TSP.2010.2053029
-        [5] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-00007f64e5b9/cov1para.m.zip
-        [6] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip
-        [7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip
-    """
-
-    SHR_LW = "lw"
-    SHR_OAS = "oas"
-
-    TGT_CONST_VAR = "const_var"
-    TGT_CONST_CORR = "const_corr"
-    TGT_SINGLE_FACTOR = "single_factor"
-
-    def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = "const_var", **kwargs):
-        """
-        Args:
-            alpha (str or float): shrinking parameter or estimator (`lw`/`oas`)
-            target (str or np.ndarray): shrinking target (`const_var`/`const_corr`/`single_factor`)
-            kwargs: see `RiskModel` for more information
-        """
-        super().__init__(**kwargs)
-
-        # alpha
-        if isinstance(alpha, str):
-            assert alpha in [self.SHR_LW, self.SHR_OAS], f"shrinking method `{alpha}` is not supported"
-        elif isinstance(alpha, (float, np.floating)):
-            assert 0 <= alpha <= 1, "alpha should be between [0, 1]"
-        else:
-            raise TypeError("invalid argument type for `alpha`")
-        self.alpha = alpha
-
-        # target
-        if isinstance(target, str):
-            assert target in [
-                self.TGT_CONST_VAR,
-                self.TGT_CONST_CORR,
-                self.TGT_SINGLE_FACTOR,
-            ], f"shrinking target `{target} is not supported"
-        elif isinstance(target, np.ndarray):
-            pass
-        else:
-            raise TypeError("invalid argument type for `target`")
-        if alpha == self.SHR_OAS and target != self.TGT_CONST_VAR:
-            raise NotImplementedError("currently `oas` can only support `const_var` as target")
-        self.target = target
-
-    def _predict(self, X: np.ndarray) -> np.ndarray:
-        # sample covariance
-        S = super()._predict(X)
-
-        # shrinking target
-        F = self._get_shrink_target(X, S)
-
-        # get shrinking parameter
-        alpha = self._get_shrink_param(X, S, F)
-
-        # shrink covariance
-        if alpha > 0:
-            S *= 1 - alpha
-            F *= alpha
-            S += F
-
-        return S
-
-    def _get_shrink_target(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
-        """get shrinking target `F`"""
-        if self.target == self.TGT_CONST_VAR:
-            return self._get_shrink_target_const_var(X, S)
-        if self.target == self.TGT_CONST_CORR:
-            return self._get_shrink_target_const_corr(X, S)
-        if self.target == self.TGT_SINGLE_FACTOR:
-            return self._get_shrink_target_single_factor(X, S)
-        return self.target
-
-    def _get_shrink_target_const_var(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
-        """get shrinking target with constant variance
-
-        This target assumes zero pair-wise correlation and constant variance.
-        The constant variance is estimated by averaging all sample's variances.
-        """
-        n = len(S)
-        F = np.eye(n)
-        np.fill_diagonal(F, np.mean(np.diag(S)))
-        return F
-
-    def _get_shrink_target_const_corr(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
-        """get shrinking target with constant correlation
-
-        This target assumes constant pair-wise correlation but keep the sample variance.
-        The constant correlation is estimated by averaging all pairwise correlations.
-        """
-        n = len(S)
-        var = np.diag(S)
-        sqrt_var = np.sqrt(var)
-        covar = np.outer(sqrt_var, sqrt_var)
-        r_bar = (np.sum(S / covar) - n) / (n * (n - 1))
-        F = r_bar * covar
-        np.fill_diagonal(F, var)
-        return F
-
-    def _get_shrink_target_single_factor(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
-        """get shrinking target with single factor model"""
-        X_mkt = np.nanmean(X, axis=1)
-        cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X))
-        var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X))
-        F = np.outer(cov_mkt, cov_mkt) / var_mkt
-        np.fill_diagonal(F, np.diag(S))
-        return F
-
-    def _get_shrink_param(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
-        """get shrinking parameter `alpha`
-
-        Note:
-            The Ledoit-Wolf shrinking parameter estimator consists of three different methods.
-        """
-        if self.alpha == self.SHR_OAS:
-            return self._get_shrink_param_oas(X, S, F)
-        elif self.alpha == self.SHR_LW:
-            if self.target == self.TGT_CONST_VAR:
-                return self._get_shrink_param_lw_const_var(X, S, F)
-            if self.target == self.TGT_CONST_CORR:
-                return self._get_shrink_param_lw_const_corr(X, S, F)
-            if self.target == self.TGT_SINGLE_FACTOR:
-                return self._get_shrink_param_lw_single_factor(X, S, F)
-        return self.alpha
-
-    def _get_shrink_param_oas(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
-        """Oracle Approximating Shrinkage Estimator
-
-        This method uses the following formula to estimate the `alpha`
-        parameter for the shrink covariance estimator:
-            A = (1 - 2 / p) * trace(S^2) + trace^2(S)
-            B = (n + 1 - 2 / p) * (trace(S^2) - trace^2(S) / p)
-            alpha = A / B
-        where `n`, `p` are the dim of observations and variables respectively.
-        """
-        trS2 = np.sum(S ** 2)
-        tr2S = np.trace(S) ** 2
-
-        n, p = X.shape
-
-        A = (1 - 2 / p) * (trS2 + tr2S)
-        B = (n + 1 - 2 / p) * (trS2 + tr2S / p)
-        alpha = A / B
-
-        return alpha
-
-    def _get_shrink_param_lw_const_var(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
-        """Ledoit-Wolf Shrinkage Estimator (Constant Variance)
-
-        This method shrinks the covariance matrix towards the constand variance target.
-        """
-        t, n = X.shape
-
-        y = X ** 2
-        phi = np.sum(y.T.dot(y) / t - S ** 2)
-
-        gamma = np.linalg.norm(S - F, "fro") ** 2
-
-        kappa = phi / gamma
-        alpha = max(0, min(1, kappa / t))
-
-        return alpha
-
-    def _get_shrink_param_lw_const_corr(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
-        """Ledoit-Wolf Shrinkage Estimator (Constant Correlation)
-
-        This method shrinks the covariance matrix towards the constand correlation target.
-        """
-        t, n = X.shape
-
-        var = np.diag(S)
-        sqrt_var = np.sqrt(var)
-        r_bar = (np.sum(S / np.outer(sqrt_var, sqrt_var)) - n) / (n * (n - 1))
-
-        y = X ** 2
-        phi_mat = y.T.dot(y) / t - S ** 2
-        phi = np.sum(phi_mat)
-
-        theta_mat = (X ** 3).T.dot(X) / t - var[:, None] * S
-        np.fill_diagonal(theta_mat, 0)
-        rho = np.sum(np.diag(phi_mat)) + r_bar * np.sum(np.outer(1 / sqrt_var, sqrt_var) * theta_mat)
-
-        gamma = np.linalg.norm(S - F, "fro") ** 2
-
-        kappa = (phi - rho) / gamma
-        alpha = max(0, min(1, kappa / t))
-
-        return alpha
-
-    def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
-        """Ledoit-Wolf Shrinkage Estimator (Single Factor Model)
-
-        This method shrinks the covariance matrix towards the single factor model target.
-        """
-        t, n = X.shape
-
-        X_mkt = np.nanmean(X, axis=1)
-        cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X))
-        var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X))
-
-        y = X ** 2
-        phi = np.sum(y.T.dot(y)) / t - np.sum(S ** 2)
-
-        rdiag = np.sum(y ** 2) / t - np.sum(np.diag(S) ** 2)
-        z = X * X_mkt[:, None]
-        v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
-        roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
-        v3 = z.T.dot(z) / t - var_mkt * S
-        roff3 = (
-            np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
-        )
-        roff = 2 * roff1 - roff3
-        rho = rdiag + roff
-
-        gamma = np.linalg.norm(S - F, "fro") ** 2
-
-        kappa = (phi - rho) / gamma
-        alpha = max(0, min(1, kappa / t))
-
-        return alpha
-
-
-class POETCovEstimator(RiskModel):
-    """Principal Orthogonal Complement Thresholding Estimator (POET)
-
-    Reference:
-        [1] Fan, J., Liao, Y., & Mincheva, M. (2013). Large covariance estimation by thresholding principal orthogonal complements.
-            Journal of the Royal Statistical Society. Series B: Statistical Methodology, 75(4), 603–680. https://doi.org/10.1111/rssb.12016
-        [2] http://econweb.rutgers.edu/yl1114/papers/poet/POET.m
-    """
-
-    THRESH_SOFT = "soft"
-    THRESH_HARD = "hard"
-    THRESH_SCAD = "scad"
-
-    def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = "soft", **kwargs):
-        """
-        Args:
-            num_factors (int): number of factors (if set to zero, no factor model will be used).
-            thresh (float): the positive constant for thresholding.
-            thresh_method (str): thresholding method, which can be
-                - 'soft': soft thresholding.
-                - 'hard': hard thresholding.
-                - 'scad': scad thresholding.
-            kwargs: see `RiskModel` for more information.
-        """
-        super().__init__(**kwargs)
-
-        assert num_factors >= 0, "`num_factors` requires a positive integer"
-        self.num_factors = num_factors
-
-        assert thresh >= 0, "`thresh` requires a positive float number"
-        self.thresh = thresh
-
-        assert thresh_method in [
-            self.THRESH_HARD,
-            self.THRESH_SOFT,
-            self.THRESH_SCAD,
-        ], "`thresh_method` should be `soft`/`hard`/`scad`"
-        self.thresh_method = thresh_method
-
-    def _predict(self, X: np.ndarray) -> np.ndarray:
-
-        Y = X.T  # NOTE: to match POET's implementation
-        p, n = Y.shape
-
-        if self.num_factors > 0:
-            Dd, V = np.linalg.eig(Y.T.dot(Y))
-            V = V[:, np.argsort(Dd)]
-            F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n)
-            LamPCA = Y.dot(F) / n
-            uhat = np.asarray(Y - LamPCA.dot(F.T))
-            Lowrank = np.asarray(LamPCA.dot(LamPCA.T))
-            rate = 1 / np.sqrt(p) + np.sqrt(np.log(p) / n)
-        else:
-            uhat = np.asarray(Y)
-            rate = np.sqrt(np.log(p) / n)
-            Lowrank = 0
-
-        lamb = rate * self.thresh
-        SuPCA = uhat.dot(uhat.T) / n
-        SuDiag = np.diag(np.diag(SuPCA))
-        R = np.linalg.inv(SuDiag ** 0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag ** 0.5))
-
-        if self.thresh_method == self.THRESH_HARD:
-            M = R * (np.abs(R) > lamb)
-        elif self.thresh_method == self.THRESH_SOFT:
-            res = np.abs(R) - lamb
-            res = (res + np.abs(res)) / 2
-            M = np.sign(R) * res
-        else:
-            M1 = (np.abs(R) < 2 * lamb) * np.sign(R) * (np.abs(R) - lamb) * (np.abs(R) > lamb)
-            M2 = (np.abs(R) < 3.7 * lamb) * (np.abs(R) >= 2 * lamb) * (2.7 * R - 3.7 * np.sign(R) * lamb) / 1.7
-            M3 = (np.abs(R) >= 3.7 * lamb) * R
-            M = M1 + M2 + M3
-
-        Rthresh = M - np.diag(np.diag(M)) + np.eye(p)
-        SigmaU = (SuDiag ** 0.5).dot(Rthresh).dot(SuDiag ** 0.5)
-        SigmaY = SigmaU + Lowrank
-
-        return SigmaY
-
-
-class StructuredCovEstimator(RiskModel):
-    """Structured Covariance Estimator
-
-    This estimator assumes observations can be predicted by multiple factors
-        X = FB + U
-    where `F` can be specified by explicit risk factors or latent factors.
-
-    Therefore the structured covariance can be estimated by
-        cov(X) = F cov(B) F.T + cov(U)
-
-    We use latent factor models to estimate the structured covariance.
-    Specifically, the following latent factor models are supported:
-        - `pca`: Principal Component Analysis
-        - `fa`: Factor Analysis
-
-    Reference: [1] Fan, J., Liao, Y., & Liu, H. (2016). An overview of the estimation of large covariance and
-    precision matrices. Econometrics Journal, 19(1), C1–C32. https://doi.org/10.1111/ectj.12061
-    """
-
-    FACTOR_MODEL_PCA = "pca"
-    FACTOR_MODEL_FA = "fa"
-
-    def __init__(
-        self,
-        factor_model: str = "pca",
-        num_factors: int = 10,
-        nan_option: str = "ignore",
-        assume_centered: bool = False,
-        scale_return: bool = True,
-    ):
-        """
-        Args:
-            factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`).
-            num_factors (int): number of components to keep.
-            nan_option (str): nan handling option (`ignore`/`fill`).
-            assume_centered (bool): whether the data is assumed to be centered.
-            scale_return (bool): whether scale returns as percentage.
-        """
-        super().__init__(nan_option, assume_centered, scale_return)
-
-        assert factor_model in [
-            self.FACTOR_MODEL_PCA,
-            self.FACTOR_MODEL_FA,
-        ], "factor_model={} is not supported".format(factor_model)
-        self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis
-
-        self.num_factors = num_factors
-
-    def predict(
-        self,
-        X: Union[pd.Series, pd.DataFrame, np.ndarray],
-        return_corr: bool = False,
-        is_price: bool = True,
-        return_decomposed_components=False,
-    ) -> Union[pd.DataFrame, np.ndarray, tuple]:
-        """
-        Args:
-            X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
-                with variables as columns and observations as rows.
-            return_corr (bool): whether return the correlation matrix.
-            is_price (bool): whether `X` contains price (if not assume stock returns).
-            return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
-
-        Returns:
-            tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
-        """
-        assert (
-            not return_corr or not return_decomposed_components
-        ), "Can only return either correlation matrix or decomposed components."
-
-        # transform input into 2D array
-        if not isinstance(X, (pd.Series, pd.DataFrame)):
-            columns = None
-        else:
-            if isinstance(X.index, pd.MultiIndex):
-                if isinstance(X, pd.DataFrame):
-                    X = X.iloc[:, 0].unstack(level="instrument")  # always use the first column
-                else:
-                    X = X.unstack(level="instrument")
-            else:
-                # X is 2D DataFrame
-                pass
-            columns = X.columns  # will be used to restore dataframe
-            X = X.values
-
-        # calculate pct_change
-        if is_price:
-            X = X[1:] / X[:-1] - 1  # NOTE: resulting `n - 1` rows
-
-        # scale return
-        if self.scale_return:
-            X *= 100
-
-        # handle nan and centered
-        X = self._preprocess(X)
-
-        if return_decomposed_components:
-            F, cov_b, var_u = self._predict(X, return_structured=True)
-            return F, cov_b, var_u
-        else:
-            # estimate covariance
-            S = self._predict(X)
-
-            # return correlation if needed
-            if return_corr:
-                vola = np.sqrt(np.diag(S))
-                corr = S / np.outer(vola, vola)
-                if columns is None:
-                    return corr
-                return pd.DataFrame(corr, index=columns, columns=columns)
-
-            # return covariance
-            if columns is None:
-                return S
-            return pd.DataFrame(S, index=columns, columns=columns)
-
-    def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]:
-        """
-        covariance estimation implementation
-
-        Args:
-            X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
-            return_structured (bool): whether return decomposed components of the covariance matrix.
-
-        Returns:
-            tuple or np.ndarray: decomposed covariance matrix or covariance matrix.
-        """
-
-        model = self.solver(self.num_factors, random_state=0).fit(X)
-
-        F = model.components_.T  # num_features x num_factors
-        B = model.transform(X)  # num_samples x num_factors
-        U = X - B @ F.T
-        cov_b = np.cov(B.T)  # num_factors x num_factors
-        var_u = np.var(U, axis=0)  # diagonal
-
-        if return_structured:
-            return F, cov_b, var_u
-
-        cov_x = F @ cov_b @ F.T + np.diag(var_u)
-
-        return cov_x
diff --git a/qlib/model/riskmodel_poet.py b/qlib/model/riskmodel/__init__.py
similarity index 100%
rename from qlib/model/riskmodel_poet.py
rename to qlib/model/riskmodel/__init__.py
diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py
new file mode 100644
index 00000000000..d5b009cccca
--- /dev/null
+++ b/qlib/model/riskmodel/base.py
@@ -0,0 +1,141 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+from typing import Union
+
+from qlib.model.base import BaseModel
+
+from qlib.model.riskmodel_poet import POETCovEstimator
+from qlib.model.riskmodel_shrink import ShrinkCovEstimator
+from qlib.model.riskmodel_structured import StructuredCovEstimator
+
+
+class RiskModel(BaseModel):
+    """Risk Model
+
+    A risk model is used to estimate the covariance matrix of stock returns.
+    """
+
+    MASK_NAN = "mask"
+    FILL_NAN = "fill"
+    IGNORE_NAN = "ignore"
+
+    def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, scale_return: bool = True):
+        """
+        Args:
+            nan_option (str): nan handling option (`ignore`/`mask`/`fill`).
+            assume_centered (bool): whether the data is assumed to be centered.
+            scale_return (bool): whether scale returns as percentage.
+        """
+        # nan
+        assert nan_option in [
+            self.MASK_NAN,
+            self.FILL_NAN,
+            self.IGNORE_NAN,
+        ], f"`nan_option={nan_option}` is not supported"
+        self.nan_option = nan_option
+
+        self.assume_centered = assume_centered
+        self.scale_return = scale_return
+
+    def predict(
+        self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
+    ) -> Union[pd.DataFrame, np.ndarray]:
+        """
+        Args:
+            X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
+                with variables as columns and observations as rows.
+            return_corr (bool): whether return the correlation matrix.
+            is_price (bool): whether `X` contains price (if not assume stock returns).
+
+        Returns:
+            pd.DataFrame or np.ndarray: estimated covariance (or correlation).
+        """
+        # transform input into 2D array
+        if not isinstance(X, (pd.Series, pd.DataFrame)):
+            columns = None
+        else:
+            if isinstance(X.index, pd.MultiIndex):
+                if isinstance(X, pd.DataFrame):
+                    X = X.iloc[:, 0].unstack(level="instrument")  # always use the first column
+                else:
+                    X = X.unstack(level="instrument")
+            else:
+                # X is 2D DataFrame
+                pass
+            columns = X.columns  # will be used to restore dataframe
+            X = X.values
+
+        # calculate pct_change
+        if is_price:
+            X = X[1:] / X[:-1] - 1  # NOTE: resulting `n - 1` rows
+
+        # scale return
+        if self.scale_return:
+            X *= 100
+
+        # handle nan and centered
+        X = self._preprocess(X)
+
+        # estimate covariance
+        S = self._predict(X)
+
+        # return correlation if needed
+        if return_corr:
+            vola = np.sqrt(np.diag(S))
+            corr = S / np.outer(vola, vola)
+            if columns is None:
+                return corr
+            return pd.DataFrame(corr, index=columns, columns=columns)
+
+        # return covariance
+        if columns is None:
+            return S
+        return pd.DataFrame(S, index=columns, columns=columns)
+
+    def _predict(self, X: np.ndarray) -> np.ndarray:
+        """covariance estimation implementation
+
+        This method should be overridden by child classes.
+
+        By default, this method implements the empirical covariance estimation.
+
+        Args:
+            X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
+
+        Returns:
+            np.ndarray: covariance matrix.
+        """
+        xTx = np.asarray(X.T.dot(X))
+        N = len(X)
+        if isinstance(X, np.ma.MaskedArray):
+            M = 1 - X.mask
+            N = M.T.dot(M)  # each pair has distinct number of samples
+        return xTx / N
+
+    def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]:
+        """handle nan and centerize data
+
+        Note:
+            if `nan_option='mask'` then the returned array will be `np.ma.MaskedArray`.
+        """
+        # handle nan
+        if self.nan_option == self.FILL_NAN:
+            X = np.nan_to_num(X)
+        elif self.nan_option == self.MASK_NAN:
+            X = np.ma.masked_invalid(X)
+        # centralize
+        if not self.assume_centered:
+            X = X - np.nanmean(X, axis=0)
+        return X
+
+
+
+
+
+
+
+
+
diff --git a/qlib/model/riskmodel/poet.py b/qlib/model/riskmodel/poet.py
new file mode 100644
index 00000000000..8dbe890360e
--- /dev/null
+++ b/qlib/model/riskmodel/poet.py
@@ -0,0 +1,84 @@
+import numpy as np
+
+from qlib.model.riskmodel import RiskModel
+
+
+class POETCovEstimator(RiskModel):
+    """Principal Orthogonal Complement Thresholding Estimator (POET)
+
+    Reference:
+        [1] Fan, J., Liao, Y., & Mincheva, M. (2013). Large covariance estimation by thresholding principal orthogonal complements.
+            Journal of the Royal Statistical Society. Series B: Statistical Methodology, 75(4), 603–680. https://doi.org/10.1111/rssb.12016
+        [2] http://econweb.rutgers.edu/yl1114/papers/poet/POET.m
+    """
+
+    THRESH_SOFT = "soft"
+    THRESH_HARD = "hard"
+    THRESH_SCAD = "scad"
+
+    def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = "soft", **kwargs):
+        """
+        Args:
+            num_factors (int): number of factors (if set to zero, no factor model will be used).
+            thresh (float): the positive constant for thresholding.
+            thresh_method (str): thresholding method, which can be
+                - 'soft': soft thresholding.
+                - 'hard': hard thresholding.
+                - 'scad': scad thresholding.
+            kwargs: see `RiskModel` for more information.
+        """
+        super().__init__(**kwargs)
+
+        assert num_factors >= 0, "`num_factors` requires a positive integer"
+        self.num_factors = num_factors
+
+        assert thresh >= 0, "`thresh` requires a positive float number"
+        self.thresh = thresh
+
+        assert thresh_method in [
+            self.THRESH_HARD,
+            self.THRESH_SOFT,
+            self.THRESH_SCAD,
+        ], "`thresh_method` should be `soft`/`hard`/`scad`"
+        self.thresh_method = thresh_method
+
+    def _predict(self, X: np.ndarray) -> np.ndarray:
+
+        Y = X.T  # NOTE: to match POET's implementation
+        p, n = Y.shape
+
+        if self.num_factors > 0:
+            Dd, V = np.linalg.eig(Y.T.dot(Y))
+            V = V[:, np.argsort(Dd)]
+            F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n)
+            LamPCA = Y.dot(F) / n
+            uhat = np.asarray(Y - LamPCA.dot(F.T))
+            Lowrank = np.asarray(LamPCA.dot(LamPCA.T))
+            rate = 1 / np.sqrt(p) + np.sqrt(np.log(p) / n)
+        else:
+            uhat = np.asarray(Y)
+            rate = np.sqrt(np.log(p) / n)
+            Lowrank = 0
+
+        lamb = rate * self.thresh
+        SuPCA = uhat.dot(uhat.T) / n
+        SuDiag = np.diag(np.diag(SuPCA))
+        R = np.linalg.inv(SuDiag ** 0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag ** 0.5))
+
+        if self.thresh_method == self.THRESH_HARD:
+            M = R * (np.abs(R) > lamb)
+        elif self.thresh_method == self.THRESH_SOFT:
+            res = np.abs(R) - lamb
+            res = (res + np.abs(res)) / 2
+            M = np.sign(R) * res
+        else:
+            M1 = (np.abs(R) < 2 * lamb) * np.sign(R) * (np.abs(R) - lamb) * (np.abs(R) > lamb)
+            M2 = (np.abs(R) < 3.7 * lamb) * (np.abs(R) >= 2 * lamb) * (2.7 * R - 3.7 * np.sign(R) * lamb) / 1.7
+            M3 = (np.abs(R) >= 3.7 * lamb) * R
+            M = M1 + M2 + M3
+
+        Rthresh = M - np.diag(np.diag(M)) + np.eye(p)
+        SigmaU = (SuDiag ** 0.5).dot(Rthresh).dot(SuDiag ** 0.5)
+        SigmaY = SigmaU + Lowrank
+
+        return SigmaY
diff --git a/qlib/model/riskmodel/shrink.py b/qlib/model/riskmodel/shrink.py
new file mode 100644
index 00000000000..1298891fb01
--- /dev/null
+++ b/qlib/model/riskmodel/shrink.py
@@ -0,0 +1,262 @@
+import numpy as np
+from typing import Union
+
+from qlib.model.riskmodel import RiskModel
+
+
+class ShrinkCovEstimator(RiskModel):
+    """Shrinkage Covariance Estimator
+
+    This estimator will shrink the sample covariance matrix towards
+    an identify matrix:
+        S_hat = (1 - alpha) * S + alpha * F
+    where `alpha` is the shrink parameter and `F` is the shrinking target.
+
+    The following shrinking parameters (`alpha`) are supported:
+        - `lw` [1][2][3]: use Ledoit-Wolf shrinking parameter.
+        - `oas` [4]: use Oracle Approximating Shrinkage shrinking parameter.
+        - float: directly specify the shrink parameter, should be between [0, 1].
+
+    The following shrinking targets (`F`) are supported:
+        - `const_var` [1][4][5]: assume stocks have the same constant variance and zero correlation.
+        - `const_corr` [2][6]: assume stocks have different variance but equal correlation.
+        - `single_factor` [3][7]: assume single factor model as the shrinking target.
+        - np.ndarray: provide the shrinking targets directly.
+
+    Note:
+        - The optimal shrinking parameter depends on the selection of the shrinking target.
+            Currently, `oas` is not supported for `const_corr` and `single_factor`.
+        - Remember to set `nan_option` to `fill` or `mask` if your data has missing values.
+
+    References:
+        [1] Ledoit, O., & Wolf, M. (2004). A well-conditioned estimator for large-dimensional covariance matrices.
+            Journal of Multivariate Analysis, 88(2), 365–411. https://doi.org/10.1016/S0047-259X(03)00096-4
+        [2] Ledoit, O., & Wolf, M. (2004). Honey, I shrunk the sample covariance matrix.
+            Journal of Portfolio Management, 30(4), 1–22. https://doi.org/10.3905/jpm.2004.110
+        [3] Ledoit, O., & Wolf, M. (2003). Improved estimation of the covariance matrix of stock returns
+            with an application to portfolio selection.
+            Journal of Empirical Finance, 10(5), 603–621. https://doi.org/10.1016/S0927-5398(03)00007-0
+        [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance
+            estimation. IEEE Transactions on Signal Processing, 58(10), 5016–5029.
+            https://doi.org/10.1109/TSP.2010.2053029
+        [5] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-00007f64e5b9/cov1para.m.zip
+        [6] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip
+        [7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip
+    """
+
+    SHR_LW = "lw"
+    SHR_OAS = "oas"
+
+    TGT_CONST_VAR = "const_var"
+    TGT_CONST_CORR = "const_corr"
+    TGT_SINGLE_FACTOR = "single_factor"
+
+    def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = "const_var", **kwargs):
+        """
+        Args:
+            alpha (str or float): shrinking parameter or estimator (`lw`/`oas`)
+            target (str or np.ndarray): shrinking target (`const_var`/`const_corr`/`single_factor`)
+            kwargs: see `RiskModel` for more information
+        """
+        super().__init__(**kwargs)
+
+        # alpha
+        if isinstance(alpha, str):
+            assert alpha in [self.SHR_LW, self.SHR_OAS], f"shrinking method `{alpha}` is not supported"
+        elif isinstance(alpha, (float, np.floating)):
+            assert 0 <= alpha <= 1, "alpha should be between [0, 1]"
+        else:
+            raise TypeError("invalid argument type for `alpha`")
+        self.alpha = alpha
+
+        # target
+        if isinstance(target, str):
+            assert target in [
+                self.TGT_CONST_VAR,
+                self.TGT_CONST_CORR,
+                self.TGT_SINGLE_FACTOR,
+            ], f"shrinking target `{target} is not supported"
+        elif isinstance(target, np.ndarray):
+            pass
+        else:
+            raise TypeError("invalid argument type for `target`")
+        if alpha == self.SHR_OAS and target != self.TGT_CONST_VAR:
+            raise NotImplementedError("currently `oas` can only support `const_var` as target")
+        self.target = target
+
+    def _predict(self, X: np.ndarray) -> np.ndarray:
+        # sample covariance
+        S = super()._predict(X)
+
+        # shrinking target
+        F = self._get_shrink_target(X, S)
+
+        # get shrinking parameter
+        alpha = self._get_shrink_param(X, S, F)
+
+        # shrink covariance
+        if alpha > 0:
+            S *= 1 - alpha
+            F *= alpha
+            S += F
+
+        return S
+
+    def _get_shrink_target(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
+        """get shrinking target `F`"""
+        if self.target == self.TGT_CONST_VAR:
+            return self._get_shrink_target_const_var(X, S)
+        if self.target == self.TGT_CONST_CORR:
+            return self._get_shrink_target_const_corr(X, S)
+        if self.target == self.TGT_SINGLE_FACTOR:
+            return self._get_shrink_target_single_factor(X, S)
+        return self.target
+
+    def _get_shrink_target_const_var(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
+        """get shrinking target with constant variance
+
+        This target assumes zero pair-wise correlation and constant variance.
+        The constant variance is estimated by averaging all sample's variances.
+        """
+        n = len(S)
+        F = np.eye(n)
+        np.fill_diagonal(F, np.mean(np.diag(S)))
+        return F
+
+    def _get_shrink_target_const_corr(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
+        """get shrinking target with constant correlation
+
+        This target assumes constant pair-wise correlation but keep the sample variance.
+        The constant correlation is estimated by averaging all pairwise correlations.
+        """
+        n = len(S)
+        var = np.diag(S)
+        sqrt_var = np.sqrt(var)
+        covar = np.outer(sqrt_var, sqrt_var)
+        r_bar = (np.sum(S / covar) - n) / (n * (n - 1))
+        F = r_bar * covar
+        np.fill_diagonal(F, var)
+        return F
+
+    def _get_shrink_target_single_factor(self, X: np.ndarray, S: np.ndarray) -> np.ndarray:
+        """get shrinking target with single factor model"""
+        X_mkt = np.nanmean(X, axis=1)
+        cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X))
+        var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X))
+        F = np.outer(cov_mkt, cov_mkt) / var_mkt
+        np.fill_diagonal(F, np.diag(S))
+        return F
+
+    def _get_shrink_param(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
+        """get shrinking parameter `alpha`
+
+        Note:
+            The Ledoit-Wolf shrinking parameter estimator consists of three different methods.
+        """
+        if self.alpha == self.SHR_OAS:
+            return self._get_shrink_param_oas(X, S, F)
+        elif self.alpha == self.SHR_LW:
+            if self.target == self.TGT_CONST_VAR:
+                return self._get_shrink_param_lw_const_var(X, S, F)
+            if self.target == self.TGT_CONST_CORR:
+                return self._get_shrink_param_lw_const_corr(X, S, F)
+            if self.target == self.TGT_SINGLE_FACTOR:
+                return self._get_shrink_param_lw_single_factor(X, S, F)
+        return self.alpha
+
+    def _get_shrink_param_oas(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
+        """Oracle Approximating Shrinkage Estimator
+
+        This method uses the following formula to estimate the `alpha`
+        parameter for the shrink covariance estimator:
+            A = (1 - 2 / p) * trace(S^2) + trace^2(S)
+            B = (n + 1 - 2 / p) * (trace(S^2) - trace^2(S) / p)
+            alpha = A / B
+        where `n`, `p` are the dim of observations and variables respectively.
+        """
+        trS2 = np.sum(S ** 2)
+        tr2S = np.trace(S) ** 2
+
+        n, p = X.shape
+
+        A = (1 - 2 / p) * (trS2 + tr2S)
+        B = (n + 1 - 2 / p) * (trS2 + tr2S / p)
+        alpha = A / B
+
+        return alpha
+
+    def _get_shrink_param_lw_const_var(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
+        """Ledoit-Wolf Shrinkage Estimator (Constant Variance)
+
+        This method shrinks the covariance matrix towards the constand variance target.
+        """
+        t, n = X.shape
+
+        y = X ** 2
+        phi = np.sum(y.T.dot(y) / t - S ** 2)
+
+        gamma = np.linalg.norm(S - F, "fro") ** 2
+
+        kappa = phi / gamma
+        alpha = max(0, min(1, kappa / t))
+
+        return alpha
+
+    def _get_shrink_param_lw_const_corr(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
+        """Ledoit-Wolf Shrinkage Estimator (Constant Correlation)
+
+        This method shrinks the covariance matrix towards the constand correlation target.
+        """
+        t, n = X.shape
+
+        var = np.diag(S)
+        sqrt_var = np.sqrt(var)
+        r_bar = (np.sum(S / np.outer(sqrt_var, sqrt_var)) - n) / (n * (n - 1))
+
+        y = X ** 2
+        phi_mat = y.T.dot(y) / t - S ** 2
+        phi = np.sum(phi_mat)
+
+        theta_mat = (X ** 3).T.dot(X) / t - var[:, None] * S
+        np.fill_diagonal(theta_mat, 0)
+        rho = np.sum(np.diag(phi_mat)) + r_bar * np.sum(np.outer(1 / sqrt_var, sqrt_var) * theta_mat)
+
+        gamma = np.linalg.norm(S - F, "fro") ** 2
+
+        kappa = (phi - rho) / gamma
+        alpha = max(0, min(1, kappa / t))
+
+        return alpha
+
+    def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float:
+        """Ledoit-Wolf Shrinkage Estimator (Single Factor Model)
+
+        This method shrinks the covariance matrix towards the single factor model target.
+        """
+        t, n = X.shape
+
+        X_mkt = np.nanmean(X, axis=1)
+        cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X))
+        var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X))
+
+        y = X ** 2
+        phi = np.sum(y.T.dot(y)) / t - np.sum(S ** 2)
+
+        rdiag = np.sum(y ** 2) / t - np.sum(np.diag(S) ** 2)
+        z = X * X_mkt[:, None]
+        v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
+        roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
+        v3 = z.T.dot(z) / t - var_mkt * S
+        roff3 = (
+                np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(
+            np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
+        )
+        roff = 2 * roff1 - roff3
+        rho = rdiag + roff
+
+        gamma = np.linalg.norm(S - F, "fro") ** 2
+
+        kappa = (phi - rho) / gamma
+        alpha = max(0, min(1, kappa / t))
+
+        return alpha
diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py
new file mode 100644
index 00000000000..e778c2faa2a
--- /dev/null
+++ b/qlib/model/riskmodel/structured.py
@@ -0,0 +1,152 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+from typing import Union
+from sklearn.decomposition import PCA, FactorAnalysis
+
+from qlib.model.riskmodel import RiskModel
+
+
+class StructuredCovEstimator(RiskModel):
+    """Structured Covariance Estimator
+
+    This estimator assumes observations can be predicted by multiple factors
+        X = FB + U
+    where `F` can be specified by explicit risk factors or latent factors.
+
+    Therefore the structured covariance can be estimated by
+        cov(X) = F cov(B) F.T + cov(U)
+
+    We use latent factor models to estimate the structured covariance.
+    Specifically, the following latent factor models are supported:
+        - `pca`: Principal Component Analysis
+        - `fa`: Factor Analysis
+
+    Reference: [1] Fan, J., Liao, Y., & Liu, H. (2016). An overview of the estimation of large covariance and
+    precision matrices. Econometrics Journal, 19(1), C1–C32. https://doi.org/10.1111/ectj.12061
+    """
+
+    FACTOR_MODEL_PCA = "pca"
+    FACTOR_MODEL_FA = "fa"
+    NAN_OPTION = "fill"
+
+    def __init__(
+            self,
+            factor_model: str = "pca",
+            num_factors: int = 10,
+            assume_centered: bool = False,
+            scale_return: bool = True,
+    ):
+        """
+        Args:
+            factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`).
+            num_factors (int): number of components to keep.
+            assume_centered (bool): whether the data is assumed to be centered.
+            scale_return (bool): whether scale returns as percentage.
+        """
+        super().__init__(self.NAN_OPTION, assume_centered, scale_return)
+
+        assert factor_model in [
+            self.FACTOR_MODEL_PCA,
+            self.FACTOR_MODEL_FA,
+        ], "factor_model={} is not supported".format(factor_model)
+        self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis
+
+        self.num_factors = num_factors
+
+    def predict(
+            self,
+            X: Union[pd.Series, pd.DataFrame, np.ndarray],
+            return_corr: bool = False,
+            is_price: bool = True,
+            return_decomposed_components=False,
+    ) -> Union[pd.DataFrame, np.ndarray, tuple]:
+        """
+        Args:
+            X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
+                with variables as columns and observations as rows.
+            return_corr (bool): whether return the correlation matrix.
+            is_price (bool): whether `X` contains price (if not assume stock returns).
+            return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
+
+        Returns:
+            tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
+        """
+        assert (
+                not return_corr or not return_decomposed_components
+        ), "Can only return either correlation matrix or decomposed components."
+
+        # transform input into 2D array
+        if not isinstance(X, (pd.Series, pd.DataFrame)):
+            columns = None
+        else:
+            if isinstance(X.index, pd.MultiIndex):
+                if isinstance(X, pd.DataFrame):
+                    X = X.iloc[:, 0].unstack(level="instrument")  # always use the first column
+                else:
+                    X = X.unstack(level="instrument")
+            else:
+                # X is 2D DataFrame
+                pass
+            columns = X.columns  # will be used to restore dataframe
+            X = X.values
+
+        # calculate pct_change
+        if is_price:
+            X = X[1:] / X[:-1] - 1  # NOTE: resulting `n - 1` rows
+
+        # scale return
+        if self.scale_return:
+            X *= 100
+
+        # handle nan and centered
+        X = self._preprocess(X)
+
+        if return_decomposed_components:
+            F, cov_b, var_u = self._predict(X, return_structured=True)
+            return F, cov_b, var_u
+        else:
+            # estimate covariance
+            S = self._predict(X)
+
+            # return correlation if needed
+            if return_corr:
+                vola = np.sqrt(np.diag(S))
+                corr = S / np.outer(vola, vola)
+                if columns is None:
+                    return corr
+                return pd.DataFrame(corr, index=columns, columns=columns)
+
+            # return covariance
+            if columns is None:
+                return S
+            return pd.DataFrame(S, index=columns, columns=columns)
+
+    def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]:
+        """
+        covariance estimation implementation
+
+        Args:
+            X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
+            return_structured (bool): whether return decomposed components of the covariance matrix.
+
+        Returns:
+            tuple or np.ndarray: decomposed covariance matrix or covariance matrix.
+        """
+
+        model = self.solver(self.num_factors, random_state=0).fit(X)
+
+        F = model.components_.T  # num_features x num_factors
+        B = model.transform(X)  # num_samples x num_factors
+        U = X - B @ F.T
+        cov_b = np.cov(B.T)  # num_factors x num_factors
+        var_u = np.var(U, axis=0)  # diagonal
+
+        if return_structured:
+            return F, cov_b, var_u
+
+        cov_x = F @ cov_b @ F.T + np.diag(var_u)
+
+        return cov_x
diff --git a/qlib/portfolio/enhanced_indexing.py b/qlib/portfolio/enhanced_indexing.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/qlib/model/riskmodel_shrink.py b/qlib/portfolio/optimizer/__init__.py
similarity index 100%
rename from qlib/model/riskmodel_shrink.py
rename to qlib/portfolio/optimizer/__init__.py
diff --git a/qlib/model/riskmodel_structured.py b/qlib/portfolio/optimizer/base.py
similarity index 100%
rename from qlib/model/riskmodel_structured.py
rename to qlib/portfolio/optimizer/base.py
diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py
new file mode 100644
index 00000000000..d988c776bc6
--- /dev/null
+++ b/qlib/portfolio/optimizer/enhanced_indexing.py
@@ -0,0 +1,140 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import cvxpy as cp
+import pandas as pd
+from typing import Union
+
+from qlib.portfolio.optimizer import BaseOptimizer
+
+
+class EnhancedIndexingOptimizer(BaseOptimizer):
+    """
+    Portfolio Optimizer with Enhanced Indexing
+
+    Note:
+        This optimizer always assumes full investment and no-shorting.
+    """
+
+    START_FROM_W0 = "w0"
+    START_FROM_BENCH = "benchmark"
+    DO_NOT_START_FROM = "no_warm_start"
+
+    def __init__(
+            self,
+            lamb: float = 10,
+            delta: float = 0.4,
+            bench_dev: float = 0.01,
+            inds_dev: float = None,
+            scale_alpha: bool = True,
+            verbose: bool = False,
+            warm_start: str = DO_NOT_START_FROM,
+            max_iters: int = 10000,
+    ):
+        """
+        Args:
+            lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
+            delta (float): turnover rate limit
+            bench_dev (float): benchmark deviation limit
+            inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific
+                                   restriction
+            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
+            verbose (bool): if print detailed information about the solver
+            warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
+                              (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)
+        """
+
+        assert lamb >= 0, "risk aversion parameter `lamb` should be positive"
+        self.lamb = lamb
+
+        assert delta >= 0, "turnover limit `delta` should be positive"
+        self.delta = delta
+
+        assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive"
+        self.bench_dev = bench_dev
+
+        assert inds_dev is None or inds_dev >= 0, "industry deviation limit `inds_dev` should be positive or None."
+        self.inds_dev = inds_dev
+
+        assert warm_start in [
+            self.DO_NOT_START_FROM,
+            self.START_FROM_W0,
+            self.START_FROM_BENCH,
+        ], "illegal warm start option"
+        self.start_from_w0 = warm_start == self.START_FROM_W0
+        self.start_from_bench = warm_start == self.START_FROM_BENCH
+
+        self.scale_alpha = scale_alpha
+        self.verbose = verbose
+        self.max_iters = max_iters
+
+    def __call__(
+            self,
+            u: np.ndarray,
+            F: np.ndarray,
+            covB: np.ndarray,
+            varU: np.ndarray,
+            w0: np.ndarray,
+            w_bench: np.ndarray,
+            inds_onehot: np.ndarray = None,
+    ) -> Union[np.ndarray, pd.Series]:
+        """
+        Args:
+            u (np.ndarray): expected returns (a.k.a., alpha)
+            F, covB, varU (np.ndarray): see StructuredCovEstimator
+            w0 (np.ndarray): initial weights (for turnover control)
+            w_bench (np.ndarray): benchmark weights
+            inds_onehot (np.ndarray): industry (onehot)
+
+        Returns:
+            np.ndarray or pd.Series: optimized portfolio allocation
+        """
+        assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required."
+
+        # scale alpha to match volatility
+        if self.scale_alpha:
+            u = u / u.std()
+            x_variance = np.mean(np.diag(F @ covB @ F.T) + varU)
+            u *= x_variance ** 0.5
+
+        w = cp.Variable(len(u))  # num_assets
+        v = w @ F  # num_factors
+        ret = w @ u
+        risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
+        obj = cp.Maximize(ret - self.lamb * risk)
+        d_bench = w - w_bench
+        cons = [
+            w >= 0,
+            cp.sum(w) == 1,
+            d_bench >= -self.bench_dev,
+            d_bench <= self.bench_dev,
+        ]
+
+        if self.inds_dev is not None:
+            d_inds = d_bench @ inds_onehot
+            cons.append(d_inds >= -self.inds_dev)
+            cons.append(d_inds <= self.inds_dev)
+
+        if w0 is not None:
+            turnover = cp.sum(cp.abs(w - w0))
+            cons.append(turnover <= self.delta)
+
+        warm_start = False
+        if self.start_from_w0:
+            if w0 is None:
+                print("Warning: try warm start with w0, but w0 is `None`.")
+            else:
+                w.value = w0
+                warm_start = True
+        elif self.start_from_bench:
+            w.value = w_bench
+            warm_start = True
+
+        prob = cp.Problem(obj, cons)
+        prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
+
+        if prob.status != "optimal":
+            print("Warning: solve failed.", prob.status)
+
+        return np.asarray(w.value)
diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer/optimizer.py
similarity index 62%
rename from qlib/portfolio/optimizer.py
rename to qlib/portfolio/optimizer/optimizer.py
index 6ee396a513b..17a7fc30a66 100644
--- a/qlib/portfolio/optimizer.py
+++ b/qlib/portfolio/optimizer/optimizer.py
@@ -4,11 +4,12 @@
 import abc
 import warnings
 import numpy as np
-import cvxpy as cp
 import pandas as pd
 import scipy.optimize as so
 from typing import Optional, Union, Callable, List
 
+from qlib.portfolio.enhanced_indexing import EnhancedIndexingOptimizer
+
 
 class BaseOptimizer(abc.ABC):
     """ Construct portfolio with a optimization related method """
@@ -38,13 +39,13 @@ class PortfolioOptimizer(BaseOptimizer):
     OPT_INV = "inv"
 
     def __init__(
-        self,
-        method: str = "inv",
-        lamb: float = 0,
-        delta: float = 0,
-        alpha: float = 0.0,
-        scale_alpha: bool = True,
-        tol: float = 1e-8,
+            self,
+            method: str = "inv",
+            lamb: float = 0,
+            delta: float = 0,
+            alpha: float = 0.0,
+            scale_alpha: bool = True,
+            tol: float = 1e-8,
     ):
         """
         Args:
@@ -71,10 +72,10 @@ def __init__(
         self.scale_alpha = scale_alpha
 
     def __call__(
-        self,
-        S: Union[np.ndarray, pd.DataFrame],
-        u: Optional[Union[np.ndarray, pd.Series]] = None,
-        w0: Optional[Union[np.ndarray, pd.Series]] = None,
+            self,
+            S: Union[np.ndarray, pd.DataFrame],
+            u: Optional[Union[np.ndarray, pd.Series]] = None,
+            w0: Optional[Union[np.ndarray, pd.Series]] = None,
     ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
@@ -163,7 +164,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd
         return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
 
     def _optimize_mvo(
-        self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
+            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
     ) -> np.ndarray:
         """optimize mean-variance portfolio
 
@@ -259,7 +260,6 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda
         # add l2 regularization
         wrapped_obj = obj
         if self.alpha > 0:
-
             def opt_obj(x):
                 return obj(x) + self.alpha * np.sum(np.square(x))
 
@@ -272,134 +272,3 @@ def opt_obj(x):
             warnings.warn(f"optimization not success ({sol.status})")
 
         return sol.x
-
-
-class EnhancedIndexingOptimizer(BaseOptimizer):
-    """
-    Portfolio Optimizer with Enhanced Indexing
-
-    Note:
-        This optimizer always assumes full investment and no-shorting.
-    """
-
-    START_FROM_W0 = "w0"
-    START_FROM_BENCH = "benchmark"
-    DO_NOT_START_FROM = "no_warm_start"
-
-    def __init__(
-        self,
-        lamb: float = 10,
-        delta: float = 0.4,
-        bench_dev: float = 0.01,
-        inds_dev: float = None,
-        scale_alpha: bool = True,
-        verbose: bool = False,
-        warm_start: str = DO_NOT_START_FROM,
-        max_iters: int = 10000,
-    ):
-        """
-        Args:
-            lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
-            delta (float): turnover rate limit
-            bench_dev (float): benchmark deviation limit
-            inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific
-                                   restriction
-            scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
-            verbose (bool): if print detailed information about the solver
-            warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
-                              (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start)
-        """
-
-        assert lamb >= 0, "risk aversion parameter `lamb` should be positive"
-        self.lamb = lamb
-
-        assert delta >= 0, "turnover limit `delta` should be positive"
-        self.delta = delta
-
-        assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive"
-        self.bench_dev = bench_dev
-
-        assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive"
-        self.inds_dev = inds_dev
-
-        assert warm_start in [
-            self.DO_NOT_START_FROM,
-            self.START_FROM_W0,
-            self.START_FROM_BENCH,
-        ], "illegal warm start option"
-        self.start_from_w0 = warm_start == self.START_FROM_W0
-        self.start_from_bench = warm_start == self.START_FROM_BENCH
-
-        self.scale_alpha = scale_alpha
-        self.verbose = verbose
-        self.max_iters = max_iters
-
-    def __call__(
-        self,
-        u: np.ndarray,
-        F: np.ndarray,
-        covB: np.ndarray,
-        varU: np.ndarray,
-        w0: np.ndarray,
-        w_bench: np.ndarray,
-        inds_onehot: np.ndarray = None,
-    ) -> Union[np.ndarray, pd.Series]:
-        """
-        Args:
-            u (np.ndarray): expected returns (a.k.a., alpha)
-            F, covB, varU (np.ndarray): see StructuredCovEstimator
-            w0 (np.ndarray): initial weights (for turnover control)
-            w_bench (np.ndarray): benchmark weights
-            inds_onehot (np.ndarray): industry (onehot)
-
-        Returns:
-            np.ndarray or pd.Series: optimized portfolio allocation
-        """
-        assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required."
-
-        # scale alpha to match volatility
-        if self.scale_alpha:
-            u = u / u.std()
-            x_variance = np.mean(np.diag(F @ covB @ F.T) + varU)
-            u *= x_variance ** 0.5
-
-        w = cp.Variable(len(u))  # num_assets
-        v = w @ F  # num_factors
-        ret = w @ u
-        risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
-        obj = cp.Maximize(ret - self.lamb * risk)
-        d_bench = w - w_bench
-        cons = [
-            w >= 0,
-            cp.sum(w) == 1,
-            d_bench >= -self.bench_dev,
-            d_bench <= self.bench_dev,
-        ]
-
-        if self.inds_dev is not None:
-            d_inds = d_bench @ inds_onehot
-            cons.append(d_inds >= -self.inds_dev)
-            cons.append(d_inds <= self.inds_dev)
-
-        if w0 is not None:
-            turnover = cp.sum(cp.abs(w - w0))
-            cons.append(turnover <= self.delta)
-
-        warm_start = False
-        if self.start_from_w0:
-            if w0 is None:
-                print("Warning: try warm start with w0, but w0 is `None`.")
-            else:
-                w.value = w0
-                warm_start = True
-        elif self.start_from_bench:
-            w.value = w_bench
-            warm_start = True
-
-        prob = cp.Problem(obj, cons)
-        prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters)
-
-        if prob.status != "optimal":
-            print("Warning: solve failed.", prob.status)
-
-        return np.asarray(w.value)
diff --git a/tests/test_enhanced_indexing.py b/tests/test_enhanced_indexing.py
deleted file mode 100644
index f21d51984a6..00000000000
--- a/tests/test_enhanced_indexing.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import sys
-import math
-import shutil
-import unittest
-import numpy as np
-import pandas as pd
-from tqdm import tqdm
-from pathlib import Path
-
-import qlib
-from qlib.config import C
-from qlib.utils import init_instance_by_config, flatten_dict
-from qlib.workflow import R
-from qlib.config import REG_CN
-from qlib.workflow.record_temp import SignalRecord, SigAnaRecord
-from qlib.tests import TestAutoData
-from qlib.portfolio.optimizer import EnhancedIndexingOptimizer
-from qlib.model.riskmodel import StructuredCovEstimator
-from qlib.data.dataset.loader import QlibDataLoader
-from qlib.data.dataset.handler import DataHandler
-from qlib.data import D
-from qlib.utils import exists_qlib_data, init_instance_by_config
-
-market = "all"
-trade_gap = 21
-label_config = "Ref($close, -{}) / Ref($close, -1) - 1".format(trade_gap)  # reconstruct portfolio once a month
-
-provider_uri = "~/.qlib_ei/qlib_data/cn_data"  # target_dir
-if not exists_qlib_data(provider_uri):
-    print(f"Qlib data is not found in {provider_uri}")
-    sys.path.append(str(Path.cwd().parent.joinpath("scripts")))
-    from get_data import GetData
-    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
-qlib.init(provider_uri=provider_uri, region=REG_CN)
-
-###################################
-# train model
-###################################
-data_handler_config = {
-    "start_time": "2008-01-01",
-    "end_time": "2020-08-01",
-    "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-11-30",
-    "instruments": market,
-    "label": [label_config]
-}
-
-task = {
-    "model": {
-        "class": "LGBModel",
-        "module_path": "qlib.contrib.model.gbdt",
-        "kwargs": {
-            "loss": "mse",
-            "colsample_bytree": 0.8879,
-            "learning_rate": 0.0421,
-            "subsample": 0.8789,
-            "lambda_l1": 205.6999,
-            "lambda_l2": 580.9768,
-            "max_depth": 8,
-            "num_leaves": 210,
-            "num_threads": 32,
-        },
-    },
-    "dataset": {
-        "class": "DatasetH",
-        "module_path": "qlib.data.dataset",
-        "kwargs": {
-            "handler": {
-                "class": "Alpha158",
-                "module_path": "qlib.contrib.data.handler",
-                "kwargs": data_handler_config,
-            },
-            "segments": {
-                "train": ("2008-01-01", "2014-11-30"),
-                "valid": ("2015-01-01", "2016-11-30"),
-                "test": ("2017-01-01", "2018-01-01"),
-            },
-        },
-    },
-}
-
-
-class CSI300:
-    """Simulate CSI300 as the Benchmark for Enhanced Indexing to Track"""
-
-    def __init__(self):
-        # provider_uri = '/nfs_data/qlib_data/ycz_daily/qlib'
-        # qlib.init(provider_uri=provider_uri, region=REG_CN, dataset_cache=None, expression_cache=None)
-        self.csi_weight = D.features(D.instruments('csi300'), ['$csi300_weight'])
-
-    def __call__(self, pd_index, trade_date):
-        weights = np.zeros(len(pd_index))
-
-        for idx, instrument in enumerate(pd_index):
-            if (instrument, trade_date) in self.csi_weight.index:
-                weight = self.csi_weight.loc[(instrument, trade_date)].values[0]
-                if not math.isnan(weight):
-                    weights[idx] = weight
-
-        assert weights.sum() > 0, ' Fetch CSI Weights Error!'
-        weights = weights / weights.sum()
-
-        return weights
-
-
-class EnhancedIndexingStrategy:
-    """Enhanced Indexing Strategy"""
-
-    def __init__(self):
-        self.benchmark = CSI300()
-
-        provider_uri = "~/.qlib_ei/qlib_data/cn_data"
-        qlib.init(provider_uri=provider_uri, region=REG_CN)
-
-        self.data_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader(["$close"]))
-        self.label_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader([label_config]))
-        self.cov_estimator = StructuredCovEstimator()
-        self.optimizer = EnhancedIndexingOptimizer(lamb=0.1, delta=0.4, bench_dev=0.03, max_iters=50000)
-
-    def update(self, score_series, current, pred_date):
-        """
-        Parameters
-        -----------
-        score_series : pd.Series
-            stock_id , score.
-        current : Position()
-            current of account.
-        trade_exchange : Exchange()
-            exchange.
-        trade_date : pd.Timestamp
-            date.
-        """
-        print(score_series)
-        score_series = score_series.dropna()
-
-        # portfolio init weight
-        init_weight = current.reindex(score_series.index, fill_value=0).values.squeeze()
-        init_weight_sum = init_weight.sum()
-        if init_weight_sum > 0:
-            init_weight /= init_weight_sum
-
-        # covariance estimation
-        selector = (self.data_handler.get_range_selector(pred_date, 252), score_series.index)
-        price = self.data_handler.fetch(selector, level=None, squeeze=True)
-        F, cov_b, var_u = self.cov_estimator.predict(price, return_decomposed_components=True)
-
-        # optimize target portfolio
-        w_bench = self.benchmark(score_series.index, pred_date)
-        passed_init_weight = init_weight if init_weight_sum > 0 else None
-        # print(F)
-        # print(cov_b)
-        # print(var_u)
-        # print(passed_init_weight)
-        # print(w_bench)
-        target_weight = self.optimizer(score_series.values, F, cov_b, var_u, passed_init_weight, w_bench)
-        # print(target_weight)
-        target = pd.DataFrame(data=target_weight, index=score_series.index)
-
-        active_weights = target_weight - w_bench
-        selector = (self.label_handler.get_range_selector(pred_date, 1), score_series.index)
-        label = self.label_handler.fetch(selector, level=None, squeeze=True)
-        alpha = 0
-        for instrument, weight in zip(score_series.index, active_weights):
-            delta = label.loc[(pred_date, instrument)]
-            alpha += weight * (0 if math.isnan(delta) else delta)
-
-        print(alpha)
-
-        return alpha, target
-
-
-def train():
-    """train model
-
-    Returns
-    -------
-        pred_score: pandas.DataFrame
-            predict scores
-        performance: dict
-            model performance
-    """
-
-    # model initiation
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
-
-    # start exp
-    with R.start(experiment_name="workflow"):
-        R.log_params(**flatten_dict(task))
-        model.fit(dataset)
-
-        # prediction
-        recorder = R.get_recorder()
-        rid = recorder.id
-        sr = SignalRecord(model, dataset, recorder)
-        sr.generate()
-        pred_score = sr.load()
-
-        # calculate ic and ric
-        sar = SigAnaRecord(recorder)
-        sar.generate()
-        ic = sar.load(sar.get_path("ic.pkl"))
-        ric = sar.load(sar.get_path("ric.pkl"))
-
-    return pred_score, {"ic": ic, "ric": ric}, rid
-
-
-def backtest_analysis(scores):
-    """backtest enhanced indexing
-
-    Parameters
-    ----------
-        scores: pandas.DataFrame
-                predict scores
-
-    Returns
-    -------
-        sharpe_ratio: floating-point
-            sharpe ratio of the enhanced indexing portfolio
-    """
-
-    # backtest and analysis
-    with R.start(experiment_name="backtest_analysis"):
-        strategy = EnhancedIndexingStrategy()
-        dates = scores.index.get_level_values(0).unique()
-
-        alphas = []
-        current = pd.DataFrame()
-        gap_between_next_trade = 0
-        for date in tqdm(dates):
-            if gap_between_next_trade == 0:
-                score_series = scores.loc[date]
-                alpha, current = strategy.update(score_series, current, date)
-                alphas.append(alpha)
-                gap_between_next_trade = trade_gap
-            else:
-                gap_between_next_trade -= 1
-
-        alphas = np.array(alphas)
-        sharpe_ratio = alphas.mean() / np.std(alphas)
-        print('Sharpe:', sharpe_ratio)
-
-        return sharpe_ratio
-
-
-class TestAllFlow(TestAutoData):
-    PRED_SCORE = None
-    REPORT_NORMAL = None
-    POSITIONS = None
-    RID = None
-
-    @classmethod
-    def tearDownClass(cls) -> None:
-        shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve()))
-
-    def test_0_train(self):
-        TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train()
-        self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed")
-        self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed")
-
-    def test_1_backtest(self):
-        sharpe_ratio = backtest_analysis(TestAllFlow.PRED_SCORE)
-        self.assertGreaterEqual(
-            sharpe_ratio,
-            0.90,
-            "backtest failed",
-        )
-
-
-def suite():
-    _suite = unittest.TestSuite()
-    _suite.addTest(TestAllFlow("test_0_train"))
-    _suite.addTest(TestAllFlow("test_1_backtest"))
-    return _suite
-
-
-if __name__ == "__main__":
-    runner = unittest.TextTestRunner()
-    runner.run(suite())

From 0f3e3d206b51300b953e06674104af2ae23dc786 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Thu, 4 Mar 2021 22:47:42 +0800
Subject: [PATCH 21/32] Update __init__.py.

---
 qlib/model/riskmodel/__init__.py      |  7 +++++++
 qlib/model/riskmodel/base.py          |  4 ----
 qlib/portfolio/__init__.py            |  2 ++
 qlib/portfolio/optimizer/__init__.py  |  6 ++++++
 qlib/portfolio/optimizer/base.py      | 13 +++++++++++++
 qlib/portfolio/optimizer/optimizer.py | 13 ++-----------
 6 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/qlib/model/riskmodel/__init__.py b/qlib/model/riskmodel/__init__.py
index e69de29bb2d..05af6b7d377 100644
--- a/qlib/model/riskmodel/__init__.py
+++ b/qlib/model/riskmodel/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from .base import RiskModel
+from .poet import POETCovEstimator
+from .shrink import ShrinkCovEstimator
+from .structured import StructuredCovEstimator
diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py
index d5b009cccca..02ab8c2fb63 100644
--- a/qlib/model/riskmodel/base.py
+++ b/qlib/model/riskmodel/base.py
@@ -7,10 +7,6 @@
 
 from qlib.model.base import BaseModel
 
-from qlib.model.riskmodel_poet import POETCovEstimator
-from qlib.model.riskmodel_shrink import ShrinkCovEstimator
-from qlib.model.riskmodel_structured import StructuredCovEstimator
-
 
 class RiskModel(BaseModel):
     """Risk Model
diff --git a/qlib/portfolio/__init__.py b/qlib/portfolio/__init__.py
index e69de29bb2d..139597f9cb0 100644
--- a/qlib/portfolio/__init__.py
+++ b/qlib/portfolio/__init__.py
@@ -0,0 +1,2 @@
+
+
diff --git a/qlib/portfolio/optimizer/__init__.py b/qlib/portfolio/optimizer/__init__.py
index e69de29bb2d..5080b9a469a 100644
--- a/qlib/portfolio/optimizer/__init__.py
+++ b/qlib/portfolio/optimizer/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from .base import BaseOptimizer
+from .optimizer import PortfolioOptimizer
+from .enhanced_indexing import EnhancedIndexingOptimizer
diff --git a/qlib/portfolio/optimizer/base.py b/qlib/portfolio/optimizer/base.py
index e69de29bb2d..502443869d9 100644
--- a/qlib/portfolio/optimizer/base.py
+++ b/qlib/portfolio/optimizer/base.py
@@ -0,0 +1,13 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import abc
+
+
+class BaseOptimizer(abc.ABC):
+    """ Construct portfolio with a optimization related method """
+
+    @abc.abstractmethod
+    def __call__(self, *args, **kwargs) -> object:
+        """ Generate a optimized portfolio allocation """
+        pass
diff --git a/qlib/portfolio/optimizer/optimizer.py b/qlib/portfolio/optimizer/optimizer.py
index 17a7fc30a66..3daa98af329 100644
--- a/qlib/portfolio/optimizer/optimizer.py
+++ b/qlib/portfolio/optimizer/optimizer.py
@@ -1,23 +1,14 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import abc
+
 import warnings
 import numpy as np
 import pandas as pd
 import scipy.optimize as so
 from typing import Optional, Union, Callable, List
 
-from qlib.portfolio.enhanced_indexing import EnhancedIndexingOptimizer
-
-
-class BaseOptimizer(abc.ABC):
-    """ Construct portfolio with a optimization related method """
-
-    @abc.abstractmethod
-    def __call__(self, *args, **kwargs) -> object:
-        """ Generate a optimized portfolio allocation """
-        pass
+from qlib.portfolio.optimizer import BaseOptimizer
 
 
 class PortfolioOptimizer(BaseOptimizer):

From 79c1142d3e7f456ca66fe238973a49edefa1b86f Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 17:09:33 +0800
Subject: [PATCH 22/32] Pass nan_option to structured covariance estimator.

---
 qlib/model/riskmodel/structured.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py
index e778c2faa2a..69c032e8137 100644
--- a/qlib/model/riskmodel/structured.py
+++ b/qlib/model/riskmodel/structured.py
@@ -30,7 +30,7 @@ class StructuredCovEstimator(RiskModel):
 
     FACTOR_MODEL_PCA = "pca"
     FACTOR_MODEL_FA = "fa"
-    NAN_OPTION = "fill"
+    DEFAULT_NAN_OPTION = "fill"
 
     def __init__(
             self,
@@ -38,6 +38,7 @@ def __init__(
             num_factors: int = 10,
             assume_centered: bool = False,
             scale_return: bool = True,
+            nan_option: str = DEFAULT_NAN_OPTION
     ):
         """
         Args:
@@ -45,8 +46,11 @@ def __init__(
             num_factors (int): number of components to keep.
             assume_centered (bool): whether the data is assumed to be centered.
             scale_return (bool): whether scale returns as percentage.
+            nan_option (str): nan handling option (`fill`).
         """
-        super().__init__(self.NAN_OPTION, assume_centered, scale_return)
+        assert nan_option in [self.DEFAULT_NAN_OPTION], "nan_option={} is not supported".format(nan_option)
+
+        super().__init__(nan_option, assume_centered, scale_return)
 
         assert factor_model in [
             self.FACTOR_MODEL_PCA,

From 4d5a30b30b6766168dcc3b19e6a4420c31710da4 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 17:14:29 +0800
Subject: [PATCH 23/32] Resolve
 https://github.com/microsoft/qlib/pull/280\#discussion_r589167776

---
 qlib/model/riskmodel/structured.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py
index 69c032e8137..7b722e6009a 100644
--- a/qlib/model/riskmodel/structured.py
+++ b/qlib/model/riskmodel/structured.py
@@ -36,21 +36,21 @@ def __init__(
             self,
             factor_model: str = "pca",
             num_factors: int = 10,
-            assume_centered: bool = False,
-            scale_return: bool = True,
-            nan_option: str = DEFAULT_NAN_OPTION
+            **kwargs
     ):
         """
         Args:
             factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`).
             num_factors (int): number of components to keep.
-            assume_centered (bool): whether the data is assumed to be centered.
-            scale_return (bool): whether scale returns as percentage.
-            nan_option (str): nan handling option (`fill`).
+            kwargs: see `RiskModel` for more information
         """
-        assert nan_option in [self.DEFAULT_NAN_OPTION], "nan_option={} is not supported".format(nan_option)
+        if 'nan_option' in kwargs.keys():
+            assert kwargs['nan_option'] in [self.DEFAULT_NAN_OPTION], \
+                "nan_option={} is not supported".format(kwargs['nan_option'])
+        else:
+            kwargs['nan_option'] = self.DEFAULT_NAN_OPTION
 
-        super().__init__(nan_option, assume_centered, scale_return)
+        super().__init__(**kwargs)
 
         assert factor_model in [
             self.FACTOR_MODEL_PCA,

From 81b86f8022ef90f437e01d20e75a0f77e1c65786 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 17:18:07 +0800
Subject: [PATCH 24/32] Update test to cover changes in
 structured_cov_estimator

---
 tests/test_structured_cov_estimator.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py
index 6aeae3d8979..8ac1e8477cc 100644
--- a/tests/test_structured_cov_estimator.py
+++ b/tests/test_structured_cov_estimator.py
@@ -27,6 +27,24 @@ def test_random_covariance(self):
 
         self.assertTrue(if_identical)
 
+    def test_nan_option_covariance(self):
+        # Try to estimate the covariance from a randomly generated matrix.
+        NUM_VARIABLE = 10
+        NUM_OBSERVATION = 200
+        EPS = 1e-6
+
+        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill')
+
+        X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
+
+        est_cov = estimator.predict(X, is_price=False)
+        np_cov = np.cov(X.T)  # While numpy assume row means variable, qlib assume the other wise.
+
+        delta = abs(est_cov - np_cov)
+        if_identical = (delta < EPS).all()
+
+        self.assertTrue(if_identical)
+
     def test_constructed_covariance(self):
         # Try to estimate the covariance from a specially crafted matrix.
         # There should be some significant correlation since X is specially crafted.

From 351d598c9f45a59d96fb0be1d57bbbc662d756f6 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 17:49:59 +0800
Subject: [PATCH 25/32] Resolve
 https://github.com/microsoft/qlib/pull/280\#discussion_r589165409

---
 qlib/model/riskmodel/base.py           | 28 ++++++----
 qlib/model/riskmodel/structured.py     | 74 ++------------------------
 tests/test_structured_cov_estimator.py | 15 +++++-
 3 files changed, 34 insertions(+), 83 deletions(-)

diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py
index 02ab8c2fb63..89df80e8f07 100644
--- a/qlib/model/riskmodel/base.py
+++ b/qlib/model/riskmodel/base.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+import inspect
 import numpy as np
 import pandas as pd
 from typing import Union
@@ -37,18 +38,24 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc
         self.scale_return = scale_return
 
     def predict(
-        self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
-    ) -> Union[pd.DataFrame, np.ndarray]:
+            self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True,
+            return_decomposed_components=False,
+    ) -> Union[pd.DataFrame, np.ndarray, tuple]:
         """
         Args:
             X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
                 with variables as columns and observations as rows.
             return_corr (bool): whether return the correlation matrix.
             is_price (bool): whether `X` contains price (if not assume stock returns).
+            return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
 
         Returns:
             pd.DataFrame or np.ndarray: estimated covariance (or correlation).
         """
+        assert (
+                not return_corr or not return_decomposed_components
+        ), "Can only return either correlation matrix or decomposed components."
+
         # transform input into 2D array
         if not isinstance(X, (pd.Series, pd.DataFrame)):
             columns = None
@@ -75,6 +82,14 @@ def predict(
         # handle nan and centered
         X = self._preprocess(X)
 
+        # return decomposed components if needed
+        if return_decomposed_components:
+            assert 'return_decomposed_components' in inspect.getfullargspec(self._predict).args, \
+                'This risk model does not support return decomposed components of the covariance matrix '
+
+            F, cov_b, var_u = self._predict(X, return_decomposed_components=True)
+            return F, cov_b, var_u
+
         # estimate covariance
         S = self._predict(X)
 
@@ -126,12 +141,3 @@ def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]:
         if not self.assume_centered:
             X = X - np.nanmean(X, axis=0)
         return X
-
-
-
-
-
-
-
-
-
diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py
index 7b722e6009a..39ff0166efa 100644
--- a/qlib/model/riskmodel/structured.py
+++ b/qlib/model/riskmodel/structured.py
@@ -60,81 +60,13 @@ def __init__(
 
         self.num_factors = num_factors
 
-    def predict(
-            self,
-            X: Union[pd.Series, pd.DataFrame, np.ndarray],
-            return_corr: bool = False,
-            is_price: bool = True,
-            return_decomposed_components=False,
-    ) -> Union[pd.DataFrame, np.ndarray, tuple]:
-        """
-        Args:
-            X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
-                with variables as columns and observations as rows.
-            return_corr (bool): whether return the correlation matrix.
-            is_price (bool): whether `X` contains price (if not assume stock returns).
-            return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
-
-        Returns:
-            tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
-        """
-        assert (
-                not return_corr or not return_decomposed_components
-        ), "Can only return either correlation matrix or decomposed components."
-
-        # transform input into 2D array
-        if not isinstance(X, (pd.Series, pd.DataFrame)):
-            columns = None
-        else:
-            if isinstance(X.index, pd.MultiIndex):
-                if isinstance(X, pd.DataFrame):
-                    X = X.iloc[:, 0].unstack(level="instrument")  # always use the first column
-                else:
-                    X = X.unstack(level="instrument")
-            else:
-                # X is 2D DataFrame
-                pass
-            columns = X.columns  # will be used to restore dataframe
-            X = X.values
-
-        # calculate pct_change
-        if is_price:
-            X = X[1:] / X[:-1] - 1  # NOTE: resulting `n - 1` rows
-
-        # scale return
-        if self.scale_return:
-            X *= 100
-
-        # handle nan and centered
-        X = self._preprocess(X)
-
-        if return_decomposed_components:
-            F, cov_b, var_u = self._predict(X, return_structured=True)
-            return F, cov_b, var_u
-        else:
-            # estimate covariance
-            S = self._predict(X)
-
-            # return correlation if needed
-            if return_corr:
-                vola = np.sqrt(np.diag(S))
-                corr = S / np.outer(vola, vola)
-                if columns is None:
-                    return corr
-                return pd.DataFrame(corr, index=columns, columns=columns)
-
-            # return covariance
-            if columns is None:
-                return S
-            return pd.DataFrame(S, index=columns, columns=columns)
-
-    def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]:
+    def _predict(self, X: np.ndarray, return_decomposed_components=False) -> Union[np.ndarray, tuple]:
         """
         covariance estimation implementation
 
         Args:
             X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
-            return_structured (bool): whether return decomposed components of the covariance matrix.
+            return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
 
         Returns:
             tuple or np.ndarray: decomposed covariance matrix or covariance matrix.
@@ -148,7 +80,7 @@ def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray,
         cov_b = np.cov(B.T)  # num_factors x num_factors
         var_u = np.var(U, axis=0)  # diagonal
 
-        if return_structured:
+        if return_decomposed_components:
             return F, cov_b, var_u
 
         cov_x = F @ cov_b @ F.T + np.diag(var_u)
diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py
index 8ac1e8477cc..a3973be5ae9 100644
--- a/tests/test_structured_cov_estimator.py
+++ b/tests/test_structured_cov_estimator.py
@@ -28,7 +28,7 @@ def test_random_covariance(self):
         self.assertTrue(if_identical)
 
     def test_nan_option_covariance(self):
-        # Try to estimate the covariance from a randomly generated matrix.
+        # Test if nan_option is correctly passed.
         NUM_VARIABLE = 10
         NUM_OBSERVATION = 200
         EPS = 1e-6
@@ -45,6 +45,19 @@ def test_nan_option_covariance(self):
 
         self.assertTrue(if_identical)
 
+    def test_decompose_covariance(self):
+        # Test if return_decomposed_components is correctly passed.
+        NUM_VARIABLE = 10
+        NUM_OBSERVATION = 200
+
+        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill')
+
+        X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
+
+        F, cov_b, var_u = estimator.predict(X, is_price=False, return_decomposed_components=True)
+
+        self.assertTrue(F is not None and cov_b is not None and var_u is not None)
+
     def test_constructed_covariance(self):
         # Try to estimate the covariance from a specially crafted matrix.
         # There should be some significant correlation since X is specially crafted.

From c6675be7929afb6c0403aa6482f4bbea50bf053d Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 17:51:36 +0800
Subject: [PATCH 26/32] Resolve
 https://github.com/microsoft/qlib/pull/280\#discussion_r589166143

---
 qlib/portfolio/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/qlib/portfolio/__init__.py b/qlib/portfolio/__init__.py
index 139597f9cb0..b7c525821a8 100644
--- a/qlib/portfolio/__init__.py
+++ b/qlib/portfolio/__init__.py
@@ -1,2 +1,3 @@
-
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
 

From fc89fec46d4c23e650ac2f9fca12c926673ab882 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 18:56:54 +0800
Subject: [PATCH 27/32] Resolve
 https://github.com/microsoft/qlib/pull/280\#discussion_r589168764

---
 qlib/portfolio/optimizer/enhanced_indexing.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py
index d988c776bc6..a0d0bc05090 100644
--- a/qlib/portfolio/optimizer/enhanced_indexing.py
+++ b/qlib/portfolio/optimizer/enhanced_indexing.py
@@ -19,7 +19,6 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
 
     START_FROM_W0 = "w0"
     START_FROM_BENCH = "benchmark"
-    DO_NOT_START_FROM = "no_warm_start"
 
     def __init__(
             self,
@@ -29,7 +28,7 @@ def __init__(
             inds_dev: float = None,
             scale_alpha: bool = True,
             verbose: bool = False,
-            warm_start: str = DO_NOT_START_FROM,
+            warm_start: str = None,
             max_iters: int = 10000,
     ):
         """
@@ -58,7 +57,7 @@ def __init__(
         self.inds_dev = inds_dev
 
         assert warm_start in [
-            self.DO_NOT_START_FROM,
+            None,
             self.START_FROM_W0,
             self.START_FROM_BENCH,
         ], "illegal warm start option"

From 2f9af1af8ff44a4b60a0e9e2ca5412d1453c9755 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 19:02:40 +0800
Subject: [PATCH 28/32] Resolve
 https://github.com/microsoft/qlib/pull/280\#discussion_r589169769

---
 qlib/portfolio/optimizer/enhanced_indexing.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py
index a0d0bc05090..1f7de6cb4c1 100644
--- a/qlib/portfolio/optimizer/enhanced_indexing.py
+++ b/qlib/portfolio/optimizer/enhanced_indexing.py
@@ -70,7 +70,7 @@ def __init__(
 
     def __call__(
             self,
-            u: np.ndarray,
+            u: Union[np.ndarray, pd.Series],
             F: np.ndarray,
             covB: np.ndarray,
             varU: np.ndarray,
@@ -80,7 +80,7 @@ def __call__(
     ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
-            u (np.ndarray): expected returns (a.k.a., alpha)
+            u (np.ndarray or pd.Series): expected returns (a.k.a., alpha)
             F, covB, varU (np.ndarray): see StructuredCovEstimator
             w0 (np.ndarray): initial weights (for turnover control)
             w_bench (np.ndarray): benchmark weights
@@ -91,6 +91,10 @@ def __call__(
         """
         assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required."
 
+        # transform dataframe into array
+        if isinstance(u, pd.Series):
+            u = u.values
+
         # scale alpha to match volatility
         if self.scale_alpha:
             u = u / u.std()

From 7022675d003b4a603e9a04769e8a91e7232421ea Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 19:07:28 +0800
Subject: [PATCH 29/32] Resolve
 https://github.com/microsoft/qlib/pull/280\#discussion_r589169489

---
 qlib/portfolio/optimizer/enhanced_indexing.py | 40 +++++++++----------
 qlib/portfolio/optimizer/optimizer.py         | 25 ++++++------
 2 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py
index 1f7de6cb4c1..5fdc1014ddf 100644
--- a/qlib/portfolio/optimizer/enhanced_indexing.py
+++ b/qlib/portfolio/optimizer/enhanced_indexing.py
@@ -21,15 +21,15 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
     START_FROM_BENCH = "benchmark"
 
     def __init__(
-            self,
-            lamb: float = 10,
-            delta: float = 0.4,
-            bench_dev: float = 0.01,
-            inds_dev: float = None,
-            scale_alpha: bool = True,
-            verbose: bool = False,
-            warm_start: str = None,
-            max_iters: int = 10000,
+        self,
+        lamb: float = 10,
+        delta: float = 0.4,
+        bench_dev: float = 0.01,
+        inds_dev: float = None,
+        scale_alpha: bool = True,
+        verbose: bool = False,
+        warm_start: str = None,
+        max_iters: int = 10000,
     ):
         """
         Args:
@@ -56,11 +56,7 @@ def __init__(
         assert inds_dev is None or inds_dev >= 0, "industry deviation limit `inds_dev` should be positive or None."
         self.inds_dev = inds_dev
 
-        assert warm_start in [
-            None,
-            self.START_FROM_W0,
-            self.START_FROM_BENCH,
-        ], "illegal warm start option"
+        assert warm_start in [None, self.START_FROM_W0, self.START_FROM_BENCH,], "illegal warm start option"
         self.start_from_w0 = warm_start == self.START_FROM_W0
         self.start_from_bench = warm_start == self.START_FROM_BENCH
 
@@ -69,14 +65,14 @@ def __init__(
         self.max_iters = max_iters
 
     def __call__(
-            self,
-            u: Union[np.ndarray, pd.Series],
-            F: np.ndarray,
-            covB: np.ndarray,
-            varU: np.ndarray,
-            w0: np.ndarray,
-            w_bench: np.ndarray,
-            inds_onehot: np.ndarray = None,
+        self,
+        u: Union[np.ndarray, pd.Series],
+        F: np.ndarray,
+        covB: np.ndarray,
+        varU: np.ndarray,
+        w0: np.ndarray,
+        w_bench: np.ndarray,
+        inds_onehot: np.ndarray = None,
     ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
diff --git a/qlib/portfolio/optimizer/optimizer.py b/qlib/portfolio/optimizer/optimizer.py
index 3daa98af329..54648a46ac0 100644
--- a/qlib/portfolio/optimizer/optimizer.py
+++ b/qlib/portfolio/optimizer/optimizer.py
@@ -30,13 +30,13 @@ class PortfolioOptimizer(BaseOptimizer):
     OPT_INV = "inv"
 
     def __init__(
-            self,
-            method: str = "inv",
-            lamb: float = 0,
-            delta: float = 0,
-            alpha: float = 0.0,
-            scale_alpha: bool = True,
-            tol: float = 1e-8,
+        self,
+        method: str = "inv",
+        lamb: float = 0,
+        delta: float = 0,
+        alpha: float = 0.0,
+        scale_alpha: bool = True,
+        tol: float = 1e-8,
     ):
         """
         Args:
@@ -63,10 +63,10 @@ def __init__(
         self.scale_alpha = scale_alpha
 
     def __call__(
-            self,
-            S: Union[np.ndarray, pd.DataFrame],
-            u: Optional[Union[np.ndarray, pd.Series]] = None,
-            w0: Optional[Union[np.ndarray, pd.Series]] = None,
+        self,
+        S: Union[np.ndarray, pd.DataFrame],
+        u: Optional[Union[np.ndarray, pd.Series]] = None,
+        w0: Optional[Union[np.ndarray, pd.Series]] = None,
     ) -> Union[np.ndarray, pd.Series]:
         """
         Args:
@@ -155,7 +155,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd
         return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
 
     def _optimize_mvo(
-            self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
+        self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
     ) -> np.ndarray:
         """optimize mean-variance portfolio
 
@@ -251,6 +251,7 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda
         # add l2 regularization
         wrapped_obj = obj
         if self.alpha > 0:
+
             def opt_obj(x):
                 return obj(x) + self.alpha * np.sum(np.square(x))
 

From 6a305c73ae51254c9c8d7629b968720bc099ac6f Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 19:08:55 +0800
Subject: [PATCH 30/32] Resolve
 https://github.com/microsoft/qlib/pull/280\#discussion_r589166529

---
 qlib/model/riskmodel/base.py       | 14 +++++++++-----
 qlib/model/riskmodel/poet.py       |  2 +-
 qlib/model/riskmodel/shrink.py     |  3 +--
 qlib/model/riskmodel/structured.py | 16 ++++++----------
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py
index 89df80e8f07..bb067e3d586 100644
--- a/qlib/model/riskmodel/base.py
+++ b/qlib/model/riskmodel/base.py
@@ -38,8 +38,11 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc
         self.scale_return = scale_return
 
     def predict(
-            self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True,
-            return_decomposed_components=False,
+        self,
+        X: Union[pd.Series, pd.DataFrame, np.ndarray],
+        return_corr: bool = False,
+        is_price: bool = True,
+        return_decomposed_components=False,
     ) -> Union[pd.DataFrame, np.ndarray, tuple]:
         """
         Args:
@@ -53,7 +56,7 @@ def predict(
             pd.DataFrame or np.ndarray: estimated covariance (or correlation).
         """
         assert (
-                not return_corr or not return_decomposed_components
+            not return_corr or not return_decomposed_components
         ), "Can only return either correlation matrix or decomposed components."
 
         # transform input into 2D array
@@ -84,8 +87,9 @@ def predict(
 
         # return decomposed components if needed
         if return_decomposed_components:
-            assert 'return_decomposed_components' in inspect.getfullargspec(self._predict).args, \
-                'This risk model does not support return decomposed components of the covariance matrix '
+            assert (
+                "return_decomposed_components" in inspect.getfullargspec(self._predict).args
+            ), "This risk model does not support return decomposed components of the covariance matrix "
 
             F, cov_b, var_u = self._predict(X, return_decomposed_components=True)
             return F, cov_b, var_u
diff --git a/qlib/model/riskmodel/poet.py b/qlib/model/riskmodel/poet.py
index 8dbe890360e..84038455582 100644
--- a/qlib/model/riskmodel/poet.py
+++ b/qlib/model/riskmodel/poet.py
@@ -50,7 +50,7 @@ def _predict(self, X: np.ndarray) -> np.ndarray:
         if self.num_factors > 0:
             Dd, V = np.linalg.eig(Y.T.dot(Y))
             V = V[:, np.argsort(Dd)]
-            F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n)
+            F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n)
             LamPCA = Y.dot(F) / n
             uhat = np.asarray(Y - LamPCA.dot(F.T))
             Lowrank = np.asarray(LamPCA.dot(LamPCA.T))
diff --git a/qlib/model/riskmodel/shrink.py b/qlib/model/riskmodel/shrink.py
index 1298891fb01..3cb2620d1bc 100644
--- a/qlib/model/riskmodel/shrink.py
+++ b/qlib/model/riskmodel/shrink.py
@@ -248,8 +248,7 @@ def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np
         roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
         v3 = z.T.dot(z) / t - var_mkt * S
         roff3 = (
-                np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(
-            np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
+            np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
         )
         roff = 2 * roff1 - roff3
         rho = rdiag + roff
diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py
index 39ff0166efa..878503401fc 100644
--- a/qlib/model/riskmodel/structured.py
+++ b/qlib/model/riskmodel/structured.py
@@ -32,23 +32,19 @@ class StructuredCovEstimator(RiskModel):
     FACTOR_MODEL_FA = "fa"
     DEFAULT_NAN_OPTION = "fill"
 
-    def __init__(
-            self,
-            factor_model: str = "pca",
-            num_factors: int = 10,
-            **kwargs
-    ):
+    def __init__(self, factor_model: str = "pca", num_factors: int = 10, **kwargs):
         """
         Args:
             factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`).
             num_factors (int): number of components to keep.
             kwargs: see `RiskModel` for more information
         """
-        if 'nan_option' in kwargs.keys():
-            assert kwargs['nan_option'] in [self.DEFAULT_NAN_OPTION], \
-                "nan_option={} is not supported".format(kwargs['nan_option'])
+        if "nan_option" in kwargs.keys():
+            assert kwargs["nan_option"] in [self.DEFAULT_NAN_OPTION], "nan_option={} is not supported".format(
+                kwargs["nan_option"]
+            )
         else:
-            kwargs['nan_option'] = self.DEFAULT_NAN_OPTION
+            kwargs["nan_option"] = self.DEFAULT_NAN_OPTION
 
         super().__init__(**kwargs)
 

From 8b9065c16690057b3fdd3968262e875e3c520c87 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 19:32:13 +0800
Subject: [PATCH 31/32] Reformat with black.

---
 qlib/config.py                                | 24 ++-----
 qlib/contrib/backtest/__init__.py             | 18 +----
 qlib/contrib/backtest/profit_attribution.py   | 23 ++-----
 qlib/contrib/data/handler.py                  | 10 +--
 qlib/contrib/eva/alpha.py                     |  6 +-
 qlib/contrib/evaluate.py                      |  7 +-
 qlib/contrib/evaluate_portfolio.py            | 16 +----
 qlib/contrib/model/catboost_model.py          |  4 +-
 qlib/contrib/model/pytorch_alstm.py           | 21 ++----
 qlib/contrib/model/pytorch_alstm_ts.py        | 17 ++---
 qlib/contrib/model/pytorch_gats.py            | 22 ++----
 qlib/contrib/model/pytorch_gats_ts.py         | 18 +----
 qlib/contrib/model/pytorch_gru.py             | 21 ++----
 qlib/contrib/model/pytorch_gru_ts.py          | 17 +----
 qlib/contrib/model/pytorch_lstm.py            | 21 ++----
 qlib/contrib/model/pytorch_lstm_ts.py         | 17 +----
 qlib/contrib/model/pytorch_nn.py              |  6 +-
 qlib/contrib/model/pytorch_sfm.py             | 19 +----
 qlib/contrib/model/pytorch_tabnet.py          | 14 +---
 qlib/contrib/model/xgboost.py                 |  4 +-
 qlib/contrib/online/executor.py               | 24 ++-----
 qlib/contrib/online/manager.py                |  6 +-
 qlib/contrib/online/operator.py               |  8 +--
 qlib/contrib/online/utils.py                  |  6 +-
 .../analysis_model_performance.py             | 66 ++++--------------
 .../analysis_position/cumulative_return.py    | 36 ++--------
 .../analysis_position/parse_position.py       |  5 +-
 .../report/analysis_position/rank_label.py    | 16 +----
 .../report/analysis_position/report.py        | 15 +---
 qlib/contrib/report/graph.py                  |  6 +-
 qlib/contrib/strategy/cost_control.py         |  5 +-
 qlib/contrib/strategy/order_generator.py      | 12 +---
 qlib/contrib/tuner/launcher.py                |  6 +-
 qlib/contrib/tuner/space.py                   |  5 +-
 qlib/contrib/tuner/tuner.py                   | 26 ++-----
 qlib/data/client.py                           |  3 +-
 qlib/data/data.py                             | 69 +++----------------
 qlib/data/dataset/utils.py                    |  5 +-
 qlib/data/filter.py                           |  7 +-
 qlib/portfolio/__init__.py                    |  1 -
 qlib/tests/__init__.py                        |  6 +-
 qlib/workflow/record_temp.py                  |  5 +-
 tests/test_all_pipeline.py                    |  9 +--
 tests/test_dump_data.py                       |  9 +--
 tests/test_get_data.py                        |  4 +-
 tests/test_structured_cov_estimator.py        |  4 +-
 46 files changed, 123 insertions(+), 546 deletions(-)

diff --git a/qlib/config.py b/qlib/config.py
index 52b05568d57..344eb852777 100644
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -115,12 +115,7 @@ def set_conf_from_C(self, config_c):
                 "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s"
             }
         },
-        "filters": {
-            "field_not_found": {
-                "()": "qlib.log.LogFilter",
-                "param": [".*?WARN: data not found for.*?"],
-            }
-        },
+        "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}},
         "handlers": {
             "console": {
                 "class": "logging.StreamHandler",
@@ -135,10 +130,7 @@ def set_conf_from_C(self, config_c):
     "exp_manager": {
         "class": "MLflowExpManager",
         "module_path": "qlib.workflow.expm",
-        "kwargs": {
-            "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
-            "default_exp_name": "Experiment",
-        },
+        "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",},
     },
 }
 
@@ -200,16 +192,8 @@ def set_conf_from_C(self, config_c):
 }
 
 _default_region_config = {
-    REG_CN: {
-        "trade_unit": 100,
-        "limit_threshold": 0.099,
-        "deal_price": "vwap",
-    },
-    REG_US: {
-        "trade_unit": 1,
-        "limit_threshold": None,
-        "deal_price": "close",
-    },
+    REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",},
+    REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",},
 }
 
 
diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py
index aa24ffb0cf6..bd3494abf6a 100644
--- a/qlib/contrib/backtest/__init__.py
+++ b/qlib/contrib/backtest/__init__.py
@@ -18,13 +18,7 @@
 
 
 def get_strategy(
-    strategy=None,
-    topk=50,
-    margin=0.5,
-    n_drop=5,
-    risk_degree=0.95,
-    str_type="dropout",
-    adjust_dates=None,
+    strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None,
 ):
     """get_strategy
 
@@ -75,11 +69,7 @@ def get_strategy(
 
         str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
         strategy = str_cls(
-            topk=topk,
-            buffer_margin=margin,
-            n_drop=n_drop,
-            risk_degree=risk_degree,
-            adjust_dates=adjust_dates,
+            topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates,
         )
     elif isinstance(strategy, (dict, str)):
         # 2) create strategy with init_instance_by_config
@@ -172,9 +162,7 @@ def get_exchange(
 
 
 def get_executor(
-    executor=None,
-    trade_exchange=None,
-    verbose=True,
+    executor=None, trade_exchange=None, verbose=True,
 ):
     """get_executor
 
diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py
index 20c6f638fcd..355f0637395 100644
--- a/qlib/contrib/backtest/profit_attribution.py
+++ b/qlib/contrib/backtest/profit_attribution.py
@@ -12,10 +12,7 @@
 
 
 def get_benchmark_weight(
-    bench,
-    start_date=None,
-    end_date=None,
-    path=None,
+    bench, start_date=None, end_date=None, path=None,
 ):
     """get_benchmark_weight
 
@@ -216,12 +213,7 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g
 
 
 def brinson_pa(
-    positions,
-    bench="SH000905",
-    group_field="industry",
-    group_method="category",
-    group_n=None,
-    deal_price="vwap",
+    positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap",
 ):
     """brinson profit attribution
 
@@ -255,17 +247,10 @@ def brinson_pa(
     # suspend stock is NAN. So we have to get more date to forward fill the NAN
     shift_start_date = start_date - datetime.timedelta(days=250)
     instruments = D.list_instruments(
-        D.instruments(market="all"),
-        start_time=shift_start_date,
-        end_time=end_date,
-        as_list=True,
+        D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True,
     )
     stock_df = D.features(
-        instruments,
-        [group_field, deal_price],
-        start_time=shift_start_date,
-        end_time=end_date,
-        freq="day",
+        instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day",
     )
     stock_df.columns = [group_field, "deal_price"]
 
diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py
index 970b032d6b0..574287819b7 100644
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -21,10 +21,7 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time):
                     fit_start_time is not None and fit_end_time is not None
                 ), "Make sure `fit_start_time` and `fit_end_time` are not None."
                 pkwargs.update(
-                    {
-                        "fit_start_time": fit_start_time,
-                        "fit_end_time": fit_end_time,
-                    }
+                    {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,}
                 )
             new_l.append({"class": klass.__name__, "kwargs": pkwargs})
         else:
@@ -170,10 +167,7 @@ def __init__(
     def get_feature_config(self):
         conf = {
             "kbar": {},
-            "price": {
-                "windows": [0],
-                "feature": ["OPEN", "HIGH", "LOW", "VWAP"],
-            },
+            "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],},
             "rolling": {},
         }
         return self.parse_config_to_fields(conf)
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index c68571853f1..363a184582d 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -35,11 +35,7 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False
 
 
 def calc_long_short_return(
-    pred: pd.Series,
-    label: pd.Series,
-    date_col: str = "datetime",
-    quantile: float = 0.2,
-    dropna: bool = False,
+    pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False,
 ) -> Tuple[pd.Series, pd.Series]:
     """
     calculate long-short return
diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py
index 4aa5b55156f..5cb1ce4eb67 100644
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -244,12 +244,7 @@ def long_short_backtest(
         short_returns[date] = np.mean(short_profit) + np.mean(all_profit)
         ls_returns[date] = np.mean(short_profit) + np.mean(long_profit)
 
-    return dict(
-        zip(
-            ["long", "short", "long_short"],
-            map(pd.Series, [long_returns, short_returns, ls_returns]),
-        )
-    )
+    return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),))
 
 
 def t_run():
diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py
index 04ddd8db041..2d94105e482 100644
--- a/qlib/contrib/evaluate_portfolio.py
+++ b/qlib/contrib/evaluate_portfolio.py
@@ -64,12 +64,7 @@ def get_position_value(evaluate_date, position):
     instruments = list(set(instruments) - set(["cash"]))  # filter 'cash'
     fields = ["$close"]
     close_data_df = D.features(
-        instruments,
-        fields,
-        start_time=evaluate_date,
-        end_time=evaluate_date,
-        freq="day",
-        disk_cache=0,
+        instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0,
     )
     value = _get_position_value_from_df(evaluate_date, position, close_data_df)
     return value
@@ -87,14 +82,7 @@ def get_position_list_value(positions):
     start_date, end_date = day_list[0], day_list[-1]
     # load data
     fields = ["$close"]
-    close_data_df = D.features(
-        instruments,
-        fields,
-        start_time=start_date,
-        end_time=end_date,
-        freq="day",
-        disk_cache=0,
-    )
+    close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,)
     # generate value
     # return dict for time:position_value
     value_dict = OrderedDict()
diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py
index d57c32b7022..2840c2cef5a 100644
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -32,9 +32,7 @@ def fit(
         **kwargs
     ):
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py
index bbbb61851b1..306e68aadf2 100644
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -118,10 +118,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -328,14 +319,12 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in",
-            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out",
-            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py
index 725568de855..612bacbec93 100644
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -123,10 +123,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -198,11 +195,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -309,14 +302,12 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in",
-            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out",
-            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py
index 07048e1bc1a..c59dc91973f 100644
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -229,17 +229,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -340,19 +334,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py
index 1e94f56e418..dfc5f4ab5ed 100644
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -242,11 +242,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -361,19 +357,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat,
-                hidden_size=hidden_size,
-                num_layers=num_layers,
-                batch_first=True,
-                dropout=dropout,
+                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py
index 84f863b9fb0..d2a774b65b4 100755
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -118,10 +118,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.gru_model = GRUModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr)
@@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py
index bb6618b854c..49f438cc379 100755
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -123,10 +123,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.GRU_model = GRUModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr)
@@ -198,11 +195,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py
index 163d500ec87..02ca16e36b8 100755
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -118,10 +118,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.lstm_model = LSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr)
@@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py
index cf4f8fb9f1f..2ec36f96e34 100755
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -123,10 +123,7 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.LSTM_model = LSTMModel(
-            d_feat=self.d_feat,
-            hidden_size=self.hidden_size,
-            num_layers=self.num_layers,
-            dropout=self.dropout,
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr)
@@ -198,11 +195,7 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self,
-        dataset,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset, evals_result=dict(), verbose=True, save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout,
+            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py
index 16fcea9ff53..8c1a77ec3c5 100644
--- a/qlib/contrib/model/pytorch_nn.py
+++ b/qlib/contrib/model/pytorch_nn.py
@@ -154,11 +154,7 @@ def __init__(
         self.dnn_model.to(self.device)
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
         df_train, df_valid = dataset.prepare(
             ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py
index d5169e6c7bd..1f7433e053d 100644
--- a/qlib/contrib/model/pytorch_sfm.py
+++ b/qlib/contrib/model/pytorch_sfm.py
@@ -30,14 +30,7 @@
 
 class SFM_Model(nn.Module):
     def __init__(
-        self,
-        d_feat=6,
-        output_dim=1,
-        freq_dim=10,
-        hidden_size=64,
-        dropout_W=0.0,
-        dropout_U=0.0,
-        device="cpu",
+        self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu",
     ):
         super().__init__()
 
@@ -362,17 +355,11 @@ def train_epoch(self, x_train, y_train):
             self.train_optimizer.step()
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py
index 62e32d701ce..18e9d8eb404 100644
--- a/qlib/contrib/model/pytorch_tabnet.py
+++ b/qlib/contrib/model/pytorch_tabnet.py
@@ -120,9 +120,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
             os.makedirs("pretrain")
 
         [df_train, df_valid] = dataset.prepare(
-            ["pretrain", "pretrain_validation"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
 
         df_train.fillna(df_train.mean(), inplace=True)
@@ -156,11 +154,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
                     break
 
     def fit(
-        self,
-        dataset: DatasetH,
-        evals_result=dict(),
-        verbose=True,
-        save_path=None,
+        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
     ):
         if self.pretrain:
             # there is a  pretrained model, load the model
@@ -172,9 +166,7 @@ def fit(
         # adding one more linear layer to fit the final output dimension
         self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device)
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         df_train.fillna(df_train.mean(), inplace=True)
         x_train, y_train = df_train["feature"], df_train["label"]
diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py
index ba2e5789b85..e37725c2eb6 100755
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -29,9 +29,7 @@ def fit(
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"],
-            col_set=["feature", "label"],
-            data_key=DataHandlerLP.DK_L,
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py
index 2bd0937a032..52b86888133 100644
--- a/qlib/contrib/online/executor.py
+++ b/qlib/contrib/online/executor.py
@@ -150,21 +150,13 @@ def execute(self, trade_account, order_list, trade_date):
                     if order.direction == Order.SELL:  # sell
                         print(
                             "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date,
-                                order.stock_id,
-                                trade_price,
-                                order.deal_amount,
-                                trade_val,
+                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
                             )
                         )
                     else:
                         print(
                             "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date,
-                                order.stock_id,
-                                trade_price,
-                                order.deal_amount,
-                                trade_val,
+                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
                             )
                         )
 
@@ -271,21 +263,13 @@ def load_order_list(user_path, trade_date):
     for stock_id in order_dict["sell"]:
         amount, factor = order_dict["sell"][stock_id]
         order = Order(
-            stock_id=stock_id,
-            amount=amount,
-            trade_date=pd.Timestamp(trade_date),
-            direction=Order.SELL,
-            factor=factor,
+            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor,
         )
         order_list.append(order)
     for stock_id in order_dict["buy"]:
         amount, factor = order_dict["buy"][stock_id]
         order = Order(
-            stock_id=stock_id,
-            amount=amount,
-            trade_date=pd.Timestamp(trade_date),
-            direction=Order.BUY,
-            factor=factor,
+            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor,
         )
         order_list.append(order)
     return order_list
diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py
index cf850b9dace..a4476709de0 100644
--- a/qlib/contrib/online/manager.py
+++ b/qlib/contrib/online/manager.py
@@ -84,12 +84,10 @@ def save_user_data(self, user_id):
             raise ValueError("Cannot find user {}".format(user_id))
         self.users[user_id].account.save_account(self.data_path / user_id)
         save_instance(
-            self.users[user_id].strategy,
-            self.data_path / user_id / "strategy_{}.pickle".format(user_id),
+            self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id),
         )
         save_instance(
-            self.users[user_id].model,
-            self.data_path / user_id / "model_{}.pickle".format(user_id),
+            self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id),
         )
 
     def add_user(self, user_id, config_file, add_date):
diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py
index c8b44f57858..c82deb3945c 100644
--- a/qlib/contrib/online/operator.py
+++ b/qlib/contrib/online/operator.py
@@ -125,9 +125,7 @@ def generate(self, date, path):
                 trade_date=trade_date,
             )
             save_order_list(
-                order_list=order_list,
-                user_path=(pathlib.Path(path) / user_id),
-                trade_date=trade_date,
+                order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
             )
             self.logger.info("Generate order list at {} for {}".format(trade_date, user_id))
             um.save_user_data(user_id)
@@ -160,9 +158,7 @@ def execute(self, date, exchange_config, path):
             order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date)
             trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date)
             executor.save_executed_file_from_trade_info(
-                trade_info=trade_info,
-                user_path=(pathlib.Path(path) / user_id),
-                trade_date=trade_date,
+                trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
             )
             self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id))
 
diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py
index 611af63e4af..fb96c87bd31 100644
--- a/qlib/contrib/online/utils.py
+++ b/qlib/contrib/online/utils.py
@@ -79,11 +79,7 @@ def prepare(um, today, user_id, exchange_config=None):
         log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today))
         return [pd.Timestamp(latest_trading_date)], None
 
-    dates = D.calendar(
-        start_time=pd.Timestamp(latest_trading_date),
-        end_time=pd.Timestamp(today),
-        future=True,
-    )
+    dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,)
     dates = list(dates)
     dates.append(get_next_trading_date(dates[-1], future=True))
     if exchange_config:
diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py
index 1cb14d26153..ef1447a12be 100644
--- a/qlib/contrib/report/analysis_model/analysis_model_performance.py
+++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py
@@ -53,8 +53,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     t_df.index = t_df.index.strftime("%Y-%m-%d")
     # Cumulative Return By Group
     group_scatter_figure = ScatterGraph(
-        t_df.cumsum(),
-        layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
+        t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
     ).figure
 
     t_df = t_df.loc[:, ["long-short", "long-average"]]
@@ -62,12 +61,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     group_hist_figure = SubplotsGraph(
         t_df,
         kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)),
-        subplots_kwargs=dict(
-            rows=1,
-            cols=2,
-            print_grid=False,
-            subplot_titles=["long-short", "long-average"],
-        ),
+        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],),
     ).figure
 
     return group_scatter_figure, group_hist_figure
@@ -102,15 +96,12 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
     _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
     _monthly_ic = ic.groupby(_index).mean()
     _monthly_ic.index = pd.MultiIndex.from_arrays(
-        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)],
-        names=["year", "month"],
+        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"],
     )
 
     # fill month
     _month_list = pd.date_range(
-        start=pd.Timestamp(f"{_index.min()[:4]}0101"),
-        end=pd.Timestamp(f"{_index.max()[:4]}1231"),
-        freq="1M",
+        start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M",
     )
     _years = []
     _month = []
@@ -142,32 +133,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
 
     _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min()
     _sub_graph_data = [
-        (
-            "ic",
-            dict(
-                row=1,
-                col=1,
-                name="",
-                kind="DistplotGraph",
-                graph_kwargs=dict(bin_size=_bin_size),
-            ),
-        ),
+        ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),),
         (_qqplot_fig, dict(row=1, col=2)),
     ]
     ic_hist_figure = SubplotsGraph(
         _ic_df.dropna(),
         kind_map=dict(kind="HistogramGraph", kwargs=dict()),
-        subplots_kwargs=dict(
-            rows=1,
-            cols=2,
-            print_grid=False,
-            subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],
-        ),
+        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],),
         sub_graph_data=_sub_graph_data,
-        layout=dict(
-            yaxis2=dict(title="Observed Quantile"),
-            xaxis2=dict(title=f"{dist_name} Distribution Quantile"),
-        ),
+        layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),),
     ).figure
 
     return ic_bar_figure, ic_heatmap_figure, ic_hist_figure
@@ -181,8 +155,7 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
     _df = ac.to_frame("value")
     _df.index = _df.index.strftime("%Y-%m-%d")
     ac_figure = ScatterGraph(
-        _df,
-        layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
+        _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (ac_figure,)
 
@@ -202,17 +175,11 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
         .sum()
         / (len(x) // N)
     )
-    r_df = pd.DataFrame(
-        {
-            "Top": top,
-            "Bottom": bottom,
-        }
-    )
+    r_df = pd.DataFrame({"Top": top, "Bottom": bottom,})
     # FIXME: support HIGH-FREQ
     r_df.index = r_df.index.strftime("%Y-%m-%d")
     turnover_figure = ScatterGraph(
-        r_df,
-        layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
+        r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (turnover_figure,)
 
@@ -230,11 +197,7 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure:
     # FIXME: support HIGH-FREQ
     ic_df.index = ic_df.index.strftime("%Y-%m-%d")
     ic_bar_figure = BarGraph(
-        ic_df,
-        layout=dict(
-            title="Information Coefficient (IC)",
-            xaxis=dict(type="category", tickangle=45),
-        ),
+        ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),),
     ).figure
     return ic_bar_figure
 
@@ -277,12 +240,7 @@ def model_performance_graph(
     figure_list = []
     for graph_name in graph_names:
         fun_res = eval(f"_{graph_name}")(
-            pred_label=pred_label,
-            lag=lag,
-            N=N,
-            reverse=reverse,
-            rank=rank,
-            show_nature_day=show_nature_day,
+            pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day,
         )
         figure_list += fun_res
 
diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py
index abb68ea6051..604189c94b6 100644
--- a/qlib/contrib/report/analysis_position/cumulative_return.py
+++ b/qlib/contrib/report/analysis_position/cumulative_return.py
@@ -13,11 +13,7 @@
 
 
 def _get_cum_return_data_with_position(
-    position: dict,
-    report_normal: pd.DataFrame,
-    label_data: pd.DataFrame,
-    start_date=None,
-    end_date=None,
+    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
 ):
     """
 
@@ -29,11 +25,7 @@ def _get_cum_return_data_with_position(
     :return:
     """
     _cumulative_return_df = get_position_data(
-        position=position,
-        report_normal=report_normal,
-        label_data=label_data,
-        start_date=start_date,
-        end_date=end_date,
+        position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date,
     ).copy()
 
     _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"]
@@ -87,11 +79,7 @@ def _get_cum_return_data_with_position(
 
 
 def _get_figure_with_position(
-    position: dict,
-    report_normal: pd.DataFrame,
-    label_data: pd.DataFrame,
-    start_date=None,
-    end_date=None,
+    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
 ) -> Iterable[go.Figure]:
     """Get average analysis figures
 
@@ -111,18 +99,12 @@ def _get_figure_with_position(
     # Create figures
     for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]:
         sub_graph_data = [
-            (
-                "cum_{}".format(_t_name),
-                dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),
-            ),
+            ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),),
             (
                 "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name),
                 dict(row=2, col=1),
             ),
-            (
-                "{}_value".format(_t_name),
-                dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),
-            ),
+            ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),),
         ]
 
         _default_xaxis = dict(showline=False, zeroline=True, tickangle=45)
@@ -161,13 +143,7 @@ def _get_figure_with_position(
             [{"rowspan": 1}, None],
         ]
         subplots_kwargs = dict(
-            vertical_spacing=0.01,
-            rows=2,
-            cols=2,
-            row_width=[1, 2],
-            column_width=[3, 1],
-            print_grid=False,
-            specs=specs,
+            vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs,
         )
         yield SubplotsGraph(
             cum_return_df,
diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py
index fe1d6113709..23f9c592c0a 100644
--- a/qlib/contrib/report/analysis_position/parse_position.py
+++ b/qlib/contrib/report/analysis_position/parse_position.py
@@ -72,10 +72,7 @@ def parse_position(position: dict = None) -> pd.DataFrame:
 
         result_df = result_df.append(_trading_day_df, sort=True)
 
-        previous_data = dict(
-            date=_trading_date,
-            code_list=_trading_day_df[_trading_day_df["status"] != -1].index,
-        )
+        previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,)
 
     result_df.reset_index(inplace=True)
     result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True)
diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py
index 72a358adcbf..9a4d834ed92 100644
--- a/qlib/contrib/report/analysis_position/rank_label.py
+++ b/qlib/contrib/report/analysis_position/rank_label.py
@@ -23,11 +23,7 @@ def _get_figure_with_position(
     :return:
     """
     _position_df = get_position_data(
-        position,
-        label_data,
-        calculate_label_rank=True,
-        start_date=start_date,
-        end_date=end_date,
+        position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date,
     )
 
     res_dict = dict()
@@ -51,20 +47,14 @@ def _get_figure_with_position(
         yield ScatterGraph(
             _res_df.loc[:, [_col]],
             layout=dict(
-                title=_col,
-                xaxis=dict(type="category", tickangle=45),
-                yaxis=dict(title="lable-rank-ratio: %"),
+                title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"),
             ),
             graph_kwargs=dict(mode="lines+markers"),
         ).figure
 
 
 def rank_label_graph(
-    position: dict,
-    label_data: pd.DataFrame,
-    start_date=None,
-    end_date=None,
-    show_notebook=True,
+    position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True,
 ) -> Iterable[go.Figure]:
     """Ranking percentage of stocks buy, sell, and holding on the trading day.
     Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading
diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py
index f82e654c432..8e2c05c0a38 100644
--- a/qlib/contrib/report/analysis_position/report.py
+++ b/qlib/contrib/report/analysis_position/report.py
@@ -123,9 +123,7 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 1,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {
-                    "width": 0,
-                },
+                "line": {"width": 0,},
             },
             {
                 "type": "rect",
@@ -137,20 +135,13 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 0.55,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {
-                    "width": 0,
-                },
+                "line": {"width": 0,},
             },
         ],
     )
 
     _subplot_kwargs = dict(
-        shared_xaxes=True,
-        vertical_spacing=0.01,
-        rows=7,
-        cols=1,
-        row_width=[1, 1, 1, 3, 1, 1, 3],
-        print_grid=False,
+        shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False,
     )
     figure = SubplotsGraph(
         df=report_df,
diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py
index 70e382fb165..dbbc411109d 100644
--- a/qlib/contrib/report/graph.py
+++ b/qlib/contrib/report/graph.py
@@ -311,11 +311,7 @@ def _init_sub_graph_data(self):
             _temp_row_data = (
                 column_name,
                 dict(
-                    row=row,
-                    col=col,
-                    name=res_name,
-                    kind=self._kind_map["kind"],
-                    graph_kwargs=self._kind_map["kwargs"],
+                    row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"],
                 ),
             )
             self._sub_graph_data.append(_temp_row_data)
diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py
index dd90437b03f..ee3ee03ecfd 100644
--- a/qlib/contrib/strategy/cost_control.py
+++ b/qlib/contrib/strategy/cost_control.py
@@ -57,10 +57,7 @@ def generate_target_weight_position(self, score, current, trade_date):
                     final_stock_weight[stock_id] -= sw
             if self.buy_method == "first_fill":
                 for stock_id in buy_signal_stocks:
-                    add_weight = min(
-                        max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0),
-                        sold_stock_weight,
-                    )
+                    add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,)
                     final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight
                     sold_stock_weight -= add_weight
             elif self.buy_method == "average_fill":
diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py
index 494981ecc09..6f168b4dd52 100644
--- a/qlib/contrib/strategy/order_generator.py
+++ b/qlib/contrib/strategy/order_generator.py
@@ -102,14 +102,10 @@ def generate_order_list_from_target_weight_position(
             # strategy 1 : generate amount_position by weight_position
             # Use API in Exchange()
             target_amount_dict = trade_exchange.generate_amount_position_from_weight_position(
-                weight_position=target_weight_position,
-                cash=current_tradable_value,
-                trade_date=trade_date,
+                weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date,
             )
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=target_amount_dict,
-            current_position=current_amount_dict,
-            trade_date=trade_date,
+            target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date,
         )
         return order_list
 
@@ -164,8 +160,6 @@ def generate_order_list_from_target_weight_position(
             else:
                 continue
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=amount_dict,
-            current_position=current.get_stock_amount_dict(),
-            trade_date=trade_date,
+            target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date,
         )
         return order_list
diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py
index 711658c9a63..409410a2ab4 100644
--- a/qlib/contrib/tuner/launcher.py
+++ b/qlib/contrib/tuner/launcher.py
@@ -13,11 +13,7 @@
 
 args_parser = argparse.ArgumentParser(prog="tuner")
 args_parser.add_argument(
-    "-c",
-    "--config_path",
-    required=True,
-    type=str,
-    help="config path indicates where to load yaml config.",
+    "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.",
 )
 
 args = args_parser.parse_args()
diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py
index 76f101671b7..57f57a6c34e 100644
--- a/qlib/contrib/tuner/space.py
+++ b/qlib/contrib/tuner/space.py
@@ -10,8 +10,5 @@
 }
 
 QLibDataLabelSpace = {
-    "labels": hp.choice(
-        "labels",
-        [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],
-    )
+    "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],)
 }
diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py
index 2ce957859b2..e81d41a9ad0 100644
--- a/qlib/contrib/tuner/tuner.py
+++ b/qlib/contrib/tuner/tuner.py
@@ -28,10 +28,7 @@ def __init__(self, tuner_config, optim_config):
         self.optim_config = optim_config
 
         self.max_evals = self.tuner_config.get("max_evals", 10)
-        self.ex_dir = os.path.join(
-            self.tuner_config["experiment"]["dir"],
-            self.tuner_config["experiment"]["name"],
-        )
+        self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],)
 
         self.best_params = None
         self.best_res = None
@@ -42,10 +39,7 @@ def tune(self):
 
         TimeInspector.set_time_mark()
         fmin(
-            fn=self.objective,
-            space=self.space,
-            algo=tpe.suggest,
-            max_evals=self.max_evals,
+            fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals,
         )
         self.logger.info("Local best params: {} ".format(self.best_params))
         TimeInspector.log_cost_time(
@@ -159,8 +153,7 @@ def setup_estimator_config(self, params):
             estimator_config["data"]["args"].update(params["data_label_space"])
 
         estimator_path = os.path.join(
-            self.tuner_config["experiment"].get("dir", "../"),
-            QLibTuner.ESTIMATOR_CONFIG_NAME,
+            self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME,
         )
 
         with open(estimator_path, "w") as fp:
@@ -173,27 +166,20 @@ def setup_space(self):
         model_space_name = self.tuner_config["model"].get("space", None)
         if model_space_name is None:
             raise ValueError("Please give the search space of model.")
-        model_space = getattr(
-            importlib.import_module(".space", package="qlib.contrib.tuner"),
-            model_space_name,
-        )
+        model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,)
 
         # 2. Setup strategy space
         strategy_space_name = self.tuner_config["strategy"].get("space", None)
         if strategy_space_name is None:
             raise ValueError("Please give the search space of strategy.")
-        strategy_space = getattr(
-            importlib.import_module(".space", package="qlib.contrib.tuner"),
-            strategy_space_name,
-        )
+        strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,)
 
         # 3. Setup data label space if given
         if self.tuner_config.get("data_label", None) is not None:
             data_label_space_name = self.tuner_config["data_label"].get("space", None)
             if data_label_space_name is not None:
                 data_label_space = getattr(
-                    importlib.import_module(".space", package="qlib.contrib.tuner"),
-                    data_label_space_name,
+                    importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name,
                 )
         else:
             data_label_space_name = None
diff --git a/qlib/data/client.py b/qlib/data/client.py
index 5244a7e45cf..d1a68cb3857 100644
--- a/qlib/data/client.py
+++ b/qlib/data/client.py
@@ -26,8 +26,7 @@ def __init__(self, host, port):
         self.logger = get_module_logger(self.__class__.__name__)
         # bind connect/disconnect callbacks
         self.sio.on(
-            "connect",
-            lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
+            "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
         )
         self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!"))
 
diff --git a/qlib/data/data.py b/qlib/data/data.py
index 762467da35e..47cded79cec 100644
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -328,14 +328,7 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day
         raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method")
 
     def _uri(
-        self,
-        instruments,
-        fields,
-        start_time=None,
-        end_time=None,
-        freq="day",
-        disk_cache=1,
-        **kwargs,
+        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs,
     ):
         """Get task uri, used when generating rabbitmq task in qlib_server
 
@@ -414,29 +407,13 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq):
             for inst, spans in instruments_d.items():
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(
-                        inst,
-                        start_time,
-                        end_time,
-                        freq,
-                        normalize_column_names,
-                        spans,
-                        C,
-                    ),
+                    args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,),
                 )
         else:
             for inst in instruments_d:
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(
-                        inst,
-                        start_time,
-                        end_time,
-                        freq,
-                        normalize_column_names,
-                        None,
-                        C,
-                    ),
+                    args=(inst, start_time, end_time, freq, normalize_column_names, None, C,),
                 )
 
         p.close()
@@ -598,12 +575,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         start_time = pd.Timestamp(start_time or cal[0])
         end_time = pd.Timestamp(end_time or cal[-1])
         _instruments_filtered = {
-            inst: list(
-                filter(
-                    lambda x: x[0] <= x[1],
-                    [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],
-                )
-            )
+            inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],))
             for inst, spans in _instruments.items()
         }
         _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value}
@@ -723,14 +695,7 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq
 
         for inst in instruments_d:
             p.apply_async(
-                LocalDatasetProvider.cache_walker,
-                args=(
-                    inst,
-                    start_time,
-                    end_time,
-                    freq,
-                    column_names,
-                ),
+                LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,),
             )
 
         p.close()
@@ -763,12 +728,7 @@ def set_conn(self, conn):
     def calendar(self, start_time=None, end_time=None, freq="day", future=False):
         self.conn.send_request(
             request_type="calendar",
-            request_content={
-                "start_time": str(start_time),
-                "end_time": str(end_time),
-                "freq": freq,
-                "future": future,
-            },
+            request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,},
             msg_queue=self.queue,
             msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content],
         )
@@ -832,14 +792,7 @@ def set_conn(self, conn):
         self.queue = queue.Queue()
 
     def dataset(
-        self,
-        instruments,
-        fields,
-        start_time=None,
-        end_time=None,
-        freq="day",
-        disk_cache=0,
-        return_uri=False,
+        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False,
     ):
         if Inst.get_inst_type(instruments) == Inst.DICT:
             get_module_logger("data").warning(
@@ -942,13 +895,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         return Inst.list_instruments(instruments, start_time, end_time, freq, as_list)
 
     def features(
-        self,
-        instruments,
-        fields,
-        start_time=None,
-        end_time=None,
-        freq="day",
-        disk_cache=None,
+        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None,
     ):
         """
         Parameters:
diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py
index feda1904463..58e2bd96811 100644
--- a/qlib/data/dataset/utils.py
+++ b/qlib/data/dataset/utils.py
@@ -32,10 +32,7 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int:
 
 
 def fetch_df_by_index(
-    df: pd.DataFrame,
-    selector: Union[pd.Timestamp, slice, str, list],
-    level: Union[str, int],
-    fetch_orig=True,
+    df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True,
 ) -> pd.DataFrame:
     """
     fetch data from `data` with `selector` and `level`
diff --git a/qlib/data/filter.py b/qlib/data/filter.py
index 70f9d32780d..811fd387f14 100644
--- a/qlib/data/filter.py
+++ b/qlib/data/filter.py
@@ -341,12 +341,7 @@ def _getFilterSeries(self, instruments, fstart, fend):
         # do not use dataset cache
         try:
             _features = DatasetD.dataset(
-                instruments,
-                [self.rule_expression],
-                fstart,
-                fend,
-                freq=self.filter_freq,
-                disk_cache=0,
+                instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0,
             )
         except TypeError:
             # use LocalDatasetProvider
diff --git a/qlib/portfolio/__init__.py b/qlib/portfolio/__init__.py
index b7c525821a8..59e481eb93d 100644
--- a/qlib/portfolio/__init__.py
+++ b/qlib/portfolio/__init__.py
@@ -1,3 +1,2 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-
diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py
index f92e7278758..eb6f9c5edb5 100644
--- a/qlib/tests/__init__.py
+++ b/qlib/tests/__init__.py
@@ -18,10 +18,6 @@ def setUpClass(cls) -> None:
             print(f"Qlib data is not found in {provider_uri}")
 
             GetData().qlib_data(
-                name="qlib_data_simple",
-                region="cn",
-                interval="1d",
-                target_dir=provider_uri,
-                delete_old=False,
+                name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False,
             )
         init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs)
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index be458a24d29..0c704b89669 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -193,10 +193,7 @@ def generate(self):
                 }
             )
             objects.update(
-                {
-                    "long_short_r.pkl": long_short_r,
-                    "long_avg_r.pkl": long_avg_r,
-                }
+                {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,}
             )
         self.recorder.log_metrics(**metrics)
         self.recorder.save_objects(**objects, artifact_path=self.get_path())
diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py
index f6e77cba4d8..8b3819c8302 100644
--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -78,10 +78,7 @@
     "strategy": {
         "class": "TopkDropoutStrategy",
         "module_path": "qlib.contrib.strategy.strategy",
-        "kwargs": {
-            "topk": 50,
-            "n_drop": 5,
-        },
+        "kwargs": {"topk": 50, "n_drop": 5,},
     },
     "backtest": {
         "verbose": False,
@@ -176,9 +173,7 @@ def test_0_train(self):
     def test_1_backtest(self):
         analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
         self.assertGreaterEqual(
-            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
-            0.10,
-            "backtest failed",
+            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed",
         )
 
 
diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py
index dfa7f8556dd..de649c37edf 100644
--- a/tests/test_dump_data.py
+++ b/tests/test_dump_data.py
@@ -40,9 +40,7 @@ def setUpClass(cls) -> None:
         TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri,
-            expression_cache=None,
-            dataset_cache=None,
+            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
         )
 
     @classmethod
@@ -54,10 +52,7 @@ def test_0_dump_bin(self):
 
     def test_1_dump_calendars(self):
         ori_calendars = set(
-            map(
-                pd.Timestamp,
-                pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,
-            )
+            map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,)
         )
         res_calendars = set(D.calendar())
         assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed"
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
index c511d1b910d..d5637b02595 100644
--- a/tests/test_get_data.py
+++ b/tests/test_get_data.py
@@ -26,9 +26,7 @@ class TestGetData(unittest.TestCase):
     def setUpClass(cls) -> None:
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri,
-            expression_cache=None,
-            dataset_cache=None,
+            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
         )
 
     @classmethod
diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py
index a3973be5ae9..494962cc334 100644
--- a/tests/test_structured_cov_estimator.py
+++ b/tests/test_structured_cov_estimator.py
@@ -33,7 +33,7 @@ def test_nan_option_covariance(self):
         NUM_OBSERVATION = 200
         EPS = 1e-6
 
-        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill')
+        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option="fill")
 
         X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
 
@@ -50,7 +50,7 @@ def test_decompose_covariance(self):
         NUM_VARIABLE = 10
         NUM_OBSERVATION = 200
 
-        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill')
+        estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option="fill")
 
         X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
 

From 53cf89d7c22f42234a452507cca67a98662e4ad9 Mon Sep 17 00:00:00 2001
From: Charles Young <cy@unitive.tech>
Date: Mon, 8 Mar 2021 19:43:03 +0800
Subject: [PATCH 32/32] Reformat with black.

---
 qlib/config.py                                | 24 +++++--
 qlib/contrib/backtest/__init__.py             | 18 ++++-
 qlib/contrib/backtest/profit_attribution.py   | 23 +++++--
 qlib/contrib/data/handler.py                  | 10 ++-
 qlib/contrib/eva/alpha.py                     |  6 +-
 qlib/contrib/evaluate.py                      |  7 +-
 qlib/contrib/evaluate_portfolio.py            | 16 ++++-
 qlib/contrib/model/catboost_model.py          |  4 +-
 qlib/contrib/model/pytorch_alstm.py           | 21 ++++--
 qlib/contrib/model/pytorch_alstm_ts.py        | 17 +++--
 qlib/contrib/model/pytorch_gats.py            | 22 ++++--
 qlib/contrib/model/pytorch_gats_ts.py         | 18 ++++-
 qlib/contrib/model/pytorch_gru.py             | 21 ++++--
 qlib/contrib/model/pytorch_gru_ts.py          | 17 ++++-
 qlib/contrib/model/pytorch_lstm.py            | 21 ++++--
 qlib/contrib/model/pytorch_lstm_ts.py         | 17 ++++-
 qlib/contrib/model/pytorch_nn.py              |  6 +-
 qlib/contrib/model/pytorch_sfm.py             | 19 ++++-
 qlib/contrib/model/pytorch_tabnet.py          | 14 +++-
 qlib/contrib/model/xgboost.py                 |  4 +-
 qlib/contrib/online/executor.py               | 24 +++++--
 qlib/contrib/online/manager.py                |  6 +-
 qlib/contrib/online/operator.py               |  8 ++-
 qlib/contrib/online/utils.py                  |  6 +-
 .../analysis_model_performance.py             | 66 ++++++++++++++----
 .../analysis_position/cumulative_return.py    | 36 ++++++++--
 .../analysis_position/parse_position.py       |  5 +-
 .../report/analysis_position/rank_label.py    | 16 ++++-
 .../report/analysis_position/report.py        | 15 +++-
 qlib/contrib/report/graph.py                  |  6 +-
 qlib/contrib/strategy/cost_control.py         |  5 +-
 qlib/contrib/strategy/order_generator.py      | 12 +++-
 qlib/contrib/tuner/launcher.py                |  6 +-
 qlib/contrib/tuner/space.py                   |  5 +-
 qlib/contrib/tuner/tuner.py                   | 26 +++++--
 qlib/data/client.py                           |  3 +-
 qlib/data/data.py                             | 69 ++++++++++++++++---
 qlib/data/dataset/utils.py                    |  5 +-
 qlib/data/filter.py                           |  7 +-
 qlib/portfolio/optimizer/enhanced_indexing.py |  6 +-
 qlib/tests/__init__.py                        |  6 +-
 qlib/workflow/record_temp.py                  |  5 +-
 tests/test_all_pipeline.py                    |  9 ++-
 tests/test_dump_data.py                       |  9 ++-
 tests/test_get_data.py                        |  4 +-
 45 files changed, 548 insertions(+), 122 deletions(-)

diff --git a/qlib/config.py b/qlib/config.py
index 344eb852777..52b05568d57 100644
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -115,7 +115,12 @@ def set_conf_from_C(self, config_c):
                 "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s"
             }
         },
-        "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}},
+        "filters": {
+            "field_not_found": {
+                "()": "qlib.log.LogFilter",
+                "param": [".*?WARN: data not found for.*?"],
+            }
+        },
         "handlers": {
             "console": {
                 "class": "logging.StreamHandler",
@@ -130,7 +135,10 @@ def set_conf_from_C(self, config_c):
     "exp_manager": {
         "class": "MLflowExpManager",
         "module_path": "qlib.workflow.expm",
-        "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",},
+        "kwargs": {
+            "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
+            "default_exp_name": "Experiment",
+        },
     },
 }
 
@@ -192,8 +200,16 @@ def set_conf_from_C(self, config_c):
 }
 
 _default_region_config = {
-    REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",},
-    REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",},
+    REG_CN: {
+        "trade_unit": 100,
+        "limit_threshold": 0.099,
+        "deal_price": "vwap",
+    },
+    REG_US: {
+        "trade_unit": 1,
+        "limit_threshold": None,
+        "deal_price": "close",
+    },
 }
 
 
diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py
index bd3494abf6a..aa24ffb0cf6 100644
--- a/qlib/contrib/backtest/__init__.py
+++ b/qlib/contrib/backtest/__init__.py
@@ -18,7 +18,13 @@
 
 
 def get_strategy(
-    strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None,
+    strategy=None,
+    topk=50,
+    margin=0.5,
+    n_drop=5,
+    risk_degree=0.95,
+    str_type="dropout",
+    adjust_dates=None,
 ):
     """get_strategy
 
@@ -69,7 +75,11 @@ def get_strategy(
 
         str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
         strategy = str_cls(
-            topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates,
+            topk=topk,
+            buffer_margin=margin,
+            n_drop=n_drop,
+            risk_degree=risk_degree,
+            adjust_dates=adjust_dates,
         )
     elif isinstance(strategy, (dict, str)):
         # 2) create strategy with init_instance_by_config
@@ -162,7 +172,9 @@ def get_exchange(
 
 
 def get_executor(
-    executor=None, trade_exchange=None, verbose=True,
+    executor=None,
+    trade_exchange=None,
+    verbose=True,
 ):
     """get_executor
 
diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py
index 355f0637395..20c6f638fcd 100644
--- a/qlib/contrib/backtest/profit_attribution.py
+++ b/qlib/contrib/backtest/profit_attribution.py
@@ -12,7 +12,10 @@
 
 
 def get_benchmark_weight(
-    bench, start_date=None, end_date=None, path=None,
+    bench,
+    start_date=None,
+    end_date=None,
+    path=None,
 ):
     """get_benchmark_weight
 
@@ -213,7 +216,12 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g
 
 
 def brinson_pa(
-    positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap",
+    positions,
+    bench="SH000905",
+    group_field="industry",
+    group_method="category",
+    group_n=None,
+    deal_price="vwap",
 ):
     """brinson profit attribution
 
@@ -247,10 +255,17 @@ def brinson_pa(
     # suspend stock is NAN. So we have to get more date to forward fill the NAN
     shift_start_date = start_date - datetime.timedelta(days=250)
     instruments = D.list_instruments(
-        D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True,
+        D.instruments(market="all"),
+        start_time=shift_start_date,
+        end_time=end_date,
+        as_list=True,
     )
     stock_df = D.features(
-        instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day",
+        instruments,
+        [group_field, deal_price],
+        start_time=shift_start_date,
+        end_time=end_date,
+        freq="day",
     )
     stock_df.columns = [group_field, "deal_price"]
 
diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py
index 574287819b7..970b032d6b0 100644
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -21,7 +21,10 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time):
                     fit_start_time is not None and fit_end_time is not None
                 ), "Make sure `fit_start_time` and `fit_end_time` are not None."
                 pkwargs.update(
-                    {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,}
+                    {
+                        "fit_start_time": fit_start_time,
+                        "fit_end_time": fit_end_time,
+                    }
                 )
             new_l.append({"class": klass.__name__, "kwargs": pkwargs})
         else:
@@ -167,7 +170,10 @@ def __init__(
     def get_feature_config(self):
         conf = {
             "kbar": {},
-            "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],},
+            "price": {
+                "windows": [0],
+                "feature": ["OPEN", "HIGH", "LOW", "VWAP"],
+            },
             "rolling": {},
         }
         return self.parse_config_to_fields(conf)
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index 363a184582d..c68571853f1 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -35,7 +35,11 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False
 
 
 def calc_long_short_return(
-    pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False,
+    pred: pd.Series,
+    label: pd.Series,
+    date_col: str = "datetime",
+    quantile: float = 0.2,
+    dropna: bool = False,
 ) -> Tuple[pd.Series, pd.Series]:
     """
     calculate long-short return
diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py
index 5cb1ce4eb67..4aa5b55156f 100644
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -244,7 +244,12 @@ def long_short_backtest(
         short_returns[date] = np.mean(short_profit) + np.mean(all_profit)
         ls_returns[date] = np.mean(short_profit) + np.mean(long_profit)
 
-    return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),))
+    return dict(
+        zip(
+            ["long", "short", "long_short"],
+            map(pd.Series, [long_returns, short_returns, ls_returns]),
+        )
+    )
 
 
 def t_run():
diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py
index 2d94105e482..04ddd8db041 100644
--- a/qlib/contrib/evaluate_portfolio.py
+++ b/qlib/contrib/evaluate_portfolio.py
@@ -64,7 +64,12 @@ def get_position_value(evaluate_date, position):
     instruments = list(set(instruments) - set(["cash"]))  # filter 'cash'
     fields = ["$close"]
     close_data_df = D.features(
-        instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0,
+        instruments,
+        fields,
+        start_time=evaluate_date,
+        end_time=evaluate_date,
+        freq="day",
+        disk_cache=0,
     )
     value = _get_position_value_from_df(evaluate_date, position, close_data_df)
     return value
@@ -82,7 +87,14 @@ def get_position_list_value(positions):
     start_date, end_date = day_list[0], day_list[-1]
     # load data
     fields = ["$close"]
-    close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,)
+    close_data_df = D.features(
+        instruments,
+        fields,
+        start_time=start_date,
+        end_time=end_date,
+        freq="day",
+        disk_cache=0,
+    )
     # generate value
     # return dict for time:position_value
     value_dict = OrderedDict()
diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py
index 2840c2cef5a..d57c32b7022 100644
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -32,7 +32,9 @@ def fit(
         **kwargs
     ):
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py
index 306e68aadf2..bbbb61851b1 100644
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -118,7 +118,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -319,12 +328,14 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in",
+            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out",
+            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py
index 612bacbec93..725568de855 100644
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -123,7 +123,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.ALSTM_model = ALSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
@@ -195,7 +198,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -302,12 +309,14 @@ def _build_model(self):
         self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         self.att_net = nn.Sequential()
         self.att_net.add_module(
-            "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
+            "att_fc_in",
+            nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
         )
         self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
         self.att_net.add_module("att_act", nn.Tanh())
         self.att_net.add_module(
-            "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
+            "att_fc_out",
+            nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
         )
         self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py
index c59dc91973f..07048e1bc1a 100644
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -229,11 +229,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -334,11 +340,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py
index dfc5f4ab5ed..1e94f56e418 100644
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -242,7 +242,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -357,11 +361,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod
 
         if base_model == "GRU":
             self.rnn = nn.GRU(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         elif base_model == "LSTM":
             self.rnn = nn.LSTM(
-                input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
             )
         else:
             raise ValueError("unknown base model name `%s`" % base_model)
diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py
index d2a774b65b4..84f863b9fb0 100755
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -118,7 +118,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.gru_model = GRUModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr)
@@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py
index 49f438cc379..bb6618b854c 100755
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -123,7 +123,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.GRU_model = GRUModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr)
@@ -195,7 +198,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.GRU(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py
index 02ca16e36b8..163d500ec87 100755
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -118,7 +118,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.lstm_model = LSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         )
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr)
@@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid, df_test = dataset.prepare(
-            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         x_train, y_train = df_train["feature"], df_train["label"]
@@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py
index 2ec36f96e34..cf4f8fb9f1f 100755
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -123,7 +123,10 @@ def __init__(
             torch.manual_seed(self.seed)
 
         self.LSTM_model = LSTMModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout,
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
         ).to(self.device)
         if optimizer.lower() == "adam":
             self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr)
@@ -195,7 +198,11 @@ def test_epoch(self, data_loader):
         return np.mean(losses), np.mean(scores)
 
     def fit(
-        self, dataset, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
         super().__init__()
 
         self.rnn = nn.LSTM(
-            input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout,
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
         )
         self.fc_out = nn.Linear(hidden_size, 1)
 
diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py
index 8c1a77ec3c5..16fcea9ff53 100644
--- a/qlib/contrib/model/pytorch_nn.py
+++ b/qlib/contrib/model/pytorch_nn.py
@@ -154,7 +154,11 @@ def __init__(
         self.dnn_model.to(self.device)
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         df_train, df_valid = dataset.prepare(
             ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py
index 1f7433e053d..d5169e6c7bd 100644
--- a/qlib/contrib/model/pytorch_sfm.py
+++ b/qlib/contrib/model/pytorch_sfm.py
@@ -30,7 +30,14 @@
 
 class SFM_Model(nn.Module):
     def __init__(
-        self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu",
+        self,
+        d_feat=6,
+        output_dim=1,
+        freq_dim=10,
+        hidden_size=64,
+        dropout_W=0.0,
+        dropout_U=0.0,
+        device="cpu",
     ):
         super().__init__()
 
@@ -355,11 +362,17 @@ def train_epoch(self, x_train, y_train):
             self.train_optimizer.step()
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py
index 18e9d8eb404..62e32d701ce 100644
--- a/qlib/contrib/model/pytorch_tabnet.py
+++ b/qlib/contrib/model/pytorch_tabnet.py
@@ -120,7 +120,9 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
             os.makedirs("pretrain")
 
         [df_train, df_valid] = dataset.prepare(
-            ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["pretrain", "pretrain_validation"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
 
         df_train.fillna(df_train.mean(), inplace=True)
@@ -154,7 +156,11 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
                     break
 
     def fit(
-        self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None,
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
     ):
         if self.pretrain:
             # there is a  pretrained model, load the model
@@ -166,7 +172,9 @@ def fit(
         # adding one more linear layer to fit the final output dimension
         self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device)
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         df_train.fillna(df_train.mean(), inplace=True)
         x_train, y_train = df_train["feature"], df_train["label"]
diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py
index e37725c2eb6..ba2e5789b85 100755
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -29,7 +29,9 @@ def fit(
     ):
 
         df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
+            ["train", "valid"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
         )
         x_train, y_train = df_train["feature"], df_train["label"]
         x_valid, y_valid = df_valid["feature"], df_valid["label"]
diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py
index 52b86888133..2bd0937a032 100644
--- a/qlib/contrib/online/executor.py
+++ b/qlib/contrib/online/executor.py
@@ -150,13 +150,21 @@ def execute(self, trade_account, order_list, trade_date):
                     if order.direction == Order.SELL:  # sell
                         print(
                             "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
+                                trade_date,
+                                order.stock_id,
+                                trade_price,
+                                order.deal_amount,
+                                trade_val,
                             )
                         )
                     else:
                         print(
                             "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format(
-                                trade_date, order.stock_id, trade_price, order.deal_amount, trade_val,
+                                trade_date,
+                                order.stock_id,
+                                trade_price,
+                                order.deal_amount,
+                                trade_val,
                             )
                         )
 
@@ -263,13 +271,21 @@ def load_order_list(user_path, trade_date):
     for stock_id in order_dict["sell"]:
         amount, factor = order_dict["sell"][stock_id]
         order = Order(
-            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor,
+            stock_id=stock_id,
+            amount=amount,
+            trade_date=pd.Timestamp(trade_date),
+            direction=Order.SELL,
+            factor=factor,
         )
         order_list.append(order)
     for stock_id in order_dict["buy"]:
         amount, factor = order_dict["buy"][stock_id]
         order = Order(
-            stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor,
+            stock_id=stock_id,
+            amount=amount,
+            trade_date=pd.Timestamp(trade_date),
+            direction=Order.BUY,
+            factor=factor,
         )
         order_list.append(order)
     return order_list
diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py
index a4476709de0..cf850b9dace 100644
--- a/qlib/contrib/online/manager.py
+++ b/qlib/contrib/online/manager.py
@@ -84,10 +84,12 @@ def save_user_data(self, user_id):
             raise ValueError("Cannot find user {}".format(user_id))
         self.users[user_id].account.save_account(self.data_path / user_id)
         save_instance(
-            self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id),
+            self.users[user_id].strategy,
+            self.data_path / user_id / "strategy_{}.pickle".format(user_id),
         )
         save_instance(
-            self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id),
+            self.users[user_id].model,
+            self.data_path / user_id / "model_{}.pickle".format(user_id),
         )
 
     def add_user(self, user_id, config_file, add_date):
diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py
index c82deb3945c..c8b44f57858 100644
--- a/qlib/contrib/online/operator.py
+++ b/qlib/contrib/online/operator.py
@@ -125,7 +125,9 @@ def generate(self, date, path):
                 trade_date=trade_date,
             )
             save_order_list(
-                order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
+                order_list=order_list,
+                user_path=(pathlib.Path(path) / user_id),
+                trade_date=trade_date,
             )
             self.logger.info("Generate order list at {} for {}".format(trade_date, user_id))
             um.save_user_data(user_id)
@@ -158,7 +160,9 @@ def execute(self, date, exchange_config, path):
             order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date)
             trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date)
             executor.save_executed_file_from_trade_info(
-                trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date,
+                trade_info=trade_info,
+                user_path=(pathlib.Path(path) / user_id),
+                trade_date=trade_date,
             )
             self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id))
 
diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py
index fb96c87bd31..611af63e4af 100644
--- a/qlib/contrib/online/utils.py
+++ b/qlib/contrib/online/utils.py
@@ -79,7 +79,11 @@ def prepare(um, today, user_id, exchange_config=None):
         log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today))
         return [pd.Timestamp(latest_trading_date)], None
 
-    dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,)
+    dates = D.calendar(
+        start_time=pd.Timestamp(latest_trading_date),
+        end_time=pd.Timestamp(today),
+        future=True,
+    )
     dates = list(dates)
     dates.append(get_next_trading_date(dates[-1], future=True))
     if exchange_config:
diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py
index ef1447a12be..1cb14d26153 100644
--- a/qlib/contrib/report/analysis_model/analysis_model_performance.py
+++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py
@@ -53,7 +53,8 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     t_df.index = t_df.index.strftime("%Y-%m-%d")
     # Cumulative Return By Group
     group_scatter_figure = ScatterGraph(
-        t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
+        t_df.cumsum(),
+        layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
     ).figure
 
     t_df = t_df.loc[:, ["long-short", "long-average"]]
@@ -61,7 +62,12 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     group_hist_figure = SubplotsGraph(
         t_df,
         kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)),
-        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],),
+        subplots_kwargs=dict(
+            rows=1,
+            cols=2,
+            print_grid=False,
+            subplot_titles=["long-short", "long-average"],
+        ),
     ).figure
 
     return group_scatter_figure, group_hist_figure
@@ -96,12 +102,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
     _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
     _monthly_ic = ic.groupby(_index).mean()
     _monthly_ic.index = pd.MultiIndex.from_arrays(
-        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"],
+        [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)],
+        names=["year", "month"],
     )
 
     # fill month
     _month_list = pd.date_range(
-        start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M",
+        start=pd.Timestamp(f"{_index.min()[:4]}0101"),
+        end=pd.Timestamp(f"{_index.max()[:4]}1231"),
+        freq="1M",
     )
     _years = []
     _month = []
@@ -133,15 +142,32 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
 
     _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min()
     _sub_graph_data = [
-        ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),),
+        (
+            "ic",
+            dict(
+                row=1,
+                col=1,
+                name="",
+                kind="DistplotGraph",
+                graph_kwargs=dict(bin_size=_bin_size),
+            ),
+        ),
         (_qqplot_fig, dict(row=1, col=2)),
     ]
     ic_hist_figure = SubplotsGraph(
         _ic_df.dropna(),
         kind_map=dict(kind="HistogramGraph", kwargs=dict()),
-        subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],),
+        subplots_kwargs=dict(
+            rows=1,
+            cols=2,
+            print_grid=False,
+            subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],
+        ),
         sub_graph_data=_sub_graph_data,
-        layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),),
+        layout=dict(
+            yaxis2=dict(title="Observed Quantile"),
+            xaxis2=dict(title=f"{dist_name} Distribution Quantile"),
+        ),
     ).figure
 
     return ic_bar_figure, ic_heatmap_figure, ic_hist_figure
@@ -155,7 +181,8 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
     _df = ac.to_frame("value")
     _df.index = _df.index.strftime("%Y-%m-%d")
     ac_figure = ScatterGraph(
-        _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
+        _df,
+        layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (ac_figure,)
 
@@ -175,11 +202,17 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
         .sum()
         / (len(x) // N)
     )
-    r_df = pd.DataFrame({"Top": top, "Bottom": bottom,})
+    r_df = pd.DataFrame(
+        {
+            "Top": top,
+            "Bottom": bottom,
+        }
+    )
     # FIXME: support HIGH-FREQ
     r_df.index = r_df.index.strftime("%Y-%m-%d")
     turnover_figure = ScatterGraph(
-        r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
+        r_df,
+        layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (turnover_figure,)
 
@@ -197,7 +230,11 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure:
     # FIXME: support HIGH-FREQ
     ic_df.index = ic_df.index.strftime("%Y-%m-%d")
     ic_bar_figure = BarGraph(
-        ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),),
+        ic_df,
+        layout=dict(
+            title="Information Coefficient (IC)",
+            xaxis=dict(type="category", tickangle=45),
+        ),
     ).figure
     return ic_bar_figure
 
@@ -240,7 +277,12 @@ def model_performance_graph(
     figure_list = []
     for graph_name in graph_names:
         fun_res = eval(f"_{graph_name}")(
-            pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day,
+            pred_label=pred_label,
+            lag=lag,
+            N=N,
+            reverse=reverse,
+            rank=rank,
+            show_nature_day=show_nature_day,
         )
         figure_list += fun_res
 
diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py
index 604189c94b6..abb68ea6051 100644
--- a/qlib/contrib/report/analysis_position/cumulative_return.py
+++ b/qlib/contrib/report/analysis_position/cumulative_return.py
@@ -13,7 +13,11 @@
 
 
 def _get_cum_return_data_with_position(
-    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
+    position: dict,
+    report_normal: pd.DataFrame,
+    label_data: pd.DataFrame,
+    start_date=None,
+    end_date=None,
 ):
     """
 
@@ -25,7 +29,11 @@ def _get_cum_return_data_with_position(
     :return:
     """
     _cumulative_return_df = get_position_data(
-        position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date,
+        position=position,
+        report_normal=report_normal,
+        label_data=label_data,
+        start_date=start_date,
+        end_date=end_date,
     ).copy()
 
     _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"]
@@ -79,7 +87,11 @@ def _get_cum_return_data_with_position(
 
 
 def _get_figure_with_position(
-    position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None,
+    position: dict,
+    report_normal: pd.DataFrame,
+    label_data: pd.DataFrame,
+    start_date=None,
+    end_date=None,
 ) -> Iterable[go.Figure]:
     """Get average analysis figures
 
@@ -99,12 +111,18 @@ def _get_figure_with_position(
     # Create figures
     for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]:
         sub_graph_data = [
-            ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),),
+            (
+                "cum_{}".format(_t_name),
+                dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),
+            ),
             (
                 "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name),
                 dict(row=2, col=1),
             ),
-            ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),),
+            (
+                "{}_value".format(_t_name),
+                dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),
+            ),
         ]
 
         _default_xaxis = dict(showline=False, zeroline=True, tickangle=45)
@@ -143,7 +161,13 @@ def _get_figure_with_position(
             [{"rowspan": 1}, None],
         ]
         subplots_kwargs = dict(
-            vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs,
+            vertical_spacing=0.01,
+            rows=2,
+            cols=2,
+            row_width=[1, 2],
+            column_width=[3, 1],
+            print_grid=False,
+            specs=specs,
         )
         yield SubplotsGraph(
             cum_return_df,
diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py
index 23f9c592c0a..fe1d6113709 100644
--- a/qlib/contrib/report/analysis_position/parse_position.py
+++ b/qlib/contrib/report/analysis_position/parse_position.py
@@ -72,7 +72,10 @@ def parse_position(position: dict = None) -> pd.DataFrame:
 
         result_df = result_df.append(_trading_day_df, sort=True)
 
-        previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,)
+        previous_data = dict(
+            date=_trading_date,
+            code_list=_trading_day_df[_trading_day_df["status"] != -1].index,
+        )
 
     result_df.reset_index(inplace=True)
     result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True)
diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py
index 9a4d834ed92..72a358adcbf 100644
--- a/qlib/contrib/report/analysis_position/rank_label.py
+++ b/qlib/contrib/report/analysis_position/rank_label.py
@@ -23,7 +23,11 @@ def _get_figure_with_position(
     :return:
     """
     _position_df = get_position_data(
-        position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date,
+        position,
+        label_data,
+        calculate_label_rank=True,
+        start_date=start_date,
+        end_date=end_date,
     )
 
     res_dict = dict()
@@ -47,14 +51,20 @@ def _get_figure_with_position(
         yield ScatterGraph(
             _res_df.loc[:, [_col]],
             layout=dict(
-                title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"),
+                title=_col,
+                xaxis=dict(type="category", tickangle=45),
+                yaxis=dict(title="lable-rank-ratio: %"),
             ),
             graph_kwargs=dict(mode="lines+markers"),
         ).figure
 
 
 def rank_label_graph(
-    position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True,
+    position: dict,
+    label_data: pd.DataFrame,
+    start_date=None,
+    end_date=None,
+    show_notebook=True,
 ) -> Iterable[go.Figure]:
     """Ranking percentage of stocks buy, sell, and holding on the trading day.
     Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading
diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py
index 8e2c05c0a38..f82e654c432 100644
--- a/qlib/contrib/report/analysis_position/report.py
+++ b/qlib/contrib/report/analysis_position/report.py
@@ -123,7 +123,9 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 1,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {"width": 0,},
+                "line": {
+                    "width": 0,
+                },
             },
             {
                 "type": "rect",
@@ -135,13 +137,20 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
                 "y1": 0.55,
                 "fillcolor": "#d3d3d3",
                 "opacity": 0.3,
-                "line": {"width": 0,},
+                "line": {
+                    "width": 0,
+                },
             },
         ],
     )
 
     _subplot_kwargs = dict(
-        shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False,
+        shared_xaxes=True,
+        vertical_spacing=0.01,
+        rows=7,
+        cols=1,
+        row_width=[1, 1, 1, 3, 1, 1, 3],
+        print_grid=False,
     )
     figure = SubplotsGraph(
         df=report_df,
diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py
index dbbc411109d..70e382fb165 100644
--- a/qlib/contrib/report/graph.py
+++ b/qlib/contrib/report/graph.py
@@ -311,7 +311,11 @@ def _init_sub_graph_data(self):
             _temp_row_data = (
                 column_name,
                 dict(
-                    row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"],
+                    row=row,
+                    col=col,
+                    name=res_name,
+                    kind=self._kind_map["kind"],
+                    graph_kwargs=self._kind_map["kwargs"],
                 ),
             )
             self._sub_graph_data.append(_temp_row_data)
diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py
index ee3ee03ecfd..dd90437b03f 100644
--- a/qlib/contrib/strategy/cost_control.py
+++ b/qlib/contrib/strategy/cost_control.py
@@ -57,7 +57,10 @@ def generate_target_weight_position(self, score, current, trade_date):
                     final_stock_weight[stock_id] -= sw
             if self.buy_method == "first_fill":
                 for stock_id in buy_signal_stocks:
-                    add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,)
+                    add_weight = min(
+                        max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0),
+                        sold_stock_weight,
+                    )
                     final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight
                     sold_stock_weight -= add_weight
             elif self.buy_method == "average_fill":
diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py
index 6f168b4dd52..494981ecc09 100644
--- a/qlib/contrib/strategy/order_generator.py
+++ b/qlib/contrib/strategy/order_generator.py
@@ -102,10 +102,14 @@ def generate_order_list_from_target_weight_position(
             # strategy 1 : generate amount_position by weight_position
             # Use API in Exchange()
             target_amount_dict = trade_exchange.generate_amount_position_from_weight_position(
-                weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date,
+                weight_position=target_weight_position,
+                cash=current_tradable_value,
+                trade_date=trade_date,
             )
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date,
+            target_position=target_amount_dict,
+            current_position=current_amount_dict,
+            trade_date=trade_date,
         )
         return order_list
 
@@ -160,6 +164,8 @@ def generate_order_list_from_target_weight_position(
             else:
                 continue
         order_list = trade_exchange.generate_order_for_target_amount_position(
-            target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date,
+            target_position=amount_dict,
+            current_position=current.get_stock_amount_dict(),
+            trade_date=trade_date,
         )
         return order_list
diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py
index 409410a2ab4..711658c9a63 100644
--- a/qlib/contrib/tuner/launcher.py
+++ b/qlib/contrib/tuner/launcher.py
@@ -13,7 +13,11 @@
 
 args_parser = argparse.ArgumentParser(prog="tuner")
 args_parser.add_argument(
-    "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.",
+    "-c",
+    "--config_path",
+    required=True,
+    type=str,
+    help="config path indicates where to load yaml config.",
 )
 
 args = args_parser.parse_args()
diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py
index 57f57a6c34e..76f101671b7 100644
--- a/qlib/contrib/tuner/space.py
+++ b/qlib/contrib/tuner/space.py
@@ -10,5 +10,8 @@
 }
 
 QLibDataLabelSpace = {
-    "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],)
+    "labels": hp.choice(
+        "labels",
+        [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],
+    )
 }
diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py
index e81d41a9ad0..2ce957859b2 100644
--- a/qlib/contrib/tuner/tuner.py
+++ b/qlib/contrib/tuner/tuner.py
@@ -28,7 +28,10 @@ def __init__(self, tuner_config, optim_config):
         self.optim_config = optim_config
 
         self.max_evals = self.tuner_config.get("max_evals", 10)
-        self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],)
+        self.ex_dir = os.path.join(
+            self.tuner_config["experiment"]["dir"],
+            self.tuner_config["experiment"]["name"],
+        )
 
         self.best_params = None
         self.best_res = None
@@ -39,7 +42,10 @@ def tune(self):
 
         TimeInspector.set_time_mark()
         fmin(
-            fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals,
+            fn=self.objective,
+            space=self.space,
+            algo=tpe.suggest,
+            max_evals=self.max_evals,
         )
         self.logger.info("Local best params: {} ".format(self.best_params))
         TimeInspector.log_cost_time(
@@ -153,7 +159,8 @@ def setup_estimator_config(self, params):
             estimator_config["data"]["args"].update(params["data_label_space"])
 
         estimator_path = os.path.join(
-            self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME,
+            self.tuner_config["experiment"].get("dir", "../"),
+            QLibTuner.ESTIMATOR_CONFIG_NAME,
         )
 
         with open(estimator_path, "w") as fp:
@@ -166,20 +173,27 @@ def setup_space(self):
         model_space_name = self.tuner_config["model"].get("space", None)
         if model_space_name is None:
             raise ValueError("Please give the search space of model.")
-        model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,)
+        model_space = getattr(
+            importlib.import_module(".space", package="qlib.contrib.tuner"),
+            model_space_name,
+        )
 
         # 2. Setup strategy space
         strategy_space_name = self.tuner_config["strategy"].get("space", None)
         if strategy_space_name is None:
             raise ValueError("Please give the search space of strategy.")
-        strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,)
+        strategy_space = getattr(
+            importlib.import_module(".space", package="qlib.contrib.tuner"),
+            strategy_space_name,
+        )
 
         # 3. Setup data label space if given
         if self.tuner_config.get("data_label", None) is not None:
             data_label_space_name = self.tuner_config["data_label"].get("space", None)
             if data_label_space_name is not None:
                 data_label_space = getattr(
-                    importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name,
+                    importlib.import_module(".space", package="qlib.contrib.tuner"),
+                    data_label_space_name,
                 )
         else:
             data_label_space_name = None
diff --git a/qlib/data/client.py b/qlib/data/client.py
index d1a68cb3857..5244a7e45cf 100644
--- a/qlib/data/client.py
+++ b/qlib/data/client.py
@@ -26,7 +26,8 @@ def __init__(self, host, port):
         self.logger = get_module_logger(self.__class__.__name__)
         # bind connect/disconnect callbacks
         self.sio.on(
-            "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
+            "connect",
+            lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
         )
         self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!"))
 
diff --git a/qlib/data/data.py b/qlib/data/data.py
index 47cded79cec..762467da35e 100644
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -328,7 +328,14 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day
         raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method")
 
     def _uri(
-        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs,
+        self,
+        instruments,
+        fields,
+        start_time=None,
+        end_time=None,
+        freq="day",
+        disk_cache=1,
+        **kwargs,
     ):
         """Get task uri, used when generating rabbitmq task in qlib_server
 
@@ -407,13 +414,29 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq):
             for inst, spans in instruments_d.items():
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,),
+                    args=(
+                        inst,
+                        start_time,
+                        end_time,
+                        freq,
+                        normalize_column_names,
+                        spans,
+                        C,
+                    ),
                 )
         else:
             for inst in instruments_d:
                 data[inst] = p.apply_async(
                     DatasetProvider.expression_calculator,
-                    args=(inst, start_time, end_time, freq, normalize_column_names, None, C,),
+                    args=(
+                        inst,
+                        start_time,
+                        end_time,
+                        freq,
+                        normalize_column_names,
+                        None,
+                        C,
+                    ),
                 )
 
         p.close()
@@ -575,7 +598,12 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         start_time = pd.Timestamp(start_time or cal[0])
         end_time = pd.Timestamp(end_time or cal[-1])
         _instruments_filtered = {
-            inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],))
+            inst: list(
+                filter(
+                    lambda x: x[0] <= x[1],
+                    [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],
+                )
+            )
             for inst, spans in _instruments.items()
         }
         _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value}
@@ -695,7 +723,14 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq
 
         for inst in instruments_d:
             p.apply_async(
-                LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,),
+                LocalDatasetProvider.cache_walker,
+                args=(
+                    inst,
+                    start_time,
+                    end_time,
+                    freq,
+                    column_names,
+                ),
             )
 
         p.close()
@@ -728,7 +763,12 @@ def set_conn(self, conn):
     def calendar(self, start_time=None, end_time=None, freq="day", future=False):
         self.conn.send_request(
             request_type="calendar",
-            request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,},
+            request_content={
+                "start_time": str(start_time),
+                "end_time": str(end_time),
+                "freq": freq,
+                "future": future,
+            },
             msg_queue=self.queue,
             msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content],
         )
@@ -792,7 +832,14 @@ def set_conn(self, conn):
         self.queue = queue.Queue()
 
     def dataset(
-        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False,
+        self,
+        instruments,
+        fields,
+        start_time=None,
+        end_time=None,
+        freq="day",
+        disk_cache=0,
+        return_uri=False,
     ):
         if Inst.get_inst_type(instruments) == Inst.DICT:
             get_module_logger("data").warning(
@@ -895,7 +942,13 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da
         return Inst.list_instruments(instruments, start_time, end_time, freq, as_list)
 
     def features(
-        self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None,
+        self,
+        instruments,
+        fields,
+        start_time=None,
+        end_time=None,
+        freq="day",
+        disk_cache=None,
     ):
         """
         Parameters:
diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py
index 58e2bd96811..feda1904463 100644
--- a/qlib/data/dataset/utils.py
+++ b/qlib/data/dataset/utils.py
@@ -32,7 +32,10 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int:
 
 
 def fetch_df_by_index(
-    df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True,
+    df: pd.DataFrame,
+    selector: Union[pd.Timestamp, slice, str, list],
+    level: Union[str, int],
+    fetch_orig=True,
 ) -> pd.DataFrame:
     """
     fetch data from `data` with `selector` and `level`
diff --git a/qlib/data/filter.py b/qlib/data/filter.py
index 811fd387f14..70f9d32780d 100644
--- a/qlib/data/filter.py
+++ b/qlib/data/filter.py
@@ -341,7 +341,12 @@ def _getFilterSeries(self, instruments, fstart, fend):
         # do not use dataset cache
         try:
             _features = DatasetD.dataset(
-                instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0,
+                instruments,
+                [self.rule_expression],
+                fstart,
+                fend,
+                freq=self.filter_freq,
+                disk_cache=0,
             )
         except TypeError:
             # use LocalDatasetProvider
diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py
index 5fdc1014ddf..5a7a0804dbd 100644
--- a/qlib/portfolio/optimizer/enhanced_indexing.py
+++ b/qlib/portfolio/optimizer/enhanced_indexing.py
@@ -56,7 +56,11 @@ def __init__(
         assert inds_dev is None or inds_dev >= 0, "industry deviation limit `inds_dev` should be positive or None."
         self.inds_dev = inds_dev
 
-        assert warm_start in [None, self.START_FROM_W0, self.START_FROM_BENCH,], "illegal warm start option"
+        assert warm_start in [
+            None,
+            self.START_FROM_W0,
+            self.START_FROM_BENCH,
+        ], "illegal warm start option"
         self.start_from_w0 = warm_start == self.START_FROM_W0
         self.start_from_bench = warm_start == self.START_FROM_BENCH
 
diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py
index eb6f9c5edb5..f92e7278758 100644
--- a/qlib/tests/__init__.py
+++ b/qlib/tests/__init__.py
@@ -18,6 +18,10 @@ def setUpClass(cls) -> None:
             print(f"Qlib data is not found in {provider_uri}")
 
             GetData().qlib_data(
-                name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False,
+                name="qlib_data_simple",
+                region="cn",
+                interval="1d",
+                target_dir=provider_uri,
+                delete_old=False,
             )
         init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs)
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index 0c704b89669..be458a24d29 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -193,7 +193,10 @@ def generate(self):
                 }
             )
             objects.update(
-                {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,}
+                {
+                    "long_short_r.pkl": long_short_r,
+                    "long_avg_r.pkl": long_avg_r,
+                }
             )
         self.recorder.log_metrics(**metrics)
         self.recorder.save_objects(**objects, artifact_path=self.get_path())
diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py
index 8b3819c8302..f6e77cba4d8 100644
--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -78,7 +78,10 @@
     "strategy": {
         "class": "TopkDropoutStrategy",
         "module_path": "qlib.contrib.strategy.strategy",
-        "kwargs": {"topk": 50, "n_drop": 5,},
+        "kwargs": {
+            "topk": 50,
+            "n_drop": 5,
+        },
     },
     "backtest": {
         "verbose": False,
@@ -173,7 +176,9 @@ def test_0_train(self):
     def test_1_backtest(self):
         analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
         self.assertGreaterEqual(
-            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed",
+            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
+            0.10,
+            "backtest failed",
         )
 
 
diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py
index de649c37edf..dfa7f8556dd 100644
--- a/tests/test_dump_data.py
+++ b/tests/test_dump_data.py
@@ -40,7 +40,9 @@ def setUpClass(cls) -> None:
         TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
+            provider_uri=provider_uri,
+            expression_cache=None,
+            dataset_cache=None,
         )
 
     @classmethod
@@ -52,7 +54,10 @@ def test_0_dump_bin(self):
 
     def test_1_dump_calendars(self):
         ori_calendars = set(
-            map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,)
+            map(
+                pd.Timestamp,
+                pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,
+            )
         )
         res_calendars = set(D.calendar())
         assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed"
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
index d5637b02595..c511d1b910d 100644
--- a/tests/test_get_data.py
+++ b/tests/test_get_data.py
@@ -26,7 +26,9 @@ class TestGetData(unittest.TestCase):
     def setUpClass(cls) -> None:
         provider_uri = str(QLIB_DIR.resolve())
         qlib.init(
-            provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
+            provider_uri=provider_uri,
+            expression_cache=None,
+            dataset_cache=None,
         )
 
     @classmethod