From 988b42e159111f52dbba3301e7b1ae1187d2dc12 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Tue, 9 Feb 2021 20:28:42 +0800 Subject: [PATCH 01/32] Add Structured Covariance Estimator to riskmodel.py --- qlib/model/riskmodel.py | 141 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 3 deletions(-) diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py index 07a1e0c9f65..32984ed6a3d 100644 --- a/qlib/model/riskmodel.py +++ b/qlib/model/riskmodel.py @@ -39,7 +39,7 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc self.scale_return = scale_return def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True ) -> Union[pd.DataFrame, np.ndarray]: """ Args: @@ -373,7 +373,8 @@ def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt v3 = z.T.dot(z) / t - var_mkt * S roff3 = ( - np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 + np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum( + np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 ) roff = 2 * roff1 - roff3 rho = rdiag + roff @@ -433,7 +434,7 @@ def _predict(self, X: np.ndarray) -> np.ndarray: if self.num_factors > 0: Dd, V = np.linalg.eig(Y.T.dot(Y)) V = V[:, np.argsort(Dd)] - F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n) + F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n) LamPCA = Y.dot(F) / n uhat = np.asarray(Y - LamPCA.dot(F.T)) Lowrank = np.asarray(LamPCA.dot(LamPCA.T)) @@ -465,3 +466,137 @@ def _predict(self, X: np.ndarray) -> np.ndarray: SigmaY = SigmaU + Lowrank return SigmaY + + +class StructuredCovEstimator(RiskModel): + """Structured Covariance Estimator + + This estimator assumes observations can be predicted by multiple factors + X = FB + U + where `F` can be specified by explicit risk factors or latent factors. + + Therefore the structured covariance can be estimated by + cov(X) = F cov(B) F.T + cov(U) + + We use latent factor models to estimate the structured covariance. + Specifically, the following latent factor models are supported: + - `pca`: Principal Component Analysis + - `fa`: Factor Analysis + + Reference: [1] Fan, J., Liao, Y., & Liu, H. (2016). An overview of the estimation of large covariance and + precision matrices. Econometrics Journal, 19(1), C1–C32. https://doi.org/10.1111/ectj.12061 + """ + + FACTOR_MODEL_PCA = "pca" + FACTOR_MODEL_FA = "fa" + + def __init__(self, factor_model: str = 'pca', num_factors: int = 10, nan_option: str = "ignore", + assume_centered: bool = False, scale_return: bool = True): + """ + Args: + factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`). + num_factors (int): number of components to keep. + nan_option (str): nan handling option (`ignore`/`fill`). + assume_centered (bool): whether the data is assumed to be centered. + scale_return (bool): whether scale returns as percentage. + """ + super().__init__(nan_option, assume_centered, scale_return) + + assert factor_model in [ + self.FACTOR_MODEL_PCA, + self.FACTOR_MODEL_FA, + ], 'factor_model={} is not supported'.format(factor_model) + self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis + + self.num_factors = num_factors + + def predict( + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True, + return_decomposed_components=False + ) -> Union[pd.DataFrame, np.ndarray, tuple]: + """ + Args: + X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, + with variables as columns and observations as rows. + return_corr (bool): whether return the correlation matrix. + is_price (bool): whether `X` contains price (if not assume stock returns). + return_decomposed_components (bool): whether return decomposed components of the covariance matrix. + + Returns: + tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation. + """ + assert not return_corr or not return_decomposed_components, \ + 'Can only return either correlation matrix or decomposed components.' + + # transform input into 2D array + if not isinstance(X, (pd.Series, pd.DataFrame)): + columns = None + else: + if isinstance(X.index, pd.MultiIndex): + if isinstance(X, pd.DataFrame): + X = X.iloc[:, 0].unstack(level="instrument") # always use the first column + else: + X = X.unstack(level="instrument") + else: + # X is 2D DataFrame + pass + columns = X.columns # will be used to restore dataframe + X = X.values + + # calculate pct_change + if is_price: + X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows + + # scale return + if self.scale_return: + X *= 100 + + # handle nan and centered + X = self._preprocess(X) + + if return_decomposed_components: + F, cov_b, var_u = self._predict(X, return_structured=True) + return F, cov_b, var_u + else: + # estimate covariance + S = self._predict(X) + + # return correlation if needed + if return_corr: + vola = np.sqrt(np.diag(S)) + corr = S / np.outer(vola, vola) + if columns is None: + return corr + return pd.DataFrame(corr, index=columns, columns=columns) + + # return covariance + if columns is None: + return S + return pd.DataFrame(S, index=columns, columns=columns) + + def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]: + """ + covariance estimation implementation + + Args: + X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows). + return_structured (bool): whether return decomposed components of the covariance matrix. + + Returns: + tuple or np.ndarray: decomposed covariance matrix or covariance matrix. + """ + + model = self.solver(self.num_factors, random_state=0).fit(X) + + F = model.components_.T # num_features x num_factors + B = model.transform(X) # num_samples x num_factors + U = X - B @ F.T + cov_b = np.cov(B.T) # num_factors x num_factors + var_u = np.var(U, axis=0) # diagonal + + if return_structured: + return F, cov_b, var_u + + cov_x = F @ cov_b @ F.T + np.diag(var_u) + + return cov_x From 7b01c5cae7830d2b75c5566443f5a4559b5b2f40 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Tue, 9 Feb 2021 20:30:26 +0800 Subject: [PATCH 02/32] Add an implementation of Enhanced Indexing to optimizer.py --- qlib/.DS_Store | Bin 0 -> 6148 bytes qlib/portfolio/optimizer.py | 129 ++++++++++++++++++++++++++++++++---- 2 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 qlib/.DS_Store diff --git a/qlib/.DS_Store b/qlib/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3b196d96a164ebf17b658d6f6d8c5ef19fb26c8c GIT binary patch literal 6148 zcmeHK%Wl&^6upx=txW}FQHgGlykS>WX{it_kU}Vn?vR3D0VuU=(^|NmD0Z;g5R^S% z0N;SbckloAHxWIzRhp0nDE)t`24cQXodQJ=0 zG9@>FLbZ`lK|@^IG+JQW0;_;k;9paK=kB_ivk7HXoxf)XsXPi(8G^yf!y}~apUU77 zd2~b)F>erctY&;O;%g&f%|sM5Mod_5U)*l69f~NxdJ;5v9~--ZHYJy`PvXJrW|?Rkd0?VRkk)wz)B!- zgQ;dimFZHcy5udogvWbXZ&~oHsPb`pshPpyu3yTu1gn5m;D1wq*9RYov1f3uQ7s+F z)DZyapja7v{#oD{-(b(+Tq9~=LWcr%s4zzip~F$`8(z=gT%!&rVGbX{JXx3%icn8S z`@W)+=xMaMRlq7xS71foc6k4Pa`E}U9%S#V0#<=5rGRku2m5_|lG$4qK92WVAL$T@ qjd^p8Dg>F?j+Mh(@g|Zo)cNcHdj{tkQ3A6+0!jv(Sq1*80>1zhm={X` literal 0 HcmV?d00001 diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 0e7d2725458..e04923ed6fa 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -28,13 +28,13 @@ class PortfolioOptimizer: OPT_INV = "inv" def __init__( - self, - method: str = "inv", - lamb: float = 0, - delta: float = 0, - alpha: float = 0.0, - scale_alpha: bool = True, - tol: float = 1e-8, + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, ): """ Args: @@ -59,10 +59,10 @@ def __init__( self.tol = tol def __call__( - self, - S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None, + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, ) -> Union[np.ndarray, pd.Series]: """ Args: @@ -151,7 +151,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) def _optimize_mvo( - self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None ) -> np.ndarray: """optimize mean-variance portfolio @@ -256,3 +256,108 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda warnings.warn(f"optimization not success ({sol.status})") return sol.x + + +class EnhancedIndexingOptimizer: + """ + Portfolio Optimizer with Enhanced Indexing + + Note: + This optimizer always assumes full investment and no-shorting. + """ + + START_FROM_W0 = 'w0' + START_FROM_BENCH = 'benchmark' + DO_NOT_START_FROM = '' + + def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, + scale_alpha=True, verbose: bool = False, warm_start: str = '', max_iters: int = 10000): + """ + Args: + lamb (float): risk aversion parameter (larger `lamb` means less focus on return) + delta (float): turnover rate limit + bench_dev (float): benchmark deviation limit + inds_dev (float): industry deviation limit + verbose (bool): if print detailed information about the solver + warm_start (str): whether try to warm start (`w0`/`benchmark`/``) + (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) + """ + + assert lamb >= 0, "risk aversion parameter `lamb` should be positive" + self.lamb = lamb + + assert delta >= 0, "turnover limit `delta` should be positive" + self.delta = delta + + assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive" + self.bench_dev = bench_dev + + assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" + self.inds_dev = inds_dev + + assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0, + self.START_FROM_BENCH], "illegal warm start option" + self.start_from_w0 = (warm_start == self.START_FROM_W0) + self.start_from_bench = (warm_start == self.START_FROM_BENCH) + + self.scale_alpha = scale_alpha + self.verbose = verbose + self.max_iters = max_iters + + def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray, + w_bench: np.ndarray, inds_onehot: np.ndarray + ) -> Union[np.ndarray, pd.Series]: + """ + Args: + u (np.ndarray): expected returns (a.k.a., alpha) + F, covB, varU (np.ndarray): see StructuredCovEstimator + w0 (np.ndarray): initial weights (for turnover control) + w_bench (np.ndarray): benchmark weights + inds_onehot (np.ndarray): industry (onehot) + + Returns: + np.ndarray or pd.Series: optimized portfolio allocation + """ + # scale alpha to match volatility + if self.scale_alpha: + u = u / u.std() + x_variance = np.mean(np.diag(F @ covB @ F.T) + varU) + u *= x_variance ** 0.5 + + w = cp.Variable(len(u)) # num_assets + v = w @ F # num_factors + ret = w @ u + risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) + obj = cp.Maximize(ret - self.lamb * risk) + d_bench = w - w_bench + d_inds = d_bench @ inds_onehot + cons = [ + w >= 0, + cp.sum(w) == 1, + d_bench >= -self.bench_dev, + d_bench <= self.bench_dev, + d_inds >= -self.inds_dev, + d_inds <= self.inds_dev + ] + if w0 is not None: + turnover = cp.sum(cp.abs(w - w0)) + cons.append(turnover <= self.delta) + + warm_start = False + if self.start_from_w0: + if w0 is None: + print('Warning: try warm start with w0, but w0 is `None`.') + else: + w.value = w0 + warm_start = True + elif self.start_from_bench: + w.value = w_bench + warm_start = True + + prob = cp.Problem(obj, cons) + prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) + + if prob.status != 'optimal': + print('Warning: solve failed.', prob.status) + + return np.asarray(w.value) From 9c2653f125e31e754f00ef6df0c455f8e828d78a Mon Sep 17 00:00:00 2001 From: Charles Young Date: Tue, 9 Feb 2021 20:31:00 +0800 Subject: [PATCH 03/32] Add an implementation of Enhanced Indexing to optimizer.py --- qlib/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 qlib/.DS_Store diff --git a/qlib/.DS_Store b/qlib/.DS_Store deleted file mode 100644 index 3b196d96a164ebf17b658d6f6d8c5ef19fb26c8c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%Wl&^6upx=txW}FQHgGlykS>WX{it_kU}Vn?vR3D0VuU=(^|NmD0Z;g5R^S% z0N;SbckloAHxWIzRhp0nDE)t`24cQXodQJ=0 zG9@>FLbZ`lK|@^IG+JQW0;_;k;9paK=kB_ivk7HXoxf)XsXPi(8G^yf!y}~apUU77 zd2~b)F>erctY&;O;%g&f%|sM5Mod_5U)*l69f~NxdJ;5v9~--ZHYJy`PvXJrW|?Rkd0?VRkk)wz)B!- zgQ;dimFZHcy5udogvWbXZ&~oHsPb`pshPpyu3yTu1gn5m;D1wq*9RYov1f3uQ7s+F z)DZyapja7v{#oD{-(b(+Tq9~=LWcr%s4zzip~F$`8(z=gT%!&rVGbX{JXx3%icn8S z`@W)+=xMaMRlq7xS71foc6k4Pa`E}U9%S#V0#<=5rGRku2m5_|lG$4qK92WVAL$T@ qjd^p8Dg>F?j+Mh(@g|Zo)cNcHdj{tkQ3A6+0!jv(Sq1*80>1zhm={X` From 4000518698f0d5f929a2a493bdb9bd207a313d17 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 08:41:35 +0800 Subject: [PATCH 04/32] Separate specific implementation of Portfolio Optimizer to folder. --- .../portfolio_optimizer/enhanced_indexing.py | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 qlib/contrib/portfolio_optimizer/enhanced_indexing.py diff --git a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py new file mode 100644 index 00000000000..0c40a617ef7 --- /dev/null +++ b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py @@ -0,0 +1,112 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import numpy as np +import pandas as pd +import cvxpy as cp +from typing import Union + + +class EnhancedIndexingOptimizer: + """ + Portfolio Optimizer with Enhanced Indexing + + Note: + This optimizer always assumes full investment and no-shorting. + """ + + START_FROM_W0 = 'w0' + START_FROM_BENCH = 'benchmark' + DO_NOT_START_FROM = 'no_warm_start' + + def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, + scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000): + """ + Args: + lamb (float): risk aversion parameter (larger `lamb` means less focus on return) + delta (float): turnover rate limit + bench_dev (float): benchmark deviation limit + inds_dev (float): industry deviation limit + verbose (bool): if print detailed information about the solver + warm_start (str): whether try to warm start (`w0`/`benchmark`/``) + (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) + """ + + assert lamb >= 0, "risk aversion parameter `lamb` should be positive" + self.lamb = lamb + + assert delta >= 0, "turnover limit `delta` should be positive" + self.delta = delta + + assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive" + self.bench_dev = bench_dev + + assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" + self.inds_dev = inds_dev + + assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0, + self.START_FROM_BENCH], "illegal warm start option" + self.start_from_w0 = (warm_start == self.START_FROM_W0) + self.start_from_bench = (warm_start == self.START_FROM_BENCH) + + self.scale_alpha = scale_alpha + self.verbose = verbose + self.max_iters = max_iters + + def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray, + w_bench: np.ndarray, inds_onehot: np.ndarray + ) -> Union[np.ndarray, pd.Series]: + """ + Args: + u (np.ndarray): expected returns (a.k.a., alpha) + F, covB, varU (np.ndarray): see StructuredCovEstimator + w0 (np.ndarray): initial weights (for turnover control) + w_bench (np.ndarray): benchmark weights + inds_onehot (np.ndarray): industry (onehot) + + Returns: + np.ndarray or pd.Series: optimized portfolio allocation + """ + # scale alpha to match volatility + if self.scale_alpha: + u = u / u.std() + x_variance = np.mean(np.diag(F @ covB @ F.T) + varU) + u *= x_variance ** 0.5 + + w = cp.Variable(len(u)) # num_assets + v = w @ F # num_factors + ret = w @ u + risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) + obj = cp.Maximize(ret - self.lamb * risk) + d_bench = w - w_bench + d_inds = d_bench @ inds_onehot + cons = [ + w >= 0, + cp.sum(w) == 1, + d_bench >= -self.bench_dev, + d_bench <= self.bench_dev, + d_inds >= -self.inds_dev, + d_inds <= self.inds_dev + ] + if w0 is not None: + turnover = cp.sum(cp.abs(w - w0)) + cons.append(turnover <= self.delta) + + warm_start = False + if self.start_from_w0: + if w0 is None: + print('Warning: try warm start with w0, but w0 is `None`.') + else: + w.value = w0 + warm_start = True + elif self.start_from_bench: + w.value = w_bench + warm_start = True + + prob = cp.Problem(obj, cons) + prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) + + if prob.status != 'optimal': + print('Warning: solve failed.', prob.status) + + return np.asarray(w.value) From b2e2142594d38c0afa4e31d560a17cfad05e2705 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 09:00:12 +0800 Subject: [PATCH 05/32] Applied slight modification to follow PEP 8. --- qlib/portfolio/optimizer.py | 114 +++--------------------------------- 1 file changed, 7 insertions(+), 107 deletions(-) diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index e04923ed6fa..104e2c441dc 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -42,6 +42,7 @@ def __init__( lamb (float): risk aversion parameter (larger `lamb` means more focus on return) delta (float): turnover rate limit alpha (float): l2 norm regularizer + scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix tol (float): tolerance for optimization termination """ assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported" @@ -57,6 +58,7 @@ def __init__( self.alpha = alpha self.tol = tol + self.scale_alpha = scale_alpha def __call__( self, @@ -94,7 +96,7 @@ def __call__( w0 = w0.values # scale alpha to match volatility - if u is not None: + if u is not None and self.scale_alpha: u = u / u.std() u *= np.mean(np.diag(S)) ** 0.5 @@ -247,7 +249,10 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda # add l2 regularization wrapped_obj = obj if self.alpha > 0: - wrapped_obj = lambda x: obj(x) + self.alpha * np.sum(np.square(x)) + def opt_obj(x): + return obj(x) + self.alpha * np.sum(np.square(x)) + + wrapped_obj = opt_obj # solve x0 = np.ones(n) / n # init results @@ -256,108 +261,3 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda warnings.warn(f"optimization not success ({sol.status})") return sol.x - - -class EnhancedIndexingOptimizer: - """ - Portfolio Optimizer with Enhanced Indexing - - Note: - This optimizer always assumes full investment and no-shorting. - """ - - START_FROM_W0 = 'w0' - START_FROM_BENCH = 'benchmark' - DO_NOT_START_FROM = '' - - def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, - scale_alpha=True, verbose: bool = False, warm_start: str = '', max_iters: int = 10000): - """ - Args: - lamb (float): risk aversion parameter (larger `lamb` means less focus on return) - delta (float): turnover rate limit - bench_dev (float): benchmark deviation limit - inds_dev (float): industry deviation limit - verbose (bool): if print detailed information about the solver - warm_start (str): whether try to warm start (`w0`/`benchmark`/``) - (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) - """ - - assert lamb >= 0, "risk aversion parameter `lamb` should be positive" - self.lamb = lamb - - assert delta >= 0, "turnover limit `delta` should be positive" - self.delta = delta - - assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive" - self.bench_dev = bench_dev - - assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" - self.inds_dev = inds_dev - - assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0, - self.START_FROM_BENCH], "illegal warm start option" - self.start_from_w0 = (warm_start == self.START_FROM_W0) - self.start_from_bench = (warm_start == self.START_FROM_BENCH) - - self.scale_alpha = scale_alpha - self.verbose = verbose - self.max_iters = max_iters - - def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray, - w_bench: np.ndarray, inds_onehot: np.ndarray - ) -> Union[np.ndarray, pd.Series]: - """ - Args: - u (np.ndarray): expected returns (a.k.a., alpha) - F, covB, varU (np.ndarray): see StructuredCovEstimator - w0 (np.ndarray): initial weights (for turnover control) - w_bench (np.ndarray): benchmark weights - inds_onehot (np.ndarray): industry (onehot) - - Returns: - np.ndarray or pd.Series: optimized portfolio allocation - """ - # scale alpha to match volatility - if self.scale_alpha: - u = u / u.std() - x_variance = np.mean(np.diag(F @ covB @ F.T) + varU) - u *= x_variance ** 0.5 - - w = cp.Variable(len(u)) # num_assets - v = w @ F # num_factors - ret = w @ u - risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) - obj = cp.Maximize(ret - self.lamb * risk) - d_bench = w - w_bench - d_inds = d_bench @ inds_onehot - cons = [ - w >= 0, - cp.sum(w) == 1, - d_bench >= -self.bench_dev, - d_bench <= self.bench_dev, - d_inds >= -self.inds_dev, - d_inds <= self.inds_dev - ] - if w0 is not None: - turnover = cp.sum(cp.abs(w - w0)) - cons.append(turnover <= self.delta) - - warm_start = False - if self.start_from_w0: - if w0 is None: - print('Warning: try warm start with w0, but w0 is `None`.') - else: - w.value = w0 - warm_start = True - elif self.start_from_bench: - w.value = w_bench - warm_start = True - - prob = cp.Problem(obj, cons) - prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) - - if prob.status != 'optimal': - print('Warning: solve failed.', prob.status) - - return np.asarray(w.value) From 2cc057e438cb412ff60a39d6e83df6724f29b4fe Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 09:09:03 +0800 Subject: [PATCH 06/32] Fix minor mismatches of type hints. --- qlib/portfolio/optimizer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 104e2c441dc..87a8b7416f9 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -85,14 +85,14 @@ def __call__( if u is not None: assert len(u) == len(S), "`u` has mismatched shape" if isinstance(u, pd.Series): - assert all(u.index == index), "`u` has mismatched index" + assert u.index.equals(index), "`u` has mismatched index" u = u.values # transform initial weights if w0 is not None: assert len(w0) == len(S), "`w0` has mismatched shape" if isinstance(w0, pd.Series): - assert all(w0.index == index), "`w0` has mismatched index" + assert w0.index.equals(index), "`w0` has mismatched index" w0 = w0.values # scale alpha to match volatility @@ -175,7 +175,7 @@ def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nda """ return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0)) - def _get_objective_gmv(self, S: np.ndarray) -> np.ndarray: + def _get_objective_gmv(self, S: np.ndarray) -> Callable: """global minimum variance optimization objective Optimization objective @@ -187,7 +187,7 @@ def func(x): return func - def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> np.ndarray: + def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable: """mean-variance optimization objective Optimization objective @@ -201,7 +201,7 @@ def func(x): return func - def _get_objective_rp(self, S: np.ndarray) -> np.ndarray: + def _get_objective_rp(self, S: np.ndarray) -> Callable: """risk-parity optimization objective Optimization objective From 9448a6e2c79a344516e17abba7060d6e62231582 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 09:23:48 +0800 Subject: [PATCH 07/32] Add a abstract class as the base class for all optimization related portfolio constructions. --- .../portfolio_optimizer/enhanced_indexing.py | 4 +- .../portfolio_optimizer/mean_variance.py | 264 ++++++++++++++++++ qlib/portfolio/optimizer.py | 264 +----------------- 3 files changed, 274 insertions(+), 258 deletions(-) create mode 100644 qlib/contrib/portfolio_optimizer/mean_variance.py diff --git a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py index 0c40a617ef7..323e3154b67 100644 --- a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py +++ b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py @@ -6,8 +6,10 @@ import cvxpy as cp from typing import Union +from ...portfolio.optimizer import BaseOptimizer -class EnhancedIndexingOptimizer: + +class EnhancedIndexingOptimizer(BaseOptimizer): """ Portfolio Optimizer with Enhanced Indexing diff --git a/qlib/contrib/portfolio_optimizer/mean_variance.py b/qlib/contrib/portfolio_optimizer/mean_variance.py new file mode 100644 index 00000000000..c3c4f7a3d01 --- /dev/null +++ b/qlib/contrib/portfolio_optimizer/mean_variance.py @@ -0,0 +1,264 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import warnings +import numpy as np +import pandas as pd +import scipy.optimize as so +from typing import Optional, Union, Callable, List + +from ...portfolio.optimizer import BaseOptimizer + + +class PortfolioOptimizer(BaseOptimizer): + """Portfolio Optimizer + + The following optimization algorithms are supported: + - `gmv`: Global Minimum Variance Portfolio + - `mvo`: Mean Variance Optimized Portfolio + - `rp`: Risk Parity + - `inv`: Inverse Volatility + + Note: + This optimizer always assumes full investment and no-shorting. + """ + + OPT_GMV = "gmv" + OPT_MVO = "mvo" + OPT_RP = "rp" + OPT_INV = "inv" + + def __init__( + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, + ): + """ + Args: + method (str): portfolio optimization method + lamb (float): risk aversion parameter (larger `lamb` means more focus on return) + delta (float): turnover rate limit + alpha (float): l2 norm regularizer + scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix + tol (float): tolerance for optimization termination + """ + assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported" + self.method = method + + assert lamb >= 0, f"risk aversion parameter `lamb` should be positive" + self.lamb = lamb + + assert delta >= 0, f"turnover limit `delta` should be positive" + self.delta = delta + + assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive" + self.alpha = alpha + + self.tol = tol + self.scale_alpha = scale_alpha + + def __call__( + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, + ) -> Union[np.ndarray, pd.Series]: + """ + Args: + S (np.ndarray or pd.DataFrame): covariance matrix + u (np.ndarray or pd.Series): expected returns (a.k.a., alpha) + w0 (np.ndarray or pd.Series): initial weights (for turnover control) + + Returns: + np.ndarray or pd.Series: optimized portfolio allocation + """ + # transform dataframe into array + index = None + if isinstance(S, pd.DataFrame): + index = S.index + S = S.values + + # transform alpha + if u is not None: + assert len(u) == len(S), "`u` has mismatched shape" + if isinstance(u, pd.Series): + assert u.index.equals(index), "`u` has mismatched index" + u = u.values + + # transform initial weights + if w0 is not None: + assert len(w0) == len(S), "`w0` has mismatched shape" + if isinstance(w0, pd.Series): + assert w0.index.equals(index), "`w0` has mismatched index" + w0 = w0.values + + # scale alpha to match volatility + if u is not None and self.scale_alpha: + u = u / u.std() + u *= np.mean(np.diag(S)) ** 0.5 + + # optimize + w = self._optimize(S, u, w0) + + # restore index if needed + if index is not None: + w = pd.Series(w, index=index) + + return w + + def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray: + + # inverse volatility + if self.method == self.OPT_INV: + if u is not None: + warnings.warn("`u` is set but will not be used for `inv` portfolio") + if w0 is not None: + warnings.warn("`w0` is set but will not be used for `inv` portfolio") + return self._optimize_inv(S) + + # global minimum variance + if self.method == self.OPT_GMV: + if u is not None: + warnings.warn("`u` is set but will not be used for `gmv` portfolio") + return self._optimize_gmv(S, w0) + + # mean-variance + if self.method == self.OPT_MVO: + return self._optimize_mvo(S, u, w0) + + # risk parity + if self.method == self.OPT_RP: + if u is not None: + warnings.warn("`u` is set but will not be used for `rp` portfolio") + return self._optimize_rp(S, w0) + + def _optimize_inv(self, S: np.ndarray) -> np.ndarray: + """Inverse volatility""" + vola = np.diag(S) ** 0.5 + w = 1 / vola + w /= w.sum() + return w + + def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: + """optimize global minimum variance portfolio + + This method solves the following optimization problem + min_w w' S w + s.t. w >= 0, sum(w) == 1 + where `S` is the covariance matrix. + """ + return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) + + def _optimize_mvo( + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + ) -> np.ndarray: + """optimize mean-variance portfolio + + This method solves the following optimization problem + min_w - w' u + lamb * w' S w + s.t. w >= 0, sum(w) == 1 + where `S` is the covariance matrix, `u` is the expected returns, + and `lamb` is the risk aversion parameter. + """ + return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0)) + + def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: + """optimize risk parity portfolio + + This method solves the following optimization problem + min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 + s.t. w >= 0, sum(w) == 1 + where `S` is the covariance matrix and `N` is the number of stocks. + """ + return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0)) + + def _get_objective_gmv(self, S: np.ndarray) -> Callable: + """global minimum variance optimization objective + + Optimization objective + min_w w' S w + """ + + def func(x): + return x @ S @ x + + return func + + def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable: + """mean-variance optimization objective + + Optimization objective + min_w - w' u + lamb * w' S w + """ + + def func(x): + risk = x @ S @ x + ret = x @ u + return -ret + self.lamb * risk + + return func + + def _get_objective_rp(self, S: np.ndarray) -> Callable: + """risk-parity optimization objective + + Optimization objective + min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 + """ + + def func(x): + N = len(x) + Sx = S @ x + xSx = x @ Sx + return np.sum((x - xSx / Sx / N) ** 2) + + return func + + def _get_constrains(self, w0: Optional[np.ndarray] = None): + """optimization constraints + + Defines the following constraints: + - no shorting and leverage: 0 <= w <= 1 + - full investment: sum(w) == 1 + - turnover constraint: |w - w0| <= delta + """ + + # no shorting and leverage + bounds = so.Bounds(0.0, 1.0) + + # full investment constraint + cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}] # == 0 + + # turnover constraint + if w0 is not None: + cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))}) # >= 0 + + return bounds, cons + + def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray: + """solve optimization + + Args: + n (int): number of parameters + obj (callable): optimization objective + bounds (Bounds): bounds of parameters + cons (list): optimization constraints + """ + # add l2 regularization + wrapped_obj = obj + if self.alpha > 0: + def opt_obj(x): + return obj(x) + self.alpha * np.sum(np.square(x)) + + wrapped_obj = opt_obj + + # solve + x0 = np.ones(n) / n # init results + sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol) + if not sol.success: + warnings.warn(f"optimization not success ({sol.status})") + + return sol.x diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 87a8b7416f9..c63d936564d 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -1,263 +1,13 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import warnings -import numpy as np -import pandas as pd -import scipy.optimize as so +import abc -from typing import Optional, Union, Callable, List +class BaseOptimizer(abc.ABC): + """Modeling things""" -class PortfolioOptimizer: - """Portfolio Optimizer - - The following optimization algorithms are supported: - - `gmv`: Global Minimum Variance Portfolio - - `mvo`: Mean Variance Optimized Portfolio - - `rp`: Risk Parity - - `inv`: Inverse Volatility - - Note: - This optimizer always assumes full investment and no-shorting. - """ - - OPT_GMV = "gmv" - OPT_MVO = "mvo" - OPT_RP = "rp" - OPT_INV = "inv" - - def __init__( - self, - method: str = "inv", - lamb: float = 0, - delta: float = 0, - alpha: float = 0.0, - scale_alpha: bool = True, - tol: float = 1e-8, - ): - """ - Args: - method (str): portfolio optimization method - lamb (float): risk aversion parameter (larger `lamb` means more focus on return) - delta (float): turnover rate limit - alpha (float): l2 norm regularizer - scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix - tol (float): tolerance for optimization termination - """ - assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported" - self.method = method - - assert lamb >= 0, f"risk aversion parameter `lamb` should be positive" - self.lamb = lamb - - assert delta >= 0, f"turnover limit `delta` should be positive" - self.delta = delta - - assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive" - self.alpha = alpha - - self.tol = tol - self.scale_alpha = scale_alpha - - def __call__( - self, - S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None, - ) -> Union[np.ndarray, pd.Series]: - """ - Args: - S (np.ndarray or pd.DataFrame): covariance matrix - u (np.ndarray or pd.Series): expected returns (a.k.a., alpha) - w0 (np.ndarray or pd.Series): initial weights (for turnover control) - - Returns: - np.ndarray or pd.Series: optimized portfolio allocation - """ - # transform dataframe into array - index = None - if isinstance(S, pd.DataFrame): - index = S.index - S = S.values - - # transform alpha - if u is not None: - assert len(u) == len(S), "`u` has mismatched shape" - if isinstance(u, pd.Series): - assert u.index.equals(index), "`u` has mismatched index" - u = u.values - - # transform initial weights - if w0 is not None: - assert len(w0) == len(S), "`w0` has mismatched shape" - if isinstance(w0, pd.Series): - assert w0.index.equals(index), "`w0` has mismatched index" - w0 = w0.values - - # scale alpha to match volatility - if u is not None and self.scale_alpha: - u = u / u.std() - u *= np.mean(np.diag(S)) ** 0.5 - - # optimize - w = self._optimize(S, u, w0) - - # restore index if needed - if index is not None: - w = pd.Series(w, index=index) - - return w - - def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray: - - # inverse volatility - if self.method == self.OPT_INV: - if u is not None: - warnings.warn("`u` is set but will not be used for `inv` portfolio") - if w0 is not None: - warnings.warn("`w0` is set but will not be used for `inv` portfolio") - return self._optimize_inv(S) - - # global minimum variance - if self.method == self.OPT_GMV: - if u is not None: - warnings.warn("`u` is set but will not be used for `gmv` portfolio") - return self._optimize_gmv(S, w0) - - # mean-variance - if self.method == self.OPT_MVO: - return self._optimize_mvo(S, u, w0) - - # risk parity - if self.method == self.OPT_RP: - if u is not None: - warnings.warn("`u` is set but will not be used for `rp` portfolio") - return self._optimize_rp(S, w0) - - def _optimize_inv(self, S: np.ndarray) -> np.ndarray: - """Inverse volatility""" - vola = np.diag(S) ** 0.5 - w = 1 / vola - w /= w.sum() - return w - - def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: - """optimize global minimum variance portfolio - - This method solves the following optimization problem - min_w w' S w - s.t. w >= 0, sum(w) == 1 - where `S` is the covariance matrix. - """ - return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) - - def _optimize_mvo( - self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None - ) -> np.ndarray: - """optimize mean-variance portfolio - - This method solves the following optimization problem - min_w - w' u + lamb * w' S w - s.t. w >= 0, sum(w) == 1 - where `S` is the covariance matrix, `u` is the expected returns, - and `lamb` is the risk aversion parameter. - """ - return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0)) - - def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: - """optimize risk parity portfolio - - This method solves the following optimization problem - min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 - s.t. w >= 0, sum(w) == 1 - where `S` is the covariance matrix and `N` is the number of stocks. - """ - return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0)) - - def _get_objective_gmv(self, S: np.ndarray) -> Callable: - """global minimum variance optimization objective - - Optimization objective - min_w w' S w - """ - - def func(x): - return x @ S @ x - - return func - - def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable: - """mean-variance optimization objective - - Optimization objective - min_w - w' u + lamb * w' S w - """ - - def func(x): - risk = x @ S @ x - ret = x @ u - return -ret + self.lamb * risk - - return func - - def _get_objective_rp(self, S: np.ndarray) -> Callable: - """risk-parity optimization objective - - Optimization objective - min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 - """ - - def func(x): - N = len(x) - Sx = S @ x - xSx = x @ Sx - return np.sum((x - xSx / Sx / N) ** 2) - - return func - - def _get_constrains(self, w0: Optional[np.ndarray] = None): - """optimization constraints - - Defines the following constraints: - - no shorting and leverage: 0 <= w <= 1 - - full investment: sum(w) == 1 - - turnover constraint: |w - w0| <= delta - """ - - # no shorting and leverage - bounds = so.Bounds(0.0, 1.0) - - # full investment constraint - cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}] # == 0 - - # turnover constraint - if w0 is not None: - cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))}) # >= 0 - - return bounds, cons - - def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray: - """solve optimization - - Args: - n (int): number of parameters - obj (callable): optimization objective - bounds (Bounds): bounds of parameters - cons (list): optimization constraints - """ - # add l2 regularization - wrapped_obj = obj - if self.alpha > 0: - def opt_obj(x): - return obj(x) + self.alpha * np.sum(np.square(x)) - - wrapped_obj = opt_obj - - # solve - x0 = np.ones(n) / n # init results - sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol) - if not sol.success: - warnings.warn(f"optimization not success ({sol.status})") - - return sol.x + @abc.abstractmethod + def __call__(self, *args, **kwargs) -> object: + """ Generate a optimized portfolio allocation """ + pass From 42f882504e09d36f20c29f3eaafa11f0249144ed Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 09:25:48 +0800 Subject: [PATCH 08/32] Reformat code to follow PEP 8. --- qlib/model/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/model/base.py b/qlib/model/base.py index 5a295787f76..a7001f0a67b 100644 --- a/qlib/model/base.py +++ b/qlib/model/base.py @@ -43,8 +43,8 @@ def fit(self, dataset: Dataset): # get weights try: - wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"], data_key=DataHandlerLP.DK_L) - w_train, w_valid = wdf_train["weight"], wdf_valid["weight"] + wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"], + data_key=DataHandlerLP.DK_L, w_train, w_valid = wdf_train["weight"], wdf_valid["weight"] except KeyError as e: w_train = pd.DataFrame(np.ones_like(y_train.values), index=y_train.index) w_valid = pd.DataFrame(np.ones_like(y_valid.values), index=y_valid.index) From f7d3e56561d4059bc85dc7922017706bfa322750 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 09:57:41 +0800 Subject: [PATCH 09/32] Merge optimization related portfolio construction back to portfolio/optimizer. --- .../portfolio_optimizer/enhanced_indexing.py | 114 ------ .../portfolio_optimizer/mean_variance.py | 264 ------------- qlib/portfolio/optimizer.py | 367 +++++++++++++++++- 3 files changed, 366 insertions(+), 379 deletions(-) delete mode 100644 qlib/contrib/portfolio_optimizer/enhanced_indexing.py delete mode 100644 qlib/contrib/portfolio_optimizer/mean_variance.py diff --git a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py b/qlib/contrib/portfolio_optimizer/enhanced_indexing.py deleted file mode 100644 index 323e3154b67..00000000000 --- a/qlib/contrib/portfolio_optimizer/enhanced_indexing.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import numpy as np -import pandas as pd -import cvxpy as cp -from typing import Union - -from ...portfolio.optimizer import BaseOptimizer - - -class EnhancedIndexingOptimizer(BaseOptimizer): - """ - Portfolio Optimizer with Enhanced Indexing - - Note: - This optimizer always assumes full investment and no-shorting. - """ - - START_FROM_W0 = 'w0' - START_FROM_BENCH = 'benchmark' - DO_NOT_START_FROM = 'no_warm_start' - - def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, - scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000): - """ - Args: - lamb (float): risk aversion parameter (larger `lamb` means less focus on return) - delta (float): turnover rate limit - bench_dev (float): benchmark deviation limit - inds_dev (float): industry deviation limit - verbose (bool): if print detailed information about the solver - warm_start (str): whether try to warm start (`w0`/`benchmark`/``) - (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) - """ - - assert lamb >= 0, "risk aversion parameter `lamb` should be positive" - self.lamb = lamb - - assert delta >= 0, "turnover limit `delta` should be positive" - self.delta = delta - - assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive" - self.bench_dev = bench_dev - - assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" - self.inds_dev = inds_dev - - assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0, - self.START_FROM_BENCH], "illegal warm start option" - self.start_from_w0 = (warm_start == self.START_FROM_W0) - self.start_from_bench = (warm_start == self.START_FROM_BENCH) - - self.scale_alpha = scale_alpha - self.verbose = verbose - self.max_iters = max_iters - - def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray, - w_bench: np.ndarray, inds_onehot: np.ndarray - ) -> Union[np.ndarray, pd.Series]: - """ - Args: - u (np.ndarray): expected returns (a.k.a., alpha) - F, covB, varU (np.ndarray): see StructuredCovEstimator - w0 (np.ndarray): initial weights (for turnover control) - w_bench (np.ndarray): benchmark weights - inds_onehot (np.ndarray): industry (onehot) - - Returns: - np.ndarray or pd.Series: optimized portfolio allocation - """ - # scale alpha to match volatility - if self.scale_alpha: - u = u / u.std() - x_variance = np.mean(np.diag(F @ covB @ F.T) + varU) - u *= x_variance ** 0.5 - - w = cp.Variable(len(u)) # num_assets - v = w @ F # num_factors - ret = w @ u - risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) - obj = cp.Maximize(ret - self.lamb * risk) - d_bench = w - w_bench - d_inds = d_bench @ inds_onehot - cons = [ - w >= 0, - cp.sum(w) == 1, - d_bench >= -self.bench_dev, - d_bench <= self.bench_dev, - d_inds >= -self.inds_dev, - d_inds <= self.inds_dev - ] - if w0 is not None: - turnover = cp.sum(cp.abs(w - w0)) - cons.append(turnover <= self.delta) - - warm_start = False - if self.start_from_w0: - if w0 is None: - print('Warning: try warm start with w0, but w0 is `None`.') - else: - w.value = w0 - warm_start = True - elif self.start_from_bench: - w.value = w_bench - warm_start = True - - prob = cp.Problem(obj, cons) - prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) - - if prob.status != 'optimal': - print('Warning: solve failed.', prob.status) - - return np.asarray(w.value) diff --git a/qlib/contrib/portfolio_optimizer/mean_variance.py b/qlib/contrib/portfolio_optimizer/mean_variance.py deleted file mode 100644 index c3c4f7a3d01..00000000000 --- a/qlib/contrib/portfolio_optimizer/mean_variance.py +++ /dev/null @@ -1,264 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import warnings -import numpy as np -import pandas as pd -import scipy.optimize as so -from typing import Optional, Union, Callable, List - -from ...portfolio.optimizer import BaseOptimizer - - -class PortfolioOptimizer(BaseOptimizer): - """Portfolio Optimizer - - The following optimization algorithms are supported: - - `gmv`: Global Minimum Variance Portfolio - - `mvo`: Mean Variance Optimized Portfolio - - `rp`: Risk Parity - - `inv`: Inverse Volatility - - Note: - This optimizer always assumes full investment and no-shorting. - """ - - OPT_GMV = "gmv" - OPT_MVO = "mvo" - OPT_RP = "rp" - OPT_INV = "inv" - - def __init__( - self, - method: str = "inv", - lamb: float = 0, - delta: float = 0, - alpha: float = 0.0, - scale_alpha: bool = True, - tol: float = 1e-8, - ): - """ - Args: - method (str): portfolio optimization method - lamb (float): risk aversion parameter (larger `lamb` means more focus on return) - delta (float): turnover rate limit - alpha (float): l2 norm regularizer - scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix - tol (float): tolerance for optimization termination - """ - assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported" - self.method = method - - assert lamb >= 0, f"risk aversion parameter `lamb` should be positive" - self.lamb = lamb - - assert delta >= 0, f"turnover limit `delta` should be positive" - self.delta = delta - - assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive" - self.alpha = alpha - - self.tol = tol - self.scale_alpha = scale_alpha - - def __call__( - self, - S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None, - ) -> Union[np.ndarray, pd.Series]: - """ - Args: - S (np.ndarray or pd.DataFrame): covariance matrix - u (np.ndarray or pd.Series): expected returns (a.k.a., alpha) - w0 (np.ndarray or pd.Series): initial weights (for turnover control) - - Returns: - np.ndarray or pd.Series: optimized portfolio allocation - """ - # transform dataframe into array - index = None - if isinstance(S, pd.DataFrame): - index = S.index - S = S.values - - # transform alpha - if u is not None: - assert len(u) == len(S), "`u` has mismatched shape" - if isinstance(u, pd.Series): - assert u.index.equals(index), "`u` has mismatched index" - u = u.values - - # transform initial weights - if w0 is not None: - assert len(w0) == len(S), "`w0` has mismatched shape" - if isinstance(w0, pd.Series): - assert w0.index.equals(index), "`w0` has mismatched index" - w0 = w0.values - - # scale alpha to match volatility - if u is not None and self.scale_alpha: - u = u / u.std() - u *= np.mean(np.diag(S)) ** 0.5 - - # optimize - w = self._optimize(S, u, w0) - - # restore index if needed - if index is not None: - w = pd.Series(w, index=index) - - return w - - def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray: - - # inverse volatility - if self.method == self.OPT_INV: - if u is not None: - warnings.warn("`u` is set but will not be used for `inv` portfolio") - if w0 is not None: - warnings.warn("`w0` is set but will not be used for `inv` portfolio") - return self._optimize_inv(S) - - # global minimum variance - if self.method == self.OPT_GMV: - if u is not None: - warnings.warn("`u` is set but will not be used for `gmv` portfolio") - return self._optimize_gmv(S, w0) - - # mean-variance - if self.method == self.OPT_MVO: - return self._optimize_mvo(S, u, w0) - - # risk parity - if self.method == self.OPT_RP: - if u is not None: - warnings.warn("`u` is set but will not be used for `rp` portfolio") - return self._optimize_rp(S, w0) - - def _optimize_inv(self, S: np.ndarray) -> np.ndarray: - """Inverse volatility""" - vola = np.diag(S) ** 0.5 - w = 1 / vola - w /= w.sum() - return w - - def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: - """optimize global minimum variance portfolio - - This method solves the following optimization problem - min_w w' S w - s.t. w >= 0, sum(w) == 1 - where `S` is the covariance matrix. - """ - return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) - - def _optimize_mvo( - self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None - ) -> np.ndarray: - """optimize mean-variance portfolio - - This method solves the following optimization problem - min_w - w' u + lamb * w' S w - s.t. w >= 0, sum(w) == 1 - where `S` is the covariance matrix, `u` is the expected returns, - and `lamb` is the risk aversion parameter. - """ - return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0)) - - def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: - """optimize risk parity portfolio - - This method solves the following optimization problem - min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 - s.t. w >= 0, sum(w) == 1 - where `S` is the covariance matrix and `N` is the number of stocks. - """ - return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0)) - - def _get_objective_gmv(self, S: np.ndarray) -> Callable: - """global minimum variance optimization objective - - Optimization objective - min_w w' S w - """ - - def func(x): - return x @ S @ x - - return func - - def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable: - """mean-variance optimization objective - - Optimization objective - min_w - w' u + lamb * w' S w - """ - - def func(x): - risk = x @ S @ x - ret = x @ u - return -ret + self.lamb * risk - - return func - - def _get_objective_rp(self, S: np.ndarray) -> Callable: - """risk-parity optimization objective - - Optimization objective - min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 - """ - - def func(x): - N = len(x) - Sx = S @ x - xSx = x @ Sx - return np.sum((x - xSx / Sx / N) ** 2) - - return func - - def _get_constrains(self, w0: Optional[np.ndarray] = None): - """optimization constraints - - Defines the following constraints: - - no shorting and leverage: 0 <= w <= 1 - - full investment: sum(w) == 1 - - turnover constraint: |w - w0| <= delta - """ - - # no shorting and leverage - bounds = so.Bounds(0.0, 1.0) - - # full investment constraint - cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}] # == 0 - - # turnover constraint - if w0 is not None: - cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))}) # >= 0 - - return bounds, cons - - def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray: - """solve optimization - - Args: - n (int): number of parameters - obj (callable): optimization objective - bounds (Bounds): bounds of parameters - cons (list): optimization constraints - """ - # add l2 regularization - wrapped_obj = obj - if self.alpha > 0: - def opt_obj(x): - return obj(x) + self.alpha * np.sum(np.square(x)) - - wrapped_obj = opt_obj - - # solve - x0 = np.ones(n) / n # init results - sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol) - if not sol.success: - warnings.warn(f"optimization not success ({sol.status})") - - return sol.x diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index c63d936564d..728a04ea9db 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -2,12 +2,377 @@ # Licensed under the MIT License. import abc +import warnings +import numpy as np +import cvxpy as cp +import pandas as pd +import scipy.optimize as so +from typing import Optional, Union, Callable, List class BaseOptimizer(abc.ABC): - """Modeling things""" + """ Construct portfolio with a optimization related method """ @abc.abstractmethod def __call__(self, *args, **kwargs) -> object: """ Generate a optimized portfolio allocation """ pass + + +class PortfolioOptimizer(BaseOptimizer): + """Portfolio Optimizer + + The following optimization algorithms are supported: + - `gmv`: Global Minimum Variance Portfolio + - `mvo`: Mean Variance Optimized Portfolio + - `rp`: Risk Parity + - `inv`: Inverse Volatility + + Note: + This optimizer always assumes full investment and no-shorting. + """ + + OPT_GMV = "gmv" + OPT_MVO = "mvo" + OPT_RP = "rp" + OPT_INV = "inv" + + def __init__( + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, + ): + """ + Args: + method (str): portfolio optimization method + lamb (float): risk aversion parameter (larger `lamb` means more focus on return) + delta (float): turnover rate limit + alpha (float): l2 norm regularizer + scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix + tol (float): tolerance for optimization termination + """ + assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported" + self.method = method + + assert lamb >= 0, f"risk aversion parameter `lamb` should be positive" + self.lamb = lamb + + assert delta >= 0, f"turnover limit `delta` should be positive" + self.delta = delta + + assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive" + self.alpha = alpha + + self.tol = tol + self.scale_alpha = scale_alpha + + def __call__( + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, + ) -> Union[np.ndarray, pd.Series]: + """ + Args: + S (np.ndarray or pd.DataFrame): covariance matrix + u (np.ndarray or pd.Series): expected returns (a.k.a., alpha) + w0 (np.ndarray or pd.Series): initial weights (for turnover control) + + Returns: + np.ndarray or pd.Series: optimized portfolio allocation + """ + # transform dataframe into array + index = None + if isinstance(S, pd.DataFrame): + index = S.index + S = S.values + + # transform alpha + if u is not None: + assert len(u) == len(S), "`u` has mismatched shape" + if isinstance(u, pd.Series): + assert u.index.equals(index), "`u` has mismatched index" + u = u.values + + # transform initial weights + if w0 is not None: + assert len(w0) == len(S), "`w0` has mismatched shape" + if isinstance(w0, pd.Series): + assert w0.index.equals(index), "`w0` has mismatched index" + w0 = w0.values + + # scale alpha to match volatility + if u is not None and self.scale_alpha: + u = u / u.std() + u *= np.mean(np.diag(S)) ** 0.5 + + # optimize + w = self._optimize(S, u, w0) + + # restore index if needed + if index is not None: + w = pd.Series(w, index=index) + + return w + + def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray: + + # inverse volatility + if self.method == self.OPT_INV: + if u is not None: + warnings.warn("`u` is set but will not be used for `inv` portfolio") + if w0 is not None: + warnings.warn("`w0` is set but will not be used for `inv` portfolio") + return self._optimize_inv(S) + + # global minimum variance + if self.method == self.OPT_GMV: + if u is not None: + warnings.warn("`u` is set but will not be used for `gmv` portfolio") + return self._optimize_gmv(S, w0) + + # mean-variance + if self.method == self.OPT_MVO: + return self._optimize_mvo(S, u, w0) + + # risk parity + if self.method == self.OPT_RP: + if u is not None: + warnings.warn("`u` is set but will not be used for `rp` portfolio") + return self._optimize_rp(S, w0) + + def _optimize_inv(self, S: np.ndarray) -> np.ndarray: + """Inverse volatility""" + vola = np.diag(S) ** 0.5 + w = 1 / vola + w /= w.sum() + return w + + def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: + """optimize global minimum variance portfolio + + This method solves the following optimization problem + min_w w' S w + s.t. w >= 0, sum(w) == 1 + where `S` is the covariance matrix. + """ + return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) + + def _optimize_mvo( + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + ) -> np.ndarray: + """optimize mean-variance portfolio + + This method solves the following optimization problem + min_w - w' u + lamb * w' S w + s.t. w >= 0, sum(w) == 1 + where `S` is the covariance matrix, `u` is the expected returns, + and `lamb` is the risk aversion parameter. + """ + return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0)) + + def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: + """optimize risk parity portfolio + + This method solves the following optimization problem + min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 + s.t. w >= 0, sum(w) == 1 + where `S` is the covariance matrix and `N` is the number of stocks. + """ + return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0)) + + def _get_objective_gmv(self, S: np.ndarray) -> Callable: + """global minimum variance optimization objective + + Optimization objective + min_w w' S w + """ + + def func(x): + return x @ S @ x + + return func + + def _get_objective_mvo(self, S: np.ndarray, u: np.ndarray = None) -> Callable: + """mean-variance optimization objective + + Optimization objective + min_w - w' u + lamb * w' S w + """ + + def func(x): + risk = x @ S @ x + ret = x @ u + return -ret + self.lamb * risk + + return func + + def _get_objective_rp(self, S: np.ndarray) -> Callable: + """risk-parity optimization objective + + Optimization objective + min_w sum_i [w_i - (w' S w) / ((S w)_i * N)]**2 + """ + + def func(x): + N = len(x) + Sx = S @ x + xSx = x @ Sx + return np.sum((x - xSx / Sx / N) ** 2) + + return func + + def _get_constrains(self, w0: Optional[np.ndarray] = None): + """optimization constraints + + Defines the following constraints: + - no shorting and leverage: 0 <= w <= 1 + - full investment: sum(w) == 1 + - turnover constraint: |w - w0| <= delta + """ + + # no shorting and leverage + bounds = so.Bounds(0.0, 1.0) + + # full investment constraint + cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}] # == 0 + + # turnover constraint + if w0 is not None: + cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))}) # >= 0 + + return bounds, cons + + def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.ndarray: + """solve optimization + + Args: + n (int): number of parameters + obj (callable): optimization objective + bounds (Bounds): bounds of parameters + cons (list): optimization constraints + """ + # add l2 regularization + wrapped_obj = obj + if self.alpha > 0: + def opt_obj(x): + return obj(x) + self.alpha * np.sum(np.square(x)) + + wrapped_obj = opt_obj + + # solve + x0 = np.ones(n) / n # init results + sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol) + if not sol.success: + warnings.warn(f"optimization not success ({sol.status})") + + return sol.x + + +class EnhancedIndexingOptimizer(BaseOptimizer): + """ + Portfolio Optimizer with Enhanced Indexing + + Note: + This optimizer always assumes full investment and no-shorting. + """ + + START_FROM_W0 = 'w0' + START_FROM_BENCH = 'benchmark' + DO_NOT_START_FROM = 'no_warm_start' + + def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, + scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000): + """ + Args: + lamb (float): risk aversion parameter (larger `lamb` means less focus on return) + delta (float): turnover rate limit + bench_dev (float): benchmark deviation limit + inds_dev (float): industry deviation limit + verbose (bool): if print detailed information about the solver + warm_start (str): whether try to warm start (`w0`/`benchmark`/``) + (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) + """ + + assert lamb >= 0, "risk aversion parameter `lamb` should be positive" + self.lamb = lamb + + assert delta >= 0, "turnover limit `delta` should be positive" + self.delta = delta + + assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive" + self.bench_dev = bench_dev + + assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" + self.inds_dev = inds_dev + + assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0, + self.START_FROM_BENCH], "illegal warm start option" + self.start_from_w0 = (warm_start == self.START_FROM_W0) + self.start_from_bench = (warm_start == self.START_FROM_BENCH) + + self.scale_alpha = scale_alpha + self.verbose = verbose + self.max_iters = max_iters + + def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray, + w_bench: np.ndarray, inds_onehot: np.ndarray + ) -> Union[np.ndarray, pd.Series]: + """ + Args: + u (np.ndarray): expected returns (a.k.a., alpha) + F, covB, varU (np.ndarray): see StructuredCovEstimator + w0 (np.ndarray): initial weights (for turnover control) + w_bench (np.ndarray): benchmark weights + inds_onehot (np.ndarray): industry (onehot) + + Returns: + np.ndarray or pd.Series: optimized portfolio allocation + """ + # scale alpha to match volatility + if self.scale_alpha: + u = u / u.std() + x_variance = np.mean(np.diag(F @ covB @ F.T) + varU) + u *= x_variance ** 0.5 + + w = cp.Variable(len(u)) # num_assets + v = w @ F # num_factors + ret = w @ u + risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) + obj = cp.Maximize(ret - self.lamb * risk) + d_bench = w - w_bench + d_inds = d_bench @ inds_onehot + cons = [ + w >= 0, + cp.sum(w) == 1, + d_bench >= -self.bench_dev, + d_bench <= self.bench_dev, + d_inds >= -self.inds_dev, + d_inds <= self.inds_dev + ] + if w0 is not None: + turnover = cp.sum(cp.abs(w - w0)) + cons.append(turnover <= self.delta) + + warm_start = False + if self.start_from_w0: + if w0 is None: + print('Warning: try warm start with w0, but w0 is `None`.') + else: + w.value = w0 + warm_start = True + elif self.start_from_bench: + w.value = w_bench + warm_start = True + + prob = cp.Problem(obj, cons) + prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) + + if prob.status != 'optimal': + print('Warning: solve failed.', prob.status) + + return np.asarray(w.value) From 58f74cfd84b1f7e94de4f47b5c09d063ca8ed507 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 10:07:03 +0800 Subject: [PATCH 10/32] Reformat code to follow PEP 8. --- qlib/model/riskmodel.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py index 32984ed6a3d..8eec73e00ae 100644 --- a/qlib/model/riskmodel.py +++ b/qlib/model/riskmodel.py @@ -1,11 +1,10 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import warnings import numpy as np import pandas as pd - from typing import Union +from sklearn.decomposition import PCA, FactorAnalysis from qlib.model.base import BaseModel @@ -124,7 +123,7 @@ def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]: X = np.nan_to_num(X) elif self.nan_option == self.MASK_NAN: X = np.ma.masked_invalid(X) - # centerize + # centralize if not self.assume_centered: X = X - np.nanmean(X, axis=0) return X @@ -162,8 +161,9 @@ class ShrinkCovEstimator(RiskModel): [3] Ledoit, O., & Wolf, M. (2003). Improved estimation of the covariance matrix of stock returns with an application to portfolio selection. Journal of Empirical Finance, 10(5), 603–621. https://doi.org/10.1016/S0927-5398(03)00007-0 - [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance estimation. - IEEE Transactions on Signal Processing, 58(10), 5016–5029. https://doi.org/10.1109/TSP.2010.2053029 + [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance + estimation. IEEE Transactions on Signal Processing, 58(10), 5016–5029. + https://doi.org/10.1109/TSP.2010.2053029 [5] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-00007f64e5b9/cov1para.m.zip [6] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip [7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip From 164687d54bfc3ea454eb72e060de16c6dc4a43c1 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 10:13:08 +0800 Subject: [PATCH 11/32] Add scikit-learn to dependencies. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f759945fd58..6582054b9c6 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ "tornado", "joblib>=0.17.0", "ruamel.yaml>=0.16.12", + "scikit-learn>=0.22" ] # Numpy include From b8647c13c78842d8ceb20ffb3788cc034cba6041 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 10:20:51 +0800 Subject: [PATCH 12/32] Reformat code to follow PEP 8. --- qlib/contrib/strategy/strategy.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/qlib/contrib/strategy/strategy.py b/qlib/contrib/strategy/strategy.py index 74df39f3e31..550ff649db8 100644 --- a/qlib/contrib/strategy/strategy.py +++ b/qlib/contrib/strategy/strategy.py @@ -7,7 +7,6 @@ import pandas as pd from ..backtest.order import Order -from ...utils import get_pre_trading_date from .order_generator import OrderGenWInteract @@ -390,11 +389,11 @@ def filter_stock(l): current_stock_list = current_temp.get_stock_list() value = cash * self.risk_degree / len(buy) if len(buy) > 0 else 0 - # open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not consider it - # as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line + # open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not + # consider it as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line # value = value / (1+trade_exchange.open_cost) # set open_cost limit for code in buy: - # check is stock supended + # check is stock suspended if not trade_exchange.is_stock_tradable(stock_id=code, trade_date=trade_date): continue # buy order From 2f9d45e03ac429d56ab2356e104089c8544316a3 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 10:29:29 +0800 Subject: [PATCH 13/32] Reformat code with black. --- docs/conf.py | 10 +-- examples/benchmarks/TFT/libs/tft_model.py | 12 +-- examples/highfreq/highfreq_handler.py | 33 ++------ examples/highfreq/highfreq_processor.py | 4 +- examples/highfreq/workflow.py | 35 ++------- examples/run_all_model.py | 5 +- examples/workflow_by_code.py | 5 +- qlib/config.py | 24 +----- qlib/contrib/backtest/__init__.py | 18 +---- qlib/contrib/backtest/profit_attribution.py | 23 +----- qlib/contrib/data/handler.py | 10 +-- qlib/contrib/eva/alpha.py | 6 +- qlib/contrib/evaluate.py | 7 +- qlib/contrib/evaluate_portfolio.py | 16 +--- qlib/contrib/model/catboost_model.py | 4 +- qlib/contrib/model/pytorch_alstm.py | 21 ++--- qlib/contrib/model/pytorch_alstm_ts.py | 17 +---- qlib/contrib/model/pytorch_gats.py | 22 +----- qlib/contrib/model/pytorch_gats_ts.py | 18 +---- qlib/contrib/model/pytorch_gru.py | 21 +---- qlib/contrib/model/pytorch_gru_ts.py | 17 +---- qlib/contrib/model/pytorch_lstm.py | 21 +---- qlib/contrib/model/pytorch_lstm_ts.py | 17 +---- qlib/contrib/model/pytorch_nn.py | 6 +- qlib/contrib/model/pytorch_sfm.py | 19 +---- qlib/contrib/model/pytorch_tabnet.py | 14 +--- qlib/contrib/model/xgboost.py | 4 +- qlib/contrib/online/executor.py | 24 +----- qlib/contrib/online/manager.py | 6 +- qlib/contrib/online/operator.py | 8 +- qlib/contrib/online/utils.py | 6 +- .../analysis_model_performance.py | 66 +++------------- .../analysis_position/cumulative_return.py | 36 ++------- .../analysis_position/parse_position.py | 5 +- .../report/analysis_position/rank_label.py | 16 +--- .../report/analysis_position/report.py | 15 +--- qlib/contrib/report/graph.py | 6 +- qlib/contrib/strategy/cost_control.py | 5 +- qlib/contrib/strategy/order_generator.py | 12 +-- qlib/contrib/tuner/launcher.py | 6 +- qlib/contrib/tuner/space.py | 5 +- qlib/contrib/tuner/tuner.py | 26 ++----- qlib/data/client.py | 3 +- qlib/data/data.py | 69 ++--------------- qlib/data/dataset/utils.py | 5 +- qlib/data/filter.py | 7 +- qlib/model/riskmodel.py | 31 +++++--- qlib/portfolio/optimizer.py | 76 ++++++++++++------- qlib/tests/__init__.py | 6 +- qlib/workflow/record_temp.py | 5 +- scripts/data_collector/yahoo/collector.py | 27 ++----- scripts/dump_bin.py | 13 +--- setup.py | 16 +--- tests/test_all_pipeline.py | 9 +-- tests/test_dump_data.py | 9 +-- tests/test_get_data.py | 4 +- 56 files changed, 218 insertions(+), 713 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 6e52b0e34a4..61fe784e7a9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -191,15 +191,7 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ( - master_doc, - "QLib", - u"QLib Documentation", - author, - "QLib", - "One line description of project.", - "Miscellaneous", - ), + (master_doc, "QLib", u"QLib Documentation", author, "QLib", "One line description of project.", "Miscellaneous",), ] diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py index b39f1782553..f40a1aece33 100644 --- a/examples/benchmarks/TFT/libs/tft_model.py +++ b/examples/benchmarks/TFT/libs/tft_model.py @@ -721,12 +721,7 @@ def _build_base_graph(self): encoder_steps = self.num_encoder_steps # Inputs. - all_inputs = tf.keras.layers.Input( - shape=( - time_steps, - combined_input_size, - ) - ) + all_inputs = tf.keras.layers.Input(shape=(time_steps, combined_input_size,)) unknown_inputs, known_combined_layer, obs_inputs, static_inputs = self.get_tft_embeddings(all_inputs) @@ -866,10 +861,7 @@ def get_lstm(return_state): """Returns LSTM cell initialized with default parameters.""" if self.use_cudnn: lstm = tf.keras.layers.CuDNNLSTM( - self.hidden_layer_size, - return_sequences=True, - return_state=return_state, - stateful=False, + self.hidden_layer_size, return_sequences=True, return_state=return_state, stateful=False, ) else: lstm = tf.keras.layers.LSTM( diff --git a/examples/highfreq/highfreq_handler.py b/examples/highfreq/highfreq_handler.py index d3565051446..2fc411ab660 100644 --- a/examples/highfreq/highfreq_handler.py +++ b/examples/highfreq/highfreq_handler.py @@ -20,10 +20,7 @@ def check_transform_proc(proc_l): new_l = [] for p in proc_l: p["kwargs"].update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } + {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} ) new_l.append(p) return new_l @@ -33,11 +30,7 @@ def check_transform_proc(proc_l): data_loader = { "class": "QlibDataLoader", - "kwargs": { - "config": self.get_feature_config(), - "swap_level": False, - "freq": "1min", - }, + "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",}, } super().__init__( instruments=instruments, @@ -68,8 +61,7 @@ def get_normalized_price_feature(price_field, shift=0): feature_ops = template_norm.format( template_if.format( - template_fillnan.format(template_paused.format("$close")), - template_paused.format(price_field), + template_fillnan.format(template_paused.format("$close")), template_paused.format(price_field), ), template_fillnan.format(template_paused.format("$close")), ) @@ -119,24 +111,14 @@ def get_normalized_price_feature(price_field, shift=0): class HighFreqBacktestHandler(DataHandler): def __init__( - self, - instruments="csi300", - start_time=None, - end_time=None, + self, instruments="csi300", start_time=None, end_time=None, ): data_loader = { "class": "QlibDataLoader", - "kwargs": { - "config": self.get_feature_config(), - "swap_level": False, - "freq": "1min", - }, + "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",}, } super().__init__( - instruments=instruments, - start_time=start_time, - end_time=end_time, - data_loader=data_loader, + instruments=instruments, start_time=start_time, end_time=end_time, data_loader=data_loader, ) def get_feature_config(self): @@ -155,8 +137,7 @@ def get_feature_config(self): fields += [ "Cut({0}, 240, None)".format( template_if.format( - template_fillnan.format(template_paused.format("$close")), - template_paused.format(simpson_vwap), + template_fillnan.format(template_paused.format("$close")), template_paused.format(simpson_vwap), ) ) ] diff --git a/examples/highfreq/highfreq_processor.py b/examples/highfreq/highfreq_processor.py index f0ab0dec2b1..73510ef0689 100644 --- a/examples/highfreq/highfreq_processor.py +++ b/examples/highfreq/highfreq_processor.py @@ -65,8 +65,6 @@ def __call__(self, df_features): feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240) feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240) df_new_features = pd.DataFrame( - data=np.concatenate((feat, feat_1), axis=1), - index=idx, - columns=["FEATURE_%d" % i for i in range(12 * 240)], + data=np.concatenate((feat, feat_1), axis=1), index=idx, columns=["FEATURE_%d" % i for i in range(12 * 240)], ).sort_index() return df_new_features diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py index 01de59c0e77..0bfd0c2a09c 100644 --- a/examples/highfreq/workflow.py +++ b/examples/highfreq/workflow.py @@ -63,13 +63,7 @@ class HighfreqWorkflow(object): "module_path": "highfreq_handler", "kwargs": DATA_HANDLER_CONFIG0, }, - "segments": { - "train": (start_time, train_end_time), - "test": ( - test_start_time, - end_time, - ), - }, + "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),}, }, }, "dataset_backtest": { @@ -81,13 +75,7 @@ class HighfreqWorkflow(object): "module_path": "highfreq_handler", "kwargs": DATA_HANDLER_CONFIG1, }, - "segments": { - "train": (start_time, train_end_time), - "test": ( - test_start_time, - end_time, - ), - }, + "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),}, }, }, } @@ -152,24 +140,11 @@ def dump_and_load_dataset(self): "start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00", }, - segment_kwargs={ - "test": ( - "2021-01-19 00:00:00", - "2021-01-25 16:00:00", - ), - }, + segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),}, ) dataset_backtest.init( - handler_kwargs={ - "start_time": "2021-01-19 00:00:00", - "end_time": "2021-01-25 16:00:00", - }, - segment_kwargs={ - "test": ( - "2021-01-19 00:00:00", - "2021-01-25 16:00:00", - ), - }, + handler_kwargs={"start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00",}, + segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),}, ) ##=============get data============= diff --git a/examples/run_all_model.py b/examples/run_all_model.py index d587eff1559..d356b41285e 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -34,10 +34,7 @@ exp_manager = { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": { - "uri": "file:" + exp_path, - "default_exp_name": "Experiment", - }, + "kwargs": {"uri": "file:" + exp_path, "default_exp_name": "Experiment",}, } if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py index d5dab891789..6f5c11dc020 100644 --- a/examples/workflow_by_code.py +++ b/examples/workflow_by_code.py @@ -81,10 +81,7 @@ "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, + "kwargs": {"topk": 50, "n_drop": 5,}, }, "backtest": { "verbose": False, diff --git a/qlib/config.py b/qlib/config.py index 52b05568d57..344eb852777 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -115,12 +115,7 @@ def set_conf_from_C(self, config_c): "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s" } }, - "filters": { - "field_not_found": { - "()": "qlib.log.LogFilter", - "param": [".*?WARN: data not found for.*?"], - } - }, + "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}}, "handlers": { "console": { "class": "logging.StreamHandler", @@ -135,10 +130,7 @@ def set_conf_from_C(self, config_c): "exp_manager": { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": { - "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), - "default_exp_name": "Experiment", - }, + "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",}, }, } @@ -200,16 +192,8 @@ def set_conf_from_C(self, config_c): } _default_region_config = { - REG_CN: { - "trade_unit": 100, - "limit_threshold": 0.099, - "deal_price": "vwap", - }, - REG_US: { - "trade_unit": 1, - "limit_threshold": None, - "deal_price": "close", - }, + REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",}, + REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",}, } diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index aa24ffb0cf6..bd3494abf6a 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -18,13 +18,7 @@ def get_strategy( - strategy=None, - topk=50, - margin=0.5, - n_drop=5, - risk_degree=0.95, - str_type="dropout", - adjust_dates=None, + strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None, ): """get_strategy @@ -75,11 +69,7 @@ def get_strategy( str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( - topk=topk, - buffer_margin=margin, - n_drop=n_drop, - risk_degree=risk_degree, - adjust_dates=adjust_dates, + topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates, ) elif isinstance(strategy, (dict, str)): # 2) create strategy with init_instance_by_config @@ -172,9 +162,7 @@ def get_exchange( def get_executor( - executor=None, - trade_exchange=None, - verbose=True, + executor=None, trade_exchange=None, verbose=True, ): """get_executor diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py index 20c6f638fcd..355f0637395 100644 --- a/qlib/contrib/backtest/profit_attribution.py +++ b/qlib/contrib/backtest/profit_attribution.py @@ -12,10 +12,7 @@ def get_benchmark_weight( - bench, - start_date=None, - end_date=None, - path=None, + bench, start_date=None, end_date=None, path=None, ): """get_benchmark_weight @@ -216,12 +213,7 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g def brinson_pa( - positions, - bench="SH000905", - group_field="industry", - group_method="category", - group_n=None, - deal_price="vwap", + positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap", ): """brinson profit attribution @@ -255,17 +247,10 @@ def brinson_pa( # suspend stock is NAN. So we have to get more date to forward fill the NAN shift_start_date = start_date - datetime.timedelta(days=250) instruments = D.list_instruments( - D.instruments(market="all"), - start_time=shift_start_date, - end_time=end_date, - as_list=True, + D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True, ) stock_df = D.features( - instruments, - [group_field, deal_price], - start_time=shift_start_date, - end_time=end_date, - freq="day", + instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day", ) stock_df.columns = [group_field, "deal_price"] diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 970b032d6b0..574287819b7 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -21,10 +21,7 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time): fit_start_time is not None and fit_end_time is not None ), "Make sure `fit_start_time` and `fit_end_time` are not None." pkwargs.update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } + {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} ) new_l.append({"class": klass.__name__, "kwargs": pkwargs}) else: @@ -170,10 +167,7 @@ def __init__( def get_feature_config(self): conf = { "kbar": {}, - "price": { - "windows": [0], - "feature": ["OPEN", "HIGH", "LOW", "VWAP"], - }, + "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],}, "rolling": {}, } return self.parse_config_to_fields(conf) diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py index c68571853f1..363a184582d 100644 --- a/qlib/contrib/eva/alpha.py +++ b/qlib/contrib/eva/alpha.py @@ -35,11 +35,7 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False def calc_long_short_return( - pred: pd.Series, - label: pd.Series, - date_col: str = "datetime", - quantile: float = 0.2, - dropna: bool = False, + pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False, ) -> Tuple[pd.Series, pd.Series]: """ calculate long-short return diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 4aa5b55156f..5cb1ce4eb67 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -244,12 +244,7 @@ def long_short_backtest( short_returns[date] = np.mean(short_profit) + np.mean(all_profit) ls_returns[date] = np.mean(short_profit) + np.mean(long_profit) - return dict( - zip( - ["long", "short", "long_short"], - map(pd.Series, [long_returns, short_returns, ls_returns]), - ) - ) + return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),)) def t_run(): diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py index 04ddd8db041..2d94105e482 100644 --- a/qlib/contrib/evaluate_portfolio.py +++ b/qlib/contrib/evaluate_portfolio.py @@ -64,12 +64,7 @@ def get_position_value(evaluate_date, position): instruments = list(set(instruments) - set(["cash"])) # filter 'cash' fields = ["$close"] close_data_df = D.features( - instruments, - fields, - start_time=evaluate_date, - end_time=evaluate_date, - freq="day", - disk_cache=0, + instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0, ) value = _get_position_value_from_df(evaluate_date, position, close_data_df) return value @@ -87,14 +82,7 @@ def get_position_list_value(positions): start_date, end_date = day_list[0], day_list[-1] # load data fields = ["$close"] - close_data_df = D.features( - instruments, - fields, - start_time=start_date, - end_time=end_date, - freq="day", - disk_cache=0, - ) + close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,) # generate value # return dict for time:position_value value_dict = OrderedDict() diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py index d57c32b7022..2840c2cef5a 100644 --- a/qlib/contrib/model/catboost_model.py +++ b/qlib/contrib/model/catboost_model.py @@ -32,9 +32,7 @@ def fit( **kwargs ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index bbbb61851b1..306e68aadf2 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -118,10 +118,7 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -328,14 +319,12 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", - nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", - nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 725568de855..612bacbec93 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -123,10 +123,7 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -309,14 +302,12 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", - nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", - nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index 07048e1bc1a..c59dc91973f 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -229,17 +229,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -340,19 +334,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index 1e94f56e418..dfc5f4ab5ed 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -242,11 +242,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -361,19 +357,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 84f863b9fb0..d2a774b65b4 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -118,10 +118,7 @@ def __init__( torch.manual_seed(self.seed) self.gru_model = GRUModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index bb6618b854c..49f438cc379 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -123,10 +123,7 @@ def __init__( torch.manual_seed(self.seed) self.GRU_model = GRUModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 163d500ec87..02ca16e36b8 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -118,10 +118,7 @@ def __init__( torch.manual_seed(self.seed) self.lstm_model = LSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index cf4f8fb9f1f..2ec36f96e34 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -123,10 +123,7 @@ def __init__( torch.manual_seed(self.seed) self.LSTM_model = LSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 16fcea9ff53..8c1a77ec3c5 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -154,11 +154,7 @@ def __init__( self.dnn_model.to(self.device) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid = dataset.prepare( ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index d5169e6c7bd..1f7433e053d 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -30,14 +30,7 @@ class SFM_Model(nn.Module): def __init__( - self, - d_feat=6, - output_dim=1, - freq_dim=10, - hidden_size=64, - dropout_W=0.0, - dropout_U=0.0, - device="cpu", + self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu", ): super().__init__() @@ -362,17 +355,11 @@ def train_epoch(self, x_train, y_train): self.train_optimizer.step() def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 62e32d701ce..18e9d8eb404 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -120,9 +120,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): os.makedirs("pretrain") [df_train, df_valid] = dataset.prepare( - ["pretrain", "pretrain_validation"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) @@ -156,11 +154,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): break def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): if self.pretrain: # there is a pretrained model, load the model @@ -172,9 +166,7 @@ def fit( # adding one more linear layer to fit the final output dimension self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device) df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) x_train, y_train = df_train["feature"], df_train["label"] diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py index ba2e5789b85..e37725c2eb6 100755 --- a/qlib/contrib/model/xgboost.py +++ b/qlib/contrib/model/xgboost.py @@ -29,9 +29,7 @@ def fit( ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py index 2bd0937a032..52b86888133 100644 --- a/qlib/contrib/online/executor.py +++ b/qlib/contrib/online/executor.py @@ -150,21 +150,13 @@ def execute(self, trade_account, order_list, trade_date): if order.direction == Order.SELL: # sell print( "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, - order.stock_id, - trade_price, - order.deal_amount, - trade_val, + trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, ) ) else: print( "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, - order.stock_id, - trade_price, - order.deal_amount, - trade_val, + trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, ) ) @@ -271,21 +263,13 @@ def load_order_list(user_path, trade_date): for stock_id in order_dict["sell"]: amount, factor = order_dict["sell"][stock_id] order = Order( - stock_id=stock_id, - amount=amount, - trade_date=pd.Timestamp(trade_date), - direction=Order.SELL, - factor=factor, + stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor, ) order_list.append(order) for stock_id in order_dict["buy"]: amount, factor = order_dict["buy"][stock_id] order = Order( - stock_id=stock_id, - amount=amount, - trade_date=pd.Timestamp(trade_date), - direction=Order.BUY, - factor=factor, + stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor, ) order_list.append(order) return order_list diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py index cf850b9dace..a4476709de0 100644 --- a/qlib/contrib/online/manager.py +++ b/qlib/contrib/online/manager.py @@ -84,12 +84,10 @@ def save_user_data(self, user_id): raise ValueError("Cannot find user {}".format(user_id)) self.users[user_id].account.save_account(self.data_path / user_id) save_instance( - self.users[user_id].strategy, - self.data_path / user_id / "strategy_{}.pickle".format(user_id), + self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id), ) save_instance( - self.users[user_id].model, - self.data_path / user_id / "model_{}.pickle".format(user_id), + self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id), ) def add_user(self, user_id, config_file, add_date): diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index c8b44f57858..c82deb3945c 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -125,9 +125,7 @@ def generate(self, date, path): trade_date=trade_date, ) save_order_list( - order_list=order_list, - user_path=(pathlib.Path(path) / user_id), - trade_date=trade_date, + order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, ) self.logger.info("Generate order list at {} for {}".format(trade_date, user_id)) um.save_user_data(user_id) @@ -160,9 +158,7 @@ def execute(self, date, exchange_config, path): order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date) trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date) executor.save_executed_file_from_trade_info( - trade_info=trade_info, - user_path=(pathlib.Path(path) / user_id), - trade_date=trade_date, + trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, ) self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id)) diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py index 611af63e4af..fb96c87bd31 100644 --- a/qlib/contrib/online/utils.py +++ b/qlib/contrib/online/utils.py @@ -79,11 +79,7 @@ def prepare(um, today, user_id, exchange_config=None): log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today)) return [pd.Timestamp(latest_trading_date)], None - dates = D.calendar( - start_time=pd.Timestamp(latest_trading_date), - end_time=pd.Timestamp(today), - future=True, - ) + dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,) dates = list(dates) dates.append(get_next_trading_date(dates[-1], future=True)) if exchange_config: diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py index 1cb14d26153..ef1447a12be 100644 --- a/qlib/contrib/report/analysis_model/analysis_model_performance.py +++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py @@ -53,8 +53,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int t_df.index = t_df.index.strftime("%Y-%m-%d") # Cumulative Return By Group group_scatter_figure = ScatterGraph( - t_df.cumsum(), - layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), + t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), ).figure t_df = t_df.loc[:, ["long-short", "long-average"]] @@ -62,12 +61,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int group_hist_figure = SubplotsGraph( t_df, kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)), - subplots_kwargs=dict( - rows=1, - cols=2, - print_grid=False, - subplot_titles=["long-short", "long-average"], - ), + subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],), ).figure return group_scatter_figure, group_hist_figure @@ -102,15 +96,12 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6) _monthly_ic = ic.groupby(_index).mean() _monthly_ic.index = pd.MultiIndex.from_arrays( - [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], - names=["year", "month"], + [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"], ) # fill month _month_list = pd.date_range( - start=pd.Timestamp(f"{_index.min()[:4]}0101"), - end=pd.Timestamp(f"{_index.max()[:4]}1231"), - freq="1M", + start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M", ) _years = [] _month = [] @@ -142,32 +133,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min() _sub_graph_data = [ - ( - "ic", - dict( - row=1, - col=1, - name="", - kind="DistplotGraph", - graph_kwargs=dict(bin_size=_bin_size), - ), - ), + ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),), (_qqplot_fig, dict(row=1, col=2)), ] ic_hist_figure = SubplotsGraph( _ic_df.dropna(), kind_map=dict(kind="HistogramGraph", kwargs=dict()), - subplots_kwargs=dict( - rows=1, - cols=2, - print_grid=False, - subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name], - ), + subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],), sub_graph_data=_sub_graph_data, - layout=dict( - yaxis2=dict(title="Observed Quantile"), - xaxis2=dict(title=f"{dist_name} Distribution Quantile"), - ), + layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),), ).figure return ic_bar_figure, ic_heatmap_figure, ic_hist_figure @@ -181,8 +155,7 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple: _df = ac.to_frame("value") _df.index = _df.index.strftime("%Y-%m-%d") ac_figure = ScatterGraph( - _df, - layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), + _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), ).figure return (ac_figure,) @@ -202,17 +175,11 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple: .sum() / (len(x) // N) ) - r_df = pd.DataFrame( - { - "Top": top, - "Bottom": bottom, - } - ) + r_df = pd.DataFrame({"Top": top, "Bottom": bottom,}) # FIXME: support HIGH-FREQ r_df.index = r_df.index.strftime("%Y-%m-%d") turnover_figure = ScatterGraph( - r_df, - layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), + r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), ).figure return (turnover_figure,) @@ -230,11 +197,7 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure: # FIXME: support HIGH-FREQ ic_df.index = ic_df.index.strftime("%Y-%m-%d") ic_bar_figure = BarGraph( - ic_df, - layout=dict( - title="Information Coefficient (IC)", - xaxis=dict(type="category", tickangle=45), - ), + ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),), ).figure return ic_bar_figure @@ -277,12 +240,7 @@ def model_performance_graph( figure_list = [] for graph_name in graph_names: fun_res = eval(f"_{graph_name}")( - pred_label=pred_label, - lag=lag, - N=N, - reverse=reverse, - rank=rank, - show_nature_day=show_nature_day, + pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day, ) figure_list += fun_res diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py index abb68ea6051..604189c94b6 100644 --- a/qlib/contrib/report/analysis_position/cumulative_return.py +++ b/qlib/contrib/report/analysis_position/cumulative_return.py @@ -13,11 +13,7 @@ def _get_cum_return_data_with_position( - position: dict, - report_normal: pd.DataFrame, - label_data: pd.DataFrame, - start_date=None, - end_date=None, + position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, ): """ @@ -29,11 +25,7 @@ def _get_cum_return_data_with_position( :return: """ _cumulative_return_df = get_position_data( - position=position, - report_normal=report_normal, - label_data=label_data, - start_date=start_date, - end_date=end_date, + position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date, ).copy() _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"] @@ -87,11 +79,7 @@ def _get_cum_return_data_with_position( def _get_figure_with_position( - position: dict, - report_normal: pd.DataFrame, - label_data: pd.DataFrame, - start_date=None, - end_date=None, + position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, ) -> Iterable[go.Figure]: """Get average analysis figures @@ -111,18 +99,12 @@ def _get_figure_with_position( # Create figures for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]: sub_graph_data = [ - ( - "cum_{}".format(_t_name), - dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}), - ), + ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),), ( "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name), dict(row=2, col=1), ), - ( - "{}_value".format(_t_name), - dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}), - ), + ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),), ] _default_xaxis = dict(showline=False, zeroline=True, tickangle=45) @@ -161,13 +143,7 @@ def _get_figure_with_position( [{"rowspan": 1}, None], ] subplots_kwargs = dict( - vertical_spacing=0.01, - rows=2, - cols=2, - row_width=[1, 2], - column_width=[3, 1], - print_grid=False, - specs=specs, + vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs, ) yield SubplotsGraph( cum_return_df, diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py index fe1d6113709..23f9c592c0a 100644 --- a/qlib/contrib/report/analysis_position/parse_position.py +++ b/qlib/contrib/report/analysis_position/parse_position.py @@ -72,10 +72,7 @@ def parse_position(position: dict = None) -> pd.DataFrame: result_df = result_df.append(_trading_day_df, sort=True) - previous_data = dict( - date=_trading_date, - code_list=_trading_day_df[_trading_day_df["status"] != -1].index, - ) + previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,) result_df.reset_index(inplace=True) result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True) diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py index 72a358adcbf..9a4d834ed92 100644 --- a/qlib/contrib/report/analysis_position/rank_label.py +++ b/qlib/contrib/report/analysis_position/rank_label.py @@ -23,11 +23,7 @@ def _get_figure_with_position( :return: """ _position_df = get_position_data( - position, - label_data, - calculate_label_rank=True, - start_date=start_date, - end_date=end_date, + position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date, ) res_dict = dict() @@ -51,20 +47,14 @@ def _get_figure_with_position( yield ScatterGraph( _res_df.loc[:, [_col]], layout=dict( - title=_col, - xaxis=dict(type="category", tickangle=45), - yaxis=dict(title="lable-rank-ratio: %"), + title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"), ), graph_kwargs=dict(mode="lines+markers"), ).figure def rank_label_graph( - position: dict, - label_data: pd.DataFrame, - start_date=None, - end_date=None, - show_notebook=True, + position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True, ) -> Iterable[go.Figure]: """Ranking percentage of stocks buy, sell, and holding on the trading day. Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py index f82e654c432..8e2c05c0a38 100644 --- a/qlib/contrib/report/analysis_position/report.py +++ b/qlib/contrib/report/analysis_position/report.py @@ -123,9 +123,7 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 1, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": { - "width": 0, - }, + "line": {"width": 0,}, }, { "type": "rect", @@ -137,20 +135,13 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 0.55, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": { - "width": 0, - }, + "line": {"width": 0,}, }, ], ) _subplot_kwargs = dict( - shared_xaxes=True, - vertical_spacing=0.01, - rows=7, - cols=1, - row_width=[1, 1, 1, 3, 1, 1, 3], - print_grid=False, + shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False, ) figure = SubplotsGraph( df=report_df, diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py index 70e382fb165..dbbc411109d 100644 --- a/qlib/contrib/report/graph.py +++ b/qlib/contrib/report/graph.py @@ -311,11 +311,7 @@ def _init_sub_graph_data(self): _temp_row_data = ( column_name, dict( - row=row, - col=col, - name=res_name, - kind=self._kind_map["kind"], - graph_kwargs=self._kind_map["kwargs"], + row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"], ), ) self._sub_graph_data.append(_temp_row_data) diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index dd90437b03f..ee3ee03ecfd 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -57,10 +57,7 @@ def generate_target_weight_position(self, score, current, trade_date): final_stock_weight[stock_id] -= sw if self.buy_method == "first_fill": for stock_id in buy_signal_stocks: - add_weight = min( - max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), - sold_stock_weight, - ) + add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,) final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight sold_stock_weight -= add_weight elif self.buy_method == "average_fill": diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py index 494981ecc09..6f168b4dd52 100644 --- a/qlib/contrib/strategy/order_generator.py +++ b/qlib/contrib/strategy/order_generator.py @@ -102,14 +102,10 @@ def generate_order_list_from_target_weight_position( # strategy 1 : generate amount_position by weight_position # Use API in Exchange() target_amount_dict = trade_exchange.generate_amount_position_from_weight_position( - weight_position=target_weight_position, - cash=current_tradable_value, - trade_date=trade_date, + weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date, ) order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=target_amount_dict, - current_position=current_amount_dict, - trade_date=trade_date, + target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date, ) return order_list @@ -164,8 +160,6 @@ def generate_order_list_from_target_weight_position( else: continue order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=amount_dict, - current_position=current.get_stock_amount_dict(), - trade_date=trade_date, + target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date, ) return order_list diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py index 711658c9a63..409410a2ab4 100644 --- a/qlib/contrib/tuner/launcher.py +++ b/qlib/contrib/tuner/launcher.py @@ -13,11 +13,7 @@ args_parser = argparse.ArgumentParser(prog="tuner") args_parser.add_argument( - "-c", - "--config_path", - required=True, - type=str, - help="config path indicates where to load yaml config.", + "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.", ) args = args_parser.parse_args() diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py index 76f101671b7..57f57a6c34e 100644 --- a/qlib/contrib/tuner/space.py +++ b/qlib/contrib/tuner/space.py @@ -10,8 +10,5 @@ } QLibDataLabelSpace = { - "labels": hp.choice( - "labels", - [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], - ) + "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],) } diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index 2ce957859b2..e81d41a9ad0 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -28,10 +28,7 @@ def __init__(self, tuner_config, optim_config): self.optim_config = optim_config self.max_evals = self.tuner_config.get("max_evals", 10) - self.ex_dir = os.path.join( - self.tuner_config["experiment"]["dir"], - self.tuner_config["experiment"]["name"], - ) + self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],) self.best_params = None self.best_res = None @@ -42,10 +39,7 @@ def tune(self): TimeInspector.set_time_mark() fmin( - fn=self.objective, - space=self.space, - algo=tpe.suggest, - max_evals=self.max_evals, + fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals, ) self.logger.info("Local best params: {} ".format(self.best_params)) TimeInspector.log_cost_time( @@ -159,8 +153,7 @@ def setup_estimator_config(self, params): estimator_config["data"]["args"].update(params["data_label_space"]) estimator_path = os.path.join( - self.tuner_config["experiment"].get("dir", "../"), - QLibTuner.ESTIMATOR_CONFIG_NAME, + self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME, ) with open(estimator_path, "w") as fp: @@ -173,27 +166,20 @@ def setup_space(self): model_space_name = self.tuner_config["model"].get("space", None) if model_space_name is None: raise ValueError("Please give the search space of model.") - model_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - model_space_name, - ) + model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,) # 2. Setup strategy space strategy_space_name = self.tuner_config["strategy"].get("space", None) if strategy_space_name is None: raise ValueError("Please give the search space of strategy.") - strategy_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - strategy_space_name, - ) + strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,) # 3. Setup data label space if given if self.tuner_config.get("data_label", None) is not None: data_label_space_name = self.tuner_config["data_label"].get("space", None) if data_label_space_name is not None: data_label_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - data_label_space_name, + importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name, ) else: data_label_space_name = None diff --git a/qlib/data/client.py b/qlib/data/client.py index 5244a7e45cf..d1a68cb3857 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -26,8 +26,7 @@ def __init__(self, host, port): self.logger = get_module_logger(self.__class__.__name__) # bind connect/disconnect callbacks self.sio.on( - "connect", - lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), + "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), ) self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!")) diff --git a/qlib/data/data.py b/qlib/data/data.py index 762467da35e..47cded79cec 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -328,14 +328,7 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method") def _uri( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=1, - **kwargs, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs, ): """Get task uri, used when generating rabbitmq task in qlib_server @@ -414,29 +407,13 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq): for inst, spans in instruments_d.items(): data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=( - inst, - start_time, - end_time, - freq, - normalize_column_names, - spans, - C, - ), + args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,), ) else: for inst in instruments_d: data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=( - inst, - start_time, - end_time, - freq, - normalize_column_names, - None, - C, - ), + args=(inst, start_time, end_time, freq, normalize_column_names, None, C,), ) p.close() @@ -598,12 +575,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da start_time = pd.Timestamp(start_time or cal[0]) end_time = pd.Timestamp(end_time or cal[-1]) _instruments_filtered = { - inst: list( - filter( - lambda x: x[0] <= x[1], - [(max(start_time, x[0]), min(end_time, x[1])) for x in spans], - ) - ) + inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],)) for inst, spans in _instruments.items() } _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value} @@ -723,14 +695,7 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq for inst in instruments_d: p.apply_async( - LocalDatasetProvider.cache_walker, - args=( - inst, - start_time, - end_time, - freq, - column_names, - ), + LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,), ) p.close() @@ -763,12 +728,7 @@ def set_conn(self, conn): def calendar(self, start_time=None, end_time=None, freq="day", future=False): self.conn.send_request( request_type="calendar", - request_content={ - "start_time": str(start_time), - "end_time": str(end_time), - "freq": freq, - "future": future, - }, + request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,}, msg_queue=self.queue, msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content], ) @@ -832,14 +792,7 @@ def set_conn(self, conn): self.queue = queue.Queue() def dataset( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=0, - return_uri=False, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False, ): if Inst.get_inst_type(instruments) == Inst.DICT: get_module_logger("data").warning( @@ -942,13 +895,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da return Inst.list_instruments(instruments, start_time, end_time, freq, as_list) def features( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=None, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None, ): """ Parameters: diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py index feda1904463..58e2bd96811 100644 --- a/qlib/data/dataset/utils.py +++ b/qlib/data/dataset/utils.py @@ -32,10 +32,7 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int: def fetch_df_by_index( - df: pd.DataFrame, - selector: Union[pd.Timestamp, slice, str, list], - level: Union[str, int], - fetch_orig=True, + df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True, ) -> pd.DataFrame: """ fetch data from `data` with `selector` and `level` diff --git a/qlib/data/filter.py b/qlib/data/filter.py index 70f9d32780d..811fd387f14 100644 --- a/qlib/data/filter.py +++ b/qlib/data/filter.py @@ -341,12 +341,7 @@ def _getFilterSeries(self, instruments, fstart, fend): # do not use dataset cache try: _features = DatasetD.dataset( - instruments, - [self.rule_expression], - fstart, - fend, - freq=self.filter_freq, - disk_cache=0, + instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0, ) except TypeError: # use LocalDatasetProvider diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py index 8eec73e00ae..f19c60fc9be 100644 --- a/qlib/model/riskmodel.py +++ b/qlib/model/riskmodel.py @@ -38,7 +38,7 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc self.scale_return = scale_return def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True ) -> Union[pd.DataFrame, np.ndarray]: """ Args: @@ -373,8 +373,7 @@ def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt v3 = z.T.dot(z) / t - var_mkt * S roff3 = ( - np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum( - np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 + np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 ) roff = 2 * roff1 - roff3 rho = rdiag + roff @@ -434,7 +433,7 @@ def _predict(self, X: np.ndarray) -> np.ndarray: if self.num_factors > 0: Dd, V = np.linalg.eig(Y.T.dot(Y)) V = V[:, np.argsort(Dd)] - F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n) + F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n) LamPCA = Y.dot(F) / n uhat = np.asarray(Y - LamPCA.dot(F.T)) Lowrank = np.asarray(LamPCA.dot(LamPCA.T)) @@ -490,8 +489,14 @@ class StructuredCovEstimator(RiskModel): FACTOR_MODEL_PCA = "pca" FACTOR_MODEL_FA = "fa" - def __init__(self, factor_model: str = 'pca', num_factors: int = 10, nan_option: str = "ignore", - assume_centered: bool = False, scale_return: bool = True): + def __init__( + self, + factor_model: str = "pca", + num_factors: int = 10, + nan_option: str = "ignore", + assume_centered: bool = False, + scale_return: bool = True, + ): """ Args: factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`). @@ -505,14 +510,17 @@ def __init__(self, factor_model: str = 'pca', num_factors: int = 10, nan_option: assert factor_model in [ self.FACTOR_MODEL_PCA, self.FACTOR_MODEL_FA, - ], 'factor_model={} is not supported'.format(factor_model) + ], "factor_model={} is not supported".format(factor_model) self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis self.num_factors = num_factors def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True, - return_decomposed_components=False + self, + X: Union[pd.Series, pd.DataFrame, np.ndarray], + return_corr: bool = False, + is_price: bool = True, + return_decomposed_components=False, ) -> Union[pd.DataFrame, np.ndarray, tuple]: """ Args: @@ -525,8 +533,9 @@ def predict( Returns: tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation. """ - assert not return_corr or not return_decomposed_components, \ - 'Can only return either correlation matrix or decomposed components.' + assert ( + not return_corr or not return_decomposed_components + ), "Can only return either correlation matrix or decomposed components." # transform input into 2D array if not isinstance(X, (pd.Series, pd.DataFrame)): diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 728a04ea9db..3912421277c 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -38,13 +38,13 @@ class PortfolioOptimizer(BaseOptimizer): OPT_INV = "inv" def __init__( - self, - method: str = "inv", - lamb: float = 0, - delta: float = 0, - alpha: float = 0.0, - scale_alpha: bool = True, - tol: float = 1e-8, + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, ): """ Args: @@ -71,10 +71,10 @@ def __init__( self.scale_alpha = scale_alpha def __call__( - self, - S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None, + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, ) -> Union[np.ndarray, pd.Series]: """ Args: @@ -163,7 +163,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) def _optimize_mvo( - self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None ) -> np.ndarray: """optimize mean-variance portfolio @@ -259,6 +259,7 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda # add l2 regularization wrapped_obj = obj if self.alpha > 0: + def opt_obj(x): return obj(x) + self.alpha * np.sum(np.square(x)) @@ -281,12 +282,21 @@ class EnhancedIndexingOptimizer(BaseOptimizer): This optimizer always assumes full investment and no-shorting. """ - START_FROM_W0 = 'w0' - START_FROM_BENCH = 'benchmark' - DO_NOT_START_FROM = 'no_warm_start' + START_FROM_W0 = "w0" + START_FROM_BENCH = "benchmark" + DO_NOT_START_FROM = "no_warm_start" - def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, - scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000): + def __init__( + self, + lamb: float = 10, + delta: float = 0.4, + bench_dev: float = 0.01, + inds_dev: float = 0.01, + scale_alpha=True, + verbose: bool = False, + warm_start: str = DO_NOT_START_FROM, + max_iters: int = 10000, + ): """ Args: lamb (float): risk aversion parameter (larger `lamb` means less focus on return) @@ -310,18 +320,28 @@ def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01 assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" self.inds_dev = inds_dev - assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0, - self.START_FROM_BENCH], "illegal warm start option" - self.start_from_w0 = (warm_start == self.START_FROM_W0) - self.start_from_bench = (warm_start == self.START_FROM_BENCH) + assert warm_start in [ + self.DO_NOT_START_FROM, + self.START_FROM_W0, + self.START_FROM_BENCH, + ], "illegal warm start option" + self.start_from_w0 = warm_start == self.START_FROM_W0 + self.start_from_bench = warm_start == self.START_FROM_BENCH self.scale_alpha = scale_alpha self.verbose = verbose self.max_iters = max_iters - def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray, - w_bench: np.ndarray, inds_onehot: np.ndarray - ) -> Union[np.ndarray, pd.Series]: + def __call__( + self, + u: np.ndarray, + F: np.ndarray, + covB: np.ndarray, + varU: np.ndarray, + w0: np.ndarray, + w_bench: np.ndarray, + inds_onehot: np.ndarray, + ) -> Union[np.ndarray, pd.Series]: """ Args: u (np.ndarray): expected returns (a.k.a., alpha) @@ -352,7 +372,7 @@ def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndar d_bench >= -self.bench_dev, d_bench <= self.bench_dev, d_inds >= -self.inds_dev, - d_inds <= self.inds_dev + d_inds <= self.inds_dev, ] if w0 is not None: turnover = cp.sum(cp.abs(w - w0)) @@ -361,7 +381,7 @@ def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndar warm_start = False if self.start_from_w0: if w0 is None: - print('Warning: try warm start with w0, but w0 is `None`.') + print("Warning: try warm start with w0, but w0 is `None`.") else: w.value = w0 warm_start = True @@ -372,7 +392,7 @@ def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndar prob = cp.Problem(obj, cons) prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) - if prob.status != 'optimal': - print('Warning: solve failed.', prob.status) + if prob.status != "optimal": + print("Warning: solve failed.", prob.status) return np.asarray(w.value) diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py index f92e7278758..eb6f9c5edb5 100644 --- a/qlib/tests/__init__.py +++ b/qlib/tests/__init__.py @@ -18,10 +18,6 @@ def setUpClass(cls) -> None: print(f"Qlib data is not found in {provider_uri}") GetData().qlib_data( - name="qlib_data_simple", - region="cn", - interval="1d", - target_dir=provider_uri, - delete_old=False, + name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False, ) init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index be458a24d29..0c704b89669 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -193,10 +193,7 @@ def generate(self): } ) objects.update( - { - "long_short_r.pkl": long_short_r, - "long_avg_r.pkl": long_avg_r, - } + {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,} ) self.recorder.log_metrics(**metrics) self.recorder.save_objects(**objects, artifact_path=self.get_path()) diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 743f89462d0..24526e3328b 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -39,13 +39,7 @@ class YahooData: INTERVAL_1d = "1d" def __init__( - self, - timezone: str = None, - start=None, - end=None, - interval="1d", - delay=0, - show_1min_logging: bool = False, + self, timezone: str = None, start=None, end=None, interval="1d", delay=0, show_1min_logging: bool = False, ): """ @@ -125,11 +119,7 @@ def _get_simple(start_, end_): self._sleep() _remote_interval = "1m" if self._interval == self.INTERVAL_1min else self._interval return self.get_data_from_remote( - symbol, - interval=_remote_interval, - start=start_, - end=end_, - show_1min_logging=self._show_1min_logging, + symbol, interval=_remote_interval, start=start_, end=end_, show_1min_logging=self._show_1min_logging, ) _result = None @@ -438,9 +428,7 @@ class YahooNormalize: DAILY_FORMAT = "%Y-%m-%d" def __init__( - self, - date_field_name: str = "date", - symbol_field_name: str = "symbol", + self, date_field_name: str = "date", symbol_field_name: str = "symbol", ): """ @@ -458,10 +446,7 @@ def __init__( @staticmethod def normalize_yahoo( - df: pd.DataFrame, - calendar_list: list = None, - date_field_name: str = "date", - symbol_field_name: str = "symbol", + df: pd.DataFrame, calendar_list: list = None, date_field_name: str = "date", symbol_field_name: str = "symbol", ): if df.empty: return df @@ -566,9 +551,7 @@ class YahooNormalize1min(YahooNormalize, ABC): CONSISTENT_1d = False def __init__( - self, - date_field_name: str = "date", - symbol_field_name: str = "symbol", + self, date_field_name: str = "date", symbol_field_name: str = "symbol", ): """ diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py index 4811fd48612..ab24fa9cacf 100644 --- a/scripts/dump_bin.py +++ b/scripts/dump_bin.py @@ -153,22 +153,13 @@ def get_dump_fields(self, df_columns: Iterable[str]) -> Iterable[str]: @staticmethod def _read_calendars(calendar_path: Path) -> List[pd.Timestamp]: - return sorted( - map( - pd.Timestamp, - pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(), - ) - ) + return sorted(map(pd.Timestamp, pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),)) def _read_instruments(self, instrument_path: Path) -> pd.DataFrame: df = pd.read_csv( instrument_path, sep=self.INSTRUMENTS_SEP, - names=[ - self.symbol_field_name, - self.INSTRUMENTS_START_FIELD, - self.INSTRUMENTS_END_FIELD, - ], + names=[self.symbol_field_name, self.INSTRUMENTS_START_FIELD, self.INSTRUMENTS_END_FIELD,], ) return df diff --git a/setup.py b/setup.py index 6582054b9c6..d8a9d9efa6b 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ "tornado", "joblib>=0.17.0", "ruamel.yaml>=0.16.12", - "scikit-learn>=0.22" + "scikit-learn>=0.22", ] # Numpy include @@ -70,16 +70,10 @@ # Cython Extensions extensions = [ Extension( - "qlib.data._libs.rolling", - ["qlib/data/_libs/rolling.pyx"], - language="c++", - include_dirs=[NUMPY_INCLUDE], + "qlib.data._libs.rolling", ["qlib/data/_libs/rolling.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE], ), Extension( - "qlib.data._libs.expanding", - ["qlib/data/_libs/expanding.pyx"], - language="c++", - include_dirs=[NUMPY_INCLUDE], + "qlib.data._libs.expanding", ["qlib/data/_libs/expanding.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE], ), ] @@ -98,9 +92,7 @@ # py_modules=['qlib'], entry_points={ # 'console_scripts': ['mycli=mymodule:cli'], - "console_scripts": [ - "qrun=qlib.workflow.cli:run", - ], + "console_scripts": ["qrun=qlib.workflow.cli:run",], }, ext_modules=extensions, install_requires=REQUIRED, diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index f6e77cba4d8..8b3819c8302 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -78,10 +78,7 @@ "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, + "kwargs": {"topk": 50, "n_drop": 5,}, }, "backtest": { "verbose": False, @@ -176,9 +173,7 @@ def test_0_train(self): def test_1_backtest(self): analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) self.assertGreaterEqual( - analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], - 0.10, - "backtest failed", + analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed", ) diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py index dfa7f8556dd..de649c37edf 100644 --- a/tests/test_dump_data.py +++ b/tests/test_dump_data.py @@ -40,9 +40,7 @@ def setUpClass(cls) -> None: TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, - expression_cache=None, - dataset_cache=None, + provider_uri=provider_uri, expression_cache=None, dataset_cache=None, ) @classmethod @@ -54,10 +52,7 @@ def test_0_dump_bin(self): def test_1_dump_calendars(self): ori_calendars = set( - map( - pd.Timestamp, - pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values, - ) + map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,) ) res_calendars = set(D.calendar()) assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed" diff --git a/tests/test_get_data.py b/tests/test_get_data.py index c511d1b910d..d5637b02595 100644 --- a/tests/test_get_data.py +++ b/tests/test_get_data.py @@ -26,9 +26,7 @@ class TestGetData(unittest.TestCase): def setUpClass(cls) -> None: provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, - expression_cache=None, - dataset_cache=None, + provider_uri=provider_uri, expression_cache=None, dataset_cache=None, ) @classmethod From 37871389b98fe34da35cdd2e996469ac3c7434ff Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 11:25:42 +0800 Subject: [PATCH 14/32] Format code with the latest version of black. --- qlib/config.py | 24 +++++-- qlib/contrib/backtest/__init__.py | 18 ++++- qlib/contrib/backtest/profit_attribution.py | 23 +++++-- qlib/contrib/data/handler.py | 10 ++- qlib/contrib/eva/alpha.py | 6 +- qlib/contrib/evaluate.py | 7 +- qlib/contrib/evaluate_portfolio.py | 16 ++++- qlib/contrib/model/catboost_model.py | 4 +- qlib/contrib/model/pytorch_alstm.py | 21 ++++-- qlib/contrib/model/pytorch_alstm_ts.py | 17 +++-- qlib/contrib/model/pytorch_gats.py | 22 ++++-- qlib/contrib/model/pytorch_gats_ts.py | 18 ++++- qlib/contrib/model/pytorch_gru.py | 21 ++++-- qlib/contrib/model/pytorch_gru_ts.py | 17 ++++- qlib/contrib/model/pytorch_lstm.py | 21 ++++-- qlib/contrib/model/pytorch_lstm_ts.py | 17 ++++- qlib/contrib/model/pytorch_nn.py | 6 +- qlib/contrib/model/pytorch_sfm.py | 19 ++++- qlib/contrib/model/pytorch_tabnet.py | 14 +++- qlib/contrib/model/xgboost.py | 4 +- qlib/contrib/online/executor.py | 24 +++++-- qlib/contrib/online/manager.py | 6 +- qlib/contrib/online/operator.py | 8 ++- qlib/contrib/online/utils.py | 6 +- .../analysis_model_performance.py | 66 ++++++++++++++---- .../analysis_position/cumulative_return.py | 36 ++++++++-- .../analysis_position/parse_position.py | 5 +- .../report/analysis_position/rank_label.py | 16 ++++- .../report/analysis_position/report.py | 15 +++- qlib/contrib/report/graph.py | 6 +- qlib/contrib/strategy/cost_control.py | 5 +- qlib/contrib/strategy/order_generator.py | 12 +++- qlib/contrib/tuner/launcher.py | 6 +- qlib/contrib/tuner/space.py | 5 +- qlib/contrib/tuner/tuner.py | 26 +++++-- qlib/data/client.py | 3 +- qlib/data/data.py | 69 ++++++++++++++++--- qlib/data/dataset/utils.py | 5 +- qlib/data/filter.py | 7 +- qlib/tests/__init__.py | 6 +- qlib/workflow/record_temp.py | 5 +- 41 files changed, 526 insertions(+), 116 deletions(-) diff --git a/qlib/config.py b/qlib/config.py index 344eb852777..52b05568d57 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -115,7 +115,12 @@ def set_conf_from_C(self, config_c): "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s" } }, - "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}}, + "filters": { + "field_not_found": { + "()": "qlib.log.LogFilter", + "param": [".*?WARN: data not found for.*?"], + } + }, "handlers": { "console": { "class": "logging.StreamHandler", @@ -130,7 +135,10 @@ def set_conf_from_C(self, config_c): "exp_manager": { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",}, + "kwargs": { + "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), + "default_exp_name": "Experiment", + }, }, } @@ -192,8 +200,16 @@ def set_conf_from_C(self, config_c): } _default_region_config = { - REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",}, - REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",}, + REG_CN: { + "trade_unit": 100, + "limit_threshold": 0.099, + "deal_price": "vwap", + }, + REG_US: { + "trade_unit": 1, + "limit_threshold": None, + "deal_price": "close", + }, } diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index bd3494abf6a..aa24ffb0cf6 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -18,7 +18,13 @@ def get_strategy( - strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None, + strategy=None, + topk=50, + margin=0.5, + n_drop=5, + risk_degree=0.95, + str_type="dropout", + adjust_dates=None, ): """get_strategy @@ -69,7 +75,11 @@ def get_strategy( str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( - topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates, + topk=topk, + buffer_margin=margin, + n_drop=n_drop, + risk_degree=risk_degree, + adjust_dates=adjust_dates, ) elif isinstance(strategy, (dict, str)): # 2) create strategy with init_instance_by_config @@ -162,7 +172,9 @@ def get_exchange( def get_executor( - executor=None, trade_exchange=None, verbose=True, + executor=None, + trade_exchange=None, + verbose=True, ): """get_executor diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py index 355f0637395..20c6f638fcd 100644 --- a/qlib/contrib/backtest/profit_attribution.py +++ b/qlib/contrib/backtest/profit_attribution.py @@ -12,7 +12,10 @@ def get_benchmark_weight( - bench, start_date=None, end_date=None, path=None, + bench, + start_date=None, + end_date=None, + path=None, ): """get_benchmark_weight @@ -213,7 +216,12 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g def brinson_pa( - positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap", + positions, + bench="SH000905", + group_field="industry", + group_method="category", + group_n=None, + deal_price="vwap", ): """brinson profit attribution @@ -247,10 +255,17 @@ def brinson_pa( # suspend stock is NAN. So we have to get more date to forward fill the NAN shift_start_date = start_date - datetime.timedelta(days=250) instruments = D.list_instruments( - D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True, + D.instruments(market="all"), + start_time=shift_start_date, + end_time=end_date, + as_list=True, ) stock_df = D.features( - instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day", + instruments, + [group_field, deal_price], + start_time=shift_start_date, + end_time=end_date, + freq="day", ) stock_df.columns = [group_field, "deal_price"] diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 574287819b7..970b032d6b0 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -21,7 +21,10 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time): fit_start_time is not None and fit_end_time is not None ), "Make sure `fit_start_time` and `fit_end_time` are not None." pkwargs.update( - {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} + { + "fit_start_time": fit_start_time, + "fit_end_time": fit_end_time, + } ) new_l.append({"class": klass.__name__, "kwargs": pkwargs}) else: @@ -167,7 +170,10 @@ def __init__( def get_feature_config(self): conf = { "kbar": {}, - "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],}, + "price": { + "windows": [0], + "feature": ["OPEN", "HIGH", "LOW", "VWAP"], + }, "rolling": {}, } return self.parse_config_to_fields(conf) diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py index 363a184582d..c68571853f1 100644 --- a/qlib/contrib/eva/alpha.py +++ b/qlib/contrib/eva/alpha.py @@ -35,7 +35,11 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False def calc_long_short_return( - pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False, + pred: pd.Series, + label: pd.Series, + date_col: str = "datetime", + quantile: float = 0.2, + dropna: bool = False, ) -> Tuple[pd.Series, pd.Series]: """ calculate long-short return diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 5cb1ce4eb67..4aa5b55156f 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -244,7 +244,12 @@ def long_short_backtest( short_returns[date] = np.mean(short_profit) + np.mean(all_profit) ls_returns[date] = np.mean(short_profit) + np.mean(long_profit) - return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),)) + return dict( + zip( + ["long", "short", "long_short"], + map(pd.Series, [long_returns, short_returns, ls_returns]), + ) + ) def t_run(): diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py index 2d94105e482..04ddd8db041 100644 --- a/qlib/contrib/evaluate_portfolio.py +++ b/qlib/contrib/evaluate_portfolio.py @@ -64,7 +64,12 @@ def get_position_value(evaluate_date, position): instruments = list(set(instruments) - set(["cash"])) # filter 'cash' fields = ["$close"] close_data_df = D.features( - instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0, + instruments, + fields, + start_time=evaluate_date, + end_time=evaluate_date, + freq="day", + disk_cache=0, ) value = _get_position_value_from_df(evaluate_date, position, close_data_df) return value @@ -82,7 +87,14 @@ def get_position_list_value(positions): start_date, end_date = day_list[0], day_list[-1] # load data fields = ["$close"] - close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,) + close_data_df = D.features( + instruments, + fields, + start_time=start_date, + end_time=end_date, + freq="day", + disk_cache=0, + ) # generate value # return dict for time:position_value value_dict = OrderedDict() diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py index 2840c2cef5a..d57c32b7022 100644 --- a/qlib/contrib/model/catboost_model.py +++ b/qlib/contrib/model/catboost_model.py @@ -32,7 +32,9 @@ def fit( **kwargs ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index 306e68aadf2..bbbb61851b1 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -118,7 +118,10 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -319,12 +328,14 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", + nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", + nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 612bacbec93..725568de855 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -123,7 +123,10 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -302,12 +309,14 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", + nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", + nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index c59dc91973f..07048e1bc1a 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -229,11 +229,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -334,11 +340,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index dfc5f4ab5ed..1e94f56e418 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -242,7 +242,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -357,11 +361,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index d2a774b65b4..84f863b9fb0 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -118,7 +118,10 @@ def __init__( torch.manual_seed(self.seed) self.gru_model = GRUModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index 49f438cc379..bb6618b854c 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -123,7 +123,10 @@ def __init__( torch.manual_seed(self.seed) self.GRU_model = GRUModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 02ca16e36b8..163d500ec87 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -118,7 +118,10 @@ def __init__( torch.manual_seed(self.seed) self.lstm_model = LSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index 2ec36f96e34..cf4f8fb9f1f 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -123,7 +123,10 @@ def __init__( torch.manual_seed(self.seed) self.LSTM_model = LSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 8c1a77ec3c5..16fcea9ff53 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -154,7 +154,11 @@ def __init__( self.dnn_model.to(self.device) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid = dataset.prepare( ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index 1f7433e053d..d5169e6c7bd 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -30,7 +30,14 @@ class SFM_Model(nn.Module): def __init__( - self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu", + self, + d_feat=6, + output_dim=1, + freq_dim=10, + hidden_size=64, + dropout_W=0.0, + dropout_U=0.0, + device="cpu", ): super().__init__() @@ -355,11 +362,17 @@ def train_epoch(self, x_train, y_train): self.train_optimizer.step() def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 18e9d8eb404..62e32d701ce 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -120,7 +120,9 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): os.makedirs("pretrain") [df_train, df_valid] = dataset.prepare( - ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["pretrain", "pretrain_validation"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) @@ -154,7 +156,11 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): break def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): if self.pretrain: # there is a pretrained model, load the model @@ -166,7 +172,9 @@ def fit( # adding one more linear layer to fit the final output dimension self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device) df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) x_train, y_train = df_train["feature"], df_train["label"] diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py index e37725c2eb6..ba2e5789b85 100755 --- a/qlib/contrib/model/xgboost.py +++ b/qlib/contrib/model/xgboost.py @@ -29,7 +29,9 @@ def fit( ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py index 52b86888133..2bd0937a032 100644 --- a/qlib/contrib/online/executor.py +++ b/qlib/contrib/online/executor.py @@ -150,13 +150,21 @@ def execute(self, trade_account, order_list, trade_date): if order.direction == Order.SELL: # sell print( "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, + trade_date, + order.stock_id, + trade_price, + order.deal_amount, + trade_val, ) ) else: print( "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, + trade_date, + order.stock_id, + trade_price, + order.deal_amount, + trade_val, ) ) @@ -263,13 +271,21 @@ def load_order_list(user_path, trade_date): for stock_id in order_dict["sell"]: amount, factor = order_dict["sell"][stock_id] order = Order( - stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor, + stock_id=stock_id, + amount=amount, + trade_date=pd.Timestamp(trade_date), + direction=Order.SELL, + factor=factor, ) order_list.append(order) for stock_id in order_dict["buy"]: amount, factor = order_dict["buy"][stock_id] order = Order( - stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor, + stock_id=stock_id, + amount=amount, + trade_date=pd.Timestamp(trade_date), + direction=Order.BUY, + factor=factor, ) order_list.append(order) return order_list diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py index a4476709de0..cf850b9dace 100644 --- a/qlib/contrib/online/manager.py +++ b/qlib/contrib/online/manager.py @@ -84,10 +84,12 @@ def save_user_data(self, user_id): raise ValueError("Cannot find user {}".format(user_id)) self.users[user_id].account.save_account(self.data_path / user_id) save_instance( - self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id), + self.users[user_id].strategy, + self.data_path / user_id / "strategy_{}.pickle".format(user_id), ) save_instance( - self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id), + self.users[user_id].model, + self.data_path / user_id / "model_{}.pickle".format(user_id), ) def add_user(self, user_id, config_file, add_date): diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index c82deb3945c..c8b44f57858 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -125,7 +125,9 @@ def generate(self, date, path): trade_date=trade_date, ) save_order_list( - order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, + order_list=order_list, + user_path=(pathlib.Path(path) / user_id), + trade_date=trade_date, ) self.logger.info("Generate order list at {} for {}".format(trade_date, user_id)) um.save_user_data(user_id) @@ -158,7 +160,9 @@ def execute(self, date, exchange_config, path): order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date) trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date) executor.save_executed_file_from_trade_info( - trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, + trade_info=trade_info, + user_path=(pathlib.Path(path) / user_id), + trade_date=trade_date, ) self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id)) diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py index fb96c87bd31..611af63e4af 100644 --- a/qlib/contrib/online/utils.py +++ b/qlib/contrib/online/utils.py @@ -79,7 +79,11 @@ def prepare(um, today, user_id, exchange_config=None): log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today)) return [pd.Timestamp(latest_trading_date)], None - dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,) + dates = D.calendar( + start_time=pd.Timestamp(latest_trading_date), + end_time=pd.Timestamp(today), + future=True, + ) dates = list(dates) dates.append(get_next_trading_date(dates[-1], future=True)) if exchange_config: diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py index ef1447a12be..1cb14d26153 100644 --- a/qlib/contrib/report/analysis_model/analysis_model_performance.py +++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py @@ -53,7 +53,8 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int t_df.index = t_df.index.strftime("%Y-%m-%d") # Cumulative Return By Group group_scatter_figure = ScatterGraph( - t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), + t_df.cumsum(), + layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), ).figure t_df = t_df.loc[:, ["long-short", "long-average"]] @@ -61,7 +62,12 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int group_hist_figure = SubplotsGraph( t_df, kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)), - subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],), + subplots_kwargs=dict( + rows=1, + cols=2, + print_grid=False, + subplot_titles=["long-short", "long-average"], + ), ).figure return group_scatter_figure, group_hist_figure @@ -96,12 +102,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6) _monthly_ic = ic.groupby(_index).mean() _monthly_ic.index = pd.MultiIndex.from_arrays( - [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"], + [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], + names=["year", "month"], ) # fill month _month_list = pd.date_range( - start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M", + start=pd.Timestamp(f"{_index.min()[:4]}0101"), + end=pd.Timestamp(f"{_index.max()[:4]}1231"), + freq="1M", ) _years = [] _month = [] @@ -133,15 +142,32 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min() _sub_graph_data = [ - ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),), + ( + "ic", + dict( + row=1, + col=1, + name="", + kind="DistplotGraph", + graph_kwargs=dict(bin_size=_bin_size), + ), + ), (_qqplot_fig, dict(row=1, col=2)), ] ic_hist_figure = SubplotsGraph( _ic_df.dropna(), kind_map=dict(kind="HistogramGraph", kwargs=dict()), - subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],), + subplots_kwargs=dict( + rows=1, + cols=2, + print_grid=False, + subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name], + ), sub_graph_data=_sub_graph_data, - layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),), + layout=dict( + yaxis2=dict(title="Observed Quantile"), + xaxis2=dict(title=f"{dist_name} Distribution Quantile"), + ), ).figure return ic_bar_figure, ic_heatmap_figure, ic_hist_figure @@ -155,7 +181,8 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple: _df = ac.to_frame("value") _df.index = _df.index.strftime("%Y-%m-%d") ac_figure = ScatterGraph( - _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), + _df, + layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), ).figure return (ac_figure,) @@ -175,11 +202,17 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple: .sum() / (len(x) // N) ) - r_df = pd.DataFrame({"Top": top, "Bottom": bottom,}) + r_df = pd.DataFrame( + { + "Top": top, + "Bottom": bottom, + } + ) # FIXME: support HIGH-FREQ r_df.index = r_df.index.strftime("%Y-%m-%d") turnover_figure = ScatterGraph( - r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), + r_df, + layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), ).figure return (turnover_figure,) @@ -197,7 +230,11 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure: # FIXME: support HIGH-FREQ ic_df.index = ic_df.index.strftime("%Y-%m-%d") ic_bar_figure = BarGraph( - ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),), + ic_df, + layout=dict( + title="Information Coefficient (IC)", + xaxis=dict(type="category", tickangle=45), + ), ).figure return ic_bar_figure @@ -240,7 +277,12 @@ def model_performance_graph( figure_list = [] for graph_name in graph_names: fun_res = eval(f"_{graph_name}")( - pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day, + pred_label=pred_label, + lag=lag, + N=N, + reverse=reverse, + rank=rank, + show_nature_day=show_nature_day, ) figure_list += fun_res diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py index 604189c94b6..abb68ea6051 100644 --- a/qlib/contrib/report/analysis_position/cumulative_return.py +++ b/qlib/contrib/report/analysis_position/cumulative_return.py @@ -13,7 +13,11 @@ def _get_cum_return_data_with_position( - position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, + position: dict, + report_normal: pd.DataFrame, + label_data: pd.DataFrame, + start_date=None, + end_date=None, ): """ @@ -25,7 +29,11 @@ def _get_cum_return_data_with_position( :return: """ _cumulative_return_df = get_position_data( - position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date, + position=position, + report_normal=report_normal, + label_data=label_data, + start_date=start_date, + end_date=end_date, ).copy() _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"] @@ -79,7 +87,11 @@ def _get_cum_return_data_with_position( def _get_figure_with_position( - position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, + position: dict, + report_normal: pd.DataFrame, + label_data: pd.DataFrame, + start_date=None, + end_date=None, ) -> Iterable[go.Figure]: """Get average analysis figures @@ -99,12 +111,18 @@ def _get_figure_with_position( # Create figures for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]: sub_graph_data = [ - ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),), + ( + "cum_{}".format(_t_name), + dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}), + ), ( "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name), dict(row=2, col=1), ), - ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),), + ( + "{}_value".format(_t_name), + dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}), + ), ] _default_xaxis = dict(showline=False, zeroline=True, tickangle=45) @@ -143,7 +161,13 @@ def _get_figure_with_position( [{"rowspan": 1}, None], ] subplots_kwargs = dict( - vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs, + vertical_spacing=0.01, + rows=2, + cols=2, + row_width=[1, 2], + column_width=[3, 1], + print_grid=False, + specs=specs, ) yield SubplotsGraph( cum_return_df, diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py index 23f9c592c0a..fe1d6113709 100644 --- a/qlib/contrib/report/analysis_position/parse_position.py +++ b/qlib/contrib/report/analysis_position/parse_position.py @@ -72,7 +72,10 @@ def parse_position(position: dict = None) -> pd.DataFrame: result_df = result_df.append(_trading_day_df, sort=True) - previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,) + previous_data = dict( + date=_trading_date, + code_list=_trading_day_df[_trading_day_df["status"] != -1].index, + ) result_df.reset_index(inplace=True) result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True) diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py index 9a4d834ed92..72a358adcbf 100644 --- a/qlib/contrib/report/analysis_position/rank_label.py +++ b/qlib/contrib/report/analysis_position/rank_label.py @@ -23,7 +23,11 @@ def _get_figure_with_position( :return: """ _position_df = get_position_data( - position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date, + position, + label_data, + calculate_label_rank=True, + start_date=start_date, + end_date=end_date, ) res_dict = dict() @@ -47,14 +51,20 @@ def _get_figure_with_position( yield ScatterGraph( _res_df.loc[:, [_col]], layout=dict( - title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"), + title=_col, + xaxis=dict(type="category", tickangle=45), + yaxis=dict(title="lable-rank-ratio: %"), ), graph_kwargs=dict(mode="lines+markers"), ).figure def rank_label_graph( - position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True, + position: dict, + label_data: pd.DataFrame, + start_date=None, + end_date=None, + show_notebook=True, ) -> Iterable[go.Figure]: """Ranking percentage of stocks buy, sell, and holding on the trading day. Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py index 8e2c05c0a38..f82e654c432 100644 --- a/qlib/contrib/report/analysis_position/report.py +++ b/qlib/contrib/report/analysis_position/report.py @@ -123,7 +123,9 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 1, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": {"width": 0,}, + "line": { + "width": 0, + }, }, { "type": "rect", @@ -135,13 +137,20 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 0.55, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": {"width": 0,}, + "line": { + "width": 0, + }, }, ], ) _subplot_kwargs = dict( - shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False, + shared_xaxes=True, + vertical_spacing=0.01, + rows=7, + cols=1, + row_width=[1, 1, 1, 3, 1, 1, 3], + print_grid=False, ) figure = SubplotsGraph( df=report_df, diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py index dbbc411109d..70e382fb165 100644 --- a/qlib/contrib/report/graph.py +++ b/qlib/contrib/report/graph.py @@ -311,7 +311,11 @@ def _init_sub_graph_data(self): _temp_row_data = ( column_name, dict( - row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"], + row=row, + col=col, + name=res_name, + kind=self._kind_map["kind"], + graph_kwargs=self._kind_map["kwargs"], ), ) self._sub_graph_data.append(_temp_row_data) diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index ee3ee03ecfd..dd90437b03f 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -57,7 +57,10 @@ def generate_target_weight_position(self, score, current, trade_date): final_stock_weight[stock_id] -= sw if self.buy_method == "first_fill": for stock_id in buy_signal_stocks: - add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,) + add_weight = min( + max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), + sold_stock_weight, + ) final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight sold_stock_weight -= add_weight elif self.buy_method == "average_fill": diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py index 6f168b4dd52..494981ecc09 100644 --- a/qlib/contrib/strategy/order_generator.py +++ b/qlib/contrib/strategy/order_generator.py @@ -102,10 +102,14 @@ def generate_order_list_from_target_weight_position( # strategy 1 : generate amount_position by weight_position # Use API in Exchange() target_amount_dict = trade_exchange.generate_amount_position_from_weight_position( - weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date, + weight_position=target_weight_position, + cash=current_tradable_value, + trade_date=trade_date, ) order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date, + target_position=target_amount_dict, + current_position=current_amount_dict, + trade_date=trade_date, ) return order_list @@ -160,6 +164,8 @@ def generate_order_list_from_target_weight_position( else: continue order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date, + target_position=amount_dict, + current_position=current.get_stock_amount_dict(), + trade_date=trade_date, ) return order_list diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py index 409410a2ab4..711658c9a63 100644 --- a/qlib/contrib/tuner/launcher.py +++ b/qlib/contrib/tuner/launcher.py @@ -13,7 +13,11 @@ args_parser = argparse.ArgumentParser(prog="tuner") args_parser.add_argument( - "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.", + "-c", + "--config_path", + required=True, + type=str, + help="config path indicates where to load yaml config.", ) args = args_parser.parse_args() diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py index 57f57a6c34e..76f101671b7 100644 --- a/qlib/contrib/tuner/space.py +++ b/qlib/contrib/tuner/space.py @@ -10,5 +10,8 @@ } QLibDataLabelSpace = { - "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],) + "labels": hp.choice( + "labels", + [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], + ) } diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index e81d41a9ad0..2ce957859b2 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -28,7 +28,10 @@ def __init__(self, tuner_config, optim_config): self.optim_config = optim_config self.max_evals = self.tuner_config.get("max_evals", 10) - self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],) + self.ex_dir = os.path.join( + self.tuner_config["experiment"]["dir"], + self.tuner_config["experiment"]["name"], + ) self.best_params = None self.best_res = None @@ -39,7 +42,10 @@ def tune(self): TimeInspector.set_time_mark() fmin( - fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals, + fn=self.objective, + space=self.space, + algo=tpe.suggest, + max_evals=self.max_evals, ) self.logger.info("Local best params: {} ".format(self.best_params)) TimeInspector.log_cost_time( @@ -153,7 +159,8 @@ def setup_estimator_config(self, params): estimator_config["data"]["args"].update(params["data_label_space"]) estimator_path = os.path.join( - self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME, + self.tuner_config["experiment"].get("dir", "../"), + QLibTuner.ESTIMATOR_CONFIG_NAME, ) with open(estimator_path, "w") as fp: @@ -166,20 +173,27 @@ def setup_space(self): model_space_name = self.tuner_config["model"].get("space", None) if model_space_name is None: raise ValueError("Please give the search space of model.") - model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,) + model_space = getattr( + importlib.import_module(".space", package="qlib.contrib.tuner"), + model_space_name, + ) # 2. Setup strategy space strategy_space_name = self.tuner_config["strategy"].get("space", None) if strategy_space_name is None: raise ValueError("Please give the search space of strategy.") - strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,) + strategy_space = getattr( + importlib.import_module(".space", package="qlib.contrib.tuner"), + strategy_space_name, + ) # 3. Setup data label space if given if self.tuner_config.get("data_label", None) is not None: data_label_space_name = self.tuner_config["data_label"].get("space", None) if data_label_space_name is not None: data_label_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name, + importlib.import_module(".space", package="qlib.contrib.tuner"), + data_label_space_name, ) else: data_label_space_name = None diff --git a/qlib/data/client.py b/qlib/data/client.py index d1a68cb3857..5244a7e45cf 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -26,7 +26,8 @@ def __init__(self, host, port): self.logger = get_module_logger(self.__class__.__name__) # bind connect/disconnect callbacks self.sio.on( - "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), + "connect", + lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), ) self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!")) diff --git a/qlib/data/data.py b/qlib/data/data.py index 47cded79cec..762467da35e 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -328,7 +328,14 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method") def _uri( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=1, + **kwargs, ): """Get task uri, used when generating rabbitmq task in qlib_server @@ -407,13 +414,29 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq): for inst, spans in instruments_d.items(): data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,), + args=( + inst, + start_time, + end_time, + freq, + normalize_column_names, + spans, + C, + ), ) else: for inst in instruments_d: data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=(inst, start_time, end_time, freq, normalize_column_names, None, C,), + args=( + inst, + start_time, + end_time, + freq, + normalize_column_names, + None, + C, + ), ) p.close() @@ -575,7 +598,12 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da start_time = pd.Timestamp(start_time or cal[0]) end_time = pd.Timestamp(end_time or cal[-1]) _instruments_filtered = { - inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],)) + inst: list( + filter( + lambda x: x[0] <= x[1], + [(max(start_time, x[0]), min(end_time, x[1])) for x in spans], + ) + ) for inst, spans in _instruments.items() } _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value} @@ -695,7 +723,14 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq for inst in instruments_d: p.apply_async( - LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,), + LocalDatasetProvider.cache_walker, + args=( + inst, + start_time, + end_time, + freq, + column_names, + ), ) p.close() @@ -728,7 +763,12 @@ def set_conn(self, conn): def calendar(self, start_time=None, end_time=None, freq="day", future=False): self.conn.send_request( request_type="calendar", - request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,}, + request_content={ + "start_time": str(start_time), + "end_time": str(end_time), + "freq": freq, + "future": future, + }, msg_queue=self.queue, msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content], ) @@ -792,7 +832,14 @@ def set_conn(self, conn): self.queue = queue.Queue() def dataset( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=0, + return_uri=False, ): if Inst.get_inst_type(instruments) == Inst.DICT: get_module_logger("data").warning( @@ -895,7 +942,13 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da return Inst.list_instruments(instruments, start_time, end_time, freq, as_list) def features( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=None, ): """ Parameters: diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py index 58e2bd96811..feda1904463 100644 --- a/qlib/data/dataset/utils.py +++ b/qlib/data/dataset/utils.py @@ -32,7 +32,10 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int: def fetch_df_by_index( - df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True, + df: pd.DataFrame, + selector: Union[pd.Timestamp, slice, str, list], + level: Union[str, int], + fetch_orig=True, ) -> pd.DataFrame: """ fetch data from `data` with `selector` and `level` diff --git a/qlib/data/filter.py b/qlib/data/filter.py index 811fd387f14..70f9d32780d 100644 --- a/qlib/data/filter.py +++ b/qlib/data/filter.py @@ -341,7 +341,12 @@ def _getFilterSeries(self, instruments, fstart, fend): # do not use dataset cache try: _features = DatasetD.dataset( - instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0, + instruments, + [self.rule_expression], + fstart, + fend, + freq=self.filter_freq, + disk_cache=0, ) except TypeError: # use LocalDatasetProvider diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py index eb6f9c5edb5..f92e7278758 100644 --- a/qlib/tests/__init__.py +++ b/qlib/tests/__init__.py @@ -18,6 +18,10 @@ def setUpClass(cls) -> None: print(f"Qlib data is not found in {provider_uri}") GetData().qlib_data( - name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False, + name="qlib_data_simple", + region="cn", + interval="1d", + target_dir=provider_uri, + delete_old=False, ) init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index 0c704b89669..be458a24d29 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -193,7 +193,10 @@ def generate(self): } ) objects.update( - {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,} + { + "long_short_r.pkl": long_short_r, + "long_avg_r.pkl": long_avg_r, + } ) self.recorder.log_metrics(**metrics) self.recorder.save_objects(**objects, artifact_path=self.get_path()) From dc4aa675034724a9d2815763fd575b3ec56e76e2 Mon Sep 17 00:00:00 2001 From: Jactus Date: Mon, 22 Feb 2021 11:42:36 +0800 Subject: [PATCH 15/32] Black format --- docs/conf.py | 10 ++++++- examples/benchmarks/TFT/libs/tft_model.py | 12 ++++++-- examples/highfreq/highfreq_handler.py | 33 ++++++++++++++++----- examples/highfreq/highfreq_processor.py | 4 ++- examples/highfreq/workflow.py | 35 +++++++++++++++++++---- examples/run_all_model.py | 5 +++- examples/workflow_by_code.py | 5 +++- scripts/data_collector/yahoo/collector.py | 27 +++++++++++++---- scripts/dump_bin.py | 13 +++++++-- setup.py | 14 +++++++-- tests/test_all_pipeline.py | 9 ++++-- tests/test_dump_data.py | 9 ++++-- tests/test_get_data.py | 4 ++- 13 files changed, 147 insertions(+), 33 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 61fe784e7a9..6e52b0e34a4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -191,7 +191,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, "QLib", u"QLib Documentation", author, "QLib", "One line description of project.", "Miscellaneous",), + ( + master_doc, + "QLib", + u"QLib Documentation", + author, + "QLib", + "One line description of project.", + "Miscellaneous", + ), ] diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py index f40a1aece33..b39f1782553 100644 --- a/examples/benchmarks/TFT/libs/tft_model.py +++ b/examples/benchmarks/TFT/libs/tft_model.py @@ -721,7 +721,12 @@ def _build_base_graph(self): encoder_steps = self.num_encoder_steps # Inputs. - all_inputs = tf.keras.layers.Input(shape=(time_steps, combined_input_size,)) + all_inputs = tf.keras.layers.Input( + shape=( + time_steps, + combined_input_size, + ) + ) unknown_inputs, known_combined_layer, obs_inputs, static_inputs = self.get_tft_embeddings(all_inputs) @@ -861,7 +866,10 @@ def get_lstm(return_state): """Returns LSTM cell initialized with default parameters.""" if self.use_cudnn: lstm = tf.keras.layers.CuDNNLSTM( - self.hidden_layer_size, return_sequences=True, return_state=return_state, stateful=False, + self.hidden_layer_size, + return_sequences=True, + return_state=return_state, + stateful=False, ) else: lstm = tf.keras.layers.LSTM( diff --git a/examples/highfreq/highfreq_handler.py b/examples/highfreq/highfreq_handler.py index 2fc411ab660..d3565051446 100644 --- a/examples/highfreq/highfreq_handler.py +++ b/examples/highfreq/highfreq_handler.py @@ -20,7 +20,10 @@ def check_transform_proc(proc_l): new_l = [] for p in proc_l: p["kwargs"].update( - {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} + { + "fit_start_time": fit_start_time, + "fit_end_time": fit_end_time, + } ) new_l.append(p) return new_l @@ -30,7 +33,11 @@ def check_transform_proc(proc_l): data_loader = { "class": "QlibDataLoader", - "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",}, + "kwargs": { + "config": self.get_feature_config(), + "swap_level": False, + "freq": "1min", + }, } super().__init__( instruments=instruments, @@ -61,7 +68,8 @@ def get_normalized_price_feature(price_field, shift=0): feature_ops = template_norm.format( template_if.format( - template_fillnan.format(template_paused.format("$close")), template_paused.format(price_field), + template_fillnan.format(template_paused.format("$close")), + template_paused.format(price_field), ), template_fillnan.format(template_paused.format("$close")), ) @@ -111,14 +119,24 @@ def get_normalized_price_feature(price_field, shift=0): class HighFreqBacktestHandler(DataHandler): def __init__( - self, instruments="csi300", start_time=None, end_time=None, + self, + instruments="csi300", + start_time=None, + end_time=None, ): data_loader = { "class": "QlibDataLoader", - "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",}, + "kwargs": { + "config": self.get_feature_config(), + "swap_level": False, + "freq": "1min", + }, } super().__init__( - instruments=instruments, start_time=start_time, end_time=end_time, data_loader=data_loader, + instruments=instruments, + start_time=start_time, + end_time=end_time, + data_loader=data_loader, ) def get_feature_config(self): @@ -137,7 +155,8 @@ def get_feature_config(self): fields += [ "Cut({0}, 240, None)".format( template_if.format( - template_fillnan.format(template_paused.format("$close")), template_paused.format(simpson_vwap), + template_fillnan.format(template_paused.format("$close")), + template_paused.format(simpson_vwap), ) ) ] diff --git a/examples/highfreq/highfreq_processor.py b/examples/highfreq/highfreq_processor.py index 73510ef0689..f0ab0dec2b1 100644 --- a/examples/highfreq/highfreq_processor.py +++ b/examples/highfreq/highfreq_processor.py @@ -65,6 +65,8 @@ def __call__(self, df_features): feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240) feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240) df_new_features = pd.DataFrame( - data=np.concatenate((feat, feat_1), axis=1), index=idx, columns=["FEATURE_%d" % i for i in range(12 * 240)], + data=np.concatenate((feat, feat_1), axis=1), + index=idx, + columns=["FEATURE_%d" % i for i in range(12 * 240)], ).sort_index() return df_new_features diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py index 0bfd0c2a09c..01de59c0e77 100644 --- a/examples/highfreq/workflow.py +++ b/examples/highfreq/workflow.py @@ -63,7 +63,13 @@ class HighfreqWorkflow(object): "module_path": "highfreq_handler", "kwargs": DATA_HANDLER_CONFIG0, }, - "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),}, + "segments": { + "train": (start_time, train_end_time), + "test": ( + test_start_time, + end_time, + ), + }, }, }, "dataset_backtest": { @@ -75,7 +81,13 @@ class HighfreqWorkflow(object): "module_path": "highfreq_handler", "kwargs": DATA_HANDLER_CONFIG1, }, - "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),}, + "segments": { + "train": (start_time, train_end_time), + "test": ( + test_start_time, + end_time, + ), + }, }, }, } @@ -140,11 +152,24 @@ def dump_and_load_dataset(self): "start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00", }, - segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),}, + segment_kwargs={ + "test": ( + "2021-01-19 00:00:00", + "2021-01-25 16:00:00", + ), + }, ) dataset_backtest.init( - handler_kwargs={"start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00",}, - segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),}, + handler_kwargs={ + "start_time": "2021-01-19 00:00:00", + "end_time": "2021-01-25 16:00:00", + }, + segment_kwargs={ + "test": ( + "2021-01-19 00:00:00", + "2021-01-25 16:00:00", + ), + }, ) ##=============get data============= diff --git a/examples/run_all_model.py b/examples/run_all_model.py index d356b41285e..d587eff1559 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -34,7 +34,10 @@ exp_manager = { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": {"uri": "file:" + exp_path, "default_exp_name": "Experiment",}, + "kwargs": { + "uri": "file:" + exp_path, + "default_exp_name": "Experiment", + }, } if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py index 6f5c11dc020..d5dab891789 100644 --- a/examples/workflow_by_code.py +++ b/examples/workflow_by_code.py @@ -81,7 +81,10 @@ "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": {"topk": 50, "n_drop": 5,}, + "kwargs": { + "topk": 50, + "n_drop": 5, + }, }, "backtest": { "verbose": False, diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 24526e3328b..743f89462d0 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -39,7 +39,13 @@ class YahooData: INTERVAL_1d = "1d" def __init__( - self, timezone: str = None, start=None, end=None, interval="1d", delay=0, show_1min_logging: bool = False, + self, + timezone: str = None, + start=None, + end=None, + interval="1d", + delay=0, + show_1min_logging: bool = False, ): """ @@ -119,7 +125,11 @@ def _get_simple(start_, end_): self._sleep() _remote_interval = "1m" if self._interval == self.INTERVAL_1min else self._interval return self.get_data_from_remote( - symbol, interval=_remote_interval, start=start_, end=end_, show_1min_logging=self._show_1min_logging, + symbol, + interval=_remote_interval, + start=start_, + end=end_, + show_1min_logging=self._show_1min_logging, ) _result = None @@ -428,7 +438,9 @@ class YahooNormalize: DAILY_FORMAT = "%Y-%m-%d" def __init__( - self, date_field_name: str = "date", symbol_field_name: str = "symbol", + self, + date_field_name: str = "date", + symbol_field_name: str = "symbol", ): """ @@ -446,7 +458,10 @@ def __init__( @staticmethod def normalize_yahoo( - df: pd.DataFrame, calendar_list: list = None, date_field_name: str = "date", symbol_field_name: str = "symbol", + df: pd.DataFrame, + calendar_list: list = None, + date_field_name: str = "date", + symbol_field_name: str = "symbol", ): if df.empty: return df @@ -551,7 +566,9 @@ class YahooNormalize1min(YahooNormalize, ABC): CONSISTENT_1d = False def __init__( - self, date_field_name: str = "date", symbol_field_name: str = "symbol", + self, + date_field_name: str = "date", + symbol_field_name: str = "symbol", ): """ diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py index ab24fa9cacf..4811fd48612 100644 --- a/scripts/dump_bin.py +++ b/scripts/dump_bin.py @@ -153,13 +153,22 @@ def get_dump_fields(self, df_columns: Iterable[str]) -> Iterable[str]: @staticmethod def _read_calendars(calendar_path: Path) -> List[pd.Timestamp]: - return sorted(map(pd.Timestamp, pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),)) + return sorted( + map( + pd.Timestamp, + pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(), + ) + ) def _read_instruments(self, instrument_path: Path) -> pd.DataFrame: df = pd.read_csv( instrument_path, sep=self.INSTRUMENTS_SEP, - names=[self.symbol_field_name, self.INSTRUMENTS_START_FIELD, self.INSTRUMENTS_END_FIELD,], + names=[ + self.symbol_field_name, + self.INSTRUMENTS_START_FIELD, + self.INSTRUMENTS_END_FIELD, + ], ) return df diff --git a/setup.py b/setup.py index d8a9d9efa6b..83cf6e1b602 100644 --- a/setup.py +++ b/setup.py @@ -70,10 +70,16 @@ # Cython Extensions extensions = [ Extension( - "qlib.data._libs.rolling", ["qlib/data/_libs/rolling.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE], + "qlib.data._libs.rolling", + ["qlib/data/_libs/rolling.pyx"], + language="c++", + include_dirs=[NUMPY_INCLUDE], ), Extension( - "qlib.data._libs.expanding", ["qlib/data/_libs/expanding.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE], + "qlib.data._libs.expanding", + ["qlib/data/_libs/expanding.pyx"], + language="c++", + include_dirs=[NUMPY_INCLUDE], ), ] @@ -92,7 +98,9 @@ # py_modules=['qlib'], entry_points={ # 'console_scripts': ['mycli=mymodule:cli'], - "console_scripts": ["qrun=qlib.workflow.cli:run",], + "console_scripts": [ + "qrun=qlib.workflow.cli:run", + ], }, ext_modules=extensions, install_requires=REQUIRED, diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index 8b3819c8302..f6e77cba4d8 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -78,7 +78,10 @@ "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": {"topk": 50, "n_drop": 5,}, + "kwargs": { + "topk": 50, + "n_drop": 5, + }, }, "backtest": { "verbose": False, @@ -173,7 +176,9 @@ def test_0_train(self): def test_1_backtest(self): analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) self.assertGreaterEqual( - analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed", + analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], + 0.10, + "backtest failed", ) diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py index de649c37edf..dfa7f8556dd 100644 --- a/tests/test_dump_data.py +++ b/tests/test_dump_data.py @@ -40,7 +40,9 @@ def setUpClass(cls) -> None: TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, expression_cache=None, dataset_cache=None, + provider_uri=provider_uri, + expression_cache=None, + dataset_cache=None, ) @classmethod @@ -52,7 +54,10 @@ def test_0_dump_bin(self): def test_1_dump_calendars(self): ori_calendars = set( - map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,) + map( + pd.Timestamp, + pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values, + ) ) res_calendars = set(D.calendar()) assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed" diff --git a/tests/test_get_data.py b/tests/test_get_data.py index d5637b02595..c511d1b910d 100644 --- a/tests/test_get_data.py +++ b/tests/test_get_data.py @@ -26,7 +26,9 @@ class TestGetData(unittest.TestCase): def setUpClass(cls) -> None: provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, expression_cache=None, dataset_cache=None, + provider_uri=provider_uri, + expression_cache=None, + dataset_cache=None, ) @classmethod From f947a2fdef294ec927466ec7a287da83604c0bc8 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 15:15:51 +0800 Subject: [PATCH 16/32] Correct two mistakes in annotation. --- qlib/model/base.py | 3 ++- qlib/portfolio/optimizer.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/qlib/model/base.py b/qlib/model/base.py index a7001f0a67b..3708298d5ce 100644 --- a/qlib/model/base.py +++ b/qlib/model/base.py @@ -44,7 +44,8 @@ def fit(self, dataset: Dataset): # get weights try: wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"], - data_key=DataHandlerLP.DK_L, w_train, w_valid = wdf_train["weight"], wdf_valid["weight"] + data_key=DataHandlerLP.DK_L) + w_train, w_valid = wdf_train["weight"], wdf_valid["weight"] except KeyError as e: w_train = pd.DataFrame(np.ones_like(y_train.values), index=y_train.index) w_valid = pd.DataFrame(np.ones_like(y_valid.values), index=y_valid.index) diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 3912421277c..75c6c51f52e 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -292,7 +292,7 @@ def __init__( delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, - scale_alpha=True, + scale_alpha: bool = True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000, @@ -303,6 +303,7 @@ def __init__( delta (float): turnover rate limit bench_dev (float): benchmark deviation limit inds_dev (float): industry deviation limit + scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix verbose (bool): if print detailed information about the solver warm_start (str): whether try to warm start (`w0`/`benchmark`/``) (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) From d3caea60eed1caf7e8cce7ec89f9f4db938109a5 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 17:32:03 +0800 Subject: [PATCH 17/32] Add unittest for TestStructuredCovEstimator. --- tests/test_structured_cov_estimator.py | 80 ++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tests/test_structured_cov_estimator.py diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py new file mode 100644 index 00000000000..6aeae3d8979 --- /dev/null +++ b/tests/test_structured_cov_estimator.py @@ -0,0 +1,80 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import unittest +import numpy as np +from scipy.linalg import sqrtm + +from qlib.model.riskmodel import StructuredCovEstimator + + +class TestStructuredCovEstimator(unittest.TestCase): + def test_random_covariance(self): + # Try to estimate the covariance from a randomly generated matrix. + NUM_VARIABLE = 10 + NUM_OBSERVATION = 200 + EPS = 1e-6 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True) + + X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) + + est_cov = estimator.predict(X, is_price=False) + np_cov = np.cov(X.T) # While numpy assume row means variable, qlib assume the other wise. + + delta = abs(est_cov - np_cov) + if_identical = (delta < EPS).all() + + self.assertTrue(if_identical) + + def test_constructed_covariance(self): + # Try to estimate the covariance from a specially crafted matrix. + # There should be some significant correlation since X is specially crafted. + NUM_VARIABLE = 7 + NUM_OBSERVATION = 500 + EPS = 0.1 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, num_factors=NUM_VARIABLE - 1) + + sqrt_cov = None + while sqrt_cov is None or (np.iscomplex(sqrt_cov)).any(): + cov = np.random.rand(NUM_VARIABLE, NUM_VARIABLE) + for i in range(NUM_VARIABLE): + cov[i][i] = 1 + sqrt_cov = sqrtm(cov) + X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) @ sqrt_cov + + est_cov = estimator.predict(X, is_price=False) + np_cov = np.cov(X.T) # While numpy assume row means variable, qlib assume the other wise. + + delta = abs(est_cov - np_cov) + if_identical = (delta < EPS).all() + + self.assertTrue(if_identical) + + def test_decomposition(self): + # Try to estimate the covariance from a specially crafted matrix. + # The matrix is generated in the assumption that observations can be predicted by multiple factors. + NUM_VARIABLE = 30 + NUM_OBSERVATION = 100 + NUM_FACTOR = 10 + EPS = 0.1 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, num_factors=NUM_FACTOR) + + F = np.random.rand(NUM_VARIABLE, NUM_FACTOR) + B = np.random.rand(NUM_FACTOR, NUM_OBSERVATION) + U = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) + X = (F @ B).T + U + + est_cov = estimator.predict(X, is_price=False) + np_cov = np.cov(X.T) # While numpy assume row means variable, qlib assume the other wise. + + delta = abs(est_cov - np_cov) + if_identical = (delta < EPS).all() + + self.assertTrue(if_identical) + + +if __name__ == "__main__": + unittest.main() From 527718a44015a9cac3f13bd71dfcfb583f2d268f Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 22 Feb 2021 19:04:31 +0800 Subject: [PATCH 18/32] Allow enhanced indexing to generate portfolio without industry related restriction. --- qlib/portfolio/optimizer.py | 18 ++- tests/test_enhanced_indexing.py | 194 ++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 6 deletions(-) create mode 100644 tests/test_enhanced_indexing.py diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 75c6c51f52e..6ee396a513b 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -291,7 +291,7 @@ def __init__( lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, - inds_dev: float = 0.01, + inds_dev: float = None, scale_alpha: bool = True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, @@ -302,7 +302,8 @@ def __init__( lamb (float): risk aversion parameter (larger `lamb` means less focus on return) delta (float): turnover rate limit bench_dev (float): benchmark deviation limit - inds_dev (float): industry deviation limit + inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific + restriction scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix verbose (bool): if print detailed information about the solver warm_start (str): whether try to warm start (`w0`/`benchmark`/``) @@ -341,7 +342,7 @@ def __call__( varU: np.ndarray, w0: np.ndarray, w_bench: np.ndarray, - inds_onehot: np.ndarray, + inds_onehot: np.ndarray = None, ) -> Union[np.ndarray, pd.Series]: """ Args: @@ -354,6 +355,8 @@ def __call__( Returns: np.ndarray or pd.Series: optimized portfolio allocation """ + assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required." + # scale alpha to match volatility if self.scale_alpha: u = u / u.std() @@ -366,15 +369,18 @@ def __call__( risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) obj = cp.Maximize(ret - self.lamb * risk) d_bench = w - w_bench - d_inds = d_bench @ inds_onehot cons = [ w >= 0, cp.sum(w) == 1, d_bench >= -self.bench_dev, d_bench <= self.bench_dev, - d_inds >= -self.inds_dev, - d_inds <= self.inds_dev, ] + + if self.inds_dev is not None: + d_inds = d_bench @ inds_onehot + cons.append(d_inds >= -self.inds_dev) + cons.append(d_inds <= self.inds_dev) + if w0 is not None: turnover = cp.sum(cp.abs(w - w0)) cons.append(turnover <= self.delta) diff --git a/tests/test_enhanced_indexing.py b/tests/test_enhanced_indexing.py new file mode 100644 index 00000000000..f6e77cba4d8 --- /dev/null +++ b/tests/test_enhanced_indexing.py @@ -0,0 +1,194 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import sys +import shutil +import unittest +from pathlib import Path + +import numpy as np +import pandas as pd + +import qlib +from qlib.config import REG_CN, C +from qlib.utils import drop_nan_by_y_index +from qlib.contrib.model.gbdt import LGBModel +from qlib.contrib.data.handler import Alpha158 +from qlib.contrib.strategy.strategy import TopkDropoutStrategy +from qlib.contrib.evaluate import ( + backtest as normal_backtest, + risk_analysis, +) +from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict +from qlib.workflow import R +from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord +from qlib.tests.data import GetData +from qlib.tests import TestAutoData + + +market = "csi300" +benchmark = "SH000300" + +################################### +# train model +################################### +data_handler_config = { + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market, +} + +task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": 0.8879, + "learning_rate": 0.0421, + "subsample": 0.8789, + "lambda_l1": 205.6999, + "lambda_l2": 580.9768, + "max_depth": 8, + "num_leaves": 210, + "num_threads": 20, + }, + }, + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha158", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), + }, + }, + }, +} + +port_analysis_config = { + "strategy": { + "class": "TopkDropoutStrategy", + "module_path": "qlib.contrib.strategy.strategy", + "kwargs": { + "topk": 50, + "n_drop": 5, + }, + }, + "backtest": { + "verbose": False, + "limit_threshold": 0.095, + "account": 100000000, + "benchmark": benchmark, + "deal_price": "close", + "open_cost": 0.0005, + "close_cost": 0.0015, + "min_cost": 5, + }, +} + + +# train +def train(): + """train model + + Returns + ------- + pred_score: pandas.DataFrame + predict scores + performance: dict + model performance + """ + + # model initiaiton + model = init_instance_by_config(task["model"]) + dataset = init_instance_by_config(task["dataset"]) + + # start exp + with R.start(experiment_name="workflow"): + R.log_params(**flatten_dict(task)) + model.fit(dataset) + + # prediction + recorder = R.get_recorder() + rid = recorder.id + sr = SignalRecord(model, dataset, recorder) + sr.generate() + pred_score = sr.load() + + # calculate ic and ric + sar = SigAnaRecord(recorder) + sar.generate() + ic = sar.load(sar.get_path("ic.pkl")) + ric = sar.load(sar.get_path("ric.pkl")) + + return pred_score, {"ic": ic, "ric": ric}, rid + + +def backtest_analysis(pred, rid): + """backtest and analysis + + Parameters + ---------- + pred : pandas.DataFrame + predict scores + rid : str + the id of the recorder to be used in this function + + Returns + ------- + analysis : pandas.DataFrame + the analysis result + + """ + recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid) + # backtest + par = PortAnaRecord(recorder, port_analysis_config) + par.generate() + analysis_df = par.load(par.get_path("port_analysis.pkl")) + print(analysis_df) + return analysis_df + + +class TestAllFlow(TestAutoData): + PRED_SCORE = None + REPORT_NORMAL = None + POSITIONS = None + RID = None + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve())) + + def test_0_train(self): + TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train() + self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed") + self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed") + + def test_1_backtest(self): + analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) + self.assertGreaterEqual( + analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], + 0.10, + "backtest failed", + ) + + +def suite(): + _suite = unittest.TestSuite() + _suite.addTest(TestAllFlow("test_0_train")) + _suite.addTest(TestAllFlow("test_1_backtest")) + return _suite + + +if __name__ == "__main__": + runner = unittest.TextTestRunner() + runner.run(suite()) From 2bff6eb78120b8f3fc7aac12267d1e37c847ae0f Mon Sep 17 00:00:00 2001 From: Charles Young Date: Thu, 4 Mar 2021 22:08:11 +0800 Subject: [PATCH 19/32] Split classes in riskmodel.py & optimizer.py into seperate files. --- qlib/model/riskmodel_poet.py | 0 qlib/model/riskmodel_shrink.py | 0 qlib/model/riskmodel_structured.py | 0 qlib/portfolio/enhanced_indexing.py | 0 tests/test_enhanced_indexing.py | 212 ++++++++++++++++++++-------- 5 files changed, 150 insertions(+), 62 deletions(-) create mode 100644 qlib/model/riskmodel_poet.py create mode 100644 qlib/model/riskmodel_shrink.py create mode 100644 qlib/model/riskmodel_structured.py create mode 100644 qlib/portfolio/enhanced_indexing.py diff --git a/qlib/model/riskmodel_poet.py b/qlib/model/riskmodel_poet.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qlib/model/riskmodel_shrink.py b/qlib/model/riskmodel_shrink.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qlib/model/riskmodel_structured.py b/qlib/model/riskmodel_structured.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qlib/portfolio/enhanced_indexing.py b/qlib/portfolio/enhanced_indexing.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/test_enhanced_indexing.py b/tests/test_enhanced_indexing.py index f6e77cba4d8..f21d51984a6 100644 --- a/tests/test_enhanced_indexing.py +++ b/tests/test_enhanced_indexing.py @@ -2,32 +2,39 @@ # Licensed under the MIT License. import sys +import math import shutil import unittest -from pathlib import Path - import numpy as np import pandas as pd +from tqdm import tqdm +from pathlib import Path import qlib -from qlib.config import REG_CN, C -from qlib.utils import drop_nan_by_y_index -from qlib.contrib.model.gbdt import LGBModel -from qlib.contrib.data.handler import Alpha158 -from qlib.contrib.strategy.strategy import TopkDropoutStrategy -from qlib.contrib.evaluate import ( - backtest as normal_backtest, - risk_analysis, -) -from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict +from qlib.config import C +from qlib.utils import init_instance_by_config, flatten_dict from qlib.workflow import R -from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord -from qlib.tests.data import GetData +from qlib.config import REG_CN +from qlib.workflow.record_temp import SignalRecord, SigAnaRecord from qlib.tests import TestAutoData +from qlib.portfolio.optimizer import EnhancedIndexingOptimizer +from qlib.model.riskmodel import StructuredCovEstimator +from qlib.data.dataset.loader import QlibDataLoader +from qlib.data.dataset.handler import DataHandler +from qlib.data import D +from qlib.utils import exists_qlib_data, init_instance_by_config +market = "all" +trade_gap = 21 +label_config = "Ref($close, -{}) / Ref($close, -1) - 1".format(trade_gap) # reconstruct portfolio once a month -market = "csi300" -benchmark = "SH000300" +provider_uri = "~/.qlib_ei/qlib_data/cn_data" # target_dir +if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(Path.cwd().parent.joinpath("scripts"))) + from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region=REG_CN) +qlib.init(provider_uri=provider_uri, region=REG_CN) ################################### # train model @@ -36,8 +43,9 @@ "start_time": "2008-01-01", "end_time": "2020-08-01", "fit_start_time": "2008-01-01", - "fit_end_time": "2014-12-31", + "fit_end_time": "2014-11-30", "instruments": market, + "label": [label_config] } task = { @@ -53,7 +61,7 @@ "lambda_l2": 580.9768, "max_depth": 8, "num_leaves": 210, - "num_threads": 20, + "num_threads": 32, }, }, "dataset": { @@ -66,37 +74,104 @@ "kwargs": data_handler_config, }, "segments": { - "train": ("2008-01-01", "2014-12-31"), - "valid": ("2015-01-01", "2016-12-31"), - "test": ("2017-01-01", "2020-08-01"), + "train": ("2008-01-01", "2014-11-30"), + "valid": ("2015-01-01", "2016-11-30"), + "test": ("2017-01-01", "2018-01-01"), }, }, }, } -port_analysis_config = { - "strategy": { - "class": "TopkDropoutStrategy", - "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, - }, - "backtest": { - "verbose": False, - "limit_threshold": 0.095, - "account": 100000000, - "benchmark": benchmark, - "deal_price": "close", - "open_cost": 0.0005, - "close_cost": 0.0015, - "min_cost": 5, - }, -} + +class CSI300: + """Simulate CSI300 as the Benchmark for Enhanced Indexing to Track""" + + def __init__(self): + # provider_uri = '/nfs_data/qlib_data/ycz_daily/qlib' + # qlib.init(provider_uri=provider_uri, region=REG_CN, dataset_cache=None, expression_cache=None) + self.csi_weight = D.features(D.instruments('csi300'), ['$csi300_weight']) + + def __call__(self, pd_index, trade_date): + weights = np.zeros(len(pd_index)) + + for idx, instrument in enumerate(pd_index): + if (instrument, trade_date) in self.csi_weight.index: + weight = self.csi_weight.loc[(instrument, trade_date)].values[0] + if not math.isnan(weight): + weights[idx] = weight + + assert weights.sum() > 0, ' Fetch CSI Weights Error!' + weights = weights / weights.sum() + + return weights + + +class EnhancedIndexingStrategy: + """Enhanced Indexing Strategy""" + + def __init__(self): + self.benchmark = CSI300() + + provider_uri = "~/.qlib_ei/qlib_data/cn_data" + qlib.init(provider_uri=provider_uri, region=REG_CN) + + self.data_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader(["$close"])) + self.label_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader([label_config])) + self.cov_estimator = StructuredCovEstimator() + self.optimizer = EnhancedIndexingOptimizer(lamb=0.1, delta=0.4, bench_dev=0.03, max_iters=50000) + + def update(self, score_series, current, pred_date): + """ + Parameters + ----------- + score_series : pd.Series + stock_id , score. + current : Position() + current of account. + trade_exchange : Exchange() + exchange. + trade_date : pd.Timestamp + date. + """ + print(score_series) + score_series = score_series.dropna() + + # portfolio init weight + init_weight = current.reindex(score_series.index, fill_value=0).values.squeeze() + init_weight_sum = init_weight.sum() + if init_weight_sum > 0: + init_weight /= init_weight_sum + + # covariance estimation + selector = (self.data_handler.get_range_selector(pred_date, 252), score_series.index) + price = self.data_handler.fetch(selector, level=None, squeeze=True) + F, cov_b, var_u = self.cov_estimator.predict(price, return_decomposed_components=True) + + # optimize target portfolio + w_bench = self.benchmark(score_series.index, pred_date) + passed_init_weight = init_weight if init_weight_sum > 0 else None + # print(F) + # print(cov_b) + # print(var_u) + # print(passed_init_weight) + # print(w_bench) + target_weight = self.optimizer(score_series.values, F, cov_b, var_u, passed_init_weight, w_bench) + # print(target_weight) + target = pd.DataFrame(data=target_weight, index=score_series.index) + + active_weights = target_weight - w_bench + selector = (self.label_handler.get_range_selector(pred_date, 1), score_series.index) + label = self.label_handler.fetch(selector, level=None, squeeze=True) + alpha = 0 + for instrument, weight in zip(score_series.index, active_weights): + delta = label.loc[(pred_date, instrument)] + alpha += weight * (0 if math.isnan(delta) else delta) + + print(alpha) + + return alpha, target -# train def train(): """train model @@ -108,7 +183,7 @@ def train(): model performance """ - # model initiaiton + # model initiation model = init_instance_by_config(task["model"]) dataset = init_instance_by_config(task["dataset"]) @@ -133,29 +208,42 @@ def train(): return pred_score, {"ic": ic, "ric": ric}, rid -def backtest_analysis(pred, rid): - """backtest and analysis +def backtest_analysis(scores): + """backtest enhanced indexing Parameters ---------- - pred : pandas.DataFrame - predict scores - rid : str - the id of the recorder to be used in this function + scores: pandas.DataFrame + predict scores Returns ------- - analysis : pandas.DataFrame - the analysis result - + sharpe_ratio: floating-point + sharpe ratio of the enhanced indexing portfolio """ - recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid) - # backtest - par = PortAnaRecord(recorder, port_analysis_config) - par.generate() - analysis_df = par.load(par.get_path("port_analysis.pkl")) - print(analysis_df) - return analysis_df + + # backtest and analysis + with R.start(experiment_name="backtest_analysis"): + strategy = EnhancedIndexingStrategy() + dates = scores.index.get_level_values(0).unique() + + alphas = [] + current = pd.DataFrame() + gap_between_next_trade = 0 + for date in tqdm(dates): + if gap_between_next_trade == 0: + score_series = scores.loc[date] + alpha, current = strategy.update(score_series, current, date) + alphas.append(alpha) + gap_between_next_trade = trade_gap + else: + gap_between_next_trade -= 1 + + alphas = np.array(alphas) + sharpe_ratio = alphas.mean() / np.std(alphas) + print('Sharpe:', sharpe_ratio) + + return sharpe_ratio class TestAllFlow(TestAutoData): @@ -174,10 +262,10 @@ def test_0_train(self): self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed") def test_1_backtest(self): - analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) + sharpe_ratio = backtest_analysis(TestAllFlow.PRED_SCORE) self.assertGreaterEqual( - analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], - 0.10, + sharpe_ratio, + 0.90, "backtest failed", ) From 83c6e747835656d0c5d5f90fb3c903a239689158 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Thu, 4 Mar 2021 22:30:38 +0800 Subject: [PATCH 20/32] Reindex files. --- qlib/model/riskmodel.py | 611 ------------------ .../__init__.py} | 0 qlib/model/riskmodel/base.py | 141 ++++ qlib/model/riskmodel/poet.py | 84 +++ qlib/model/riskmodel/shrink.py | 262 ++++++++ qlib/model/riskmodel/structured.py | 152 +++++ qlib/portfolio/enhanced_indexing.py | 0 .../optimizer/__init__.py} | 0 .../optimizer/base.py} | 0 qlib/portfolio/optimizer/enhanced_indexing.py | 140 ++++ qlib/portfolio/{ => optimizer}/optimizer.py | 159 +---- tests/test_enhanced_indexing.py | 282 -------- 12 files changed, 793 insertions(+), 1038 deletions(-) delete mode 100644 qlib/model/riskmodel.py rename qlib/model/{riskmodel_poet.py => riskmodel/__init__.py} (100%) create mode 100644 qlib/model/riskmodel/base.py create mode 100644 qlib/model/riskmodel/poet.py create mode 100644 qlib/model/riskmodel/shrink.py create mode 100644 qlib/model/riskmodel/structured.py delete mode 100644 qlib/portfolio/enhanced_indexing.py rename qlib/{model/riskmodel_shrink.py => portfolio/optimizer/__init__.py} (100%) rename qlib/{model/riskmodel_structured.py => portfolio/optimizer/base.py} (100%) create mode 100644 qlib/portfolio/optimizer/enhanced_indexing.py rename qlib/portfolio/{ => optimizer}/optimizer.py (62%) delete mode 100644 tests/test_enhanced_indexing.py diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py deleted file mode 100644 index f19c60fc9be..00000000000 --- a/qlib/model/riskmodel.py +++ /dev/null @@ -1,611 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import numpy as np -import pandas as pd -from typing import Union -from sklearn.decomposition import PCA, FactorAnalysis - -from qlib.model.base import BaseModel - - -class RiskModel(BaseModel): - """Risk Model - - A risk model is used to estimate the covariance matrix of stock returns. - """ - - MASK_NAN = "mask" - FILL_NAN = "fill" - IGNORE_NAN = "ignore" - - def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, scale_return: bool = True): - """ - Args: - nan_option (str): nan handling option (`ignore`/`mask`/`fill`). - assume_centered (bool): whether the data is assumed to be centered. - scale_return (bool): whether scale returns as percentage. - """ - # nan - assert nan_option in [ - self.MASK_NAN, - self.FILL_NAN, - self.IGNORE_NAN, - ], f"`nan_option={nan_option}` is not supported" - self.nan_option = nan_option - - self.assume_centered = assume_centered - self.scale_return = scale_return - - def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True - ) -> Union[pd.DataFrame, np.ndarray]: - """ - Args: - X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, - with variables as columns and observations as rows. - return_corr (bool): whether return the correlation matrix. - is_price (bool): whether `X` contains price (if not assume stock returns). - - Returns: - pd.DataFrame or np.ndarray: estimated covariance (or correlation). - """ - # transform input into 2D array - if not isinstance(X, (pd.Series, pd.DataFrame)): - columns = None - else: - if isinstance(X.index, pd.MultiIndex): - if isinstance(X, pd.DataFrame): - X = X.iloc[:, 0].unstack(level="instrument") # always use the first column - else: - X = X.unstack(level="instrument") - else: - # X is 2D DataFrame - pass - columns = X.columns # will be used to restore dataframe - X = X.values - - # calculate pct_change - if is_price: - X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows - - # scale return - if self.scale_return: - X *= 100 - - # handle nan and centered - X = self._preprocess(X) - - # estimate covariance - S = self._predict(X) - - # return correlation if needed - if return_corr: - vola = np.sqrt(np.diag(S)) - corr = S / np.outer(vola, vola) - if columns is None: - return corr - return pd.DataFrame(corr, index=columns, columns=columns) - - # return covariance - if columns is None: - return S - return pd.DataFrame(S, index=columns, columns=columns) - - def _predict(self, X: np.ndarray) -> np.ndarray: - """covariance estimation implementation - - This method should be overridden by child classes. - - By default, this method implements the empirical covariance estimation. - - Args: - X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows). - - Returns: - np.ndarray: covariance matrix. - """ - xTx = np.asarray(X.T.dot(X)) - N = len(X) - if isinstance(X, np.ma.MaskedArray): - M = 1 - X.mask - N = M.T.dot(M) # each pair has distinct number of samples - return xTx / N - - def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]: - """handle nan and centerize data - - Note: - if `nan_option='mask'` then the returned array will be `np.ma.MaskedArray`. - """ - # handle nan - if self.nan_option == self.FILL_NAN: - X = np.nan_to_num(X) - elif self.nan_option == self.MASK_NAN: - X = np.ma.masked_invalid(X) - # centralize - if not self.assume_centered: - X = X - np.nanmean(X, axis=0) - return X - - -class ShrinkCovEstimator(RiskModel): - """Shrinkage Covariance Estimator - - This estimator will shrink the sample covariance matrix towards - an identify matrix: - S_hat = (1 - alpha) * S + alpha * F - where `alpha` is the shrink parameter and `F` is the shrinking target. - - The following shrinking parameters (`alpha`) are supported: - - `lw` [1][2][3]: use Ledoit-Wolf shrinking parameter. - - `oas` [4]: use Oracle Approximating Shrinkage shrinking parameter. - - float: directly specify the shrink parameter, should be between [0, 1]. - - The following shrinking targets (`F`) are supported: - - `const_var` [1][4][5]: assume stocks have the same constant variance and zero correlation. - - `const_corr` [2][6]: assume stocks have different variance but equal correlation. - - `single_factor` [3][7]: assume single factor model as the shrinking target. - - np.ndarray: provide the shrinking targets directly. - - Note: - - The optimal shrinking parameter depends on the selection of the shrinking target. - Currently, `oas` is not supported for `const_corr` and `single_factor`. - - Remember to set `nan_option` to `fill` or `mask` if your data has missing values. - - References: - [1] Ledoit, O., & Wolf, M. (2004). A well-conditioned estimator for large-dimensional covariance matrices. - Journal of Multivariate Analysis, 88(2), 365–411. https://doi.org/10.1016/S0047-259X(03)00096-4 - [2] Ledoit, O., & Wolf, M. (2004). Honey, I shrunk the sample covariance matrix. - Journal of Portfolio Management, 30(4), 1–22. https://doi.org/10.3905/jpm.2004.110 - [3] Ledoit, O., & Wolf, M. (2003). Improved estimation of the covariance matrix of stock returns - with an application to portfolio selection. - Journal of Empirical Finance, 10(5), 603–621. https://doi.org/10.1016/S0927-5398(03)00007-0 - [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance - estimation. IEEE Transactions on Signal Processing, 58(10), 5016–5029. - https://doi.org/10.1109/TSP.2010.2053029 - [5] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-00007f64e5b9/cov1para.m.zip - [6] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip - [7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip - """ - - SHR_LW = "lw" - SHR_OAS = "oas" - - TGT_CONST_VAR = "const_var" - TGT_CONST_CORR = "const_corr" - TGT_SINGLE_FACTOR = "single_factor" - - def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = "const_var", **kwargs): - """ - Args: - alpha (str or float): shrinking parameter or estimator (`lw`/`oas`) - target (str or np.ndarray): shrinking target (`const_var`/`const_corr`/`single_factor`) - kwargs: see `RiskModel` for more information - """ - super().__init__(**kwargs) - - # alpha - if isinstance(alpha, str): - assert alpha in [self.SHR_LW, self.SHR_OAS], f"shrinking method `{alpha}` is not supported" - elif isinstance(alpha, (float, np.floating)): - assert 0 <= alpha <= 1, "alpha should be between [0, 1]" - else: - raise TypeError("invalid argument type for `alpha`") - self.alpha = alpha - - # target - if isinstance(target, str): - assert target in [ - self.TGT_CONST_VAR, - self.TGT_CONST_CORR, - self.TGT_SINGLE_FACTOR, - ], f"shrinking target `{target} is not supported" - elif isinstance(target, np.ndarray): - pass - else: - raise TypeError("invalid argument type for `target`") - if alpha == self.SHR_OAS and target != self.TGT_CONST_VAR: - raise NotImplementedError("currently `oas` can only support `const_var` as target") - self.target = target - - def _predict(self, X: np.ndarray) -> np.ndarray: - # sample covariance - S = super()._predict(X) - - # shrinking target - F = self._get_shrink_target(X, S) - - # get shrinking parameter - alpha = self._get_shrink_param(X, S, F) - - # shrink covariance - if alpha > 0: - S *= 1 - alpha - F *= alpha - S += F - - return S - - def _get_shrink_target(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: - """get shrinking target `F`""" - if self.target == self.TGT_CONST_VAR: - return self._get_shrink_target_const_var(X, S) - if self.target == self.TGT_CONST_CORR: - return self._get_shrink_target_const_corr(X, S) - if self.target == self.TGT_SINGLE_FACTOR: - return self._get_shrink_target_single_factor(X, S) - return self.target - - def _get_shrink_target_const_var(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: - """get shrinking target with constant variance - - This target assumes zero pair-wise correlation and constant variance. - The constant variance is estimated by averaging all sample's variances. - """ - n = len(S) - F = np.eye(n) - np.fill_diagonal(F, np.mean(np.diag(S))) - return F - - def _get_shrink_target_const_corr(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: - """get shrinking target with constant correlation - - This target assumes constant pair-wise correlation but keep the sample variance. - The constant correlation is estimated by averaging all pairwise correlations. - """ - n = len(S) - var = np.diag(S) - sqrt_var = np.sqrt(var) - covar = np.outer(sqrt_var, sqrt_var) - r_bar = (np.sum(S / covar) - n) / (n * (n - 1)) - F = r_bar * covar - np.fill_diagonal(F, var) - return F - - def _get_shrink_target_single_factor(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: - """get shrinking target with single factor model""" - X_mkt = np.nanmean(X, axis=1) - cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X)) - var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X)) - F = np.outer(cov_mkt, cov_mkt) / var_mkt - np.fill_diagonal(F, np.diag(S)) - return F - - def _get_shrink_param(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: - """get shrinking parameter `alpha` - - Note: - The Ledoit-Wolf shrinking parameter estimator consists of three different methods. - """ - if self.alpha == self.SHR_OAS: - return self._get_shrink_param_oas(X, S, F) - elif self.alpha == self.SHR_LW: - if self.target == self.TGT_CONST_VAR: - return self._get_shrink_param_lw_const_var(X, S, F) - if self.target == self.TGT_CONST_CORR: - return self._get_shrink_param_lw_const_corr(X, S, F) - if self.target == self.TGT_SINGLE_FACTOR: - return self._get_shrink_param_lw_single_factor(X, S, F) - return self.alpha - - def _get_shrink_param_oas(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: - """Oracle Approximating Shrinkage Estimator - - This method uses the following formula to estimate the `alpha` - parameter for the shrink covariance estimator: - A = (1 - 2 / p) * trace(S^2) + trace^2(S) - B = (n + 1 - 2 / p) * (trace(S^2) - trace^2(S) / p) - alpha = A / B - where `n`, `p` are the dim of observations and variables respectively. - """ - trS2 = np.sum(S ** 2) - tr2S = np.trace(S) ** 2 - - n, p = X.shape - - A = (1 - 2 / p) * (trS2 + tr2S) - B = (n + 1 - 2 / p) * (trS2 + tr2S / p) - alpha = A / B - - return alpha - - def _get_shrink_param_lw_const_var(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: - """Ledoit-Wolf Shrinkage Estimator (Constant Variance) - - This method shrinks the covariance matrix towards the constand variance target. - """ - t, n = X.shape - - y = X ** 2 - phi = np.sum(y.T.dot(y) / t - S ** 2) - - gamma = np.linalg.norm(S - F, "fro") ** 2 - - kappa = phi / gamma - alpha = max(0, min(1, kappa / t)) - - return alpha - - def _get_shrink_param_lw_const_corr(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: - """Ledoit-Wolf Shrinkage Estimator (Constant Correlation) - - This method shrinks the covariance matrix towards the constand correlation target. - """ - t, n = X.shape - - var = np.diag(S) - sqrt_var = np.sqrt(var) - r_bar = (np.sum(S / np.outer(sqrt_var, sqrt_var)) - n) / (n * (n - 1)) - - y = X ** 2 - phi_mat = y.T.dot(y) / t - S ** 2 - phi = np.sum(phi_mat) - - theta_mat = (X ** 3).T.dot(X) / t - var[:, None] * S - np.fill_diagonal(theta_mat, 0) - rho = np.sum(np.diag(phi_mat)) + r_bar * np.sum(np.outer(1 / sqrt_var, sqrt_var) * theta_mat) - - gamma = np.linalg.norm(S - F, "fro") ** 2 - - kappa = (phi - rho) / gamma - alpha = max(0, min(1, kappa / t)) - - return alpha - - def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: - """Ledoit-Wolf Shrinkage Estimator (Single Factor Model) - - This method shrinks the covariance matrix towards the single factor model target. - """ - t, n = X.shape - - X_mkt = np.nanmean(X, axis=1) - cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X)) - var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X)) - - y = X ** 2 - phi = np.sum(y.T.dot(y)) / t - np.sum(S ** 2) - - rdiag = np.sum(y ** 2) / t - np.sum(np.diag(S) ** 2) - z = X * X_mkt[:, None] - v1 = y.T.dot(z) / t - cov_mkt[:, None] * S - roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt - v3 = z.T.dot(z) / t - var_mkt * S - roff3 = ( - np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 - ) - roff = 2 * roff1 - roff3 - rho = rdiag + roff - - gamma = np.linalg.norm(S - F, "fro") ** 2 - - kappa = (phi - rho) / gamma - alpha = max(0, min(1, kappa / t)) - - return alpha - - -class POETCovEstimator(RiskModel): - """Principal Orthogonal Complement Thresholding Estimator (POET) - - Reference: - [1] Fan, J., Liao, Y., & Mincheva, M. (2013). Large covariance estimation by thresholding principal orthogonal complements. - Journal of the Royal Statistical Society. Series B: Statistical Methodology, 75(4), 603–680. https://doi.org/10.1111/rssb.12016 - [2] http://econweb.rutgers.edu/yl1114/papers/poet/POET.m - """ - - THRESH_SOFT = "soft" - THRESH_HARD = "hard" - THRESH_SCAD = "scad" - - def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = "soft", **kwargs): - """ - Args: - num_factors (int): number of factors (if set to zero, no factor model will be used). - thresh (float): the positive constant for thresholding. - thresh_method (str): thresholding method, which can be - - 'soft': soft thresholding. - - 'hard': hard thresholding. - - 'scad': scad thresholding. - kwargs: see `RiskModel` for more information. - """ - super().__init__(**kwargs) - - assert num_factors >= 0, "`num_factors` requires a positive integer" - self.num_factors = num_factors - - assert thresh >= 0, "`thresh` requires a positive float number" - self.thresh = thresh - - assert thresh_method in [ - self.THRESH_HARD, - self.THRESH_SOFT, - self.THRESH_SCAD, - ], "`thresh_method` should be `soft`/`hard`/`scad`" - self.thresh_method = thresh_method - - def _predict(self, X: np.ndarray) -> np.ndarray: - - Y = X.T # NOTE: to match POET's implementation - p, n = Y.shape - - if self.num_factors > 0: - Dd, V = np.linalg.eig(Y.T.dot(Y)) - V = V[:, np.argsort(Dd)] - F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n) - LamPCA = Y.dot(F) / n - uhat = np.asarray(Y - LamPCA.dot(F.T)) - Lowrank = np.asarray(LamPCA.dot(LamPCA.T)) - rate = 1 / np.sqrt(p) + np.sqrt(np.log(p) / n) - else: - uhat = np.asarray(Y) - rate = np.sqrt(np.log(p) / n) - Lowrank = 0 - - lamb = rate * self.thresh - SuPCA = uhat.dot(uhat.T) / n - SuDiag = np.diag(np.diag(SuPCA)) - R = np.linalg.inv(SuDiag ** 0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag ** 0.5)) - - if self.thresh_method == self.THRESH_HARD: - M = R * (np.abs(R) > lamb) - elif self.thresh_method == self.THRESH_SOFT: - res = np.abs(R) - lamb - res = (res + np.abs(res)) / 2 - M = np.sign(R) * res - else: - M1 = (np.abs(R) < 2 * lamb) * np.sign(R) * (np.abs(R) - lamb) * (np.abs(R) > lamb) - M2 = (np.abs(R) < 3.7 * lamb) * (np.abs(R) >= 2 * lamb) * (2.7 * R - 3.7 * np.sign(R) * lamb) / 1.7 - M3 = (np.abs(R) >= 3.7 * lamb) * R - M = M1 + M2 + M3 - - Rthresh = M - np.diag(np.diag(M)) + np.eye(p) - SigmaU = (SuDiag ** 0.5).dot(Rthresh).dot(SuDiag ** 0.5) - SigmaY = SigmaU + Lowrank - - return SigmaY - - -class StructuredCovEstimator(RiskModel): - """Structured Covariance Estimator - - This estimator assumes observations can be predicted by multiple factors - X = FB + U - where `F` can be specified by explicit risk factors or latent factors. - - Therefore the structured covariance can be estimated by - cov(X) = F cov(B) F.T + cov(U) - - We use latent factor models to estimate the structured covariance. - Specifically, the following latent factor models are supported: - - `pca`: Principal Component Analysis - - `fa`: Factor Analysis - - Reference: [1] Fan, J., Liao, Y., & Liu, H. (2016). An overview of the estimation of large covariance and - precision matrices. Econometrics Journal, 19(1), C1–C32. https://doi.org/10.1111/ectj.12061 - """ - - FACTOR_MODEL_PCA = "pca" - FACTOR_MODEL_FA = "fa" - - def __init__( - self, - factor_model: str = "pca", - num_factors: int = 10, - nan_option: str = "ignore", - assume_centered: bool = False, - scale_return: bool = True, - ): - """ - Args: - factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`). - num_factors (int): number of components to keep. - nan_option (str): nan handling option (`ignore`/`fill`). - assume_centered (bool): whether the data is assumed to be centered. - scale_return (bool): whether scale returns as percentage. - """ - super().__init__(nan_option, assume_centered, scale_return) - - assert factor_model in [ - self.FACTOR_MODEL_PCA, - self.FACTOR_MODEL_FA, - ], "factor_model={} is not supported".format(factor_model) - self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis - - self.num_factors = num_factors - - def predict( - self, - X: Union[pd.Series, pd.DataFrame, np.ndarray], - return_corr: bool = False, - is_price: bool = True, - return_decomposed_components=False, - ) -> Union[pd.DataFrame, np.ndarray, tuple]: - """ - Args: - X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, - with variables as columns and observations as rows. - return_corr (bool): whether return the correlation matrix. - is_price (bool): whether `X` contains price (if not assume stock returns). - return_decomposed_components (bool): whether return decomposed components of the covariance matrix. - - Returns: - tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation. - """ - assert ( - not return_corr or not return_decomposed_components - ), "Can only return either correlation matrix or decomposed components." - - # transform input into 2D array - if not isinstance(X, (pd.Series, pd.DataFrame)): - columns = None - else: - if isinstance(X.index, pd.MultiIndex): - if isinstance(X, pd.DataFrame): - X = X.iloc[:, 0].unstack(level="instrument") # always use the first column - else: - X = X.unstack(level="instrument") - else: - # X is 2D DataFrame - pass - columns = X.columns # will be used to restore dataframe - X = X.values - - # calculate pct_change - if is_price: - X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows - - # scale return - if self.scale_return: - X *= 100 - - # handle nan and centered - X = self._preprocess(X) - - if return_decomposed_components: - F, cov_b, var_u = self._predict(X, return_structured=True) - return F, cov_b, var_u - else: - # estimate covariance - S = self._predict(X) - - # return correlation if needed - if return_corr: - vola = np.sqrt(np.diag(S)) - corr = S / np.outer(vola, vola) - if columns is None: - return corr - return pd.DataFrame(corr, index=columns, columns=columns) - - # return covariance - if columns is None: - return S - return pd.DataFrame(S, index=columns, columns=columns) - - def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]: - """ - covariance estimation implementation - - Args: - X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows). - return_structured (bool): whether return decomposed components of the covariance matrix. - - Returns: - tuple or np.ndarray: decomposed covariance matrix or covariance matrix. - """ - - model = self.solver(self.num_factors, random_state=0).fit(X) - - F = model.components_.T # num_features x num_factors - B = model.transform(X) # num_samples x num_factors - U = X - B @ F.T - cov_b = np.cov(B.T) # num_factors x num_factors - var_u = np.var(U, axis=0) # diagonal - - if return_structured: - return F, cov_b, var_u - - cov_x = F @ cov_b @ F.T + np.diag(var_u) - - return cov_x diff --git a/qlib/model/riskmodel_poet.py b/qlib/model/riskmodel/__init__.py similarity index 100% rename from qlib/model/riskmodel_poet.py rename to qlib/model/riskmodel/__init__.py diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py new file mode 100644 index 00000000000..d5b009cccca --- /dev/null +++ b/qlib/model/riskmodel/base.py @@ -0,0 +1,141 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import numpy as np +import pandas as pd +from typing import Union + +from qlib.model.base import BaseModel + +from qlib.model.riskmodel_poet import POETCovEstimator +from qlib.model.riskmodel_shrink import ShrinkCovEstimator +from qlib.model.riskmodel_structured import StructuredCovEstimator + + +class RiskModel(BaseModel): + """Risk Model + + A risk model is used to estimate the covariance matrix of stock returns. + """ + + MASK_NAN = "mask" + FILL_NAN = "fill" + IGNORE_NAN = "ignore" + + def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, scale_return: bool = True): + """ + Args: + nan_option (str): nan handling option (`ignore`/`mask`/`fill`). + assume_centered (bool): whether the data is assumed to be centered. + scale_return (bool): whether scale returns as percentage. + """ + # nan + assert nan_option in [ + self.MASK_NAN, + self.FILL_NAN, + self.IGNORE_NAN, + ], f"`nan_option={nan_option}` is not supported" + self.nan_option = nan_option + + self.assume_centered = assume_centered + self.scale_return = scale_return + + def predict( + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True + ) -> Union[pd.DataFrame, np.ndarray]: + """ + Args: + X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, + with variables as columns and observations as rows. + return_corr (bool): whether return the correlation matrix. + is_price (bool): whether `X` contains price (if not assume stock returns). + + Returns: + pd.DataFrame or np.ndarray: estimated covariance (or correlation). + """ + # transform input into 2D array + if not isinstance(X, (pd.Series, pd.DataFrame)): + columns = None + else: + if isinstance(X.index, pd.MultiIndex): + if isinstance(X, pd.DataFrame): + X = X.iloc[:, 0].unstack(level="instrument") # always use the first column + else: + X = X.unstack(level="instrument") + else: + # X is 2D DataFrame + pass + columns = X.columns # will be used to restore dataframe + X = X.values + + # calculate pct_change + if is_price: + X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows + + # scale return + if self.scale_return: + X *= 100 + + # handle nan and centered + X = self._preprocess(X) + + # estimate covariance + S = self._predict(X) + + # return correlation if needed + if return_corr: + vola = np.sqrt(np.diag(S)) + corr = S / np.outer(vola, vola) + if columns is None: + return corr + return pd.DataFrame(corr, index=columns, columns=columns) + + # return covariance + if columns is None: + return S + return pd.DataFrame(S, index=columns, columns=columns) + + def _predict(self, X: np.ndarray) -> np.ndarray: + """covariance estimation implementation + + This method should be overridden by child classes. + + By default, this method implements the empirical covariance estimation. + + Args: + X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows). + + Returns: + np.ndarray: covariance matrix. + """ + xTx = np.asarray(X.T.dot(X)) + N = len(X) + if isinstance(X, np.ma.MaskedArray): + M = 1 - X.mask + N = M.T.dot(M) # each pair has distinct number of samples + return xTx / N + + def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]: + """handle nan and centerize data + + Note: + if `nan_option='mask'` then the returned array will be `np.ma.MaskedArray`. + """ + # handle nan + if self.nan_option == self.FILL_NAN: + X = np.nan_to_num(X) + elif self.nan_option == self.MASK_NAN: + X = np.ma.masked_invalid(X) + # centralize + if not self.assume_centered: + X = X - np.nanmean(X, axis=0) + return X + + + + + + + + + diff --git a/qlib/model/riskmodel/poet.py b/qlib/model/riskmodel/poet.py new file mode 100644 index 00000000000..8dbe890360e --- /dev/null +++ b/qlib/model/riskmodel/poet.py @@ -0,0 +1,84 @@ +import numpy as np + +from qlib.model.riskmodel import RiskModel + + +class POETCovEstimator(RiskModel): + """Principal Orthogonal Complement Thresholding Estimator (POET) + + Reference: + [1] Fan, J., Liao, Y., & Mincheva, M. (2013). Large covariance estimation by thresholding principal orthogonal complements. + Journal of the Royal Statistical Society. Series B: Statistical Methodology, 75(4), 603–680. https://doi.org/10.1111/rssb.12016 + [2] http://econweb.rutgers.edu/yl1114/papers/poet/POET.m + """ + + THRESH_SOFT = "soft" + THRESH_HARD = "hard" + THRESH_SCAD = "scad" + + def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = "soft", **kwargs): + """ + Args: + num_factors (int): number of factors (if set to zero, no factor model will be used). + thresh (float): the positive constant for thresholding. + thresh_method (str): thresholding method, which can be + - 'soft': soft thresholding. + - 'hard': hard thresholding. + - 'scad': scad thresholding. + kwargs: see `RiskModel` for more information. + """ + super().__init__(**kwargs) + + assert num_factors >= 0, "`num_factors` requires a positive integer" + self.num_factors = num_factors + + assert thresh >= 0, "`thresh` requires a positive float number" + self.thresh = thresh + + assert thresh_method in [ + self.THRESH_HARD, + self.THRESH_SOFT, + self.THRESH_SCAD, + ], "`thresh_method` should be `soft`/`hard`/`scad`" + self.thresh_method = thresh_method + + def _predict(self, X: np.ndarray) -> np.ndarray: + + Y = X.T # NOTE: to match POET's implementation + p, n = Y.shape + + if self.num_factors > 0: + Dd, V = np.linalg.eig(Y.T.dot(Y)) + V = V[:, np.argsort(Dd)] + F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n) + LamPCA = Y.dot(F) / n + uhat = np.asarray(Y - LamPCA.dot(F.T)) + Lowrank = np.asarray(LamPCA.dot(LamPCA.T)) + rate = 1 / np.sqrt(p) + np.sqrt(np.log(p) / n) + else: + uhat = np.asarray(Y) + rate = np.sqrt(np.log(p) / n) + Lowrank = 0 + + lamb = rate * self.thresh + SuPCA = uhat.dot(uhat.T) / n + SuDiag = np.diag(np.diag(SuPCA)) + R = np.linalg.inv(SuDiag ** 0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag ** 0.5)) + + if self.thresh_method == self.THRESH_HARD: + M = R * (np.abs(R) > lamb) + elif self.thresh_method == self.THRESH_SOFT: + res = np.abs(R) - lamb + res = (res + np.abs(res)) / 2 + M = np.sign(R) * res + else: + M1 = (np.abs(R) < 2 * lamb) * np.sign(R) * (np.abs(R) - lamb) * (np.abs(R) > lamb) + M2 = (np.abs(R) < 3.7 * lamb) * (np.abs(R) >= 2 * lamb) * (2.7 * R - 3.7 * np.sign(R) * lamb) / 1.7 + M3 = (np.abs(R) >= 3.7 * lamb) * R + M = M1 + M2 + M3 + + Rthresh = M - np.diag(np.diag(M)) + np.eye(p) + SigmaU = (SuDiag ** 0.5).dot(Rthresh).dot(SuDiag ** 0.5) + SigmaY = SigmaU + Lowrank + + return SigmaY diff --git a/qlib/model/riskmodel/shrink.py b/qlib/model/riskmodel/shrink.py new file mode 100644 index 00000000000..1298891fb01 --- /dev/null +++ b/qlib/model/riskmodel/shrink.py @@ -0,0 +1,262 @@ +import numpy as np +from typing import Union + +from qlib.model.riskmodel import RiskModel + + +class ShrinkCovEstimator(RiskModel): + """Shrinkage Covariance Estimator + + This estimator will shrink the sample covariance matrix towards + an identify matrix: + S_hat = (1 - alpha) * S + alpha * F + where `alpha` is the shrink parameter and `F` is the shrinking target. + + The following shrinking parameters (`alpha`) are supported: + - `lw` [1][2][3]: use Ledoit-Wolf shrinking parameter. + - `oas` [4]: use Oracle Approximating Shrinkage shrinking parameter. + - float: directly specify the shrink parameter, should be between [0, 1]. + + The following shrinking targets (`F`) are supported: + - `const_var` [1][4][5]: assume stocks have the same constant variance and zero correlation. + - `const_corr` [2][6]: assume stocks have different variance but equal correlation. + - `single_factor` [3][7]: assume single factor model as the shrinking target. + - np.ndarray: provide the shrinking targets directly. + + Note: + - The optimal shrinking parameter depends on the selection of the shrinking target. + Currently, `oas` is not supported for `const_corr` and `single_factor`. + - Remember to set `nan_option` to `fill` or `mask` if your data has missing values. + + References: + [1] Ledoit, O., & Wolf, M. (2004). A well-conditioned estimator for large-dimensional covariance matrices. + Journal of Multivariate Analysis, 88(2), 365–411. https://doi.org/10.1016/S0047-259X(03)00096-4 + [2] Ledoit, O., & Wolf, M. (2004). Honey, I shrunk the sample covariance matrix. + Journal of Portfolio Management, 30(4), 1–22. https://doi.org/10.3905/jpm.2004.110 + [3] Ledoit, O., & Wolf, M. (2003). Improved estimation of the covariance matrix of stock returns + with an application to portfolio selection. + Journal of Empirical Finance, 10(5), 603–621. https://doi.org/10.1016/S0927-5398(03)00007-0 + [4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance + estimation. IEEE Transactions on Signal Processing, 58(10), 5016–5029. + https://doi.org/10.1109/TSP.2010.2053029 + [5] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-00007f64e5b9/cov1para.m.zip + [6] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip + [7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip + """ + + SHR_LW = "lw" + SHR_OAS = "oas" + + TGT_CONST_VAR = "const_var" + TGT_CONST_CORR = "const_corr" + TGT_SINGLE_FACTOR = "single_factor" + + def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = "const_var", **kwargs): + """ + Args: + alpha (str or float): shrinking parameter or estimator (`lw`/`oas`) + target (str or np.ndarray): shrinking target (`const_var`/`const_corr`/`single_factor`) + kwargs: see `RiskModel` for more information + """ + super().__init__(**kwargs) + + # alpha + if isinstance(alpha, str): + assert alpha in [self.SHR_LW, self.SHR_OAS], f"shrinking method `{alpha}` is not supported" + elif isinstance(alpha, (float, np.floating)): + assert 0 <= alpha <= 1, "alpha should be between [0, 1]" + else: + raise TypeError("invalid argument type for `alpha`") + self.alpha = alpha + + # target + if isinstance(target, str): + assert target in [ + self.TGT_CONST_VAR, + self.TGT_CONST_CORR, + self.TGT_SINGLE_FACTOR, + ], f"shrinking target `{target} is not supported" + elif isinstance(target, np.ndarray): + pass + else: + raise TypeError("invalid argument type for `target`") + if alpha == self.SHR_OAS and target != self.TGT_CONST_VAR: + raise NotImplementedError("currently `oas` can only support `const_var` as target") + self.target = target + + def _predict(self, X: np.ndarray) -> np.ndarray: + # sample covariance + S = super()._predict(X) + + # shrinking target + F = self._get_shrink_target(X, S) + + # get shrinking parameter + alpha = self._get_shrink_param(X, S, F) + + # shrink covariance + if alpha > 0: + S *= 1 - alpha + F *= alpha + S += F + + return S + + def _get_shrink_target(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: + """get shrinking target `F`""" + if self.target == self.TGT_CONST_VAR: + return self._get_shrink_target_const_var(X, S) + if self.target == self.TGT_CONST_CORR: + return self._get_shrink_target_const_corr(X, S) + if self.target == self.TGT_SINGLE_FACTOR: + return self._get_shrink_target_single_factor(X, S) + return self.target + + def _get_shrink_target_const_var(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: + """get shrinking target with constant variance + + This target assumes zero pair-wise correlation and constant variance. + The constant variance is estimated by averaging all sample's variances. + """ + n = len(S) + F = np.eye(n) + np.fill_diagonal(F, np.mean(np.diag(S))) + return F + + def _get_shrink_target_const_corr(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: + """get shrinking target with constant correlation + + This target assumes constant pair-wise correlation but keep the sample variance. + The constant correlation is estimated by averaging all pairwise correlations. + """ + n = len(S) + var = np.diag(S) + sqrt_var = np.sqrt(var) + covar = np.outer(sqrt_var, sqrt_var) + r_bar = (np.sum(S / covar) - n) / (n * (n - 1)) + F = r_bar * covar + np.fill_diagonal(F, var) + return F + + def _get_shrink_target_single_factor(self, X: np.ndarray, S: np.ndarray) -> np.ndarray: + """get shrinking target with single factor model""" + X_mkt = np.nanmean(X, axis=1) + cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X)) + var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X)) + F = np.outer(cov_mkt, cov_mkt) / var_mkt + np.fill_diagonal(F, np.diag(S)) + return F + + def _get_shrink_param(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: + """get shrinking parameter `alpha` + + Note: + The Ledoit-Wolf shrinking parameter estimator consists of three different methods. + """ + if self.alpha == self.SHR_OAS: + return self._get_shrink_param_oas(X, S, F) + elif self.alpha == self.SHR_LW: + if self.target == self.TGT_CONST_VAR: + return self._get_shrink_param_lw_const_var(X, S, F) + if self.target == self.TGT_CONST_CORR: + return self._get_shrink_param_lw_const_corr(X, S, F) + if self.target == self.TGT_SINGLE_FACTOR: + return self._get_shrink_param_lw_single_factor(X, S, F) + return self.alpha + + def _get_shrink_param_oas(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: + """Oracle Approximating Shrinkage Estimator + + This method uses the following formula to estimate the `alpha` + parameter for the shrink covariance estimator: + A = (1 - 2 / p) * trace(S^2) + trace^2(S) + B = (n + 1 - 2 / p) * (trace(S^2) - trace^2(S) / p) + alpha = A / B + where `n`, `p` are the dim of observations and variables respectively. + """ + trS2 = np.sum(S ** 2) + tr2S = np.trace(S) ** 2 + + n, p = X.shape + + A = (1 - 2 / p) * (trS2 + tr2S) + B = (n + 1 - 2 / p) * (trS2 + tr2S / p) + alpha = A / B + + return alpha + + def _get_shrink_param_lw_const_var(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: + """Ledoit-Wolf Shrinkage Estimator (Constant Variance) + + This method shrinks the covariance matrix towards the constand variance target. + """ + t, n = X.shape + + y = X ** 2 + phi = np.sum(y.T.dot(y) / t - S ** 2) + + gamma = np.linalg.norm(S - F, "fro") ** 2 + + kappa = phi / gamma + alpha = max(0, min(1, kappa / t)) + + return alpha + + def _get_shrink_param_lw_const_corr(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: + """Ledoit-Wolf Shrinkage Estimator (Constant Correlation) + + This method shrinks the covariance matrix towards the constand correlation target. + """ + t, n = X.shape + + var = np.diag(S) + sqrt_var = np.sqrt(var) + r_bar = (np.sum(S / np.outer(sqrt_var, sqrt_var)) - n) / (n * (n - 1)) + + y = X ** 2 + phi_mat = y.T.dot(y) / t - S ** 2 + phi = np.sum(phi_mat) + + theta_mat = (X ** 3).T.dot(X) / t - var[:, None] * S + np.fill_diagonal(theta_mat, 0) + rho = np.sum(np.diag(phi_mat)) + r_bar * np.sum(np.outer(1 / sqrt_var, sqrt_var) * theta_mat) + + gamma = np.linalg.norm(S - F, "fro") ** 2 + + kappa = (phi - rho) / gamma + alpha = max(0, min(1, kappa / t)) + + return alpha + + def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np.ndarray) -> float: + """Ledoit-Wolf Shrinkage Estimator (Single Factor Model) + + This method shrinks the covariance matrix towards the single factor model target. + """ + t, n = X.shape + + X_mkt = np.nanmean(X, axis=1) + cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X)) + var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X)) + + y = X ** 2 + phi = np.sum(y.T.dot(y)) / t - np.sum(S ** 2) + + rdiag = np.sum(y ** 2) / t - np.sum(np.diag(S) ** 2) + z = X * X_mkt[:, None] + v1 = y.T.dot(z) / t - cov_mkt[:, None] * S + roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt + v3 = z.T.dot(z) / t - var_mkt * S + roff3 = ( + np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum( + np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 + ) + roff = 2 * roff1 - roff3 + rho = rdiag + roff + + gamma = np.linalg.norm(S - F, "fro") ** 2 + + kappa = (phi - rho) / gamma + alpha = max(0, min(1, kappa / t)) + + return alpha diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py new file mode 100644 index 00000000000..e778c2faa2a --- /dev/null +++ b/qlib/model/riskmodel/structured.py @@ -0,0 +1,152 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import numpy as np +import pandas as pd +from typing import Union +from sklearn.decomposition import PCA, FactorAnalysis + +from qlib.model.riskmodel import RiskModel + + +class StructuredCovEstimator(RiskModel): + """Structured Covariance Estimator + + This estimator assumes observations can be predicted by multiple factors + X = FB + U + where `F` can be specified by explicit risk factors or latent factors. + + Therefore the structured covariance can be estimated by + cov(X) = F cov(B) F.T + cov(U) + + We use latent factor models to estimate the structured covariance. + Specifically, the following latent factor models are supported: + - `pca`: Principal Component Analysis + - `fa`: Factor Analysis + + Reference: [1] Fan, J., Liao, Y., & Liu, H. (2016). An overview of the estimation of large covariance and + precision matrices. Econometrics Journal, 19(1), C1–C32. https://doi.org/10.1111/ectj.12061 + """ + + FACTOR_MODEL_PCA = "pca" + FACTOR_MODEL_FA = "fa" + NAN_OPTION = "fill" + + def __init__( + self, + factor_model: str = "pca", + num_factors: int = 10, + assume_centered: bool = False, + scale_return: bool = True, + ): + """ + Args: + factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`). + num_factors (int): number of components to keep. + assume_centered (bool): whether the data is assumed to be centered. + scale_return (bool): whether scale returns as percentage. + """ + super().__init__(self.NAN_OPTION, assume_centered, scale_return) + + assert factor_model in [ + self.FACTOR_MODEL_PCA, + self.FACTOR_MODEL_FA, + ], "factor_model={} is not supported".format(factor_model) + self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis + + self.num_factors = num_factors + + def predict( + self, + X: Union[pd.Series, pd.DataFrame, np.ndarray], + return_corr: bool = False, + is_price: bool = True, + return_decomposed_components=False, + ) -> Union[pd.DataFrame, np.ndarray, tuple]: + """ + Args: + X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, + with variables as columns and observations as rows. + return_corr (bool): whether return the correlation matrix. + is_price (bool): whether `X` contains price (if not assume stock returns). + return_decomposed_components (bool): whether return decomposed components of the covariance matrix. + + Returns: + tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation. + """ + assert ( + not return_corr or not return_decomposed_components + ), "Can only return either correlation matrix or decomposed components." + + # transform input into 2D array + if not isinstance(X, (pd.Series, pd.DataFrame)): + columns = None + else: + if isinstance(X.index, pd.MultiIndex): + if isinstance(X, pd.DataFrame): + X = X.iloc[:, 0].unstack(level="instrument") # always use the first column + else: + X = X.unstack(level="instrument") + else: + # X is 2D DataFrame + pass + columns = X.columns # will be used to restore dataframe + X = X.values + + # calculate pct_change + if is_price: + X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows + + # scale return + if self.scale_return: + X *= 100 + + # handle nan and centered + X = self._preprocess(X) + + if return_decomposed_components: + F, cov_b, var_u = self._predict(X, return_structured=True) + return F, cov_b, var_u + else: + # estimate covariance + S = self._predict(X) + + # return correlation if needed + if return_corr: + vola = np.sqrt(np.diag(S)) + corr = S / np.outer(vola, vola) + if columns is None: + return corr + return pd.DataFrame(corr, index=columns, columns=columns) + + # return covariance + if columns is None: + return S + return pd.DataFrame(S, index=columns, columns=columns) + + def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]: + """ + covariance estimation implementation + + Args: + X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows). + return_structured (bool): whether return decomposed components of the covariance matrix. + + Returns: + tuple or np.ndarray: decomposed covariance matrix or covariance matrix. + """ + + model = self.solver(self.num_factors, random_state=0).fit(X) + + F = model.components_.T # num_features x num_factors + B = model.transform(X) # num_samples x num_factors + U = X - B @ F.T + cov_b = np.cov(B.T) # num_factors x num_factors + var_u = np.var(U, axis=0) # diagonal + + if return_structured: + return F, cov_b, var_u + + cov_x = F @ cov_b @ F.T + np.diag(var_u) + + return cov_x diff --git a/qlib/portfolio/enhanced_indexing.py b/qlib/portfolio/enhanced_indexing.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/qlib/model/riskmodel_shrink.py b/qlib/portfolio/optimizer/__init__.py similarity index 100% rename from qlib/model/riskmodel_shrink.py rename to qlib/portfolio/optimizer/__init__.py diff --git a/qlib/model/riskmodel_structured.py b/qlib/portfolio/optimizer/base.py similarity index 100% rename from qlib/model/riskmodel_structured.py rename to qlib/portfolio/optimizer/base.py diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py new file mode 100644 index 00000000000..d988c776bc6 --- /dev/null +++ b/qlib/portfolio/optimizer/enhanced_indexing.py @@ -0,0 +1,140 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import numpy as np +import cvxpy as cp +import pandas as pd +from typing import Union + +from qlib.portfolio.optimizer import BaseOptimizer + + +class EnhancedIndexingOptimizer(BaseOptimizer): + """ + Portfolio Optimizer with Enhanced Indexing + + Note: + This optimizer always assumes full investment and no-shorting. + """ + + START_FROM_W0 = "w0" + START_FROM_BENCH = "benchmark" + DO_NOT_START_FROM = "no_warm_start" + + def __init__( + self, + lamb: float = 10, + delta: float = 0.4, + bench_dev: float = 0.01, + inds_dev: float = None, + scale_alpha: bool = True, + verbose: bool = False, + warm_start: str = DO_NOT_START_FROM, + max_iters: int = 10000, + ): + """ + Args: + lamb (float): risk aversion parameter (larger `lamb` means less focus on return) + delta (float): turnover rate limit + bench_dev (float): benchmark deviation limit + inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific + restriction + scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix + verbose (bool): if print detailed information about the solver + warm_start (str): whether try to warm start (`w0`/`benchmark`/``) + (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) + """ + + assert lamb >= 0, "risk aversion parameter `lamb` should be positive" + self.lamb = lamb + + assert delta >= 0, "turnover limit `delta` should be positive" + self.delta = delta + + assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive" + self.bench_dev = bench_dev + + assert inds_dev is None or inds_dev >= 0, "industry deviation limit `inds_dev` should be positive or None." + self.inds_dev = inds_dev + + assert warm_start in [ + self.DO_NOT_START_FROM, + self.START_FROM_W0, + self.START_FROM_BENCH, + ], "illegal warm start option" + self.start_from_w0 = warm_start == self.START_FROM_W0 + self.start_from_bench = warm_start == self.START_FROM_BENCH + + self.scale_alpha = scale_alpha + self.verbose = verbose + self.max_iters = max_iters + + def __call__( + self, + u: np.ndarray, + F: np.ndarray, + covB: np.ndarray, + varU: np.ndarray, + w0: np.ndarray, + w_bench: np.ndarray, + inds_onehot: np.ndarray = None, + ) -> Union[np.ndarray, pd.Series]: + """ + Args: + u (np.ndarray): expected returns (a.k.a., alpha) + F, covB, varU (np.ndarray): see StructuredCovEstimator + w0 (np.ndarray): initial weights (for turnover control) + w_bench (np.ndarray): benchmark weights + inds_onehot (np.ndarray): industry (onehot) + + Returns: + np.ndarray or pd.Series: optimized portfolio allocation + """ + assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required." + + # scale alpha to match volatility + if self.scale_alpha: + u = u / u.std() + x_variance = np.mean(np.diag(F @ covB @ F.T) + varU) + u *= x_variance ** 0.5 + + w = cp.Variable(len(u)) # num_assets + v = w @ F # num_factors + ret = w @ u + risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) + obj = cp.Maximize(ret - self.lamb * risk) + d_bench = w - w_bench + cons = [ + w >= 0, + cp.sum(w) == 1, + d_bench >= -self.bench_dev, + d_bench <= self.bench_dev, + ] + + if self.inds_dev is not None: + d_inds = d_bench @ inds_onehot + cons.append(d_inds >= -self.inds_dev) + cons.append(d_inds <= self.inds_dev) + + if w0 is not None: + turnover = cp.sum(cp.abs(w - w0)) + cons.append(turnover <= self.delta) + + warm_start = False + if self.start_from_w0: + if w0 is None: + print("Warning: try warm start with w0, but w0 is `None`.") + else: + w.value = w0 + warm_start = True + elif self.start_from_bench: + w.value = w_bench + warm_start = True + + prob = cp.Problem(obj, cons) + prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) + + if prob.status != "optimal": + print("Warning: solve failed.", prob.status) + + return np.asarray(w.value) diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer/optimizer.py similarity index 62% rename from qlib/portfolio/optimizer.py rename to qlib/portfolio/optimizer/optimizer.py index 6ee396a513b..17a7fc30a66 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer/optimizer.py @@ -4,11 +4,12 @@ import abc import warnings import numpy as np -import cvxpy as cp import pandas as pd import scipy.optimize as so from typing import Optional, Union, Callable, List +from qlib.portfolio.enhanced_indexing import EnhancedIndexingOptimizer + class BaseOptimizer(abc.ABC): """ Construct portfolio with a optimization related method """ @@ -38,13 +39,13 @@ class PortfolioOptimizer(BaseOptimizer): OPT_INV = "inv" def __init__( - self, - method: str = "inv", - lamb: float = 0, - delta: float = 0, - alpha: float = 0.0, - scale_alpha: bool = True, - tol: float = 1e-8, + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, ): """ Args: @@ -71,10 +72,10 @@ def __init__( self.scale_alpha = scale_alpha def __call__( - self, - S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None, + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, ) -> Union[np.ndarray, pd.Series]: """ Args: @@ -163,7 +164,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) def _optimize_mvo( - self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None ) -> np.ndarray: """optimize mean-variance portfolio @@ -259,7 +260,6 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda # add l2 regularization wrapped_obj = obj if self.alpha > 0: - def opt_obj(x): return obj(x) + self.alpha * np.sum(np.square(x)) @@ -272,134 +272,3 @@ def opt_obj(x): warnings.warn(f"optimization not success ({sol.status})") return sol.x - - -class EnhancedIndexingOptimizer(BaseOptimizer): - """ - Portfolio Optimizer with Enhanced Indexing - - Note: - This optimizer always assumes full investment and no-shorting. - """ - - START_FROM_W0 = "w0" - START_FROM_BENCH = "benchmark" - DO_NOT_START_FROM = "no_warm_start" - - def __init__( - self, - lamb: float = 10, - delta: float = 0.4, - bench_dev: float = 0.01, - inds_dev: float = None, - scale_alpha: bool = True, - verbose: bool = False, - warm_start: str = DO_NOT_START_FROM, - max_iters: int = 10000, - ): - """ - Args: - lamb (float): risk aversion parameter (larger `lamb` means less focus on return) - delta (float): turnover rate limit - bench_dev (float): benchmark deviation limit - inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific - restriction - scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix - verbose (bool): if print detailed information about the solver - warm_start (str): whether try to warm start (`w0`/`benchmark`/``) - (https://www.cvxpy.org/tutorial/advanced/index.html#warm-start) - """ - - assert lamb >= 0, "risk aversion parameter `lamb` should be positive" - self.lamb = lamb - - assert delta >= 0, "turnover limit `delta` should be positive" - self.delta = delta - - assert bench_dev >= 0, "benchmark deviation limit `bench_dev` should be positive" - self.bench_dev = bench_dev - - assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" - self.inds_dev = inds_dev - - assert warm_start in [ - self.DO_NOT_START_FROM, - self.START_FROM_W0, - self.START_FROM_BENCH, - ], "illegal warm start option" - self.start_from_w0 = warm_start == self.START_FROM_W0 - self.start_from_bench = warm_start == self.START_FROM_BENCH - - self.scale_alpha = scale_alpha - self.verbose = verbose - self.max_iters = max_iters - - def __call__( - self, - u: np.ndarray, - F: np.ndarray, - covB: np.ndarray, - varU: np.ndarray, - w0: np.ndarray, - w_bench: np.ndarray, - inds_onehot: np.ndarray = None, - ) -> Union[np.ndarray, pd.Series]: - """ - Args: - u (np.ndarray): expected returns (a.k.a., alpha) - F, covB, varU (np.ndarray): see StructuredCovEstimator - w0 (np.ndarray): initial weights (for turnover control) - w_bench (np.ndarray): benchmark weights - inds_onehot (np.ndarray): industry (onehot) - - Returns: - np.ndarray or pd.Series: optimized portfolio allocation - """ - assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required." - - # scale alpha to match volatility - if self.scale_alpha: - u = u / u.std() - x_variance = np.mean(np.diag(F @ covB @ F.T) + varU) - u *= x_variance ** 0.5 - - w = cp.Variable(len(u)) # num_assets - v = w @ F # num_factors - ret = w @ u - risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) - obj = cp.Maximize(ret - self.lamb * risk) - d_bench = w - w_bench - cons = [ - w >= 0, - cp.sum(w) == 1, - d_bench >= -self.bench_dev, - d_bench <= self.bench_dev, - ] - - if self.inds_dev is not None: - d_inds = d_bench @ inds_onehot - cons.append(d_inds >= -self.inds_dev) - cons.append(d_inds <= self.inds_dev) - - if w0 is not None: - turnover = cp.sum(cp.abs(w - w0)) - cons.append(turnover <= self.delta) - - warm_start = False - if self.start_from_w0: - if w0 is None: - print("Warning: try warm start with w0, but w0 is `None`.") - else: - w.value = w0 - warm_start = True - elif self.start_from_bench: - w.value = w_bench - warm_start = True - - prob = cp.Problem(obj, cons) - prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) - - if prob.status != "optimal": - print("Warning: solve failed.", prob.status) - - return np.asarray(w.value) diff --git a/tests/test_enhanced_indexing.py b/tests/test_enhanced_indexing.py deleted file mode 100644 index f21d51984a6..00000000000 --- a/tests/test_enhanced_indexing.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import sys -import math -import shutil -import unittest -import numpy as np -import pandas as pd -from tqdm import tqdm -from pathlib import Path - -import qlib -from qlib.config import C -from qlib.utils import init_instance_by_config, flatten_dict -from qlib.workflow import R -from qlib.config import REG_CN -from qlib.workflow.record_temp import SignalRecord, SigAnaRecord -from qlib.tests import TestAutoData -from qlib.portfolio.optimizer import EnhancedIndexingOptimizer -from qlib.model.riskmodel import StructuredCovEstimator -from qlib.data.dataset.loader import QlibDataLoader -from qlib.data.dataset.handler import DataHandler -from qlib.data import D -from qlib.utils import exists_qlib_data, init_instance_by_config - -market = "all" -trade_gap = 21 -label_config = "Ref($close, -{}) / Ref($close, -1) - 1".format(trade_gap) # reconstruct portfolio once a month - -provider_uri = "~/.qlib_ei/qlib_data/cn_data" # target_dir -if not exists_qlib_data(provider_uri): - print(f"Qlib data is not found in {provider_uri}") - sys.path.append(str(Path.cwd().parent.joinpath("scripts"))) - from get_data import GetData - GetData().qlib_data(target_dir=provider_uri, region=REG_CN) -qlib.init(provider_uri=provider_uri, region=REG_CN) - -################################### -# train model -################################### -data_handler_config = { - "start_time": "2008-01-01", - "end_time": "2020-08-01", - "fit_start_time": "2008-01-01", - "fit_end_time": "2014-11-30", - "instruments": market, - "label": [label_config] -} - -task = { - "model": { - "class": "LGBModel", - "module_path": "qlib.contrib.model.gbdt", - "kwargs": { - "loss": "mse", - "colsample_bytree": 0.8879, - "learning_rate": 0.0421, - "subsample": 0.8789, - "lambda_l1": 205.6999, - "lambda_l2": 580.9768, - "max_depth": 8, - "num_leaves": 210, - "num_threads": 32, - }, - }, - "dataset": { - "class": "DatasetH", - "module_path": "qlib.data.dataset", - "kwargs": { - "handler": { - "class": "Alpha158", - "module_path": "qlib.contrib.data.handler", - "kwargs": data_handler_config, - }, - "segments": { - "train": ("2008-01-01", "2014-11-30"), - "valid": ("2015-01-01", "2016-11-30"), - "test": ("2017-01-01", "2018-01-01"), - }, - }, - }, -} - - -class CSI300: - """Simulate CSI300 as the Benchmark for Enhanced Indexing to Track""" - - def __init__(self): - # provider_uri = '/nfs_data/qlib_data/ycz_daily/qlib' - # qlib.init(provider_uri=provider_uri, region=REG_CN, dataset_cache=None, expression_cache=None) - self.csi_weight = D.features(D.instruments('csi300'), ['$csi300_weight']) - - def __call__(self, pd_index, trade_date): - weights = np.zeros(len(pd_index)) - - for idx, instrument in enumerate(pd_index): - if (instrument, trade_date) in self.csi_weight.index: - weight = self.csi_weight.loc[(instrument, trade_date)].values[0] - if not math.isnan(weight): - weights[idx] = weight - - assert weights.sum() > 0, ' Fetch CSI Weights Error!' - weights = weights / weights.sum() - - return weights - - -class EnhancedIndexingStrategy: - """Enhanced Indexing Strategy""" - - def __init__(self): - self.benchmark = CSI300() - - provider_uri = "~/.qlib_ei/qlib_data/cn_data" - qlib.init(provider_uri=provider_uri, region=REG_CN) - - self.data_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader(["$close"])) - self.label_handler = DataHandler(market, "2015-01-01", "2019-01-01", QlibDataLoader([label_config])) - self.cov_estimator = StructuredCovEstimator() - self.optimizer = EnhancedIndexingOptimizer(lamb=0.1, delta=0.4, bench_dev=0.03, max_iters=50000) - - def update(self, score_series, current, pred_date): - """ - Parameters - ----------- - score_series : pd.Series - stock_id , score. - current : Position() - current of account. - trade_exchange : Exchange() - exchange. - trade_date : pd.Timestamp - date. - """ - print(score_series) - score_series = score_series.dropna() - - # portfolio init weight - init_weight = current.reindex(score_series.index, fill_value=0).values.squeeze() - init_weight_sum = init_weight.sum() - if init_weight_sum > 0: - init_weight /= init_weight_sum - - # covariance estimation - selector = (self.data_handler.get_range_selector(pred_date, 252), score_series.index) - price = self.data_handler.fetch(selector, level=None, squeeze=True) - F, cov_b, var_u = self.cov_estimator.predict(price, return_decomposed_components=True) - - # optimize target portfolio - w_bench = self.benchmark(score_series.index, pred_date) - passed_init_weight = init_weight if init_weight_sum > 0 else None - # print(F) - # print(cov_b) - # print(var_u) - # print(passed_init_weight) - # print(w_bench) - target_weight = self.optimizer(score_series.values, F, cov_b, var_u, passed_init_weight, w_bench) - # print(target_weight) - target = pd.DataFrame(data=target_weight, index=score_series.index) - - active_weights = target_weight - w_bench - selector = (self.label_handler.get_range_selector(pred_date, 1), score_series.index) - label = self.label_handler.fetch(selector, level=None, squeeze=True) - alpha = 0 - for instrument, weight in zip(score_series.index, active_weights): - delta = label.loc[(pred_date, instrument)] - alpha += weight * (0 if math.isnan(delta) else delta) - - print(alpha) - - return alpha, target - - -def train(): - """train model - - Returns - ------- - pred_score: pandas.DataFrame - predict scores - performance: dict - model performance - """ - - # model initiation - model = init_instance_by_config(task["model"]) - dataset = init_instance_by_config(task["dataset"]) - - # start exp - with R.start(experiment_name="workflow"): - R.log_params(**flatten_dict(task)) - model.fit(dataset) - - # prediction - recorder = R.get_recorder() - rid = recorder.id - sr = SignalRecord(model, dataset, recorder) - sr.generate() - pred_score = sr.load() - - # calculate ic and ric - sar = SigAnaRecord(recorder) - sar.generate() - ic = sar.load(sar.get_path("ic.pkl")) - ric = sar.load(sar.get_path("ric.pkl")) - - return pred_score, {"ic": ic, "ric": ric}, rid - - -def backtest_analysis(scores): - """backtest enhanced indexing - - Parameters - ---------- - scores: pandas.DataFrame - predict scores - - Returns - ------- - sharpe_ratio: floating-point - sharpe ratio of the enhanced indexing portfolio - """ - - # backtest and analysis - with R.start(experiment_name="backtest_analysis"): - strategy = EnhancedIndexingStrategy() - dates = scores.index.get_level_values(0).unique() - - alphas = [] - current = pd.DataFrame() - gap_between_next_trade = 0 - for date in tqdm(dates): - if gap_between_next_trade == 0: - score_series = scores.loc[date] - alpha, current = strategy.update(score_series, current, date) - alphas.append(alpha) - gap_between_next_trade = trade_gap - else: - gap_between_next_trade -= 1 - - alphas = np.array(alphas) - sharpe_ratio = alphas.mean() / np.std(alphas) - print('Sharpe:', sharpe_ratio) - - return sharpe_ratio - - -class TestAllFlow(TestAutoData): - PRED_SCORE = None - REPORT_NORMAL = None - POSITIONS = None - RID = None - - @classmethod - def tearDownClass(cls) -> None: - shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve())) - - def test_0_train(self): - TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train() - self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed") - self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed") - - def test_1_backtest(self): - sharpe_ratio = backtest_analysis(TestAllFlow.PRED_SCORE) - self.assertGreaterEqual( - sharpe_ratio, - 0.90, - "backtest failed", - ) - - -def suite(): - _suite = unittest.TestSuite() - _suite.addTest(TestAllFlow("test_0_train")) - _suite.addTest(TestAllFlow("test_1_backtest")) - return _suite - - -if __name__ == "__main__": - runner = unittest.TextTestRunner() - runner.run(suite()) From 0f3e3d206b51300b953e06674104af2ae23dc786 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Thu, 4 Mar 2021 22:47:42 +0800 Subject: [PATCH 21/32] Update __init__.py. --- qlib/model/riskmodel/__init__.py | 7 +++++++ qlib/model/riskmodel/base.py | 4 ---- qlib/portfolio/__init__.py | 2 ++ qlib/portfolio/optimizer/__init__.py | 6 ++++++ qlib/portfolio/optimizer/base.py | 13 +++++++++++++ qlib/portfolio/optimizer/optimizer.py | 13 ++----------- 6 files changed, 30 insertions(+), 15 deletions(-) diff --git a/qlib/model/riskmodel/__init__.py b/qlib/model/riskmodel/__init__.py index e69de29bb2d..05af6b7d377 100644 --- a/qlib/model/riskmodel/__init__.py +++ b/qlib/model/riskmodel/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from .base import RiskModel +from .poet import POETCovEstimator +from .shrink import ShrinkCovEstimator +from .structured import StructuredCovEstimator diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py index d5b009cccca..02ab8c2fb63 100644 --- a/qlib/model/riskmodel/base.py +++ b/qlib/model/riskmodel/base.py @@ -7,10 +7,6 @@ from qlib.model.base import BaseModel -from qlib.model.riskmodel_poet import POETCovEstimator -from qlib.model.riskmodel_shrink import ShrinkCovEstimator -from qlib.model.riskmodel_structured import StructuredCovEstimator - class RiskModel(BaseModel): """Risk Model diff --git a/qlib/portfolio/__init__.py b/qlib/portfolio/__init__.py index e69de29bb2d..139597f9cb0 100644 --- a/qlib/portfolio/__init__.py +++ b/qlib/portfolio/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/qlib/portfolio/optimizer/__init__.py b/qlib/portfolio/optimizer/__init__.py index e69de29bb2d..5080b9a469a 100644 --- a/qlib/portfolio/optimizer/__init__.py +++ b/qlib/portfolio/optimizer/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from .base import BaseOptimizer +from .optimizer import PortfolioOptimizer +from .enhanced_indexing import EnhancedIndexingOptimizer diff --git a/qlib/portfolio/optimizer/base.py b/qlib/portfolio/optimizer/base.py index e69de29bb2d..502443869d9 100644 --- a/qlib/portfolio/optimizer/base.py +++ b/qlib/portfolio/optimizer/base.py @@ -0,0 +1,13 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import abc + + +class BaseOptimizer(abc.ABC): + """ Construct portfolio with a optimization related method """ + + @abc.abstractmethod + def __call__(self, *args, **kwargs) -> object: + """ Generate a optimized portfolio allocation """ + pass diff --git a/qlib/portfolio/optimizer/optimizer.py b/qlib/portfolio/optimizer/optimizer.py index 17a7fc30a66..3daa98af329 100644 --- a/qlib/portfolio/optimizer/optimizer.py +++ b/qlib/portfolio/optimizer/optimizer.py @@ -1,23 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import abc + import warnings import numpy as np import pandas as pd import scipy.optimize as so from typing import Optional, Union, Callable, List -from qlib.portfolio.enhanced_indexing import EnhancedIndexingOptimizer - - -class BaseOptimizer(abc.ABC): - """ Construct portfolio with a optimization related method """ - - @abc.abstractmethod - def __call__(self, *args, **kwargs) -> object: - """ Generate a optimized portfolio allocation """ - pass +from qlib.portfolio.optimizer import BaseOptimizer class PortfolioOptimizer(BaseOptimizer): From 79c1142d3e7f456ca66fe238973a49edefa1b86f Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 17:09:33 +0800 Subject: [PATCH 22/32] Pass nan_option to structured covariance estimator. --- qlib/model/riskmodel/structured.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py index e778c2faa2a..69c032e8137 100644 --- a/qlib/model/riskmodel/structured.py +++ b/qlib/model/riskmodel/structured.py @@ -30,7 +30,7 @@ class StructuredCovEstimator(RiskModel): FACTOR_MODEL_PCA = "pca" FACTOR_MODEL_FA = "fa" - NAN_OPTION = "fill" + DEFAULT_NAN_OPTION = "fill" def __init__( self, @@ -38,6 +38,7 @@ def __init__( num_factors: int = 10, assume_centered: bool = False, scale_return: bool = True, + nan_option: str = DEFAULT_NAN_OPTION ): """ Args: @@ -45,8 +46,11 @@ def __init__( num_factors (int): number of components to keep. assume_centered (bool): whether the data is assumed to be centered. scale_return (bool): whether scale returns as percentage. + nan_option (str): nan handling option (`fill`). """ - super().__init__(self.NAN_OPTION, assume_centered, scale_return) + assert nan_option in [self.DEFAULT_NAN_OPTION], "nan_option={} is not supported".format(nan_option) + + super().__init__(nan_option, assume_centered, scale_return) assert factor_model in [ self.FACTOR_MODEL_PCA, From 4d5a30b30b6766168dcc3b19e6a4420c31710da4 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 17:14:29 +0800 Subject: [PATCH 23/32] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589167776 --- qlib/model/riskmodel/structured.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py index 69c032e8137..7b722e6009a 100644 --- a/qlib/model/riskmodel/structured.py +++ b/qlib/model/riskmodel/structured.py @@ -36,21 +36,21 @@ def __init__( self, factor_model: str = "pca", num_factors: int = 10, - assume_centered: bool = False, - scale_return: bool = True, - nan_option: str = DEFAULT_NAN_OPTION + **kwargs ): """ Args: factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`). num_factors (int): number of components to keep. - assume_centered (bool): whether the data is assumed to be centered. - scale_return (bool): whether scale returns as percentage. - nan_option (str): nan handling option (`fill`). + kwargs: see `RiskModel` for more information """ - assert nan_option in [self.DEFAULT_NAN_OPTION], "nan_option={} is not supported".format(nan_option) + if 'nan_option' in kwargs.keys(): + assert kwargs['nan_option'] in [self.DEFAULT_NAN_OPTION], \ + "nan_option={} is not supported".format(kwargs['nan_option']) + else: + kwargs['nan_option'] = self.DEFAULT_NAN_OPTION - super().__init__(nan_option, assume_centered, scale_return) + super().__init__(**kwargs) assert factor_model in [ self.FACTOR_MODEL_PCA, From 81b86f8022ef90f437e01d20e75a0f77e1c65786 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 17:18:07 +0800 Subject: [PATCH 24/32] Update test to cover changes in structured_cov_estimator --- tests/test_structured_cov_estimator.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py index 6aeae3d8979..8ac1e8477cc 100644 --- a/tests/test_structured_cov_estimator.py +++ b/tests/test_structured_cov_estimator.py @@ -27,6 +27,24 @@ def test_random_covariance(self): self.assertTrue(if_identical) + def test_nan_option_covariance(self): + # Try to estimate the covariance from a randomly generated matrix. + NUM_VARIABLE = 10 + NUM_OBSERVATION = 200 + EPS = 1e-6 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill') + + X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) + + est_cov = estimator.predict(X, is_price=False) + np_cov = np.cov(X.T) # While numpy assume row means variable, qlib assume the other wise. + + delta = abs(est_cov - np_cov) + if_identical = (delta < EPS).all() + + self.assertTrue(if_identical) + def test_constructed_covariance(self): # Try to estimate the covariance from a specially crafted matrix. # There should be some significant correlation since X is specially crafted. From 351d598c9f45a59d96fb0be1d57bbbc662d756f6 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 17:49:59 +0800 Subject: [PATCH 25/32] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589165409 --- qlib/model/riskmodel/base.py | 28 ++++++---- qlib/model/riskmodel/structured.py | 74 ++------------------------ tests/test_structured_cov_estimator.py | 15 +++++- 3 files changed, 34 insertions(+), 83 deletions(-) diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py index 02ab8c2fb63..89df80e8f07 100644 --- a/qlib/model/riskmodel/base.py +++ b/qlib/model/riskmodel/base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +import inspect import numpy as np import pandas as pd from typing import Union @@ -37,18 +38,24 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc self.scale_return = scale_return def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True - ) -> Union[pd.DataFrame, np.ndarray]: + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True, + return_decomposed_components=False, + ) -> Union[pd.DataFrame, np.ndarray, tuple]: """ Args: X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, with variables as columns and observations as rows. return_corr (bool): whether return the correlation matrix. is_price (bool): whether `X` contains price (if not assume stock returns). + return_decomposed_components (bool): whether return decomposed components of the covariance matrix. Returns: pd.DataFrame or np.ndarray: estimated covariance (or correlation). """ + assert ( + not return_corr or not return_decomposed_components + ), "Can only return either correlation matrix or decomposed components." + # transform input into 2D array if not isinstance(X, (pd.Series, pd.DataFrame)): columns = None @@ -75,6 +82,14 @@ def predict( # handle nan and centered X = self._preprocess(X) + # return decomposed components if needed + if return_decomposed_components: + assert 'return_decomposed_components' in inspect.getfullargspec(self._predict).args, \ + 'This risk model does not support return decomposed components of the covariance matrix ' + + F, cov_b, var_u = self._predict(X, return_decomposed_components=True) + return F, cov_b, var_u + # estimate covariance S = self._predict(X) @@ -126,12 +141,3 @@ def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]: if not self.assume_centered: X = X - np.nanmean(X, axis=0) return X - - - - - - - - - diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py index 7b722e6009a..39ff0166efa 100644 --- a/qlib/model/riskmodel/structured.py +++ b/qlib/model/riskmodel/structured.py @@ -60,81 +60,13 @@ def __init__( self.num_factors = num_factors - def predict( - self, - X: Union[pd.Series, pd.DataFrame, np.ndarray], - return_corr: bool = False, - is_price: bool = True, - return_decomposed_components=False, - ) -> Union[pd.DataFrame, np.ndarray, tuple]: - """ - Args: - X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, - with variables as columns and observations as rows. - return_corr (bool): whether return the correlation matrix. - is_price (bool): whether `X` contains price (if not assume stock returns). - return_decomposed_components (bool): whether return decomposed components of the covariance matrix. - - Returns: - tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation. - """ - assert ( - not return_corr or not return_decomposed_components - ), "Can only return either correlation matrix or decomposed components." - - # transform input into 2D array - if not isinstance(X, (pd.Series, pd.DataFrame)): - columns = None - else: - if isinstance(X.index, pd.MultiIndex): - if isinstance(X, pd.DataFrame): - X = X.iloc[:, 0].unstack(level="instrument") # always use the first column - else: - X = X.unstack(level="instrument") - else: - # X is 2D DataFrame - pass - columns = X.columns # will be used to restore dataframe - X = X.values - - # calculate pct_change - if is_price: - X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows - - # scale return - if self.scale_return: - X *= 100 - - # handle nan and centered - X = self._preprocess(X) - - if return_decomposed_components: - F, cov_b, var_u = self._predict(X, return_structured=True) - return F, cov_b, var_u - else: - # estimate covariance - S = self._predict(X) - - # return correlation if needed - if return_corr: - vola = np.sqrt(np.diag(S)) - corr = S / np.outer(vola, vola) - if columns is None: - return corr - return pd.DataFrame(corr, index=columns, columns=columns) - - # return covariance - if columns is None: - return S - return pd.DataFrame(S, index=columns, columns=columns) - - def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]: + def _predict(self, X: np.ndarray, return_decomposed_components=False) -> Union[np.ndarray, tuple]: """ covariance estimation implementation Args: X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows). - return_structured (bool): whether return decomposed components of the covariance matrix. + return_decomposed_components (bool): whether return decomposed components of the covariance matrix. Returns: tuple or np.ndarray: decomposed covariance matrix or covariance matrix. @@ -148,7 +80,7 @@ def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, cov_b = np.cov(B.T) # num_factors x num_factors var_u = np.var(U, axis=0) # diagonal - if return_structured: + if return_decomposed_components: return F, cov_b, var_u cov_x = F @ cov_b @ F.T + np.diag(var_u) diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py index 8ac1e8477cc..a3973be5ae9 100644 --- a/tests/test_structured_cov_estimator.py +++ b/tests/test_structured_cov_estimator.py @@ -28,7 +28,7 @@ def test_random_covariance(self): self.assertTrue(if_identical) def test_nan_option_covariance(self): - # Try to estimate the covariance from a randomly generated matrix. + # Test if nan_option is correctly passed. NUM_VARIABLE = 10 NUM_OBSERVATION = 200 EPS = 1e-6 @@ -45,6 +45,19 @@ def test_nan_option_covariance(self): self.assertTrue(if_identical) + def test_decompose_covariance(self): + # Test if return_decomposed_components is correctly passed. + NUM_VARIABLE = 10 + NUM_OBSERVATION = 200 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill') + + X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) + + F, cov_b, var_u = estimator.predict(X, is_price=False, return_decomposed_components=True) + + self.assertTrue(F is not None and cov_b is not None and var_u is not None) + def test_constructed_covariance(self): # Try to estimate the covariance from a specially crafted matrix. # There should be some significant correlation since X is specially crafted. From c6675be7929afb6c0403aa6482f4bbea50bf053d Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 17:51:36 +0800 Subject: [PATCH 26/32] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589166143 --- qlib/portfolio/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qlib/portfolio/__init__.py b/qlib/portfolio/__init__.py index 139597f9cb0..b7c525821a8 100644 --- a/qlib/portfolio/__init__.py +++ b/qlib/portfolio/__init__.py @@ -1,2 +1,3 @@ - +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. From fc89fec46d4c23e650ac2f9fca12c926673ab882 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 18:56:54 +0800 Subject: [PATCH 27/32] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589168764 --- qlib/portfolio/optimizer/enhanced_indexing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py index d988c776bc6..a0d0bc05090 100644 --- a/qlib/portfolio/optimizer/enhanced_indexing.py +++ b/qlib/portfolio/optimizer/enhanced_indexing.py @@ -19,7 +19,6 @@ class EnhancedIndexingOptimizer(BaseOptimizer): START_FROM_W0 = "w0" START_FROM_BENCH = "benchmark" - DO_NOT_START_FROM = "no_warm_start" def __init__( self, @@ -29,7 +28,7 @@ def __init__( inds_dev: float = None, scale_alpha: bool = True, verbose: bool = False, - warm_start: str = DO_NOT_START_FROM, + warm_start: str = None, max_iters: int = 10000, ): """ @@ -58,7 +57,7 @@ def __init__( self.inds_dev = inds_dev assert warm_start in [ - self.DO_NOT_START_FROM, + None, self.START_FROM_W0, self.START_FROM_BENCH, ], "illegal warm start option" From 2f9af1af8ff44a4b60a0e9e2ca5412d1453c9755 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 19:02:40 +0800 Subject: [PATCH 28/32] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589169769 --- qlib/portfolio/optimizer/enhanced_indexing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py index a0d0bc05090..1f7de6cb4c1 100644 --- a/qlib/portfolio/optimizer/enhanced_indexing.py +++ b/qlib/portfolio/optimizer/enhanced_indexing.py @@ -70,7 +70,7 @@ def __init__( def __call__( self, - u: np.ndarray, + u: Union[np.ndarray, pd.Series], F: np.ndarray, covB: np.ndarray, varU: np.ndarray, @@ -80,7 +80,7 @@ def __call__( ) -> Union[np.ndarray, pd.Series]: """ Args: - u (np.ndarray): expected returns (a.k.a., alpha) + u (np.ndarray or pd.Series): expected returns (a.k.a., alpha) F, covB, varU (np.ndarray): see StructuredCovEstimator w0 (np.ndarray): initial weights (for turnover control) w_bench (np.ndarray): benchmark weights @@ -91,6 +91,10 @@ def __call__( """ assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required." + # transform dataframe into array + if isinstance(u, pd.Series): + u = u.values + # scale alpha to match volatility if self.scale_alpha: u = u / u.std() From 7022675d003b4a603e9a04769e8a91e7232421ea Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 19:07:28 +0800 Subject: [PATCH 29/32] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589169489 --- qlib/portfolio/optimizer/enhanced_indexing.py | 40 +++++++++---------- qlib/portfolio/optimizer/optimizer.py | 25 ++++++------ 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py index 1f7de6cb4c1..5fdc1014ddf 100644 --- a/qlib/portfolio/optimizer/enhanced_indexing.py +++ b/qlib/portfolio/optimizer/enhanced_indexing.py @@ -21,15 +21,15 @@ class EnhancedIndexingOptimizer(BaseOptimizer): START_FROM_BENCH = "benchmark" def __init__( - self, - lamb: float = 10, - delta: float = 0.4, - bench_dev: float = 0.01, - inds_dev: float = None, - scale_alpha: bool = True, - verbose: bool = False, - warm_start: str = None, - max_iters: int = 10000, + self, + lamb: float = 10, + delta: float = 0.4, + bench_dev: float = 0.01, + inds_dev: float = None, + scale_alpha: bool = True, + verbose: bool = False, + warm_start: str = None, + max_iters: int = 10000, ): """ Args: @@ -56,11 +56,7 @@ def __init__( assert inds_dev is None or inds_dev >= 0, "industry deviation limit `inds_dev` should be positive or None." self.inds_dev = inds_dev - assert warm_start in [ - None, - self.START_FROM_W0, - self.START_FROM_BENCH, - ], "illegal warm start option" + assert warm_start in [None, self.START_FROM_W0, self.START_FROM_BENCH,], "illegal warm start option" self.start_from_w0 = warm_start == self.START_FROM_W0 self.start_from_bench = warm_start == self.START_FROM_BENCH @@ -69,14 +65,14 @@ def __init__( self.max_iters = max_iters def __call__( - self, - u: Union[np.ndarray, pd.Series], - F: np.ndarray, - covB: np.ndarray, - varU: np.ndarray, - w0: np.ndarray, - w_bench: np.ndarray, - inds_onehot: np.ndarray = None, + self, + u: Union[np.ndarray, pd.Series], + F: np.ndarray, + covB: np.ndarray, + varU: np.ndarray, + w0: np.ndarray, + w_bench: np.ndarray, + inds_onehot: np.ndarray = None, ) -> Union[np.ndarray, pd.Series]: """ Args: diff --git a/qlib/portfolio/optimizer/optimizer.py b/qlib/portfolio/optimizer/optimizer.py index 3daa98af329..54648a46ac0 100644 --- a/qlib/portfolio/optimizer/optimizer.py +++ b/qlib/portfolio/optimizer/optimizer.py @@ -30,13 +30,13 @@ class PortfolioOptimizer(BaseOptimizer): OPT_INV = "inv" def __init__( - self, - method: str = "inv", - lamb: float = 0, - delta: float = 0, - alpha: float = 0.0, - scale_alpha: bool = True, - tol: float = 1e-8, + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, ): """ Args: @@ -63,10 +63,10 @@ def __init__( self.scale_alpha = scale_alpha def __call__( - self, - S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None, + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, ) -> Union[np.ndarray, pd.Series]: """ Args: @@ -155,7 +155,7 @@ def _optimize_gmv(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.nd return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) def _optimize_mvo( - self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None ) -> np.ndarray: """optimize mean-variance portfolio @@ -251,6 +251,7 @@ def _solve(self, n: int, obj: Callable, bounds: so.Bounds, cons: List) -> np.nda # add l2 regularization wrapped_obj = obj if self.alpha > 0: + def opt_obj(x): return obj(x) + self.alpha * np.sum(np.square(x)) From 6a305c73ae51254c9c8d7629b968720bc099ac6f Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 19:08:55 +0800 Subject: [PATCH 30/32] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589166529 --- qlib/model/riskmodel/base.py | 14 +++++++++----- qlib/model/riskmodel/poet.py | 2 +- qlib/model/riskmodel/shrink.py | 3 +-- qlib/model/riskmodel/structured.py | 16 ++++++---------- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py index 89df80e8f07..bb067e3d586 100644 --- a/qlib/model/riskmodel/base.py +++ b/qlib/model/riskmodel/base.py @@ -38,8 +38,11 @@ def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, sc self.scale_return = scale_return def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True, - return_decomposed_components=False, + self, + X: Union[pd.Series, pd.DataFrame, np.ndarray], + return_corr: bool = False, + is_price: bool = True, + return_decomposed_components=False, ) -> Union[pd.DataFrame, np.ndarray, tuple]: """ Args: @@ -53,7 +56,7 @@ def predict( pd.DataFrame or np.ndarray: estimated covariance (or correlation). """ assert ( - not return_corr or not return_decomposed_components + not return_corr or not return_decomposed_components ), "Can only return either correlation matrix or decomposed components." # transform input into 2D array @@ -84,8 +87,9 @@ def predict( # return decomposed components if needed if return_decomposed_components: - assert 'return_decomposed_components' in inspect.getfullargspec(self._predict).args, \ - 'This risk model does not support return decomposed components of the covariance matrix ' + assert ( + "return_decomposed_components" in inspect.getfullargspec(self._predict).args + ), "This risk model does not support return decomposed components of the covariance matrix " F, cov_b, var_u = self._predict(X, return_decomposed_components=True) return F, cov_b, var_u diff --git a/qlib/model/riskmodel/poet.py b/qlib/model/riskmodel/poet.py index 8dbe890360e..84038455582 100644 --- a/qlib/model/riskmodel/poet.py +++ b/qlib/model/riskmodel/poet.py @@ -50,7 +50,7 @@ def _predict(self, X: np.ndarray) -> np.ndarray: if self.num_factors > 0: Dd, V = np.linalg.eig(Y.T.dot(Y)) V = V[:, np.argsort(Dd)] - F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n) + F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n) LamPCA = Y.dot(F) / n uhat = np.asarray(Y - LamPCA.dot(F.T)) Lowrank = np.asarray(LamPCA.dot(LamPCA.T)) diff --git a/qlib/model/riskmodel/shrink.py b/qlib/model/riskmodel/shrink.py index 1298891fb01..3cb2620d1bc 100644 --- a/qlib/model/riskmodel/shrink.py +++ b/qlib/model/riskmodel/shrink.py @@ -248,8 +248,7 @@ def _get_shrink_param_lw_single_factor(self, X: np.ndarray, S: np.ndarray, F: np roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt v3 = z.T.dot(z) / t - var_mkt * S roff3 = ( - np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum( - np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 + np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 ) roff = 2 * roff1 - roff3 rho = rdiag + roff diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py index 39ff0166efa..878503401fc 100644 --- a/qlib/model/riskmodel/structured.py +++ b/qlib/model/riskmodel/structured.py @@ -32,23 +32,19 @@ class StructuredCovEstimator(RiskModel): FACTOR_MODEL_FA = "fa" DEFAULT_NAN_OPTION = "fill" - def __init__( - self, - factor_model: str = "pca", - num_factors: int = 10, - **kwargs - ): + def __init__(self, factor_model: str = "pca", num_factors: int = 10, **kwargs): """ Args: factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`). num_factors (int): number of components to keep. kwargs: see `RiskModel` for more information """ - if 'nan_option' in kwargs.keys(): - assert kwargs['nan_option'] in [self.DEFAULT_NAN_OPTION], \ - "nan_option={} is not supported".format(kwargs['nan_option']) + if "nan_option" in kwargs.keys(): + assert kwargs["nan_option"] in [self.DEFAULT_NAN_OPTION], "nan_option={} is not supported".format( + kwargs["nan_option"] + ) else: - kwargs['nan_option'] = self.DEFAULT_NAN_OPTION + kwargs["nan_option"] = self.DEFAULT_NAN_OPTION super().__init__(**kwargs) From 8b9065c16690057b3fdd3968262e875e3c520c87 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 19:32:13 +0800 Subject: [PATCH 31/32] Reformat with black. --- qlib/config.py | 24 ++----- qlib/contrib/backtest/__init__.py | 18 +---- qlib/contrib/backtest/profit_attribution.py | 23 ++----- qlib/contrib/data/handler.py | 10 +-- qlib/contrib/eva/alpha.py | 6 +- qlib/contrib/evaluate.py | 7 +- qlib/contrib/evaluate_portfolio.py | 16 +---- qlib/contrib/model/catboost_model.py | 4 +- qlib/contrib/model/pytorch_alstm.py | 21 ++---- qlib/contrib/model/pytorch_alstm_ts.py | 17 ++--- qlib/contrib/model/pytorch_gats.py | 22 ++---- qlib/contrib/model/pytorch_gats_ts.py | 18 +---- qlib/contrib/model/pytorch_gru.py | 21 ++---- qlib/contrib/model/pytorch_gru_ts.py | 17 +---- qlib/contrib/model/pytorch_lstm.py | 21 ++---- qlib/contrib/model/pytorch_lstm_ts.py | 17 +---- qlib/contrib/model/pytorch_nn.py | 6 +- qlib/contrib/model/pytorch_sfm.py | 19 +---- qlib/contrib/model/pytorch_tabnet.py | 14 +--- qlib/contrib/model/xgboost.py | 4 +- qlib/contrib/online/executor.py | 24 ++----- qlib/contrib/online/manager.py | 6 +- qlib/contrib/online/operator.py | 8 +-- qlib/contrib/online/utils.py | 6 +- .../analysis_model_performance.py | 66 ++++-------------- .../analysis_position/cumulative_return.py | 36 ++-------- .../analysis_position/parse_position.py | 5 +- .../report/analysis_position/rank_label.py | 16 +---- .../report/analysis_position/report.py | 15 +--- qlib/contrib/report/graph.py | 6 +- qlib/contrib/strategy/cost_control.py | 5 +- qlib/contrib/strategy/order_generator.py | 12 +--- qlib/contrib/tuner/launcher.py | 6 +- qlib/contrib/tuner/space.py | 5 +- qlib/contrib/tuner/tuner.py | 26 ++----- qlib/data/client.py | 3 +- qlib/data/data.py | 69 +++---------------- qlib/data/dataset/utils.py | 5 +- qlib/data/filter.py | 7 +- qlib/portfolio/__init__.py | 1 - qlib/tests/__init__.py | 6 +- qlib/workflow/record_temp.py | 5 +- tests/test_all_pipeline.py | 9 +-- tests/test_dump_data.py | 9 +-- tests/test_get_data.py | 4 +- tests/test_structured_cov_estimator.py | 4 +- 46 files changed, 123 insertions(+), 546 deletions(-) diff --git a/qlib/config.py b/qlib/config.py index 52b05568d57..344eb852777 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -115,12 +115,7 @@ def set_conf_from_C(self, config_c): "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s" } }, - "filters": { - "field_not_found": { - "()": "qlib.log.LogFilter", - "param": [".*?WARN: data not found for.*?"], - } - }, + "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}}, "handlers": { "console": { "class": "logging.StreamHandler", @@ -135,10 +130,7 @@ def set_conf_from_C(self, config_c): "exp_manager": { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": { - "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), - "default_exp_name": "Experiment", - }, + "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",}, }, } @@ -200,16 +192,8 @@ def set_conf_from_C(self, config_c): } _default_region_config = { - REG_CN: { - "trade_unit": 100, - "limit_threshold": 0.099, - "deal_price": "vwap", - }, - REG_US: { - "trade_unit": 1, - "limit_threshold": None, - "deal_price": "close", - }, + REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",}, + REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",}, } diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index aa24ffb0cf6..bd3494abf6a 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -18,13 +18,7 @@ def get_strategy( - strategy=None, - topk=50, - margin=0.5, - n_drop=5, - risk_degree=0.95, - str_type="dropout", - adjust_dates=None, + strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None, ): """get_strategy @@ -75,11 +69,7 @@ def get_strategy( str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( - topk=topk, - buffer_margin=margin, - n_drop=n_drop, - risk_degree=risk_degree, - adjust_dates=adjust_dates, + topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates, ) elif isinstance(strategy, (dict, str)): # 2) create strategy with init_instance_by_config @@ -172,9 +162,7 @@ def get_exchange( def get_executor( - executor=None, - trade_exchange=None, - verbose=True, + executor=None, trade_exchange=None, verbose=True, ): """get_executor diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py index 20c6f638fcd..355f0637395 100644 --- a/qlib/contrib/backtest/profit_attribution.py +++ b/qlib/contrib/backtest/profit_attribution.py @@ -12,10 +12,7 @@ def get_benchmark_weight( - bench, - start_date=None, - end_date=None, - path=None, + bench, start_date=None, end_date=None, path=None, ): """get_benchmark_weight @@ -216,12 +213,7 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g def brinson_pa( - positions, - bench="SH000905", - group_field="industry", - group_method="category", - group_n=None, - deal_price="vwap", + positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap", ): """brinson profit attribution @@ -255,17 +247,10 @@ def brinson_pa( # suspend stock is NAN. So we have to get more date to forward fill the NAN shift_start_date = start_date - datetime.timedelta(days=250) instruments = D.list_instruments( - D.instruments(market="all"), - start_time=shift_start_date, - end_time=end_date, - as_list=True, + D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True, ) stock_df = D.features( - instruments, - [group_field, deal_price], - start_time=shift_start_date, - end_time=end_date, - freq="day", + instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day", ) stock_df.columns = [group_field, "deal_price"] diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 970b032d6b0..574287819b7 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -21,10 +21,7 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time): fit_start_time is not None and fit_end_time is not None ), "Make sure `fit_start_time` and `fit_end_time` are not None." pkwargs.update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } + {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} ) new_l.append({"class": klass.__name__, "kwargs": pkwargs}) else: @@ -170,10 +167,7 @@ def __init__( def get_feature_config(self): conf = { "kbar": {}, - "price": { - "windows": [0], - "feature": ["OPEN", "HIGH", "LOW", "VWAP"], - }, + "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],}, "rolling": {}, } return self.parse_config_to_fields(conf) diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py index c68571853f1..363a184582d 100644 --- a/qlib/contrib/eva/alpha.py +++ b/qlib/contrib/eva/alpha.py @@ -35,11 +35,7 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False def calc_long_short_return( - pred: pd.Series, - label: pd.Series, - date_col: str = "datetime", - quantile: float = 0.2, - dropna: bool = False, + pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False, ) -> Tuple[pd.Series, pd.Series]: """ calculate long-short return diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 4aa5b55156f..5cb1ce4eb67 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -244,12 +244,7 @@ def long_short_backtest( short_returns[date] = np.mean(short_profit) + np.mean(all_profit) ls_returns[date] = np.mean(short_profit) + np.mean(long_profit) - return dict( - zip( - ["long", "short", "long_short"], - map(pd.Series, [long_returns, short_returns, ls_returns]), - ) - ) + return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),)) def t_run(): diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py index 04ddd8db041..2d94105e482 100644 --- a/qlib/contrib/evaluate_portfolio.py +++ b/qlib/contrib/evaluate_portfolio.py @@ -64,12 +64,7 @@ def get_position_value(evaluate_date, position): instruments = list(set(instruments) - set(["cash"])) # filter 'cash' fields = ["$close"] close_data_df = D.features( - instruments, - fields, - start_time=evaluate_date, - end_time=evaluate_date, - freq="day", - disk_cache=0, + instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0, ) value = _get_position_value_from_df(evaluate_date, position, close_data_df) return value @@ -87,14 +82,7 @@ def get_position_list_value(positions): start_date, end_date = day_list[0], day_list[-1] # load data fields = ["$close"] - close_data_df = D.features( - instruments, - fields, - start_time=start_date, - end_time=end_date, - freq="day", - disk_cache=0, - ) + close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,) # generate value # return dict for time:position_value value_dict = OrderedDict() diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py index d57c32b7022..2840c2cef5a 100644 --- a/qlib/contrib/model/catboost_model.py +++ b/qlib/contrib/model/catboost_model.py @@ -32,9 +32,7 @@ def fit( **kwargs ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index bbbb61851b1..306e68aadf2 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -118,10 +118,7 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -328,14 +319,12 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", - nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", - nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 725568de855..612bacbec93 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -123,10 +123,7 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -309,14 +302,12 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", - nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", - nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index 07048e1bc1a..c59dc91973f 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -229,17 +229,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -340,19 +334,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index 1e94f56e418..dfc5f4ab5ed 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -242,11 +242,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -361,19 +357,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 84f863b9fb0..d2a774b65b4 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -118,10 +118,7 @@ def __init__( torch.manual_seed(self.seed) self.gru_model = GRUModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index bb6618b854c..49f438cc379 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -123,10 +123,7 @@ def __init__( torch.manual_seed(self.seed) self.GRU_model = GRUModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 163d500ec87..02ca16e36b8 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -118,10 +118,7 @@ def __init__( torch.manual_seed(self.seed) self.lstm_model = LSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -305,11 +296,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index cf4f8fb9f1f..2ec36f96e34 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -123,10 +123,7 @@ def __init__( torch.manual_seed(self.seed) self.LSTM_model = LSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -286,11 +279,7 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 16fcea9ff53..8c1a77ec3c5 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -154,11 +154,7 @@ def __init__( self.dnn_model.to(self.device) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid = dataset.prepare( ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index d5169e6c7bd..1f7433e053d 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -30,14 +30,7 @@ class SFM_Model(nn.Module): def __init__( - self, - d_feat=6, - output_dim=1, - freq_dim=10, - hidden_size=64, - dropout_W=0.0, - dropout_U=0.0, - device="cpu", + self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu", ): super().__init__() @@ -362,17 +355,11 @@ def train_epoch(self, x_train, y_train): self.train_optimizer.step() def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 62e32d701ce..18e9d8eb404 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -120,9 +120,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): os.makedirs("pretrain") [df_train, df_valid] = dataset.prepare( - ["pretrain", "pretrain_validation"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) @@ -156,11 +154,7 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): break def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): if self.pretrain: # there is a pretrained model, load the model @@ -172,9 +166,7 @@ def fit( # adding one more linear layer to fit the final output dimension self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device) df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) x_train, y_train = df_train["feature"], df_train["label"] diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py index ba2e5789b85..e37725c2eb6 100755 --- a/qlib/contrib/model/xgboost.py +++ b/qlib/contrib/model/xgboost.py @@ -29,9 +29,7 @@ def fit( ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py index 2bd0937a032..52b86888133 100644 --- a/qlib/contrib/online/executor.py +++ b/qlib/contrib/online/executor.py @@ -150,21 +150,13 @@ def execute(self, trade_account, order_list, trade_date): if order.direction == Order.SELL: # sell print( "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, - order.stock_id, - trade_price, - order.deal_amount, - trade_val, + trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, ) ) else: print( "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, - order.stock_id, - trade_price, - order.deal_amount, - trade_val, + trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, ) ) @@ -271,21 +263,13 @@ def load_order_list(user_path, trade_date): for stock_id in order_dict["sell"]: amount, factor = order_dict["sell"][stock_id] order = Order( - stock_id=stock_id, - amount=amount, - trade_date=pd.Timestamp(trade_date), - direction=Order.SELL, - factor=factor, + stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor, ) order_list.append(order) for stock_id in order_dict["buy"]: amount, factor = order_dict["buy"][stock_id] order = Order( - stock_id=stock_id, - amount=amount, - trade_date=pd.Timestamp(trade_date), - direction=Order.BUY, - factor=factor, + stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor, ) order_list.append(order) return order_list diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py index cf850b9dace..a4476709de0 100644 --- a/qlib/contrib/online/manager.py +++ b/qlib/contrib/online/manager.py @@ -84,12 +84,10 @@ def save_user_data(self, user_id): raise ValueError("Cannot find user {}".format(user_id)) self.users[user_id].account.save_account(self.data_path / user_id) save_instance( - self.users[user_id].strategy, - self.data_path / user_id / "strategy_{}.pickle".format(user_id), + self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id), ) save_instance( - self.users[user_id].model, - self.data_path / user_id / "model_{}.pickle".format(user_id), + self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id), ) def add_user(self, user_id, config_file, add_date): diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index c8b44f57858..c82deb3945c 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -125,9 +125,7 @@ def generate(self, date, path): trade_date=trade_date, ) save_order_list( - order_list=order_list, - user_path=(pathlib.Path(path) / user_id), - trade_date=trade_date, + order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, ) self.logger.info("Generate order list at {} for {}".format(trade_date, user_id)) um.save_user_data(user_id) @@ -160,9 +158,7 @@ def execute(self, date, exchange_config, path): order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date) trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date) executor.save_executed_file_from_trade_info( - trade_info=trade_info, - user_path=(pathlib.Path(path) / user_id), - trade_date=trade_date, + trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, ) self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id)) diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py index 611af63e4af..fb96c87bd31 100644 --- a/qlib/contrib/online/utils.py +++ b/qlib/contrib/online/utils.py @@ -79,11 +79,7 @@ def prepare(um, today, user_id, exchange_config=None): log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today)) return [pd.Timestamp(latest_trading_date)], None - dates = D.calendar( - start_time=pd.Timestamp(latest_trading_date), - end_time=pd.Timestamp(today), - future=True, - ) + dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,) dates = list(dates) dates.append(get_next_trading_date(dates[-1], future=True)) if exchange_config: diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py index 1cb14d26153..ef1447a12be 100644 --- a/qlib/contrib/report/analysis_model/analysis_model_performance.py +++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py @@ -53,8 +53,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int t_df.index = t_df.index.strftime("%Y-%m-%d") # Cumulative Return By Group group_scatter_figure = ScatterGraph( - t_df.cumsum(), - layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), + t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), ).figure t_df = t_df.loc[:, ["long-short", "long-average"]] @@ -62,12 +61,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int group_hist_figure = SubplotsGraph( t_df, kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)), - subplots_kwargs=dict( - rows=1, - cols=2, - print_grid=False, - subplot_titles=["long-short", "long-average"], - ), + subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],), ).figure return group_scatter_figure, group_hist_figure @@ -102,15 +96,12 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6) _monthly_ic = ic.groupby(_index).mean() _monthly_ic.index = pd.MultiIndex.from_arrays( - [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], - names=["year", "month"], + [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"], ) # fill month _month_list = pd.date_range( - start=pd.Timestamp(f"{_index.min()[:4]}0101"), - end=pd.Timestamp(f"{_index.max()[:4]}1231"), - freq="1M", + start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M", ) _years = [] _month = [] @@ -142,32 +133,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min() _sub_graph_data = [ - ( - "ic", - dict( - row=1, - col=1, - name="", - kind="DistplotGraph", - graph_kwargs=dict(bin_size=_bin_size), - ), - ), + ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),), (_qqplot_fig, dict(row=1, col=2)), ] ic_hist_figure = SubplotsGraph( _ic_df.dropna(), kind_map=dict(kind="HistogramGraph", kwargs=dict()), - subplots_kwargs=dict( - rows=1, - cols=2, - print_grid=False, - subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name], - ), + subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],), sub_graph_data=_sub_graph_data, - layout=dict( - yaxis2=dict(title="Observed Quantile"), - xaxis2=dict(title=f"{dist_name} Distribution Quantile"), - ), + layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),), ).figure return ic_bar_figure, ic_heatmap_figure, ic_hist_figure @@ -181,8 +155,7 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple: _df = ac.to_frame("value") _df.index = _df.index.strftime("%Y-%m-%d") ac_figure = ScatterGraph( - _df, - layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), + _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), ).figure return (ac_figure,) @@ -202,17 +175,11 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple: .sum() / (len(x) // N) ) - r_df = pd.DataFrame( - { - "Top": top, - "Bottom": bottom, - } - ) + r_df = pd.DataFrame({"Top": top, "Bottom": bottom,}) # FIXME: support HIGH-FREQ r_df.index = r_df.index.strftime("%Y-%m-%d") turnover_figure = ScatterGraph( - r_df, - layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), + r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), ).figure return (turnover_figure,) @@ -230,11 +197,7 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure: # FIXME: support HIGH-FREQ ic_df.index = ic_df.index.strftime("%Y-%m-%d") ic_bar_figure = BarGraph( - ic_df, - layout=dict( - title="Information Coefficient (IC)", - xaxis=dict(type="category", tickangle=45), - ), + ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),), ).figure return ic_bar_figure @@ -277,12 +240,7 @@ def model_performance_graph( figure_list = [] for graph_name in graph_names: fun_res = eval(f"_{graph_name}")( - pred_label=pred_label, - lag=lag, - N=N, - reverse=reverse, - rank=rank, - show_nature_day=show_nature_day, + pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day, ) figure_list += fun_res diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py index abb68ea6051..604189c94b6 100644 --- a/qlib/contrib/report/analysis_position/cumulative_return.py +++ b/qlib/contrib/report/analysis_position/cumulative_return.py @@ -13,11 +13,7 @@ def _get_cum_return_data_with_position( - position: dict, - report_normal: pd.DataFrame, - label_data: pd.DataFrame, - start_date=None, - end_date=None, + position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, ): """ @@ -29,11 +25,7 @@ def _get_cum_return_data_with_position( :return: """ _cumulative_return_df = get_position_data( - position=position, - report_normal=report_normal, - label_data=label_data, - start_date=start_date, - end_date=end_date, + position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date, ).copy() _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"] @@ -87,11 +79,7 @@ def _get_cum_return_data_with_position( def _get_figure_with_position( - position: dict, - report_normal: pd.DataFrame, - label_data: pd.DataFrame, - start_date=None, - end_date=None, + position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, ) -> Iterable[go.Figure]: """Get average analysis figures @@ -111,18 +99,12 @@ def _get_figure_with_position( # Create figures for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]: sub_graph_data = [ - ( - "cum_{}".format(_t_name), - dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}), - ), + ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),), ( "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name), dict(row=2, col=1), ), - ( - "{}_value".format(_t_name), - dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}), - ), + ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),), ] _default_xaxis = dict(showline=False, zeroline=True, tickangle=45) @@ -161,13 +143,7 @@ def _get_figure_with_position( [{"rowspan": 1}, None], ] subplots_kwargs = dict( - vertical_spacing=0.01, - rows=2, - cols=2, - row_width=[1, 2], - column_width=[3, 1], - print_grid=False, - specs=specs, + vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs, ) yield SubplotsGraph( cum_return_df, diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py index fe1d6113709..23f9c592c0a 100644 --- a/qlib/contrib/report/analysis_position/parse_position.py +++ b/qlib/contrib/report/analysis_position/parse_position.py @@ -72,10 +72,7 @@ def parse_position(position: dict = None) -> pd.DataFrame: result_df = result_df.append(_trading_day_df, sort=True) - previous_data = dict( - date=_trading_date, - code_list=_trading_day_df[_trading_day_df["status"] != -1].index, - ) + previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,) result_df.reset_index(inplace=True) result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True) diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py index 72a358adcbf..9a4d834ed92 100644 --- a/qlib/contrib/report/analysis_position/rank_label.py +++ b/qlib/contrib/report/analysis_position/rank_label.py @@ -23,11 +23,7 @@ def _get_figure_with_position( :return: """ _position_df = get_position_data( - position, - label_data, - calculate_label_rank=True, - start_date=start_date, - end_date=end_date, + position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date, ) res_dict = dict() @@ -51,20 +47,14 @@ def _get_figure_with_position( yield ScatterGraph( _res_df.loc[:, [_col]], layout=dict( - title=_col, - xaxis=dict(type="category", tickangle=45), - yaxis=dict(title="lable-rank-ratio: %"), + title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"), ), graph_kwargs=dict(mode="lines+markers"), ).figure def rank_label_graph( - position: dict, - label_data: pd.DataFrame, - start_date=None, - end_date=None, - show_notebook=True, + position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True, ) -> Iterable[go.Figure]: """Ranking percentage of stocks buy, sell, and holding on the trading day. Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py index f82e654c432..8e2c05c0a38 100644 --- a/qlib/contrib/report/analysis_position/report.py +++ b/qlib/contrib/report/analysis_position/report.py @@ -123,9 +123,7 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 1, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": { - "width": 0, - }, + "line": {"width": 0,}, }, { "type": "rect", @@ -137,20 +135,13 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 0.55, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": { - "width": 0, - }, + "line": {"width": 0,}, }, ], ) _subplot_kwargs = dict( - shared_xaxes=True, - vertical_spacing=0.01, - rows=7, - cols=1, - row_width=[1, 1, 1, 3, 1, 1, 3], - print_grid=False, + shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False, ) figure = SubplotsGraph( df=report_df, diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py index 70e382fb165..dbbc411109d 100644 --- a/qlib/contrib/report/graph.py +++ b/qlib/contrib/report/graph.py @@ -311,11 +311,7 @@ def _init_sub_graph_data(self): _temp_row_data = ( column_name, dict( - row=row, - col=col, - name=res_name, - kind=self._kind_map["kind"], - graph_kwargs=self._kind_map["kwargs"], + row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"], ), ) self._sub_graph_data.append(_temp_row_data) diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index dd90437b03f..ee3ee03ecfd 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -57,10 +57,7 @@ def generate_target_weight_position(self, score, current, trade_date): final_stock_weight[stock_id] -= sw if self.buy_method == "first_fill": for stock_id in buy_signal_stocks: - add_weight = min( - max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), - sold_stock_weight, - ) + add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,) final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight sold_stock_weight -= add_weight elif self.buy_method == "average_fill": diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py index 494981ecc09..6f168b4dd52 100644 --- a/qlib/contrib/strategy/order_generator.py +++ b/qlib/contrib/strategy/order_generator.py @@ -102,14 +102,10 @@ def generate_order_list_from_target_weight_position( # strategy 1 : generate amount_position by weight_position # Use API in Exchange() target_amount_dict = trade_exchange.generate_amount_position_from_weight_position( - weight_position=target_weight_position, - cash=current_tradable_value, - trade_date=trade_date, + weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date, ) order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=target_amount_dict, - current_position=current_amount_dict, - trade_date=trade_date, + target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date, ) return order_list @@ -164,8 +160,6 @@ def generate_order_list_from_target_weight_position( else: continue order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=amount_dict, - current_position=current.get_stock_amount_dict(), - trade_date=trade_date, + target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date, ) return order_list diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py index 711658c9a63..409410a2ab4 100644 --- a/qlib/contrib/tuner/launcher.py +++ b/qlib/contrib/tuner/launcher.py @@ -13,11 +13,7 @@ args_parser = argparse.ArgumentParser(prog="tuner") args_parser.add_argument( - "-c", - "--config_path", - required=True, - type=str, - help="config path indicates where to load yaml config.", + "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.", ) args = args_parser.parse_args() diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py index 76f101671b7..57f57a6c34e 100644 --- a/qlib/contrib/tuner/space.py +++ b/qlib/contrib/tuner/space.py @@ -10,8 +10,5 @@ } QLibDataLabelSpace = { - "labels": hp.choice( - "labels", - [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], - ) + "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],) } diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index 2ce957859b2..e81d41a9ad0 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -28,10 +28,7 @@ def __init__(self, tuner_config, optim_config): self.optim_config = optim_config self.max_evals = self.tuner_config.get("max_evals", 10) - self.ex_dir = os.path.join( - self.tuner_config["experiment"]["dir"], - self.tuner_config["experiment"]["name"], - ) + self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],) self.best_params = None self.best_res = None @@ -42,10 +39,7 @@ def tune(self): TimeInspector.set_time_mark() fmin( - fn=self.objective, - space=self.space, - algo=tpe.suggest, - max_evals=self.max_evals, + fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals, ) self.logger.info("Local best params: {} ".format(self.best_params)) TimeInspector.log_cost_time( @@ -159,8 +153,7 @@ def setup_estimator_config(self, params): estimator_config["data"]["args"].update(params["data_label_space"]) estimator_path = os.path.join( - self.tuner_config["experiment"].get("dir", "../"), - QLibTuner.ESTIMATOR_CONFIG_NAME, + self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME, ) with open(estimator_path, "w") as fp: @@ -173,27 +166,20 @@ def setup_space(self): model_space_name = self.tuner_config["model"].get("space", None) if model_space_name is None: raise ValueError("Please give the search space of model.") - model_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - model_space_name, - ) + model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,) # 2. Setup strategy space strategy_space_name = self.tuner_config["strategy"].get("space", None) if strategy_space_name is None: raise ValueError("Please give the search space of strategy.") - strategy_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - strategy_space_name, - ) + strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,) # 3. Setup data label space if given if self.tuner_config.get("data_label", None) is not None: data_label_space_name = self.tuner_config["data_label"].get("space", None) if data_label_space_name is not None: data_label_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - data_label_space_name, + importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name, ) else: data_label_space_name = None diff --git a/qlib/data/client.py b/qlib/data/client.py index 5244a7e45cf..d1a68cb3857 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -26,8 +26,7 @@ def __init__(self, host, port): self.logger = get_module_logger(self.__class__.__name__) # bind connect/disconnect callbacks self.sio.on( - "connect", - lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), + "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), ) self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!")) diff --git a/qlib/data/data.py b/qlib/data/data.py index 762467da35e..47cded79cec 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -328,14 +328,7 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method") def _uri( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=1, - **kwargs, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs, ): """Get task uri, used when generating rabbitmq task in qlib_server @@ -414,29 +407,13 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq): for inst, spans in instruments_d.items(): data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=( - inst, - start_time, - end_time, - freq, - normalize_column_names, - spans, - C, - ), + args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,), ) else: for inst in instruments_d: data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=( - inst, - start_time, - end_time, - freq, - normalize_column_names, - None, - C, - ), + args=(inst, start_time, end_time, freq, normalize_column_names, None, C,), ) p.close() @@ -598,12 +575,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da start_time = pd.Timestamp(start_time or cal[0]) end_time = pd.Timestamp(end_time or cal[-1]) _instruments_filtered = { - inst: list( - filter( - lambda x: x[0] <= x[1], - [(max(start_time, x[0]), min(end_time, x[1])) for x in spans], - ) - ) + inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],)) for inst, spans in _instruments.items() } _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value} @@ -723,14 +695,7 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq for inst in instruments_d: p.apply_async( - LocalDatasetProvider.cache_walker, - args=( - inst, - start_time, - end_time, - freq, - column_names, - ), + LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,), ) p.close() @@ -763,12 +728,7 @@ def set_conn(self, conn): def calendar(self, start_time=None, end_time=None, freq="day", future=False): self.conn.send_request( request_type="calendar", - request_content={ - "start_time": str(start_time), - "end_time": str(end_time), - "freq": freq, - "future": future, - }, + request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,}, msg_queue=self.queue, msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content], ) @@ -832,14 +792,7 @@ def set_conn(self, conn): self.queue = queue.Queue() def dataset( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=0, - return_uri=False, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False, ): if Inst.get_inst_type(instruments) == Inst.DICT: get_module_logger("data").warning( @@ -942,13 +895,7 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da return Inst.list_instruments(instruments, start_time, end_time, freq, as_list) def features( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=None, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None, ): """ Parameters: diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py index feda1904463..58e2bd96811 100644 --- a/qlib/data/dataset/utils.py +++ b/qlib/data/dataset/utils.py @@ -32,10 +32,7 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int: def fetch_df_by_index( - df: pd.DataFrame, - selector: Union[pd.Timestamp, slice, str, list], - level: Union[str, int], - fetch_orig=True, + df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True, ) -> pd.DataFrame: """ fetch data from `data` with `selector` and `level` diff --git a/qlib/data/filter.py b/qlib/data/filter.py index 70f9d32780d..811fd387f14 100644 --- a/qlib/data/filter.py +++ b/qlib/data/filter.py @@ -341,12 +341,7 @@ def _getFilterSeries(self, instruments, fstart, fend): # do not use dataset cache try: _features = DatasetD.dataset( - instruments, - [self.rule_expression], - fstart, - fend, - freq=self.filter_freq, - disk_cache=0, + instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0, ) except TypeError: # use LocalDatasetProvider diff --git a/qlib/portfolio/__init__.py b/qlib/portfolio/__init__.py index b7c525821a8..59e481eb93d 100644 --- a/qlib/portfolio/__init__.py +++ b/qlib/portfolio/__init__.py @@ -1,3 +1,2 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. - diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py index f92e7278758..eb6f9c5edb5 100644 --- a/qlib/tests/__init__.py +++ b/qlib/tests/__init__.py @@ -18,10 +18,6 @@ def setUpClass(cls) -> None: print(f"Qlib data is not found in {provider_uri}") GetData().qlib_data( - name="qlib_data_simple", - region="cn", - interval="1d", - target_dir=provider_uri, - delete_old=False, + name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False, ) init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index be458a24d29..0c704b89669 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -193,10 +193,7 @@ def generate(self): } ) objects.update( - { - "long_short_r.pkl": long_short_r, - "long_avg_r.pkl": long_avg_r, - } + {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,} ) self.recorder.log_metrics(**metrics) self.recorder.save_objects(**objects, artifact_path=self.get_path()) diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index f6e77cba4d8..8b3819c8302 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -78,10 +78,7 @@ "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, + "kwargs": {"topk": 50, "n_drop": 5,}, }, "backtest": { "verbose": False, @@ -176,9 +173,7 @@ def test_0_train(self): def test_1_backtest(self): analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) self.assertGreaterEqual( - analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], - 0.10, - "backtest failed", + analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed", ) diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py index dfa7f8556dd..de649c37edf 100644 --- a/tests/test_dump_data.py +++ b/tests/test_dump_data.py @@ -40,9 +40,7 @@ def setUpClass(cls) -> None: TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, - expression_cache=None, - dataset_cache=None, + provider_uri=provider_uri, expression_cache=None, dataset_cache=None, ) @classmethod @@ -54,10 +52,7 @@ def test_0_dump_bin(self): def test_1_dump_calendars(self): ori_calendars = set( - map( - pd.Timestamp, - pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values, - ) + map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,) ) res_calendars = set(D.calendar()) assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed" diff --git a/tests/test_get_data.py b/tests/test_get_data.py index c511d1b910d..d5637b02595 100644 --- a/tests/test_get_data.py +++ b/tests/test_get_data.py @@ -26,9 +26,7 @@ class TestGetData(unittest.TestCase): def setUpClass(cls) -> None: provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, - expression_cache=None, - dataset_cache=None, + provider_uri=provider_uri, expression_cache=None, dataset_cache=None, ) @classmethod diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py index a3973be5ae9..494962cc334 100644 --- a/tests/test_structured_cov_estimator.py +++ b/tests/test_structured_cov_estimator.py @@ -33,7 +33,7 @@ def test_nan_option_covariance(self): NUM_OBSERVATION = 200 EPS = 1e-6 - estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill') + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option="fill") X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) @@ -50,7 +50,7 @@ def test_decompose_covariance(self): NUM_VARIABLE = 10 NUM_OBSERVATION = 200 - estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill') + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option="fill") X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) From 53cf89d7c22f42234a452507cca67a98662e4ad9 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 19:43:03 +0800 Subject: [PATCH 32/32] Reformat with black. --- qlib/config.py | 24 +++++-- qlib/contrib/backtest/__init__.py | 18 ++++- qlib/contrib/backtest/profit_attribution.py | 23 +++++-- qlib/contrib/data/handler.py | 10 ++- qlib/contrib/eva/alpha.py | 6 +- qlib/contrib/evaluate.py | 7 +- qlib/contrib/evaluate_portfolio.py | 16 ++++- qlib/contrib/model/catboost_model.py | 4 +- qlib/contrib/model/pytorch_alstm.py | 21 ++++-- qlib/contrib/model/pytorch_alstm_ts.py | 17 +++-- qlib/contrib/model/pytorch_gats.py | 22 ++++-- qlib/contrib/model/pytorch_gats_ts.py | 18 ++++- qlib/contrib/model/pytorch_gru.py | 21 ++++-- qlib/contrib/model/pytorch_gru_ts.py | 17 ++++- qlib/contrib/model/pytorch_lstm.py | 21 ++++-- qlib/contrib/model/pytorch_lstm_ts.py | 17 ++++- qlib/contrib/model/pytorch_nn.py | 6 +- qlib/contrib/model/pytorch_sfm.py | 19 ++++- qlib/contrib/model/pytorch_tabnet.py | 14 +++- qlib/contrib/model/xgboost.py | 4 +- qlib/contrib/online/executor.py | 24 +++++-- qlib/contrib/online/manager.py | 6 +- qlib/contrib/online/operator.py | 8 ++- qlib/contrib/online/utils.py | 6 +- .../analysis_model_performance.py | 66 ++++++++++++++---- .../analysis_position/cumulative_return.py | 36 ++++++++-- .../analysis_position/parse_position.py | 5 +- .../report/analysis_position/rank_label.py | 16 ++++- .../report/analysis_position/report.py | 15 +++- qlib/contrib/report/graph.py | 6 +- qlib/contrib/strategy/cost_control.py | 5 +- qlib/contrib/strategy/order_generator.py | 12 +++- qlib/contrib/tuner/launcher.py | 6 +- qlib/contrib/tuner/space.py | 5 +- qlib/contrib/tuner/tuner.py | 26 +++++-- qlib/data/client.py | 3 +- qlib/data/data.py | 69 ++++++++++++++++--- qlib/data/dataset/utils.py | 5 +- qlib/data/filter.py | 7 +- qlib/portfolio/optimizer/enhanced_indexing.py | 6 +- qlib/tests/__init__.py | 6 +- qlib/workflow/record_temp.py | 5 +- tests/test_all_pipeline.py | 9 ++- tests/test_dump_data.py | 9 ++- tests/test_get_data.py | 4 +- 45 files changed, 548 insertions(+), 122 deletions(-) diff --git a/qlib/config.py b/qlib/config.py index 344eb852777..52b05568d57 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -115,7 +115,12 @@ def set_conf_from_C(self, config_c): "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s" } }, - "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}}, + "filters": { + "field_not_found": { + "()": "qlib.log.LogFilter", + "param": [".*?WARN: data not found for.*?"], + } + }, "handlers": { "console": { "class": "logging.StreamHandler", @@ -130,7 +135,10 @@ def set_conf_from_C(self, config_c): "exp_manager": { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",}, + "kwargs": { + "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), + "default_exp_name": "Experiment", + }, }, } @@ -192,8 +200,16 @@ def set_conf_from_C(self, config_c): } _default_region_config = { - REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",}, - REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",}, + REG_CN: { + "trade_unit": 100, + "limit_threshold": 0.099, + "deal_price": "vwap", + }, + REG_US: { + "trade_unit": 1, + "limit_threshold": None, + "deal_price": "close", + }, } diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index bd3494abf6a..aa24ffb0cf6 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -18,7 +18,13 @@ def get_strategy( - strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None, + strategy=None, + topk=50, + margin=0.5, + n_drop=5, + risk_degree=0.95, + str_type="dropout", + adjust_dates=None, ): """get_strategy @@ -69,7 +75,11 @@ def get_strategy( str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( - topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates, + topk=topk, + buffer_margin=margin, + n_drop=n_drop, + risk_degree=risk_degree, + adjust_dates=adjust_dates, ) elif isinstance(strategy, (dict, str)): # 2) create strategy with init_instance_by_config @@ -162,7 +172,9 @@ def get_exchange( def get_executor( - executor=None, trade_exchange=None, verbose=True, + executor=None, + trade_exchange=None, + verbose=True, ): """get_executor diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py index 355f0637395..20c6f638fcd 100644 --- a/qlib/contrib/backtest/profit_attribution.py +++ b/qlib/contrib/backtest/profit_attribution.py @@ -12,7 +12,10 @@ def get_benchmark_weight( - bench, start_date=None, end_date=None, path=None, + bench, + start_date=None, + end_date=None, + path=None, ): """get_benchmark_weight @@ -213,7 +216,12 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g def brinson_pa( - positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap", + positions, + bench="SH000905", + group_field="industry", + group_method="category", + group_n=None, + deal_price="vwap", ): """brinson profit attribution @@ -247,10 +255,17 @@ def brinson_pa( # suspend stock is NAN. So we have to get more date to forward fill the NAN shift_start_date = start_date - datetime.timedelta(days=250) instruments = D.list_instruments( - D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True, + D.instruments(market="all"), + start_time=shift_start_date, + end_time=end_date, + as_list=True, ) stock_df = D.features( - instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day", + instruments, + [group_field, deal_price], + start_time=shift_start_date, + end_time=end_date, + freq="day", ) stock_df.columns = [group_field, "deal_price"] diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 574287819b7..970b032d6b0 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -21,7 +21,10 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time): fit_start_time is not None and fit_end_time is not None ), "Make sure `fit_start_time` and `fit_end_time` are not None." pkwargs.update( - {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} + { + "fit_start_time": fit_start_time, + "fit_end_time": fit_end_time, + } ) new_l.append({"class": klass.__name__, "kwargs": pkwargs}) else: @@ -167,7 +170,10 @@ def __init__( def get_feature_config(self): conf = { "kbar": {}, - "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],}, + "price": { + "windows": [0], + "feature": ["OPEN", "HIGH", "LOW", "VWAP"], + }, "rolling": {}, } return self.parse_config_to_fields(conf) diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py index 363a184582d..c68571853f1 100644 --- a/qlib/contrib/eva/alpha.py +++ b/qlib/contrib/eva/alpha.py @@ -35,7 +35,11 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False def calc_long_short_return( - pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False, + pred: pd.Series, + label: pd.Series, + date_col: str = "datetime", + quantile: float = 0.2, + dropna: bool = False, ) -> Tuple[pd.Series, pd.Series]: """ calculate long-short return diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 5cb1ce4eb67..4aa5b55156f 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -244,7 +244,12 @@ def long_short_backtest( short_returns[date] = np.mean(short_profit) + np.mean(all_profit) ls_returns[date] = np.mean(short_profit) + np.mean(long_profit) - return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),)) + return dict( + zip( + ["long", "short", "long_short"], + map(pd.Series, [long_returns, short_returns, ls_returns]), + ) + ) def t_run(): diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py index 2d94105e482..04ddd8db041 100644 --- a/qlib/contrib/evaluate_portfolio.py +++ b/qlib/contrib/evaluate_portfolio.py @@ -64,7 +64,12 @@ def get_position_value(evaluate_date, position): instruments = list(set(instruments) - set(["cash"])) # filter 'cash' fields = ["$close"] close_data_df = D.features( - instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0, + instruments, + fields, + start_time=evaluate_date, + end_time=evaluate_date, + freq="day", + disk_cache=0, ) value = _get_position_value_from_df(evaluate_date, position, close_data_df) return value @@ -82,7 +87,14 @@ def get_position_list_value(positions): start_date, end_date = day_list[0], day_list[-1] # load data fields = ["$close"] - close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,) + close_data_df = D.features( + instruments, + fields, + start_time=start_date, + end_time=end_date, + freq="day", + disk_cache=0, + ) # generate value # return dict for time:position_value value_dict = OrderedDict() diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py index 2840c2cef5a..d57c32b7022 100644 --- a/qlib/contrib/model/catboost_model.py +++ b/qlib/contrib/model/catboost_model.py @@ -32,7 +32,9 @@ def fit( **kwargs ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index 306e68aadf2..bbbb61851b1 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -118,7 +118,10 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -319,12 +328,14 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", + nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", + nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 612bacbec93..725568de855 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -123,7 +123,10 @@ def __init__( torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -302,12 +309,14 @@ def _build_model(self): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", + nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", + nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index c59dc91973f..07048e1bc1a 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -229,11 +229,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -334,11 +340,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index dfc5f4ab5ed..1e94f56e418 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -242,7 +242,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -357,11 +361,19 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_mod if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index d2a774b65b4..84f863b9fb0 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -118,7 +118,10 @@ def __init__( torch.manual_seed(self.seed) self.gru_model = GRUModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index 49f438cc379..bb6618b854c 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -123,7 +123,10 @@ def __init__( torch.manual_seed(self.seed) self.GRU_model = GRUModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 02ca16e36b8..163d500ec87 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -118,7 +118,10 @@ def __init__( torch.manual_seed(self.seed) self.lstm_model = LSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -296,7 +305,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index 2ec36f96e34..cf4f8fb9f1f 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -123,7 +123,10 @@ def __init__( torch.manual_seed(self.seed) self.LSTM_model = LSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ def test_epoch(self, data_loader): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -279,7 +286,11 @@ def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 8c1a77ec3c5..16fcea9ff53 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -154,7 +154,11 @@ def __init__( self.dnn_model.to(self.device) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid = dataset.prepare( ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index 1f7433e053d..d5169e6c7bd 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -30,7 +30,14 @@ class SFM_Model(nn.Module): def __init__( - self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu", + self, + d_feat=6, + output_dim=1, + freq_dim=10, + hidden_size=64, + dropout_W=0.0, + dropout_U=0.0, + device="cpu", ): super().__init__() @@ -355,11 +362,17 @@ def train_epoch(self, x_train, y_train): self.train_optimizer.step() def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 18e9d8eb404..62e32d701ce 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -120,7 +120,9 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): os.makedirs("pretrain") [df_train, df_valid] = dataset.prepare( - ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["pretrain", "pretrain_validation"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) @@ -154,7 +156,11 @@ def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): break def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): if self.pretrain: # there is a pretrained model, load the model @@ -166,7 +172,9 @@ def fit( # adding one more linear layer to fit the final output dimension self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device) df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) x_train, y_train = df_train["feature"], df_train["label"] diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py index e37725c2eb6..ba2e5789b85 100755 --- a/qlib/contrib/model/xgboost.py +++ b/qlib/contrib/model/xgboost.py @@ -29,7 +29,9 @@ def fit( ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py index 52b86888133..2bd0937a032 100644 --- a/qlib/contrib/online/executor.py +++ b/qlib/contrib/online/executor.py @@ -150,13 +150,21 @@ def execute(self, trade_account, order_list, trade_date): if order.direction == Order.SELL: # sell print( "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, + trade_date, + order.stock_id, + trade_price, + order.deal_amount, + trade_val, ) ) else: print( "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, + trade_date, + order.stock_id, + trade_price, + order.deal_amount, + trade_val, ) ) @@ -263,13 +271,21 @@ def load_order_list(user_path, trade_date): for stock_id in order_dict["sell"]: amount, factor = order_dict["sell"][stock_id] order = Order( - stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor, + stock_id=stock_id, + amount=amount, + trade_date=pd.Timestamp(trade_date), + direction=Order.SELL, + factor=factor, ) order_list.append(order) for stock_id in order_dict["buy"]: amount, factor = order_dict["buy"][stock_id] order = Order( - stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor, + stock_id=stock_id, + amount=amount, + trade_date=pd.Timestamp(trade_date), + direction=Order.BUY, + factor=factor, ) order_list.append(order) return order_list diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py index a4476709de0..cf850b9dace 100644 --- a/qlib/contrib/online/manager.py +++ b/qlib/contrib/online/manager.py @@ -84,10 +84,12 @@ def save_user_data(self, user_id): raise ValueError("Cannot find user {}".format(user_id)) self.users[user_id].account.save_account(self.data_path / user_id) save_instance( - self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id), + self.users[user_id].strategy, + self.data_path / user_id / "strategy_{}.pickle".format(user_id), ) save_instance( - self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id), + self.users[user_id].model, + self.data_path / user_id / "model_{}.pickle".format(user_id), ) def add_user(self, user_id, config_file, add_date): diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index c82deb3945c..c8b44f57858 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -125,7 +125,9 @@ def generate(self, date, path): trade_date=trade_date, ) save_order_list( - order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, + order_list=order_list, + user_path=(pathlib.Path(path) / user_id), + trade_date=trade_date, ) self.logger.info("Generate order list at {} for {}".format(trade_date, user_id)) um.save_user_data(user_id) @@ -158,7 +160,9 @@ def execute(self, date, exchange_config, path): order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date) trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date) executor.save_executed_file_from_trade_info( - trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, + trade_info=trade_info, + user_path=(pathlib.Path(path) / user_id), + trade_date=trade_date, ) self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id)) diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py index fb96c87bd31..611af63e4af 100644 --- a/qlib/contrib/online/utils.py +++ b/qlib/contrib/online/utils.py @@ -79,7 +79,11 @@ def prepare(um, today, user_id, exchange_config=None): log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today)) return [pd.Timestamp(latest_trading_date)], None - dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,) + dates = D.calendar( + start_time=pd.Timestamp(latest_trading_date), + end_time=pd.Timestamp(today), + future=True, + ) dates = list(dates) dates.append(get_next_trading_date(dates[-1], future=True)) if exchange_config: diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py index ef1447a12be..1cb14d26153 100644 --- a/qlib/contrib/report/analysis_model/analysis_model_performance.py +++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py @@ -53,7 +53,8 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int t_df.index = t_df.index.strftime("%Y-%m-%d") # Cumulative Return By Group group_scatter_figure = ScatterGraph( - t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), + t_df.cumsum(), + layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), ).figure t_df = t_df.loc[:, ["long-short", "long-average"]] @@ -61,7 +62,12 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int group_hist_figure = SubplotsGraph( t_df, kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)), - subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],), + subplots_kwargs=dict( + rows=1, + cols=2, + print_grid=False, + subplot_titles=["long-short", "long-average"], + ), ).figure return group_scatter_figure, group_hist_figure @@ -96,12 +102,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6) _monthly_ic = ic.groupby(_index).mean() _monthly_ic.index = pd.MultiIndex.from_arrays( - [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"], + [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], + names=["year", "month"], ) # fill month _month_list = pd.date_range( - start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M", + start=pd.Timestamp(f"{_index.min()[:4]}0101"), + end=pd.Timestamp(f"{_index.max()[:4]}1231"), + freq="1M", ) _years = [] _month = [] @@ -133,15 +142,32 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min() _sub_graph_data = [ - ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),), + ( + "ic", + dict( + row=1, + col=1, + name="", + kind="DistplotGraph", + graph_kwargs=dict(bin_size=_bin_size), + ), + ), (_qqplot_fig, dict(row=1, col=2)), ] ic_hist_figure = SubplotsGraph( _ic_df.dropna(), kind_map=dict(kind="HistogramGraph", kwargs=dict()), - subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],), + subplots_kwargs=dict( + rows=1, + cols=2, + print_grid=False, + subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name], + ), sub_graph_data=_sub_graph_data, - layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),), + layout=dict( + yaxis2=dict(title="Observed Quantile"), + xaxis2=dict(title=f"{dist_name} Distribution Quantile"), + ), ).figure return ic_bar_figure, ic_heatmap_figure, ic_hist_figure @@ -155,7 +181,8 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple: _df = ac.to_frame("value") _df.index = _df.index.strftime("%Y-%m-%d") ac_figure = ScatterGraph( - _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), + _df, + layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), ).figure return (ac_figure,) @@ -175,11 +202,17 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple: .sum() / (len(x) // N) ) - r_df = pd.DataFrame({"Top": top, "Bottom": bottom,}) + r_df = pd.DataFrame( + { + "Top": top, + "Bottom": bottom, + } + ) # FIXME: support HIGH-FREQ r_df.index = r_df.index.strftime("%Y-%m-%d") turnover_figure = ScatterGraph( - r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), + r_df, + layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), ).figure return (turnover_figure,) @@ -197,7 +230,11 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure: # FIXME: support HIGH-FREQ ic_df.index = ic_df.index.strftime("%Y-%m-%d") ic_bar_figure = BarGraph( - ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),), + ic_df, + layout=dict( + title="Information Coefficient (IC)", + xaxis=dict(type="category", tickangle=45), + ), ).figure return ic_bar_figure @@ -240,7 +277,12 @@ def model_performance_graph( figure_list = [] for graph_name in graph_names: fun_res = eval(f"_{graph_name}")( - pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day, + pred_label=pred_label, + lag=lag, + N=N, + reverse=reverse, + rank=rank, + show_nature_day=show_nature_day, ) figure_list += fun_res diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py index 604189c94b6..abb68ea6051 100644 --- a/qlib/contrib/report/analysis_position/cumulative_return.py +++ b/qlib/contrib/report/analysis_position/cumulative_return.py @@ -13,7 +13,11 @@ def _get_cum_return_data_with_position( - position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, + position: dict, + report_normal: pd.DataFrame, + label_data: pd.DataFrame, + start_date=None, + end_date=None, ): """ @@ -25,7 +29,11 @@ def _get_cum_return_data_with_position( :return: """ _cumulative_return_df = get_position_data( - position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date, + position=position, + report_normal=report_normal, + label_data=label_data, + start_date=start_date, + end_date=end_date, ).copy() _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"] @@ -79,7 +87,11 @@ def _get_cum_return_data_with_position( def _get_figure_with_position( - position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, + position: dict, + report_normal: pd.DataFrame, + label_data: pd.DataFrame, + start_date=None, + end_date=None, ) -> Iterable[go.Figure]: """Get average analysis figures @@ -99,12 +111,18 @@ def _get_figure_with_position( # Create figures for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]: sub_graph_data = [ - ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),), + ( + "cum_{}".format(_t_name), + dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}), + ), ( "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name), dict(row=2, col=1), ), - ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),), + ( + "{}_value".format(_t_name), + dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}), + ), ] _default_xaxis = dict(showline=False, zeroline=True, tickangle=45) @@ -143,7 +161,13 @@ def _get_figure_with_position( [{"rowspan": 1}, None], ] subplots_kwargs = dict( - vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs, + vertical_spacing=0.01, + rows=2, + cols=2, + row_width=[1, 2], + column_width=[3, 1], + print_grid=False, + specs=specs, ) yield SubplotsGraph( cum_return_df, diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py index 23f9c592c0a..fe1d6113709 100644 --- a/qlib/contrib/report/analysis_position/parse_position.py +++ b/qlib/contrib/report/analysis_position/parse_position.py @@ -72,7 +72,10 @@ def parse_position(position: dict = None) -> pd.DataFrame: result_df = result_df.append(_trading_day_df, sort=True) - previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,) + previous_data = dict( + date=_trading_date, + code_list=_trading_day_df[_trading_day_df["status"] != -1].index, + ) result_df.reset_index(inplace=True) result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True) diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py index 9a4d834ed92..72a358adcbf 100644 --- a/qlib/contrib/report/analysis_position/rank_label.py +++ b/qlib/contrib/report/analysis_position/rank_label.py @@ -23,7 +23,11 @@ def _get_figure_with_position( :return: """ _position_df = get_position_data( - position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date, + position, + label_data, + calculate_label_rank=True, + start_date=start_date, + end_date=end_date, ) res_dict = dict() @@ -47,14 +51,20 @@ def _get_figure_with_position( yield ScatterGraph( _res_df.loc[:, [_col]], layout=dict( - title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"), + title=_col, + xaxis=dict(type="category", tickangle=45), + yaxis=dict(title="lable-rank-ratio: %"), ), graph_kwargs=dict(mode="lines+markers"), ).figure def rank_label_graph( - position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True, + position: dict, + label_data: pd.DataFrame, + start_date=None, + end_date=None, + show_notebook=True, ) -> Iterable[go.Figure]: """Ranking percentage of stocks buy, sell, and holding on the trading day. Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py index 8e2c05c0a38..f82e654c432 100644 --- a/qlib/contrib/report/analysis_position/report.py +++ b/qlib/contrib/report/analysis_position/report.py @@ -123,7 +123,9 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 1, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": {"width": 0,}, + "line": { + "width": 0, + }, }, { "type": "rect", @@ -135,13 +137,20 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 0.55, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": {"width": 0,}, + "line": { + "width": 0, + }, }, ], ) _subplot_kwargs = dict( - shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False, + shared_xaxes=True, + vertical_spacing=0.01, + rows=7, + cols=1, + row_width=[1, 1, 1, 3, 1, 1, 3], + print_grid=False, ) figure = SubplotsGraph( df=report_df, diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py index dbbc411109d..70e382fb165 100644 --- a/qlib/contrib/report/graph.py +++ b/qlib/contrib/report/graph.py @@ -311,7 +311,11 @@ def _init_sub_graph_data(self): _temp_row_data = ( column_name, dict( - row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"], + row=row, + col=col, + name=res_name, + kind=self._kind_map["kind"], + graph_kwargs=self._kind_map["kwargs"], ), ) self._sub_graph_data.append(_temp_row_data) diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index ee3ee03ecfd..dd90437b03f 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -57,7 +57,10 @@ def generate_target_weight_position(self, score, current, trade_date): final_stock_weight[stock_id] -= sw if self.buy_method == "first_fill": for stock_id in buy_signal_stocks: - add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,) + add_weight = min( + max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), + sold_stock_weight, + ) final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight sold_stock_weight -= add_weight elif self.buy_method == "average_fill": diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py index 6f168b4dd52..494981ecc09 100644 --- a/qlib/contrib/strategy/order_generator.py +++ b/qlib/contrib/strategy/order_generator.py @@ -102,10 +102,14 @@ def generate_order_list_from_target_weight_position( # strategy 1 : generate amount_position by weight_position # Use API in Exchange() target_amount_dict = trade_exchange.generate_amount_position_from_weight_position( - weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date, + weight_position=target_weight_position, + cash=current_tradable_value, + trade_date=trade_date, ) order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date, + target_position=target_amount_dict, + current_position=current_amount_dict, + trade_date=trade_date, ) return order_list @@ -160,6 +164,8 @@ def generate_order_list_from_target_weight_position( else: continue order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date, + target_position=amount_dict, + current_position=current.get_stock_amount_dict(), + trade_date=trade_date, ) return order_list diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py index 409410a2ab4..711658c9a63 100644 --- a/qlib/contrib/tuner/launcher.py +++ b/qlib/contrib/tuner/launcher.py @@ -13,7 +13,11 @@ args_parser = argparse.ArgumentParser(prog="tuner") args_parser.add_argument( - "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.", + "-c", + "--config_path", + required=True, + type=str, + help="config path indicates where to load yaml config.", ) args = args_parser.parse_args() diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py index 57f57a6c34e..76f101671b7 100644 --- a/qlib/contrib/tuner/space.py +++ b/qlib/contrib/tuner/space.py @@ -10,5 +10,8 @@ } QLibDataLabelSpace = { - "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],) + "labels": hp.choice( + "labels", + [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], + ) } diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index e81d41a9ad0..2ce957859b2 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -28,7 +28,10 @@ def __init__(self, tuner_config, optim_config): self.optim_config = optim_config self.max_evals = self.tuner_config.get("max_evals", 10) - self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],) + self.ex_dir = os.path.join( + self.tuner_config["experiment"]["dir"], + self.tuner_config["experiment"]["name"], + ) self.best_params = None self.best_res = None @@ -39,7 +42,10 @@ def tune(self): TimeInspector.set_time_mark() fmin( - fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals, + fn=self.objective, + space=self.space, + algo=tpe.suggest, + max_evals=self.max_evals, ) self.logger.info("Local best params: {} ".format(self.best_params)) TimeInspector.log_cost_time( @@ -153,7 +159,8 @@ def setup_estimator_config(self, params): estimator_config["data"]["args"].update(params["data_label_space"]) estimator_path = os.path.join( - self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME, + self.tuner_config["experiment"].get("dir", "../"), + QLibTuner.ESTIMATOR_CONFIG_NAME, ) with open(estimator_path, "w") as fp: @@ -166,20 +173,27 @@ def setup_space(self): model_space_name = self.tuner_config["model"].get("space", None) if model_space_name is None: raise ValueError("Please give the search space of model.") - model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,) + model_space = getattr( + importlib.import_module(".space", package="qlib.contrib.tuner"), + model_space_name, + ) # 2. Setup strategy space strategy_space_name = self.tuner_config["strategy"].get("space", None) if strategy_space_name is None: raise ValueError("Please give the search space of strategy.") - strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,) + strategy_space = getattr( + importlib.import_module(".space", package="qlib.contrib.tuner"), + strategy_space_name, + ) # 3. Setup data label space if given if self.tuner_config.get("data_label", None) is not None: data_label_space_name = self.tuner_config["data_label"].get("space", None) if data_label_space_name is not None: data_label_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name, + importlib.import_module(".space", package="qlib.contrib.tuner"), + data_label_space_name, ) else: data_label_space_name = None diff --git a/qlib/data/client.py b/qlib/data/client.py index d1a68cb3857..5244a7e45cf 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -26,7 +26,8 @@ def __init__(self, host, port): self.logger = get_module_logger(self.__class__.__name__) # bind connect/disconnect callbacks self.sio.on( - "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), + "connect", + lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), ) self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!")) diff --git a/qlib/data/data.py b/qlib/data/data.py index 47cded79cec..762467da35e 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -328,7 +328,14 @@ def dataset(self, instruments, fields, start_time=None, end_time=None, freq="day raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method") def _uri( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=1, + **kwargs, ): """Get task uri, used when generating rabbitmq task in qlib_server @@ -407,13 +414,29 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq): for inst, spans in instruments_d.items(): data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,), + args=( + inst, + start_time, + end_time, + freq, + normalize_column_names, + spans, + C, + ), ) else: for inst in instruments_d: data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=(inst, start_time, end_time, freq, normalize_column_names, None, C,), + args=( + inst, + start_time, + end_time, + freq, + normalize_column_names, + None, + C, + ), ) p.close() @@ -575,7 +598,12 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da start_time = pd.Timestamp(start_time or cal[0]) end_time = pd.Timestamp(end_time or cal[-1]) _instruments_filtered = { - inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],)) + inst: list( + filter( + lambda x: x[0] <= x[1], + [(max(start_time, x[0]), min(end_time, x[1])) for x in spans], + ) + ) for inst, spans in _instruments.items() } _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value} @@ -695,7 +723,14 @@ def multi_cache_walker(instruments, fields, start_time=None, end_time=None, freq for inst in instruments_d: p.apply_async( - LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,), + LocalDatasetProvider.cache_walker, + args=( + inst, + start_time, + end_time, + freq, + column_names, + ), ) p.close() @@ -728,7 +763,12 @@ def set_conn(self, conn): def calendar(self, start_time=None, end_time=None, freq="day", future=False): self.conn.send_request( request_type="calendar", - request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,}, + request_content={ + "start_time": str(start_time), + "end_time": str(end_time), + "freq": freq, + "future": future, + }, msg_queue=self.queue, msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content], ) @@ -792,7 +832,14 @@ def set_conn(self, conn): self.queue = queue.Queue() def dataset( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=0, + return_uri=False, ): if Inst.get_inst_type(instruments) == Inst.DICT: get_module_logger("data").warning( @@ -895,7 +942,13 @@ def list_instruments(self, instruments, start_time=None, end_time=None, freq="da return Inst.list_instruments(instruments, start_time, end_time, freq, as_list) def features( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=None, ): """ Parameters: diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py index 58e2bd96811..feda1904463 100644 --- a/qlib/data/dataset/utils.py +++ b/qlib/data/dataset/utils.py @@ -32,7 +32,10 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int: def fetch_df_by_index( - df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True, + df: pd.DataFrame, + selector: Union[pd.Timestamp, slice, str, list], + level: Union[str, int], + fetch_orig=True, ) -> pd.DataFrame: """ fetch data from `data` with `selector` and `level` diff --git a/qlib/data/filter.py b/qlib/data/filter.py index 811fd387f14..70f9d32780d 100644 --- a/qlib/data/filter.py +++ b/qlib/data/filter.py @@ -341,7 +341,12 @@ def _getFilterSeries(self, instruments, fstart, fend): # do not use dataset cache try: _features = DatasetD.dataset( - instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0, + instruments, + [self.rule_expression], + fstart, + fend, + freq=self.filter_freq, + disk_cache=0, ) except TypeError: # use LocalDatasetProvider diff --git a/qlib/portfolio/optimizer/enhanced_indexing.py b/qlib/portfolio/optimizer/enhanced_indexing.py index 5fdc1014ddf..5a7a0804dbd 100644 --- a/qlib/portfolio/optimizer/enhanced_indexing.py +++ b/qlib/portfolio/optimizer/enhanced_indexing.py @@ -56,7 +56,11 @@ def __init__( assert inds_dev is None or inds_dev >= 0, "industry deviation limit `inds_dev` should be positive or None." self.inds_dev = inds_dev - assert warm_start in [None, self.START_FROM_W0, self.START_FROM_BENCH,], "illegal warm start option" + assert warm_start in [ + None, + self.START_FROM_W0, + self.START_FROM_BENCH, + ], "illegal warm start option" self.start_from_w0 = warm_start == self.START_FROM_W0 self.start_from_bench = warm_start == self.START_FROM_BENCH diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py index eb6f9c5edb5..f92e7278758 100644 --- a/qlib/tests/__init__.py +++ b/qlib/tests/__init__.py @@ -18,6 +18,10 @@ def setUpClass(cls) -> None: print(f"Qlib data is not found in {provider_uri}") GetData().qlib_data( - name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False, + name="qlib_data_simple", + region="cn", + interval="1d", + target_dir=provider_uri, + delete_old=False, ) init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index 0c704b89669..be458a24d29 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -193,7 +193,10 @@ def generate(self): } ) objects.update( - {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,} + { + "long_short_r.pkl": long_short_r, + "long_avg_r.pkl": long_avg_r, + } ) self.recorder.log_metrics(**metrics) self.recorder.save_objects(**objects, artifact_path=self.get_path()) diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index 8b3819c8302..f6e77cba4d8 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -78,7 +78,10 @@ "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": {"topk": 50, "n_drop": 5,}, + "kwargs": { + "topk": 50, + "n_drop": 5, + }, }, "backtest": { "verbose": False, @@ -173,7 +176,9 @@ def test_0_train(self): def test_1_backtest(self): analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) self.assertGreaterEqual( - analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed", + analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], + 0.10, + "backtest failed", ) diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py index de649c37edf..dfa7f8556dd 100644 --- a/tests/test_dump_data.py +++ b/tests/test_dump_data.py @@ -40,7 +40,9 @@ def setUpClass(cls) -> None: TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, expression_cache=None, dataset_cache=None, + provider_uri=provider_uri, + expression_cache=None, + dataset_cache=None, ) @classmethod @@ -52,7 +54,10 @@ def test_0_dump_bin(self): def test_1_dump_calendars(self): ori_calendars = set( - map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,) + map( + pd.Timestamp, + pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values, + ) ) res_calendars = set(D.calendar()) assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed" diff --git a/tests/test_get_data.py b/tests/test_get_data.py index d5637b02595..c511d1b910d 100644 --- a/tests/test_get_data.py +++ b/tests/test_get_data.py @@ -26,7 +26,9 @@ class TestGetData(unittest.TestCase): def setUpClass(cls) -> None: provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, expression_cache=None, dataset_cache=None, + provider_uri=provider_uri, + expression_cache=None, + dataset_cache=None, ) @classmethod