From 525dd170482724555da047004d571ddfb81a528f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=89=E9=98=81=E9=94=90?= <40440788+2young-2simple-sometimes-naive@users.noreply.github.com> Date: Fri, 31 Dec 2021 23:13:26 -0500 Subject: [PATCH 1/4] Fix $volume normalization issue Fix: https://github.com/microsoft/qlib/issues/765 --- qlib/contrib/data/handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 1e82ed67eeb..5ae8eb8b9ab 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -126,9 +126,9 @@ def get_feature_config(self): fields += ["$vwap/$close"] names += ["VWAP0"] for i in range(59, 0, -1): - fields += ["Ref($volume, %d)/$volume" % (i)] + fields += ["Ref($volume, %d)/($volume+1e-12)" % (i)] names += ["VOLUME%d" % (i)] - fields += ["$volume/$volume"] + fields += ["($volume+1e-12)/($volume+1e-12)"] names += ["VOLUME0"] return fields, names @@ -249,7 +249,7 @@ def parse_config_to_fields(config): names += [field.upper() + str(d) for d in windows] if "volume" in config: windows = config["volume"].get("windows", range(5)) - fields += ["Ref($volume, %d)/$volume" % d if d != 0 else "$volume/$volume" for d in windows] + fields += ["Ref($volume, %d)/($volume+1e-12)" % d if d != 0 else "($volume+1e-12)/($volume+1e-12)" for d in windows] names += ["VOLUME" + str(d) for d in windows] if "rolling" in config: windows = config["rolling"].get("windows", [5, 10, 20, 30, 60]) From 2ee0c83f15ea0c398d5ebf5c39ea64b564b8eed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=89=E9=98=81=E9=94=90?= <40440788+2young-2simple-sometimes-naive@users.noreply.github.com> Date: Sat, 1 Jan 2022 05:43:06 -0500 Subject: [PATCH 2/4] black formatting black formatting --- qlib/contrib/data/handler.py | 64 ++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 5ae8eb8b9ab..9c59a3f316b 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -61,8 +61,12 @@ def __init__( inst_processor=None, **kwargs, ): - infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) - learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) + infer_processors = check_transform_proc( + infer_processors, fit_start_time, fit_end_time + ) + learn_processors = check_transform_proc( + learn_processors, fit_start_time, fit_end_time + ) data_loader = { "class": "QlibDataLoader", @@ -128,7 +132,7 @@ def get_feature_config(self): for i in range(59, 0, -1): fields += ["Ref($volume, %d)/($volume+1e-12)" % (i)] names += ["VOLUME%d" % (i)] - fields += ["($volume+1e-12)/($volume+1e-12)"] + fields += ["$volume+1/($volume+1e-12)"] names += ["VOLUME0"] return fields, names @@ -155,8 +159,12 @@ def __init__( inst_processor=None, **kwargs, ): - infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) - learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) + infer_processors = check_transform_proc( + infer_processors, fit_start_time, fit_end_time + ) + learn_processors = check_transform_proc( + learn_processors, fit_start_time, fit_end_time + ) data_loader = { "class": "QlibDataLoader", @@ -242,14 +250,26 @@ def parse_config_to_fields(config): ] if "price" in config: windows = config["price"].get("windows", range(5)) - feature = config["price"].get("feature", ["OPEN", "HIGH", "LOW", "CLOSE", "VWAP"]) + feature = config["price"].get( + "feature", ["OPEN", "HIGH", "LOW", "CLOSE", "VWAP"] + ) for field in feature: field = field.lower() - fields += ["Ref($%s, %d)/$close" % (field, d) if d != 0 else "$%s/$close" % field for d in windows] + fields += [ + "Ref($%s, %d)/$close" % (field, d) + if d != 0 + else "$%s/$close" % field + for d in windows + ] names += [field.upper() + str(d) for d in windows] if "volume" in config: windows = config["volume"].get("windows", range(5)) - fields += ["Ref($volume, %d)/($volume+1e-12)" % d if d != 0 else "($volume+1e-12)/($volume+1e-12)" for d in windows] + fields += [ + "Ref($volume, %d)/($volume+1e-12)" % d + if d != 0 + else "$volume/($volume+1e-12)" + for d in windows + ] names += ["VOLUME" + str(d) for d in windows] if "rolling" in config: windows = config["rolling"].get("windows", [5, 10, 20, 30, 60]) @@ -292,7 +312,11 @@ def parse_config_to_fields(config): fields += ["Rank($close, %d)" % d for d in windows] names += ["RANK%d" % d for d in windows] if use("RSV"): - fields += ["($close-Min($low, %d))/(Max($high, %d)-Min($low, %d)+1e-12)" % (d, d, d) for d in windows] + fields += [ + "($close-Min($low, %d))/(Max($high, %d)-Min($low, %d)+1e-12)" + % (d, d, d) + for d in windows + ] names += ["RSV%d" % d for d in windows] if use("IMAX"): fields += ["IdxMax($high, %d)/%d" % (d, d) for d in windows] @@ -301,13 +325,19 @@ def parse_config_to_fields(config): fields += ["IdxMin($low, %d)/%d" % (d, d) for d in windows] names += ["IMIN%d" % d for d in windows] if use("IMXD"): - fields += ["(IdxMax($high, %d)-IdxMin($low, %d))/%d" % (d, d, d) for d in windows] + fields += [ + "(IdxMax($high, %d)-IdxMin($low, %d))/%d" % (d, d, d) + for d in windows + ] names += ["IMXD%d" % d for d in windows] if use("CORR"): fields += ["Corr($close, Log($volume+1), %d)" % d for d in windows] names += ["CORR%d" % d for d in windows] if use("CORD"): - fields += ["Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), %d)" % d for d in windows] + fields += [ + "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), %d)" % d + for d in windows + ] names += ["CORD%d" % d for d in windows] if use("CNTP"): fields += ["Mean($close>Ref($close, 1), %d)" % d for d in windows] @@ -316,17 +346,23 @@ def parse_config_to_fields(config): fields += ["Mean($closeRef($close, 1), %d)-Mean($closeRef($close, 1), %d)-Mean($close Date: Sat, 1 Jan 2022 05:44:43 -0500 Subject: [PATCH 3/4] black formatting black formatting --- qlib/contrib/data/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 9c59a3f316b..668661dcffe 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -132,7 +132,7 @@ def get_feature_config(self): for i in range(59, 0, -1): fields += ["Ref($volume, %d)/($volume+1e-12)" % (i)] names += ["VOLUME%d" % (i)] - fields += ["$volume+1/($volume+1e-12)"] + fields += ["$volume/($volume+1e-12)"] names += ["VOLUME0"] return fields, names From e33a379c979e3ce8ea8e2651f5bc674414776882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=89=E9=98=81=E9=94=90?= <40440788+2young-2simple-sometimes-naive@users.noreply.github.com> Date: Sat, 1 Jan 2022 05:52:06 -0500 Subject: [PATCH 4/4] black formatting black formatting --- qlib/contrib/data/handler.py | 62 ++++++++---------------------------- 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 668661dcffe..3c6a93f2269 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -61,12 +61,8 @@ def __init__( inst_processor=None, **kwargs, ): - infer_processors = check_transform_proc( - infer_processors, fit_start_time, fit_end_time - ) - learn_processors = check_transform_proc( - learn_processors, fit_start_time, fit_end_time - ) + infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) + learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) data_loader = { "class": "QlibDataLoader", @@ -159,12 +155,8 @@ def __init__( inst_processor=None, **kwargs, ): - infer_processors = check_transform_proc( - infer_processors, fit_start_time, fit_end_time - ) - learn_processors = check_transform_proc( - learn_processors, fit_start_time, fit_end_time - ) + infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) + learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) data_loader = { "class": "QlibDataLoader", @@ -250,26 +242,14 @@ def parse_config_to_fields(config): ] if "price" in config: windows = config["price"].get("windows", range(5)) - feature = config["price"].get( - "feature", ["OPEN", "HIGH", "LOW", "CLOSE", "VWAP"] - ) + feature = config["price"].get("feature", ["OPEN", "HIGH", "LOW", "CLOSE", "VWAP"]) for field in feature: field = field.lower() - fields += [ - "Ref($%s, %d)/$close" % (field, d) - if d != 0 - else "$%s/$close" % field - for d in windows - ] + fields += ["Ref($%s, %d)/$close" % (field, d) if d != 0 else "$%s/$close" % field for d in windows] names += [field.upper() + str(d) for d in windows] if "volume" in config: windows = config["volume"].get("windows", range(5)) - fields += [ - "Ref($volume, %d)/($volume+1e-12)" % d - if d != 0 - else "$volume/($volume+1e-12)" - for d in windows - ] + fields += ["Ref($volume, %d)/($volume+1e-12)" % d if d != 0 else "$volume/($volume+1e-12)" for d in windows] names += ["VOLUME" + str(d) for d in windows] if "rolling" in config: windows = config["rolling"].get("windows", [5, 10, 20, 30, 60]) @@ -312,11 +292,7 @@ def parse_config_to_fields(config): fields += ["Rank($close, %d)" % d for d in windows] names += ["RANK%d" % d for d in windows] if use("RSV"): - fields += [ - "($close-Min($low, %d))/(Max($high, %d)-Min($low, %d)+1e-12)" - % (d, d, d) - for d in windows - ] + fields += ["($close-Min($low, %d))/(Max($high, %d)-Min($low, %d)+1e-12)" % (d, d, d) for d in windows] names += ["RSV%d" % d for d in windows] if use("IMAX"): fields += ["IdxMax($high, %d)/%d" % (d, d) for d in windows] @@ -325,19 +301,13 @@ def parse_config_to_fields(config): fields += ["IdxMin($low, %d)/%d" % (d, d) for d in windows] names += ["IMIN%d" % d for d in windows] if use("IMXD"): - fields += [ - "(IdxMax($high, %d)-IdxMin($low, %d))/%d" % (d, d, d) - for d in windows - ] + fields += ["(IdxMax($high, %d)-IdxMin($low, %d))/%d" % (d, d, d) for d in windows] names += ["IMXD%d" % d for d in windows] if use("CORR"): fields += ["Corr($close, Log($volume+1), %d)" % d for d in windows] names += ["CORR%d" % d for d in windows] if use("CORD"): - fields += [ - "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), %d)" % d - for d in windows - ] + fields += ["Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), %d)" % d for d in windows] names += ["CORD%d" % d for d in windows] if use("CNTP"): fields += ["Mean($close>Ref($close, 1), %d)" % d for d in windows] @@ -346,23 +316,17 @@ def parse_config_to_fields(config): fields += ["Mean($closeRef($close, 1), %d)-Mean($closeRef($close, 1), %d)-Mean($close