From 4d9d48282a80ea84c08b71dd720cd06b264e41d4 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Tue, 12 May 2020 14:48:38 +0200 Subject: [PATCH 01/15] Add a test for basic sklearn API conformance The "default constructible" (i.e. no required __init__ parameters) property is the most basic property of a scikit-learn estimator. It is a prerequisite for all other estimator checks. --- csrank/tests/test_estimators.py | 75 +++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 csrank/tests/test_estimators.py diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py new file mode 100644 index 00000000..31394608 --- /dev/null +++ b/csrank/tests/test_estimators.py @@ -0,0 +1,75 @@ +"""Check that our estimators adhere to the scikit-learn interface. + +https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator +""" + +import pytest +from sklearn.utils.estimator_checks import check_parameters_default_constructible + +from csrank.choicefunction import CmpNetChoiceFunction +from csrank.choicefunction import FATEChoiceFunction +from csrank.choicefunction import FATELinearChoiceFunction +from csrank.choicefunction import FETAChoiceFunction +from csrank.choicefunction import FETALinearChoiceFunction +from csrank.choicefunction import GeneralizedLinearModel +from csrank.choicefunction import PairwiseSVMChoiceFunction +from csrank.choicefunction import RankNetChoiceFunction +from csrank.discretechoice import CmpNetDiscreteChoiceFunction +from csrank.discretechoice import FATEDiscreteChoiceFunction +from csrank.discretechoice import FATELinearDiscreteChoiceFunction +from csrank.discretechoice import FETADiscreteChoiceFunction +from csrank.discretechoice import FETALinearDiscreteChoiceFunction +from csrank.discretechoice import GeneralizedNestedLogitModel +from csrank.discretechoice import MixedLogitModel +from csrank.discretechoice import MultinomialLogitModel +from csrank.discretechoice import NestedLogitModel +from csrank.discretechoice import PairedCombinatorialLogit +from csrank.discretechoice import PairwiseSVMDiscreteChoiceFunction +from csrank.discretechoice import RankNetDiscreteChoiceFunction +from csrank.objectranking import CmpNet +from csrank.objectranking import ExpectedRankRegression +from csrank.objectranking import FATELinearObjectRanker +from csrank.objectranking import FATEObjectRanker +from csrank.objectranking import FETALinearObjectRanker +from csrank.objectranking import FETAObjectRanker +from csrank.objectranking import ListNet +from csrank.objectranking import RankNet +from csrank.objectranking import RankSVM + + +@pytest.mark.parametrize( + "Estimator", + [ + CmpNet, + CmpNetChoiceFunction, + CmpNetDiscreteChoiceFunction, + ExpectedRankRegression, + FATEChoiceFunction, + FATEDiscreteChoiceFunction, + FATELinearChoiceFunction, + FATELinearDiscreteChoiceFunction, + FATELinearObjectRanker, + FATEObjectRanker, + FETAChoiceFunction, + FETADiscreteChoiceFunction, + FETALinearChoiceFunction, + FETALinearDiscreteChoiceFunction, + FETALinearObjectRanker, + FETAObjectRanker, + GeneralizedLinearModel, + GeneralizedNestedLogitModel, + ListNet, + MixedLogitModel, + MultinomialLogitModel, + NestedLogitModel, + PairedCombinatorialLogit, + PairwiseSVMChoiceFunction, + PairwiseSVMDiscreteChoiceFunction, + RankNet, + RankNetChoiceFunction, + RankNetDiscreteChoiceFunction, + RankSVM, + ], +) +def test_all_estimators(Estimator): + check_parameters_default_constructible("default_constructible", Estimator) From 8d77e802ea90827be8291623efb7a12b8874efd0 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 1 Aug 2020 17:51:06 +0200 Subject: [PATCH 02/15] Add check_no_attributes_set_in_init check --- csrank/tests/test_estimators.py | 58 +++++++++++++++++---------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 31394608..9f5dc1ed 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -5,6 +5,7 @@ import pytest from sklearn.utils.estimator_checks import check_parameters_default_constructible +from sklearn.utils.estimator_checks import check_no_attributes_set_in_init from csrank.choicefunction import CmpNetChoiceFunction from csrank.choicefunction import FATEChoiceFunction @@ -41,35 +42,36 @@ "Estimator", [ CmpNet, - CmpNetChoiceFunction, - CmpNetDiscreteChoiceFunction, - ExpectedRankRegression, - FATEChoiceFunction, - FATEDiscreteChoiceFunction, - FATELinearChoiceFunction, - FATELinearDiscreteChoiceFunction, - FATELinearObjectRanker, - FATEObjectRanker, - FETAChoiceFunction, - FETADiscreteChoiceFunction, - FETALinearChoiceFunction, - FETALinearDiscreteChoiceFunction, - FETALinearObjectRanker, - FETAObjectRanker, - GeneralizedLinearModel, - GeneralizedNestedLogitModel, - ListNet, - MixedLogitModel, - MultinomialLogitModel, - NestedLogitModel, - PairedCombinatorialLogit, - PairwiseSVMChoiceFunction, - PairwiseSVMDiscreteChoiceFunction, - RankNet, - RankNetChoiceFunction, - RankNetDiscreteChoiceFunction, - RankSVM, + # CmpNetChoiceFunction, + # CmpNetDiscreteChoiceFunction, + # ExpectedRankRegression, + # FATEChoiceFunction, + # FATEDiscreteChoiceFunction, + # FATELinearChoiceFunction, + # FATELinearDiscreteChoiceFunction, + # FATELinearObjectRanker, + # FATEObjectRanker, + # FETAChoiceFunction, + # FETADiscreteChoiceFunction, + # FETALinearChoiceFunction, + # FETALinearDiscreteChoiceFunction, + # FETALinearObjectRanker, + # FETAObjectRanker, + # GeneralizedLinearModel, + # GeneralizedNestedLogitModel, + # ListNet, + # MixedLogitModel, + # MultinomialLogitModel, + # NestedLogitModel, + # PairedCombinatorialLogit, + # PairwiseSVMChoiceFunction, + # PairwiseSVMDiscreteChoiceFunction, + # RankNet, + # RankNetChoiceFunction, + # RankNetDiscreteChoiceFunction, + # RankSVM, ], ) def test_all_estimators(Estimator): check_parameters_default_constructible("default_constructible", Estimator) + check_no_attributes_set_in_init("no_attributes_set_in_init", Estimator()) From 8d941f90ca5ccc81c765b96e21a20744a8ad58a6 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 17 Sep 2020 14:19:38 +0200 Subject: [PATCH 03/15] Move to a blacklist model for checks --- csrank/tests/test_estimators.py | 95 ++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 9f5dc1ed..32058e29 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -3,9 +3,10 @@ https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator """ +from functools import partial + import pytest -from sklearn.utils.estimator_checks import check_parameters_default_constructible -from sklearn.utils.estimator_checks import check_no_attributes_set_in_init +from sklearn.utils.estimator_checks import check_estimator from csrank.choicefunction import CmpNetChoiceFunction from csrank.choicefunction import FATEChoiceFunction @@ -42,36 +43,66 @@ "Estimator", [ CmpNet, - # CmpNetChoiceFunction, - # CmpNetDiscreteChoiceFunction, - # ExpectedRankRegression, - # FATEChoiceFunction, - # FATEDiscreteChoiceFunction, - # FATELinearChoiceFunction, - # FATELinearDiscreteChoiceFunction, - # FATELinearObjectRanker, - # FATEObjectRanker, - # FETAChoiceFunction, - # FETADiscreteChoiceFunction, - # FETALinearChoiceFunction, - # FETALinearDiscreteChoiceFunction, - # FETALinearObjectRanker, - # FETAObjectRanker, - # GeneralizedLinearModel, - # GeneralizedNestedLogitModel, - # ListNet, - # MixedLogitModel, - # MultinomialLogitModel, - # NestedLogitModel, - # PairedCombinatorialLogit, - # PairwiseSVMChoiceFunction, - # PairwiseSVMDiscreteChoiceFunction, - # RankNet, - # RankNetChoiceFunction, - # RankNetDiscreteChoiceFunction, - # RankSVM, + CmpNetChoiceFunction, + CmpNetDiscreteChoiceFunction, + ExpectedRankRegression, + FATEChoiceFunction, + FATEDiscreteChoiceFunction, + FATELinearChoiceFunction, + FATELinearDiscreteChoiceFunction, + FATELinearObjectRanker, + FATEObjectRanker, + FETAChoiceFunction, + FETADiscreteChoiceFunction, + FETALinearChoiceFunction, + FETALinearDiscreteChoiceFunction, + FETALinearObjectRanker, + FETAObjectRanker, + GeneralizedLinearModel, + GeneralizedNestedLogitModel, + ListNet, + MixedLogitModel, + MultinomialLogitModel, + NestedLogitModel, + PairedCombinatorialLogit, + PairwiseSVMChoiceFunction, + PairwiseSVMDiscreteChoiceFunction, + RankNet, + RankNetChoiceFunction, + RankNetDiscreteChoiceFunction, + RankSVM, ], ) def test_all_estimators(Estimator): - check_parameters_default_constructible("default_constructible", Estimator) - check_no_attributes_set_in_init("no_attributes_set_in_init", Estimator()) + def get_check_name(check): + if isinstance(check, partial): + return check.func.__name__ + else: + return check.__name__ + + for (estimator, check) in check_estimator(Estimator, generate_only=True): + # checks that attempt to call "fit" do not work since our estimators + # expect a 3-dimensional data shape while scikit-learn assumes two + # dimensions (an array of 1d data). + if not get_check_name(check) in { + "check_estimators_dtypes", + "check_fit_score_takes_y", + "check_estimators_fit_returns_self", + "check_complex_data", + "check_dtype_object", + "check_estimators_empty_data_messages", + "check_pipeline_consistency", + "check_estimators_nan_inf", + "check_estimators_overwrite_params", + "check_estimator_sparse_data", + "check_estimators_pickle", + "check_fit2d_predict1d", + "check_methods_subset_invariance", + "check_fit2d_1sample", + "check_fit2d_1feature", + "check_dict_unchanged", + "check_dont_overwrite_parameters", + "check_fit_idempotent", + "check_fit1d", + }: + check(estimator) From eba15998e1fa5f8b6f13cfef3e1799006061a21a Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Fri, 18 Sep 2020 13:43:10 +0200 Subject: [PATCH 04/15] wip! wrapper --- csrank/tests/test_estimators.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 32058e29..c8f2dd3d 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -3,6 +3,8 @@ https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator """ +import numpy as np + from functools import partial import pytest @@ -80,12 +82,32 @@ def get_check_name(check): else: return check.__name__ - for (estimator, check) in check_estimator(Estimator, generate_only=True): + def _increase_x_dimension(self, X): + n_instances, n_objects = X.shape + n_features = 1 + return X.reshape((n_instances, n_objects, n_features)) + + class WrappedEstimator(Estimator): + # scikit learn assumes that "X" is an array of one-dimensional + # feature vectors. Our learners however assume an array of objects + # as a "feature vector", hence they expect one more dimension. + # This is one scikit-learn API expectation that we do not fulfill. + # This thin wrapper is needed so that we can still use the other + # estimator checks. It just pretends every feature is itself a + # one-feature object. + + def fit(self, X, Y, *args, **kwargs): + super().fit(_increase_x_dimension(X), Y, *args, **kwargs) + + def predict(self, X, *args, **kwargs): + super().predict(_increase_x_dimension(X), *args, **kwargs) + + for (estimator, check) in check_estimator(WrappedEstimator, generate_only=True): # checks that attempt to call "fit" do not work since our estimators # expect a 3-dimensional data shape while scikit-learn assumes two # dimensions (an array of 1d data). if not get_check_name(check) in { - "check_estimators_dtypes", + # "check_estimators_dtypes", "check_fit_score_takes_y", "check_estimators_fit_returns_self", "check_complex_data", From 0cba39dd5efc6228ffadebcd461f2d3cfccca2cf Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Fri, 18 Sep 2020 15:49:30 +0200 Subject: [PATCH 05/15] wip! wrapper --- csrank/choicefunction/__init__.py | 22 ++--- csrank/dataset_reader/objectranking/util.py | 2 +- csrank/discretechoice/__init__.py | 32 +++---- csrank/objectranking/__init__.py | 24 ++--- csrank/tests/test_estimators.py | 98 +++++---------------- 5 files changed, 65 insertions(+), 113 deletions(-) diff --git a/csrank/choicefunction/__init__.py b/csrank/choicefunction/__init__.py index 41b4aff4..37ce1dde 100644 --- a/csrank/choicefunction/__init__.py +++ b/csrank/choicefunction/__init__.py @@ -8,14 +8,16 @@ from .pairwise_choice import PairwiseSVMChoiceFunction from .ranknet_choice import RankNetChoiceFunction -__all__ = [ - "AllPositive", - "CmpNetChoiceFunction", - "FATEChoiceFunction", - "FATELinearChoiceFunction", - "FETAChoiceFunction", - "FETALinearChoiceFunction", - "GeneralizedLinearModel", - "PairwiseSVMChoiceFunction", - "RankNetChoiceFunction", +algorithms = [ + AllPositive, + CmpNetChoiceFunction, + FATEChoiceFunction, + FATELinearChoiceFunction, + FETAChoiceFunction, + FETALinearChoiceFunction, + GeneralizedLinearModel, + PairwiseSVMChoiceFunction, + RankNetChoiceFunction, ] + +__all__ = [ algo.__name__ for algo in algorithms ] diff --git a/csrank/dataset_reader/objectranking/util.py b/csrank/dataset_reader/objectranking/util.py index b1306840..b73a9db3 100644 --- a/csrank/dataset_reader/objectranking/util.py +++ b/csrank/dataset_reader/objectranking/util.py @@ -35,7 +35,7 @@ def generate_pairwise_instances(features): def generate_complete_pairwise_dataset(X, Y): """ - Generates the pairiwse preference data from the given rankings.The ranking amongst the objects in a query set + Generates the pairwise preference data from the given rankings.The ranking amongst the objects in a query set :math:`Q = \\{x_1, x_2, x_3\\}` is represented by :math:`\\pi = (2,1,3)`, such that :math:`\\pi(2)=1` is the position of the :math:`x_2`. One can extract the following *pairwise preferences* :math:`x_2 \\succ x_1, x_2 \\succ x_3 and x_1 \\succ x_3`. This function generates pairwise preferences which can be used to learn different :class:`ObjectRanker` as: diff --git a/csrank/discretechoice/__init__.py b/csrank/discretechoice/__init__.py index cb5ca873..efbc9d62 100644 --- a/csrank/discretechoice/__init__.py +++ b/csrank/discretechoice/__init__.py @@ -13,19 +13,21 @@ from .pairwise_discrete_choice import PairwiseSVMDiscreteChoiceFunction from .ranknet_discrete_choice import RankNetDiscreteChoiceFunction -__all__ = [ - "RandomBaselineDC", - "CmpNetDiscreteChoiceFunction", - "FATEDiscreteChoiceFunction", - "FATELinearDiscreteChoiceFunction", - "FETADiscreteChoiceFunction", - "FETALinearDiscreteChoiceFunction", - "GeneralizedNestedLogitModel", - "MixedLogitModel", - "ModelSelector", - "MultinomialLogitModel", - "NestedLogitModel", - "PairedCombinatorialLogit", - "PairwiseSVMDiscreteChoiceFunction", - "RankNetDiscreteChoiceFunction", +algorithms = [ + RandomBaselineDC, + CmpNetDiscreteChoiceFunction, + FATEDiscreteChoiceFunction, + FATELinearDiscreteChoiceFunction, + FETADiscreteChoiceFunction, + FETALinearDiscreteChoiceFunction, + GeneralizedNestedLogitModel, + MixedLogitModel, + ModelSelector, + MultinomialLogitModel, + NestedLogitModel, + PairedCombinatorialLogit, + PairwiseSVMDiscreteChoiceFunction, + RankNetDiscreteChoiceFunction, ] + +__all__ = [ algo.__name__ for algo in algorithms ] diff --git a/csrank/objectranking/__init__.py b/csrank/objectranking/__init__.py index ce09aaaf..6e409cc9 100644 --- a/csrank/objectranking/__init__.py +++ b/csrank/objectranking/__init__.py @@ -9,15 +9,17 @@ from .rank_net import RankNet from .rank_svm import RankSVM -__all__ = [ - "CmpNet", - "ExpectedRankRegression", - "FATEObjectRanker", - "FATELinearObjectRanker", - "FETAObjectRanker", - "FETALinearObjectRanker", - "ListNet", - "RankNet", - "RankSVM", - "RandomBaselineRanker", +algorithms = [ + CmpNet, + ExpectedRankRegression, + FATEObjectRanker, + FATELinearObjectRanker, + FETAObjectRanker, + FETALinearObjectRanker, + ListNet, + RankNet, + RankSVM, + RandomBaselineRanker, ] + +__all__ = [ algo.__name__ for algo in algorithms ] diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index c8f2dd3d..b425c53a 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -10,84 +10,29 @@ import pytest from sklearn.utils.estimator_checks import check_estimator -from csrank.choicefunction import CmpNetChoiceFunction -from csrank.choicefunction import FATEChoiceFunction -from csrank.choicefunction import FATELinearChoiceFunction -from csrank.choicefunction import FETAChoiceFunction -from csrank.choicefunction import FETALinearChoiceFunction -from csrank.choicefunction import GeneralizedLinearModel -from csrank.choicefunction import PairwiseSVMChoiceFunction -from csrank.choicefunction import RankNetChoiceFunction -from csrank.discretechoice import CmpNetDiscreteChoiceFunction -from csrank.discretechoice import FATEDiscreteChoiceFunction -from csrank.discretechoice import FATELinearDiscreteChoiceFunction -from csrank.discretechoice import FETADiscreteChoiceFunction -from csrank.discretechoice import FETALinearDiscreteChoiceFunction -from csrank.discretechoice import GeneralizedNestedLogitModel -from csrank.discretechoice import MixedLogitModel -from csrank.discretechoice import MultinomialLogitModel -from csrank.discretechoice import NestedLogitModel -from csrank.discretechoice import PairedCombinatorialLogit -from csrank.discretechoice import PairwiseSVMDiscreteChoiceFunction -from csrank.discretechoice import RankNetDiscreteChoiceFunction -from csrank.objectranking import CmpNet -from csrank.objectranking import ExpectedRankRegression -from csrank.objectranking import FATELinearObjectRanker -from csrank.objectranking import FATEObjectRanker -from csrank.objectranking import FETALinearObjectRanker -from csrank.objectranking import FETAObjectRanker -from csrank.objectranking import ListNet -from csrank.objectranking import RankNet -from csrank.objectranking import RankSVM +from csrank import objectranking + + +def get_check_name(check): + if isinstance(check, partial): + return check.func.__name__ + else: + return check.__name__ + + +def _reshape_x(X): + n_instances, n_objects = X.shape + n_features = 1 + return X.reshape((n_instances, n_objects, n_features)) @pytest.mark.parametrize( "Estimator", - [ - CmpNet, - CmpNetChoiceFunction, - CmpNetDiscreteChoiceFunction, - ExpectedRankRegression, - FATEChoiceFunction, - FATEDiscreteChoiceFunction, - FATELinearChoiceFunction, - FATELinearDiscreteChoiceFunction, - FATELinearObjectRanker, - FATEObjectRanker, - FETAChoiceFunction, - FETADiscreteChoiceFunction, - FETALinearChoiceFunction, - FETALinearDiscreteChoiceFunction, - FETALinearObjectRanker, - FETAObjectRanker, - GeneralizedLinearModel, - GeneralizedNestedLogitModel, - ListNet, - MixedLogitModel, - MultinomialLogitModel, - NestedLogitModel, - PairedCombinatorialLogit, - PairwiseSVMChoiceFunction, - PairwiseSVMDiscreteChoiceFunction, - RankNet, - RankNetChoiceFunction, - RankNetDiscreteChoiceFunction, - RankSVM, - ], + # TODO write wrappers for choice, discretechoice + objectranking.algorithms, ) def test_all_estimators(Estimator): - def get_check_name(check): - if isinstance(check, partial): - return check.func.__name__ - else: - return check.__name__ - - def _increase_x_dimension(self, X): - n_instances, n_objects = X.shape - n_features = 1 - return X.reshape((n_instances, n_objects, n_features)) - - class WrappedEstimator(Estimator): + class WrappedRanker(Estimator): # scikit learn assumes that "X" is an array of one-dimensional # feature vectors. Our learners however assume an array of objects # as a "feature vector", hence they expect one more dimension. @@ -95,14 +40,15 @@ class WrappedEstimator(Estimator): # This thin wrapper is needed so that we can still use the other # estimator checks. It just pretends every feature is itself a # one-feature object. - def fit(self, X, Y, *args, **kwargs): - super().fit(_increase_x_dimension(X), Y, *args, **kwargs) + Xnew = _reshape_x(X) + Ynew = Xnew.argsort(axis=1).argsort(axis=1).squeeze(axis=-1) + super().fit(Xnew, Ynew, *args, **kwargs) def predict(self, X, *args, **kwargs): - super().predict(_increase_x_dimension(X), *args, **kwargs) + super().predict(_reshape_x(X), *args, **kwargs) - for (estimator, check) in check_estimator(WrappedEstimator, generate_only=True): + for (estimator, check) in check_estimator(WrappedRanker, generate_only=True): # checks that attempt to call "fit" do not work since our estimators # expect a 3-dimensional data shape while scikit-learn assumes two # dimensions (an array of 1d data). From ecb4f9921204f1cb71c301f0604d99a1af51849b Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Fri, 18 Sep 2020 16:53:12 +0200 Subject: [PATCH 06/15] Fix random state handling in the baseline --- csrank/objectranking/baseline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/csrank/objectranking/baseline.py b/csrank/objectranking/baseline.py index 4593966a..376a8a2f 100644 --- a/csrank/objectranking/baseline.py +++ b/csrank/objectranking/baseline.py @@ -7,7 +7,6 @@ logger = logging.getLogger(__name__) - class RandomBaselineRanker(ObjectRanker, Learner): def __init__(self, random_state=None, **kwargs): """ From ef9be3451a361a5f9c28083a3182168e5ad6c57b Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Fri, 18 Sep 2020 17:17:01 +0200 Subject: [PATCH 07/15] wip! wrapper --- csrank/tests/test_estimators.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index b425c53a..ea268ec0 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -54,12 +54,12 @@ def predict(self, X, *args, **kwargs): # dimensions (an array of 1d data). if not get_check_name(check) in { # "check_estimators_dtypes", - "check_fit_score_takes_y", - "check_estimators_fit_returns_self", - "check_complex_data", - "check_dtype_object", - "check_estimators_empty_data_messages", - "check_pipeline_consistency", + # "check_fit_score_takes_y", + # "check_pipeline_consistency", + "check_estimators_fit_returns_self", # fails for all + "check_complex_data", # fails for CmpNet + "check_dtype_object", # fails for ExpectedRankRegression + "check_estimators_empty_data_messages", # fails for all "check_estimators_nan_inf", "check_estimators_overwrite_params", "check_estimator_sparse_data", From 0a3794ae38c6ceb0cb759b8d582fc5b27ba9f25a Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Mon, 21 Sep 2020 14:23:25 +0200 Subject: [PATCH 08/15] Mark failing checks --- csrank/tests/test_estimators.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index ea268ec0..53dbd8e4 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -53,24 +53,20 @@ def predict(self, X, *args, **kwargs): # expect a 3-dimensional data shape while scikit-learn assumes two # dimensions (an array of 1d data). if not get_check_name(check) in { - # "check_estimators_dtypes", - # "check_fit_score_takes_y", - # "check_pipeline_consistency", "check_estimators_fit_returns_self", # fails for all "check_complex_data", # fails for CmpNet "check_dtype_object", # fails for ExpectedRankRegression "check_estimators_empty_data_messages", # fails for all - "check_estimators_nan_inf", - "check_estimators_overwrite_params", - "check_estimator_sparse_data", - "check_estimators_pickle", - "check_fit2d_predict1d", - "check_methods_subset_invariance", - "check_fit2d_1sample", - "check_fit2d_1feature", - "check_dict_unchanged", - "check_dont_overwrite_parameters", - "check_fit_idempotent", - "check_fit1d", + "check_estimators_nan_inf", # fails for CmpNet + "check_estimators_overwrite_params", # fails for FATELinearObjectRanker + "check_estimator_sparse_data", # fails for ExpectedRankRegression + "check_estimators_pickle", # fails for ExpectedRankRegression + "check_fit2d_predict1d", # fails for ExpectedRankRegression + "check_methods_subset_invariance", # fails for ExpectedRankRegression + "check_fit2d_1sample", # fails for FETAObjectRanker + "check_fit2d_1feature", # fails for ExpectedRankRegression + "check_dict_unchanged", # fails for ListNet + "check_dont_overwrite_parameters", # fails for CmpNet + "check_fit_idempotent", # fails for ExpectedRankRegression }: check(estimator) From fd4aae09efe2c8224974844fafe11fb9dd28768b Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Tue, 22 Sep 2020 13:59:39 +0200 Subject: [PATCH 09/15] Fixup! --- csrank/choicefunction/__init__.py | 2 +- csrank/discretechoice/__init__.py | 2 +- csrank/objectranking/__init__.py | 2 +- csrank/objectranking/baseline.py | 1 + csrank/tests/test_estimators.py | 30 +++++++++++++++--------------- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/csrank/choicefunction/__init__.py b/csrank/choicefunction/__init__.py index 37ce1dde..5ad21049 100644 --- a/csrank/choicefunction/__init__.py +++ b/csrank/choicefunction/__init__.py @@ -20,4 +20,4 @@ RankNetChoiceFunction, ] -__all__ = [ algo.__name__ for algo in algorithms ] +__all__ = [algo.__name__ for algo in algorithms] diff --git a/csrank/discretechoice/__init__.py b/csrank/discretechoice/__init__.py index efbc9d62..808f8dd2 100644 --- a/csrank/discretechoice/__init__.py +++ b/csrank/discretechoice/__init__.py @@ -30,4 +30,4 @@ RankNetDiscreteChoiceFunction, ] -__all__ = [ algo.__name__ for algo in algorithms ] +__all__ = [algo.__name__ for algo in algorithms] diff --git a/csrank/objectranking/__init__.py b/csrank/objectranking/__init__.py index 6e409cc9..b5f065ac 100644 --- a/csrank/objectranking/__init__.py +++ b/csrank/objectranking/__init__.py @@ -22,4 +22,4 @@ RandomBaselineRanker, ] -__all__ = [ algo.__name__ for algo in algorithms ] +__all__ = [algo.__name__ for algo in algorithms] diff --git a/csrank/objectranking/baseline.py b/csrank/objectranking/baseline.py index 376a8a2f..4593966a 100644 --- a/csrank/objectranking/baseline.py +++ b/csrank/objectranking/baseline.py @@ -7,6 +7,7 @@ logger = logging.getLogger(__name__) + class RandomBaselineRanker(ObjectRanker, Learner): def __init__(self, random_state=None, **kwargs): """ diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 53dbd8e4..c7fd2a6e 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -53,20 +53,20 @@ def predict(self, X, *args, **kwargs): # expect a 3-dimensional data shape while scikit-learn assumes two # dimensions (an array of 1d data). if not get_check_name(check) in { - "check_estimators_fit_returns_self", # fails for all - "check_complex_data", # fails for CmpNet - "check_dtype_object", # fails for ExpectedRankRegression - "check_estimators_empty_data_messages", # fails for all - "check_estimators_nan_inf", # fails for CmpNet - "check_estimators_overwrite_params", # fails for FATELinearObjectRanker - "check_estimator_sparse_data", # fails for ExpectedRankRegression - "check_estimators_pickle", # fails for ExpectedRankRegression - "check_fit2d_predict1d", # fails for ExpectedRankRegression - "check_methods_subset_invariance", # fails for ExpectedRankRegression - "check_fit2d_1sample", # fails for FETAObjectRanker - "check_fit2d_1feature", # fails for ExpectedRankRegression - "check_dict_unchanged", # fails for ListNet - "check_dont_overwrite_parameters", # fails for CmpNet - "check_fit_idempotent", # fails for ExpectedRankRegression + "check_estimators_fit_returns_self", # fails for all + "check_complex_data", # fails for CmpNet + "check_dtype_object", # fails for ExpectedRankRegression + "check_estimators_empty_data_messages", # fails for all + "check_estimators_nan_inf", # fails for CmpNet + "check_estimators_overwrite_params", # fails for FATELinearObjectRanker + "check_estimator_sparse_data", # fails for ExpectedRankRegression + "check_estimators_pickle", # fails for ExpectedRankRegression + "check_fit2d_predict1d", # fails for ExpectedRankRegression + "check_methods_subset_invariance", # fails for ExpectedRankRegression + "check_fit2d_1sample", # fails for FETAObjectRanker + "check_fit2d_1feature", # fails for ExpectedRankRegression + "check_dict_unchanged", # fails for ListNet + "check_dont_overwrite_parameters", # fails for CmpNet + "check_fit_idempotent", # fails for ExpectedRankRegression }: check(estimator) From 2a4f282dd7afab9fe6b7237d9f06693d6e39bab2 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Tue, 22 Sep 2020 13:59:56 +0200 Subject: [PATCH 10/15] Always return self from fit Required by the scikit-learn estimator API for easier fit-predict chaining. --- csrank/tests/test_estimators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index c7fd2a6e..9fcbdb48 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -43,7 +43,7 @@ class WrappedRanker(Estimator): def fit(self, X, Y, *args, **kwargs): Xnew = _reshape_x(X) Ynew = Xnew.argsort(axis=1).argsort(axis=1).squeeze(axis=-1) - super().fit(Xnew, Ynew, *args, **kwargs) + return super().fit(Xnew, Ynew, *args, **kwargs) def predict(self, X, *args, **kwargs): super().predict(_reshape_x(X), *args, **kwargs) From ccea422b5fc5b4cff29d96528d77e1e48c2a2643 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Tue, 22 Sep 2020 14:01:20 +0200 Subject: [PATCH 11/15] Fixup! --- csrank/tests/test_estimators.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 9fcbdb48..0d38c869 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -3,8 +3,6 @@ https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator """ -import numpy as np - from functools import partial import pytest From 2f80d649e0f1fae8a0cdaa7a6fd3426e9e8298cb Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Tue, 22 Sep 2020 14:01:29 +0200 Subject: [PATCH 12/15] Fixup! self from fit --- csrank/tests/test_estimators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 0d38c869..391bbb19 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -51,7 +51,6 @@ def predict(self, X, *args, **kwargs): # expect a 3-dimensional data shape while scikit-learn assumes two # dimensions (an array of 1d data). if not get_check_name(check) in { - "check_estimators_fit_returns_self", # fails for all "check_complex_data", # fails for CmpNet "check_dtype_object", # fails for ExpectedRankRegression "check_estimators_empty_data_messages", # fails for all From 978800a32543bb45fa9cc5453ab0bd3180505d2a Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Tue, 22 Sep 2020 14:37:20 +0200 Subject: [PATCH 13/15] Revert "Fixup! self from fit" This reverts commit 2d35098178efb1145b8ab487613861e3961071dc. --- csrank/tests/test_estimators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 391bbb19..0d38c869 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -51,6 +51,7 @@ def predict(self, X, *args, **kwargs): # expect a 3-dimensional data shape while scikit-learn assumes two # dimensions (an array of 1d data). if not get_check_name(check) in { + "check_estimators_fit_returns_self", # fails for all "check_complex_data", # fails for CmpNet "check_dtype_object", # fails for ExpectedRankRegression "check_estimators_empty_data_messages", # fails for all From bd5c46f9531209761a9c33e41c7d25653d2e5f8a Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 23 Sep 2020 15:27:10 +0200 Subject: [PATCH 14/15] Mark check_fit2d_1feature check as passing --- csrank/tests/test_estimators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 0d38c869..5dc74cf1 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -62,7 +62,6 @@ def predict(self, X, *args, **kwargs): "check_fit2d_predict1d", # fails for ExpectedRankRegression "check_methods_subset_invariance", # fails for ExpectedRankRegression "check_fit2d_1sample", # fails for FETAObjectRanker - "check_fit2d_1feature", # fails for ExpectedRankRegression "check_dict_unchanged", # fails for ListNet "check_dont_overwrite_parameters", # fails for CmpNet "check_fit_idempotent", # fails for ExpectedRankRegression From 0c7ca20e3e052768de05acc9aa71358a224e30b9 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 30 Sep 2020 14:45:56 +0200 Subject: [PATCH 15/15] Include the necessary scikit-learn patch --- 0001-Relax-init-parameter-type-checks.patch | 46 +++++++++++++++++++++ csrank/tests/test_estimators.py | 1 + shell.nix | 5 +++ 3 files changed, 52 insertions(+) create mode 100644 0001-Relax-init-parameter-type-checks.patch diff --git a/0001-Relax-init-parameter-type-checks.patch b/0001-Relax-init-parameter-type-checks.patch new file mode 100644 index 00000000..256b1e21 --- /dev/null +++ b/0001-Relax-init-parameter-type-checks.patch @@ -0,0 +1,46 @@ +From 48a6dd8f0e7ff52d116b0c01e36dac4504547781 Mon Sep 17 00:00:00 2001 +From: Timo Kaufmann +Date: Thu, 16 Jul 2020 16:57:14 +0200 +Subject: [PATCH] Relax init parameter type checks + +We now allow any "type" (uninitialized classes) and all numeric numpy +types. See https://github.com/scikit-learn/scikit-learn/issues/17756 for +a discussion. +--- + sklearn/utils/estimator_checks.py | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py +index 30c668237..ed3c40d67 100644 +--- a/sklearn/utils/estimator_checks.py ++++ b/sklearn/utils/estimator_checks.py +@@ -2529,12 +2529,20 @@ def check_parameters_default_constructible(name, Estimator): + assert init_param.default != init_param.empty, ( + "parameter %s for %s has no default value" + % (init_param.name, type(estimator).__name__)) +- if type(init_param.default) is type: +- assert init_param.default in [np.float64, np.int64] +- else: +- assert (type(init_param.default) in +- [str, int, float, bool, tuple, type(None), +- np.float64, types.FunctionType, joblib.Memory]) ++ allowed_types = { ++ str, ++ int, ++ float, ++ bool, ++ tuple, ++ type(None), ++ type, ++ types.FunctionType, ++ joblib.Memory, ++ } ++ # Any numpy numeric such as np.int32. ++ allowed_types.update(np.core.numerictypes.allTypes.values()) ++ assert type(init_param.default) in allowed_types + if init_param.name not in params.keys(): + # deprecated parameter, not in get_params + assert init_param.default is None +-- +2.28.0 + diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py index 5dc74cf1..580cec38 100644 --- a/csrank/tests/test_estimators.py +++ b/csrank/tests/test_estimators.py @@ -65,5 +65,6 @@ def predict(self, X, *args, **kwargs): "check_dict_unchanged", # fails for ListNet "check_dont_overwrite_parameters", # fails for CmpNet "check_fit_idempotent", # fails for ExpectedRankRegression + "check_n_features_in" # fails for RankSVM }: check(estimator) diff --git a/shell.nix b/shell.nix index bef389b6..f90c0f64 100644 --- a/shell.nix +++ b/shell.nix @@ -21,6 +21,11 @@ let buildInputs = with pkgs; [ xorg.libX11 ] ++ old.buildInputs; } ); + scikit-learn = super.scikit-learn.overridePythonAttrs ( + old: { + patches = [./0001-Relax-init-parameter-type-checks.patch]; + } + ); }); }; in pkgs.mkShell {