From a9cbedf2094c35088afec8aa079d4f6f78184c0d Mon Sep 17 00:00:00 2001 From: sahithyaravi1493 Date: Thu, 3 Sep 2020 15:03:47 +0200 Subject: [PATCH 1/9] fork api --- doc/progress.rst | 2 +- examples/30_extended/datasets_tutorial.py | 11 +++++++- openml/datasets/functions.py | 27 ++++++++++++++++++- tests/test_datasets/test_dataset_functions.py | 10 +++++++ 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/doc/progress.rst b/doc/progress.rst index ef5ed6bae..dae82f98e 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -8,7 +8,7 @@ Changelog 0.11.0 ~~~~~~ -* ADD #929: Add data edit API +* ADD #929: Add data edit API and Fork API * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after switching the server. * FIX #885: Logger no longer registered by default. Added utility functions to easily register diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py index e129b7718..eb41cda10 100644 --- a/examples/30_extended/datasets_tutorial.py +++ b/examples/30_extended/datasets_tutorial.py @@ -11,7 +11,7 @@ import openml import pandas as pd -from openml.datasets.functions import edit_dataset, get_dataset +from openml.datasets.functions import edit_dataset, get_dataset, fork_dataset ############################################################################ # Exercise 0 @@ -146,4 +146,13 @@ data_id = edit_dataset(564, default_target_attribute="y") print(f"Edited dataset ID: {data_id}") + +############################################################################ +# Fork dataset +# Used to create a copy of the dataset with a different owner + +data_id = fork_dataset(564) +print(f"Forked dataset ID: {data_id}") +print(get_dataset(data_id)) + openml.config.stop_using_configuration_for_example() diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 0f3037a74..be6453044 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -862,7 +862,7 @@ def edit_dataset( Returns ------- - data_id of the existing edited version or the new version created and published""" + data_id of the edited dataset """ if not isinstance(data_id, int): raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id))) @@ -897,6 +897,31 @@ def edit_dataset( return int(data_id) +def fork_dataset(data_id) -> int: + """ + Create a fork of the dataset with the calling authorized user as owner. + Note that the data arff file remains the same. + + Parameters + ---------- + data_id : int + ID of the dataset. + + Returns + ------- + data_id of the forked dataset + + """ + if not isinstance(data_id, int): + raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id))) + # compose data fork parameters + form_data = {"data_id": data_id} + result_xml = openml._api_calls._perform_api_call("data/fork", "post", data=form_data) + result = xmltodict.parse(result_xml) + data_id = result["oml:data_fork"]["oml:id"] + return int(data_id) + + def _get_dataset_description(did_cache_dir, dataset_id): """Get the dataset description as xml dictionary. diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 5076d06c2..4100b225b 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -27,6 +27,7 @@ from openml.datasets.functions import ( create_dataset, edit_dataset, + fork_dataset, attributes_arff_from_df, _get_cached_dataset, _get_cached_dataset_features, @@ -1407,3 +1408,12 @@ def test_data_edit_errors(self): data_id=128, default_target_attribute="y", ) + + def test_data_fork(self): + did = 1 + result = fork_dataset(did,) + self.assertNotEqual(did, result) + # Check server exception when unknown dataset is provided + self.assertRaisesRegex( + OpenMLServerException, "Unknown dataset", fork_dataset, data_id=100000, + ) From 1822c9921c12005134fe4222df8bf9eb3e8d91f0 Mon Sep 17 00:00:00 2001 From: sahithyaravi1493 Date: Mon, 21 Sep 2020 10:08:10 +0200 Subject: [PATCH 2/9] improve docs (+1 squashed commits) Squashed commits: [ec5c0d10] import changes --- doc/api.rst | 2 ++ examples/30_extended/datasets_tutorial.py | 2 +- openml/datasets/__init__.py | 2 ++ openml/datasets/functions.py | 35 ++++++++++++++----- tests/test_datasets/test_dataset_functions.py | 3 +- 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 0bc092bd0..8a72e6b69 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -74,6 +74,8 @@ Modules list_datasets list_qualities status_update + edit_dataset + fork_dataset :mod:`openml.evaluations`: Evaluation Functions ----------------------------------------------- diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py index eb41cda10..f49a6e7fa 100644 --- a/examples/30_extended/datasets_tutorial.py +++ b/examples/30_extended/datasets_tutorial.py @@ -11,7 +11,7 @@ import openml import pandas as pd -from openml.datasets.functions import edit_dataset, get_dataset, fork_dataset +from openml.datasets import edit_dataset, fork_dataset, get_dataset ############################################################################ # Exercise 0 diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py index f380a1676..47022ac12 100644 --- a/openml/datasets/__init__.py +++ b/openml/datasets/__init__.py @@ -24,4 +24,6 @@ "OpenMLDataFeature", "status_update", "list_qualities", + "edit_dataset", + "fork_dataset", ] diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index be6453044..13b0af141 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -815,12 +815,16 @@ def edit_dataset( ) -> int: """ Edits an OpenMLDataset. - Specify atleast one field to edit, apart from data_id - - For certain fields, a new dataset version is created : attributes, data, - default_target_attribute, ignore_attribute, row_id_attribute. + Specify atleast one field to edit, apart from data_id. - - For other fields, the uploader can edit the exisiting version. - Noone except the uploader can edit the exisitng version. + This API has non-critical and critical fields. + Critical features are default_target_attribute, ignore_attribute, row_id_attribute. + + - Editing non-critical data fields is allowed for all authenticated users. + - Editing critical fields is allowed only for the owner, provided there are no tasks. + + If dataset has tasks or if the user is not the owner, the only way + to edit critical fields is to use fork_dataset followed by edit_dataset. Parameters ---------- @@ -897,10 +901,25 @@ def edit_dataset( return int(data_id) -def fork_dataset(data_id) -> int: +def fork_dataset(data_id: int) -> int: """ - Create a fork of the dataset with the calling authorized user as owner. - Note that the data arff file remains the same. + Creates a new dataset version, with possibly a different owner. + The authenticated user becomes the new owner. + This function does not result in any change in the dataset ARFF file. + + + This API is intended for use when a user is unable to edit the critical features of a dataset + through the edit_dataset API. + (Critical features are default_target_attribute, ignore_attribute, row_id_attribute.) + + Specifically, this happens when the user is: + 1. Not the owner of the dataset. + 2. User is the owner of the dataset, but the dataset has tasks. + + In these two cases the only way to edit critical features is: + 1. STEP 1: Fork the dataset using fork_dataset API + 2. STEP 2: Call edit_dataset API on the forked version. + Parameters ---------- diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 4100b225b..761c53d71 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -26,8 +26,6 @@ from openml.utils import _tag_entity, _create_cache_directory_for_id from openml.datasets.functions import ( create_dataset, - edit_dataset, - fork_dataset, attributes_arff_from_df, _get_cached_dataset, _get_cached_dataset_features, @@ -41,6 +39,7 @@ _get_online_dataset_format, DATASETS_CACHE_DIR_NAME, ) +from openml.datasets import fork_dataset, edit_dataset class TestOpenMLDataset(TestBase): From ce94f9394588a2ac57519488806b8583f3733a05 Mon Sep 17 00:00:00 2001 From: sahithyaravi1493 Date: Mon, 21 Sep 2020 10:08:10 +0200 Subject: [PATCH 3/9] minor change (+1 squashed commits) Squashed commits: [1822c992] improve docs (+1 squashed commits) Squashed commits: [ec5c0d10] import changes --- doc/api.rst | 2 ++ examples/30_extended/datasets_tutorial.py | 2 +- openml/datasets/__init__.py | 4 +++ openml/datasets/functions.py | 35 ++++++++++++++----- tests/test_datasets/test_dataset_functions.py | 3 +- 5 files changed, 35 insertions(+), 11 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 0bc092bd0..8a72e6b69 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -74,6 +74,8 @@ Modules list_datasets list_qualities status_update + edit_dataset + fork_dataset :mod:`openml.evaluations`: Evaluation Functions ----------------------------------------------- diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py index eb41cda10..f49a6e7fa 100644 --- a/examples/30_extended/datasets_tutorial.py +++ b/examples/30_extended/datasets_tutorial.py @@ -11,7 +11,7 @@ import openml import pandas as pd -from openml.datasets.functions import edit_dataset, get_dataset, fork_dataset +from openml.datasets import edit_dataset, fork_dataset, get_dataset ############################################################################ # Exercise 0 diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py index f380a1676..abde85c06 100644 --- a/openml/datasets/__init__.py +++ b/openml/datasets/__init__.py @@ -9,6 +9,8 @@ list_datasets, status_update, list_qualities, + edit_dataset, + fork_dataset, ) from .dataset import OpenMLDataset from .data_feature import OpenMLDataFeature @@ -24,4 +26,6 @@ "OpenMLDataFeature", "status_update", "list_qualities", + "edit_dataset", + "fork_dataset", ] diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index be6453044..13b0af141 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -815,12 +815,16 @@ def edit_dataset( ) -> int: """ Edits an OpenMLDataset. - Specify atleast one field to edit, apart from data_id - - For certain fields, a new dataset version is created : attributes, data, - default_target_attribute, ignore_attribute, row_id_attribute. + Specify atleast one field to edit, apart from data_id. - - For other fields, the uploader can edit the exisiting version. - Noone except the uploader can edit the exisitng version. + This API has non-critical and critical fields. + Critical features are default_target_attribute, ignore_attribute, row_id_attribute. + + - Editing non-critical data fields is allowed for all authenticated users. + - Editing critical fields is allowed only for the owner, provided there are no tasks. + + If dataset has tasks or if the user is not the owner, the only way + to edit critical fields is to use fork_dataset followed by edit_dataset. Parameters ---------- @@ -897,10 +901,25 @@ def edit_dataset( return int(data_id) -def fork_dataset(data_id) -> int: +def fork_dataset(data_id: int) -> int: """ - Create a fork of the dataset with the calling authorized user as owner. - Note that the data arff file remains the same. + Creates a new dataset version, with possibly a different owner. + The authenticated user becomes the new owner. + This function does not result in any change in the dataset ARFF file. + + + This API is intended for use when a user is unable to edit the critical features of a dataset + through the edit_dataset API. + (Critical features are default_target_attribute, ignore_attribute, row_id_attribute.) + + Specifically, this happens when the user is: + 1. Not the owner of the dataset. + 2. User is the owner of the dataset, but the dataset has tasks. + + In these two cases the only way to edit critical features is: + 1. STEP 1: Fork the dataset using fork_dataset API + 2. STEP 2: Call edit_dataset API on the forked version. + Parameters ---------- diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 4100b225b..761c53d71 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -26,8 +26,6 @@ from openml.utils import _tag_entity, _create_cache_directory_for_id from openml.datasets.functions import ( create_dataset, - edit_dataset, - fork_dataset, attributes_arff_from_df, _get_cached_dataset, _get_cached_dataset_features, @@ -41,6 +39,7 @@ _get_online_dataset_format, DATASETS_CACHE_DIR_NAME, ) +from openml.datasets import fork_dataset, edit_dataset class TestOpenMLDataset(TestBase): From 6767a0b44558e16d5a1f374a15bd957a0a163dc7 Mon Sep 17 00:00:00 2001 From: sahithyaravi1493 Date: Mon, 21 Sep 2020 18:38:05 +0200 Subject: [PATCH 4/9] docs update --- examples/30_extended/datasets_tutorial.py | 6 ++-- openml/datasets/functions.py | 30 ++++++++++--------- tests/test_datasets/test_dataset_functions.py | 6 ++-- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py index f49a6e7fa..f8d217b8a 100644 --- a/examples/30_extended/datasets_tutorial.py +++ b/examples/30_extended/datasets_tutorial.py @@ -149,10 +149,12 @@ ############################################################################ # Fork dataset -# Used to create a copy of the dataset with a different owner +# Used to create a copy of the dataset with you as the owner. +# Use this API only if you are unable to edit the critical fields (default_target_attribute, ignore_attribute, +# row_id_attribute) of a dataset through the edit_dataset API. +# After the dataset is forked, you can edit the new version of the dataset using edit_dataset. data_id = fork_dataset(564) print(f"Forked dataset ID: {data_id}") -print(get_dataset(data_id)) openml.config.stop_using_configuration_for_example() diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 13b0af141..508d9be34 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -815,13 +815,16 @@ def edit_dataset( ) -> int: """ Edits an OpenMLDataset. - Specify atleast one field to edit, apart from data_id. + In addition to providing the dataset id of the dataset to edit (through data_id), + you must specify a value for at least one of the optional function arguments, + i.e. one value for a field to edit. - This API has non-critical and critical fields. - Critical features are default_target_attribute, ignore_attribute, row_id_attribute. + This function allows editing of both non-critical and critical fields. + Critical fields are default_target_attribute, ignore_attribute, row_id_attribute. - Editing non-critical data fields is allowed for all authenticated users. - - Editing critical fields is allowed only for the owner, provided there are no tasks. + - Editing critical fields is allowed only for the owner, provided there are no tasks + associated with this dataset. If dataset has tasks or if the user is not the owner, the only way to edit critical fields is to use fork_dataset followed by edit_dataset. @@ -866,7 +869,7 @@ def edit_dataset( Returns ------- - data_id of the edited dataset """ + Dataset id """ if not isinstance(data_id, int): raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id))) @@ -903,20 +906,19 @@ def edit_dataset( def fork_dataset(data_id: int) -> int: """ - Creates a new dataset version, with possibly a different owner. - The authenticated user becomes the new owner. - This function does not result in any change in the dataset ARFF file. + Creates a new dataset version, with the authenticated user as the new owner. + The forked dataset can have distinct dataset meta-data, + but the actual data itself is shared with the original version. - - This API is intended for use when a user is unable to edit the critical features of a dataset + This API is intended for use when a user is unable to edit the critical fields of a dataset through the edit_dataset API. - (Critical features are default_target_attribute, ignore_attribute, row_id_attribute.) + (Critical fields are default_target_attribute, ignore_attribute, row_id_attribute.) Specifically, this happens when the user is: 1. Not the owner of the dataset. 2. User is the owner of the dataset, but the dataset has tasks. - In these two cases the only way to edit critical features is: + In these two cases the only way to edit critical fields is: 1. STEP 1: Fork the dataset using fork_dataset API 2. STEP 2: Call edit_dataset API on the forked version. @@ -924,11 +926,11 @@ def fork_dataset(data_id: int) -> int: Parameters ---------- data_id : int - ID of the dataset. + id of the dataset to be forked Returns ------- - data_id of the forked dataset + Dataset id of the forked dataset """ if not isinstance(data_id, int): diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 761c53d71..aa25157bc 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1389,7 +1389,7 @@ def test_data_edit_errors(self): data_id=100000, description="xor operation dataset", ) - # Check server exception when owner/admin edits critical features of dataset with tasks + # Check server exception when owner/admin edits critical fields of dataset with tasks self.assertRaisesRegex( OpenMLServerException, "Critical features default_target_attribute, row_id_attribute and ignore_attribute " @@ -1398,7 +1398,7 @@ def test_data_edit_errors(self): data_id=223, default_target_attribute="y", ) - # Check server exception when a non-owner or non-admin tries to edit critical features + # Check server exception when a non-owner or non-admin tries to edit critical fields self.assertRaisesRegex( OpenMLServerException, "Critical features default_target_attribute, row_id_attribute and ignore_attribute " @@ -1410,7 +1410,7 @@ def test_data_edit_errors(self): def test_data_fork(self): did = 1 - result = fork_dataset(did,) + result = fork_dataset(did) self.assertNotEqual(did, result) # Check server exception when unknown dataset is provided self.assertRaisesRegex( From b624e076bad7e3be8582d8e078bb9c0316fd7ab7 Mon Sep 17 00:00:00 2001 From: sahithyaravi1493 Date: Thu, 22 Oct 2020 10:14:04 +0200 Subject: [PATCH 5/9] clarify example --- examples/30_extended/datasets_tutorial.py | 11 ++++++----- openml/datasets/functions.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py index f8d217b8a..fe0664a83 100644 --- a/examples/30_extended/datasets_tutorial.py +++ b/examples/30_extended/datasets_tutorial.py @@ -139,9 +139,10 @@ ############################################################################ -# Edit critical fields, allowed only for owners of the dataset: -# default_target_attribute, row_id_attribute, ignore_attribute -# To edit critical fields of a dataset owned by you, configure the API key: +# Editing critical fields (default_target_attribute, row_id_attribute, ignore_attribute) is allowed +# only for the dataset owner. Further, critical fields cannot be edited if the dataset has any +# tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you, +# configure the API key: # openml.config.apikey = 'FILL_IN_OPENML_API_KEY' data_id = edit_dataset(564, default_target_attribute="y") print(f"Edited dataset ID: {data_id}") @@ -150,8 +151,8 @@ ############################################################################ # Fork dataset # Used to create a copy of the dataset with you as the owner. -# Use this API only if you are unable to edit the critical fields (default_target_attribute, ignore_attribute, -# row_id_attribute) of a dataset through the edit_dataset API. +# Use this API only if you are unable to edit the critical fields (default_target_attribute, +# ignore_attribute, row_id_attribute) of a dataset through the edit_dataset API. # After the dataset is forked, you can edit the new version of the dataset using edit_dataset. data_id = fork_dataset(564) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 508d9be34..13df404b2 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -817,7 +817,7 @@ def edit_dataset( Edits an OpenMLDataset. In addition to providing the dataset id of the dataset to edit (through data_id), you must specify a value for at least one of the optional function arguments, - i.e. one value for a field to edit. + i.e. one value for a field to edit. This function allows editing of both non-critical and critical fields. Critical fields are default_target_attribute, ignore_attribute, row_id_attribute. From e8e3205c59b474e28d7fd4b35dc31be654cdebbe Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 23 Oct 2020 13:18:43 +0200 Subject: [PATCH 6/9] Update doc/progress.rst --- doc/progress.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/progress.rst b/doc/progress.rst index 6317e9693..2aad9e62a 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -8,7 +8,7 @@ Changelog 0.11.0 ~~~~~~ -* ADD #929: Add data edit API and Fork API +* ADD #929: Add ``edit_dataset`` and ``fork_dataset`` to allow editing and forking of uploaded datasets. * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after switching the server. * FIX #885: Logger no longer registered by default. Added utility functions to easily register From 15864f4958f9b9976c8e6aa45a5d763c23c1bc9a Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 23 Oct 2020 14:12:03 +0200 Subject: [PATCH 7/9] Fix whitespaces for docstring --- openml/datasets/functions.py | 113 +++++++++++++++++------------------ 1 file changed, 56 insertions(+), 57 deletions(-) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 190247234..84943b244 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -813,64 +813,63 @@ def edit_dataset( original_data_url=None, paper_url=None, ) -> int: + """ Edits an OpenMLDataset. + + In addition to providing the dataset id of the dataset to edit (through data_id), + you must specify a value for at least one of the optional function arguments, + i.e. one value for a field to edit. + + This function allows editing of both non-critical and critical fields. + Critical fields are default_target_attribute, ignore_attribute, row_id_attribute. + + - Editing non-critical data fields is allowed for all authenticated users. + - Editing critical fields is allowed only for the owner, provided there are no tasks + associated with this dataset. + + If dataset has tasks or if the user is not the owner, the only way + to edit critical fields is to use fork_dataset followed by edit_dataset. + + Parameters + ---------- + data_id : int + ID of the dataset. + description : str + Description of the dataset. + creator : str + The person who created the dataset. + contributor : str + People who contributed to the current version of the dataset. + collection_date : str + The date the data was originally collected, given by the uploader. + language : str + Language in which the data is represented. + Starts with 1 upper case letter, rest lower case, e.g. 'English'. + default_target_attribute : str + The default target attribute, if it exists. + Can have multiple values, comma separated. + ignore_attribute : str | list + Attributes that should be excluded in modelling, + such as identifiers and indexes. + citation : str + Reference(s) that should be cited when building on this data. + row_id_attribute : str, optional + The attribute that represents the row-id column, if present in the + dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not + specified, the index of the dataframe will be used as the + ``row_id_attribute``. If the name of the index is ``None``, it will + be discarded. + + .. versionadded: 0.8 + Inference of ``row_id_attribute`` from a dataframe. + original_data_url : str, optional + For derived data, the url to the original dataset. + paper_url : str, optional + Link to a paper describing the dataset. + + Returns + ------- + Dataset id """ - Edits an OpenMLDataset. - - In addition to providing the dataset id of the dataset to edit (through data_id), - you must specify a value for at least one of the optional function arguments, - i.e. one value for a field to edit. - - This function allows editing of both non-critical and critical fields. - Critical fields are default_target_attribute, ignore_attribute, row_id_attribute. - - - Editing non-critical data fields is allowed for all authenticated users. - - Editing critical fields is allowed only for the owner, provided there are no tasks - associated with this dataset. - - If dataset has tasks or if the user is not the owner, the only way - to edit critical fields is to use fork_dataset followed by edit_dataset. - - Parameters - ---------- - data_id : int - ID of the dataset. - description : str - Description of the dataset. - creator : str - The person who created the dataset. - contributor : str - People who contributed to the current version of the dataset. - collection_date : str - The date the data was originally collected, given by the uploader. - language : str - Language in which the data is represented. - Starts with 1 upper case letter, rest lower case, e.g. 'English'. - default_target_attribute : str - The default target attribute, if it exists. - Can have multiple values, comma separated. - ignore_attribute : str | list - Attributes that should be excluded in modelling, - such as identifiers and indexes. - citation : str - Reference(s) that should be cited when building on this data. - row_id_attribute : str, optional - The attribute that represents the row-id column, if present in the - dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not - specified, the index of the dataframe will be used as the - ``row_id_attribute``. If the name of the index is ``None``, it will - be discarded. - - .. versionadded: 0.8 - Inference of ``row_id_attribute`` from a dataframe. - original_data_url : str, optional - For derived data, the url to the original dataset. - paper_url : str, optional - Link to a paper describing the dataset. - - - Returns - ------- - Dataset id """ if not isinstance(data_id, int): raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id))) From 1aa266005e3466c19b894f5c30f688dcd3ab0cae Mon Sep 17 00:00:00 2001 From: sahithyaravi1493 Date: Fri, 23 Oct 2020 14:41:08 +0200 Subject: [PATCH 8/9] fix error --- examples/30_extended/create_upload_tutorial.py | 2 +- tests/test_datasets/test_dataset_functions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py index 0692b9b09..a4e1d9655 100644 --- a/examples/30_extended/create_upload_tutorial.py +++ b/examples/30_extended/create_upload_tutorial.py @@ -100,7 +100,7 @@ # The attribute that represents the row-id column, if present in the # dataset. row_id_attribute=None, - # Attribute or list of attributes that should be excluded in modelling, such as + # Attribute or list of attributes that should be excluded in modelling, such as # identifiers and indexes. E.g. "feat1" or ["feat1","feat2"] ignore_attribute=None, # How to cite the paper. diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index aa25157bc..7cf9dbd1c 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1386,7 +1386,7 @@ def test_data_edit_errors(self): OpenMLServerException, "Unknown dataset", edit_dataset, - data_id=100000, + data_id=999999, description="xor operation dataset", ) # Check server exception when owner/admin edits critical fields of dataset with tasks From eda3fd8eacdcd2c78ca08afd650448de2d83dcee Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 23 Oct 2020 15:45:01 +0200 Subject: [PATCH 9/9] Use id 999999 for unknown dataset --- tests/test_datasets/test_dataset_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 7cf9dbd1c..c6e6f78f8 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1414,5 +1414,5 @@ def test_data_fork(self): self.assertNotEqual(did, result) # Check server exception when unknown dataset is provided self.assertRaisesRegex( - OpenMLServerException, "Unknown dataset", fork_dataset, data_id=100000, + OpenMLServerException, "Unknown dataset", fork_dataset, data_id=999999, )