diff --git a/data/sql/dataset.sql b/data/sql/dataset.sql index ed0e9b0e..75004b0d 100644 --- a/data/sql/dataset.sql +++ b/data/sql/dataset.sql @@ -128,4 +128,5 @@ INSERT INTO `dataset` (`did`, `uploader`, `source`, `name`, `version`, `version_ (127, 1, 0, 'pharynx', '1', '1', 'ARFF', NULL, NULL, NULL, '2014-04-23 13:17:32', NULL, 'Public', NULL, NULL, 'https://www.openml.org/data/download/3650/dataset_2199_pharynx.arff', 'true', 127, 'class', NULL, '\"Entry\"', NULL, 'public', NULL, NULL, 'set ignore feature', '2014-10-05 00:03:24'), (128, 1, 0, 'iris', '1', '1', 'ARFF', 'R.A.Fisher', NULL, '1937', '2014-04-06 23:23:39', 'English', 'Public', 'The use of multiple measurements in taxonomic problems', NULL, 'https://www.openml.org/data/download/61/dataset_61_iris.arff', 'true', 128, 'class', NULL, NULL, 'http://digital.library.adelaide.edu.au/dspace/handle/2440/15227', 'public', NULL, 'https://archive.ics.uci.edu/ml/datasets/Iris', NULL, '2014-04-06 23:23:39'), (129, 1, NULL, 'iris-challenge', '1', '1', 'ARFF', NULL, NULL, NULL, '2018-05-29 19:06:15', NULL, 'CC0', NULL, NULL, 'https://www.openml.org/data/download/19330175/iris-challenge.arff', 'true', 129, 'class', NULL, NULL, NULL, 'public', NULL, NULL, NULL, '2018-05-29 19:06:15'), -(130, 16, 0, 'iris', '2', '1', 'ARFF', 'R.A. Fisher', NULL, '1936', '2014-04-06 23:23:39', NULL, 'Public', NULL, NULL, 'https://www.openml.org/data/download/61/dataset_61_iris.arff', 'true', 130, 'class', NULL, NULL, 'http://digital.library.adelaide.edu.au/dspace/handle/2440/15227', 'private', NULL, 'https://archive.ics.uci.edu/ml/datasets/Iris', NULL, '2014-04-06 23:23:39'); +(130, 16, 0, 'iris', '2', '1', 'ARFF', 'R.A. Fisher', NULL, '1936', '2014-04-06 23:23:39', NULL, 'Public', NULL, NULL, 'https://www.openml.org/data/download/61/dataset_61_iris.arff', 'true', 130, 'class', NULL, NULL, 'http://digital.library.adelaide.edu.au/dspace/handle/2440/15227', 'private', NULL, 'https://archive.ics.uci.edu/ml/datasets/Iris', NULL, '2014-04-06 23:23:39'), +(131, 1, 0, 'web_questionsQA', '1', '1', 'ARFF', 'Hamburg Uni', NULL, NULL, '2023-06-15 23:23:39', NULL, 'Public', NULL, NULL, 'https://api.openml.org/data/v1/download/22116555/web_questions.arff', 'true', 131, 'answers', NULL, NULL, NULL, 'public', NULL, NULL, NULL, '2023-06-15 23:23:39'); diff --git a/data/sql/estimation_procedure.sql b/data/sql/estimation_procedure.sql index a0536127..2c406bcd 100644 --- a/data/sql/estimation_procedure.sql +++ b/data/sql/estimation_procedure.sql @@ -32,4 +32,5 @@ INSERT INTO `estimation_procedure` (`id`, `ttid`, `name`, `type`, `repeats`, `fo (31, 10, '5 times 2-fold Crossvalidation', 'crossvalidation', 5, 2, 'false', NULL, 'true', 'false', '2023-02-22 11:46:54'), (32, 10, '10 times 10-fold Crossvalidation', 'crossvalidation', 10, 10, 'false', NULL, 'true', 'false', '2023-02-22 11:46:54'), (33, 10, '10% Holdout set', 'holdout', 1, NULL, 'false', 33, 'true', 'false', '2023-02-22 11:46:54'), -(34, 10, '33% Holdout set', 'holdout', 1, NULL, 'false', 33, 'true', 'false', '2023-02-22 11:46:54'); \ No newline at end of file +(34, 10, '33% Holdout set', 'holdout', 1, NULL, 'false', 33, 'true', 'false', '2023-02-22 11:46:54'), +(35, 11, '33% Holdout set', 'holdout', 1, NULL, 'false', 33, 'true', 'false', '2023-06-15 16:34:54'); \ No newline at end of file diff --git a/data/sql/file.sql b/data/sql/file.sql index e240f88b..8a094775 100644 --- a/data/sql/file.sql +++ b/data/sql/file.sql @@ -128,4 +128,5 @@ INSERT INTO `file` (`id`, `creator`, `creation_date`, `filepath`, `filesize`, `f (127, 16, '2017-06-18 14:38:57', 'https://www.openml.org/data/download/3650/dataset_2199_pharynx.arff', 13471, 'pharynx.arff', 'arff', 'text/plain;charset=UTF-8', '0bdc5060f67578a2f715e541ee5d0138', 'url', 'public'), (128, 16, '2018-05-29 14:00:00', 'https://www.openml.org/data/download/61/iris.arff', 7487, 'iris.arff', 'arff', 'text/plain;charset=UTF-8', 'ad484452702105cbf3d30f8deaba39a9', 'url', 'public'), (129, 16, '2018-05-29 14:00:00', 'https://www.openml.org/data/download/19330175/iris-challenge.arff', 7103, 'iris-challenge.arff', 'arff', 'text/plain;charset=UTF-8', 'bce91bc6033c311070d198e78e7fc954', 'url', 'public'), -(130, 16, '2018-05-29 14:00:00', 'https://www.openml.org/data/download/61/iris.arff', 7487, 'iris.arff', 'arff', 'text/plain;charset=UTF-8', 'ad484452702105cbf3d30f8deaba39a9', 'url', 'private'); +(130, 16, '2018-05-29 14:00:00', 'https://www.openml.org/data/download/61/iris.arff', 7487, 'iris.arff', 'arff', 'text/plain;charset=UTF-8', 'ad484452702105cbf3d30f8deaba39a9', 'url', 'private'), +(131, 16, '2023-06-15 14:00:00', 'https://api.openml.org/data/v1/download/22116558/web_questions.arff', 818476, 'web_questions.arff', 'arff', 'text/plain;charset=UTF-8', '67f1977f1aceb685049ca0bf036ea80c', 'url', 'public'); diff --git a/data/sql/task.sql b/data/sql/task.sql index 998e36b0..352346d8 100644 --- a/data/sql/task.sql +++ b/data/sql/task.sql @@ -1301,4 +1301,5 @@ INSERT INTO `task` (`task_id`, `ttid`, `creator`, `creation_date`, `embargo_end_ (1300, 1, 1, '2019-05-24 12:30:49', NULL), (1301, 1, 1, '2019-05-24 12:30:50', NULL), (1302, 1, 1, '2019-05-24 12:30:50', NULL), -(1303, 1, 1, '2019-05-24 12:30:51', NULL); +(1303, 1, 1, '2019-05-24 12:30:51', NULL), +(1304, 11, 1, '2023-06-15 18:00:00', NULL); diff --git a/data/sql/task_inputs.sql b/data/sql/task_inputs.sql index 44d8e2b1..bcdcc558 100644 --- a/data/sql/task_inputs.sql +++ b/data/sql/task_inputs.sql @@ -3907,4 +3907,7 @@ INSERT INTO `task_inputs` (`task_id`, `input`, `value`) VALUES (1302, 'target_feature', 'Class'), (1303, 'estimation_procedure', '28'), (1303, 'source_data', '100'), -(1303, 'target_feature', 'Y'); +(1303, 'target_feature', 'Y'), +(1304, 'estimation_procedure', '35'), +(1304, 'source_data', '131'), +(1304, 'target_feature', 'answers'); diff --git a/data/sql/task_type.sql b/data/sql/task_type.sql index ae23e81d..6bf7e902 100644 --- a/data/sql/task_type.sql +++ b/data/sql/task_type.sql @@ -8,4 +8,5 @@ INSERT INTO `task_type` (`ttid`, `name`, `description`, `creator`, `contributors (7, 'Survival Analysis', 'Related to Regression. Given a dataset (typically consisting of patient data) predict a left timestamp (date entering the study), right timestamp (date of leaving the study), or both. ', '\"Benrd Bischl\",\"Dominik Kirchhoff\",\"Michel Lang\",\"Jan van Rijn\",\"Joaquin Vanschoren\"', NULL, '2014-12-03 00:00:00'), (8, 'Subgroup Discovery', 'Subgroup discovery is a data mining technique which extracts interesting rules with respect to a target variable. An important characteristic of this task is the combination of predictive and descriptive induction. An overview related to the task of subgroup discovery is presented. (description by: Herrera et. al., An overview on subgroup discovery: foundations and applications)', '\"Jan N. van Rijn\", \"Arno Knobbe\", \"Joaquin Vanschoren\"', NULL, '2016-06-17 10:59:20'), (9, 'Multitask Regression', '', 'Jan N. van Rijn', NULL, '2019-10-24 23:46:54'), -(10, 'Active Classification', '', 'various contributors', NULL, '2023-02-22 11:46:54'); +(10, 'Active Classification', '', 'various contributors', NULL, '2023-02-22 11:46:54'), +(11, 'Text tasks', '', 'various contributors', NULL, '2023-06-15 16:32:41'); diff --git a/data/sql/task_type_inout.sql b/data/sql/task_type_inout.sql index 642724ee..534eb4da 100644 --- a/data/sql/task_type_inout.sql +++ b/data/sql/task_type_inout.sql @@ -72,4 +72,10 @@ INSERT INTO `task_type_inout` (`ttid`, `name`, `type`, `io`, `requirement`, `des (10, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\nARFF\n\n\n\n\n\n\n'), (10, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\n\"data_type\": \"numeric\",\n\"select\": \"did\",\n\"from\": \"dataset\"\n}', '\n[INPUT:source_data]\n[INPUT:target_feature]\n'), (10, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\n\"data_type\": \"string\",\n\"select\": \"name\",\n\"from\": \"data_feature\",\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\n}', NULL), -(10, 'utility_scores', 'Utility Scores', 'output', 'optional', 'The utility scores', 45, NULL, NULL); \ No newline at end of file +(10, 'utility_scores', 'Utility Scores', 'output', 'optional', 'The utility scores', 45, NULL, NULL), +(11, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[LOOKUP:estimation_procedure.percentage]\r\n[LOOKUP:estimation_procedure.stratified_sampling]\r\n'), +(11, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(11, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(11, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n\r\n'), +(11, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n'), +(11, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" \"\r\n}', NULL); \ No newline at end of file diff --git a/openml_OS/controllers/Api_splits.php b/openml_OS/controllers/Api_splits.php index 0027def9..6aa8aa3d 100644 --- a/openml_OS/controllers/Api_splits.php +++ b/openml_OS/controllers/Api_splits.php @@ -17,7 +17,7 @@ function __construct() { $this->load->helper('file_upload'); $this->db = $this->load->database('read',true); - $this->task_types = array(1, 2, 3, 6, 7, 9, 10); + $this->task_types = array(1, 2, 3, 6, 7, 9, 10, 11); $this->challenge_types = array(9); $this->evaluation = APPPATH . 'third_party/OpenML/Java/evaluate.jar'; $this->eval_engine_config = " -config 'cache_allowed=false;server=".BASE_URL.";api_key=".API_KEY."' ";