From 302e1c74c407523cb001ee90ebee03bbf0ba2f84 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Wed, 22 Feb 2023 13:32:30 +0100 Subject: [PATCH] added active testing --- data/sql/estimation_procedure.sql | 7 +- data/sql/task_type.sql | 3 +- data/sql/task_type_inout.sql | 139 ++++++++++++++++-------------- 3 files changed, 83 insertions(+), 66 deletions(-) diff --git a/data/sql/estimation_procedure.sql b/data/sql/estimation_procedure.sql index 7f69e908b..a05361276 100644 --- a/data/sql/estimation_procedure.sql +++ b/data/sql/estimation_procedure.sql @@ -27,4 +27,9 @@ INSERT INTO `estimation_procedure` (`id`, `ttid`, `name`, `type`, `repeats`, `fo (26, 1, 'Test on Training Data', 'testontrainingdata', NULL, NULL, 'false', NULL, NULL, 'false', '2019-03-16 11:30:14'), (27, 2, 'Test on Training Data', 'testontrainingdata', NULL, NULL, 'false', NULL, NULL, 'false', '2019-03-16 11:30:14'), (28, 1, '20% Holdout (Ordered)', 'holdout_ordered', 1, 1, 'false', 20, NULL, 'false', '2019-05-23 12:40:53'), -(29, 9, '10-fold Crossvalidation', 'crossvalidation', 1, 10, 'false', NULL, 'true', 'false', '2014-12-31 20:00:00'); +(29, 9, '10-fold Crossvalidation', 'crossvalidation', 1, 10, 'false', NULL, 'true', 'false', '2014-12-31 20:00:00'), +(30, 10, '10-fold Crossvalidation', 'crossvalidation', 1, 10, 'false', NULL, 'true', 'false', '2023-02-22 11:46:54'), +(31, 10, '5 times 2-fold Crossvalidation', 'crossvalidation', 5, 2, 'false', NULL, 'true', 'false', '2023-02-22 11:46:54'), +(32, 10, '10 times 10-fold Crossvalidation', 'crossvalidation', 10, 10, 'false', NULL, 'true', 'false', '2023-02-22 11:46:54'), +(33, 10, '10% Holdout set', 'holdout', 1, NULL, 'false', 33, 'true', 'false', '2023-02-22 11:46:54'), +(34, 10, '33% Holdout set', 'holdout', 1, NULL, 'false', 33, 'true', 'false', '2023-02-22 11:46:54'); \ No newline at end of file diff --git a/data/sql/task_type.sql b/data/sql/task_type.sql index 55055b462..ae23e81df 100644 --- a/data/sql/task_type.sql +++ b/data/sql/task_type.sql @@ -7,4 +7,5 @@ INSERT INTO `task_type` (`ttid`, `name`, `description`, `creator`, `contributors (6, 'Machine Learning Challenge', 'This is a standard machine learning challenge with a hidden private dataset.\r\nIt offers a labeled training set and an unlabeled test set. \r\n\r\nThe task is to label the unlabeled instances. Only the OpenML server knows the correct labels, and will evaluate the submitted predictions using these hidden labels. The evaluation procedure, measure, and cost function (if any) are provided.', '\"Jan van Rijn\",\"Joaquin Vanschoren\"', NULL, '2014-11-28 00:00:00'), (7, 'Survival Analysis', 'Related to Regression. Given a dataset (typically consisting of patient data) predict a left timestamp (date entering the study), right timestamp (date of leaving the study), or both. ', '\"Benrd Bischl\",\"Dominik Kirchhoff\",\"Michel Lang\",\"Jan van Rijn\",\"Joaquin Vanschoren\"', NULL, '2014-12-03 00:00:00'), (8, 'Subgroup Discovery', 'Subgroup discovery is a data mining technique which extracts interesting rules with respect to a target variable. An important characteristic of this task is the combination of predictive and descriptive induction. An overview related to the task of subgroup discovery is presented. (description by: Herrera et. al., An overview on subgroup discovery: foundations and applications)', '\"Jan N. van Rijn\", \"Arno Knobbe\", \"Joaquin Vanschoren\"', NULL, '2016-06-17 10:59:20'), -(9, 'Multitask Regression', '', 'Jan N. van Rijn', NULL, '2019-10-24 23:46:54'); +(9, 'Multitask Regression', '', 'Jan N. van Rijn', NULL, '2019-10-24 23:46:54'), +(10, 'Active Classification', '', 'various contributors', NULL, '2023-02-22 11:46:54'); diff --git a/data/sql/task_type_inout.sql b/data/sql/task_type_inout.sql index 891169a2c..642724ee1 100644 --- a/data/sql/task_type_inout.sql +++ b/data/sql/task_type_inout.sql @@ -1,64 +1,75 @@ -INSERT INTO `task_type_inout` (`ttid`, `name`, `type`, `io`, `requirement`, `description`, `order`, `api_constraints`, `template_api`, `template_search`) VALUES -(1, 'cost_matrix', 'CostMatrix', 'input', 'optional', 'A matrix describing the cost of miss-classifications per type. ', 21, '{\r\n \"data_type\": \"json\"\r\n}', '[INPUT:cost_matrix]', '{ \"name\": \"Specify cost matrix (optional):\", \"placeholder\": \"Experimental. Only allowed with one dataset selected in the dataset(s) field\"}'), -(1, 'custom_testset', 'KeyValue', 'input', 'hidden', 'If applicable, the user can specify a custom testset', 22, '{\r\n \"data_type\": \"json\"\r\n}', NULL, '{ \"name\": \"Specify row id\'s (0-based):\", \"placeholder\": \"Experimental. Only allowed with one dataset selected in the dataset(s) field\"}'), -(1, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[LOOKUP:estimation_procedure.percentage]\r\n[LOOKUP:estimation_procedure.stratified_sampling]\r\n', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(1, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL, NULL), -(1, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\"\r\n}'), -(1, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL, NULL), -(1, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n\r\n', NULL), -(1, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n', '{\r\n \"name\": \"Dataset(s)\",\r\n \"autocomplete\": \"commaSeparated\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"(*) include all datasets\"\r\n}'), -(1, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL, '{\r\n \"placeholder\": \"Use default target\"\r\n}'), -(2, 'custom_testset', 'KeyValue', 'input', 'hidden', 'If applicable, the user can specify a custom testset', 22, '{\r\n \"data_type\": \"json\"\r\n}', NULL, '{ \"name\": \"Specify row id\'s (0-based):\", \"placeholder\": \"Experimental. Only allowed with one dataset selected in the dataset(s) field\"}'), -(2, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[LOOKUP:estimation_procedure.percentage]\r\n', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(2, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL, NULL), -(2, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, predictive_accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\",\r\n \"default\": \"predictive_accuracy\"\r\n}'), -(2, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL, NULL), -(2, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n', NULL), -(2, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n', '{\r\n \"name\": \"Dataset(s)\",\r\n \"autocomplete\": \"commaSeparated\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"(*) include all datasets\"\r\n}'), -(2, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL, '{\r\n \"default\": \"class\",\r\n \"placeholder\": \"Use default target\"\r\n}'), -(3, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[INPUT:number_samples]\r\n', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(3, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL, NULL), -(3, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\"\r\n}'), -(3, 'number_samples', 'String', 'input', 'hidden', 'The (maximum) number of samples to return, or the number of points on the learning curve. The sample sizes grow exponentially as a power of two.', 60, NULL, NULL, NULL), -(3, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n\r\n\r\n', NULL), -(3, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n', '{\r\n \"name\": \"Dataset(s)\",\r\n \"autocomplete\": \"commaSeparated\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"(*) include all datasets\"\r\n}'), -(3, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL, '{\r\n \"default\": \"class\",\r\n \"placeholder\": \"Use default target\"\r\n}'), -(4, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(4, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL, NULL), -(4, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\"\r\n}'), -(4, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n', NULL), -(4, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n', '{\r\n \"name\": \"Dataset(s)\",\r\n \"autocomplete\": \"commaSeparated\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"(*) include all datasets\"\r\n}'), -(4, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL, '{\r\n \"default\": \"class\",\r\n \"placeholder\": \"Use default target\"\r\n}'), -(5, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to assess the quality of the clustered', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.stratified_sampling]\r\n', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(5, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL, NULL), -(5, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., log likelihood', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\"\r\n}'), -(5, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL, NULL), -(5, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n', NULL), -(5, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n', '{\r\n \"name\": \"Dataset(s)\",\r\n \"autocomplete\": \"commaSeparated\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"(*) include all datasets\"\r\n}'), -(6, 'cost_matrix', 'CostMatrix', 'input', 'optional', 'A matrix describing the cost of miss-classifications per type. ', 21, '{\r\n \"data_type\": \"json\"\r\n}', '[INPUT:cost_matrix]', '{ \"name\": \"Specify cost matrix (optional):\", \"placeholder\": \"Experimental. Only allowed with one dataset selected in the dataset(s) field\"}'), -(6, 'custom_testset', 'KeyValue', 'input', 'hidden', 'If applicable, the user can specify a custom testset', 22, '{\r\n \"data_type\": \"json\"\r\n}', NULL, '{ \"name\": \"Specify row id\'s (0-based):\", \"placeholder\": \"Experimental. Only allowed with one dataset selected in the dataset(s) field\"}'), -(6, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[LOOKUP:estimation_procedure.percentage]\r\n[LOOKUP:estimation_procedure.stratified_sampling]\r\n', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(6, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL, NULL), -(6, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\"\r\n}'), -(6, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL, NULL), -(6, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n\r\n', NULL), -(6, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:source_data_labeled]\r\n[INPUT:target_feature]\r\n', '{\r\n \"name\": \"Dataset\",\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"Select at least one dataset\"\r\n}'), -(6, 'source_data_labeled', 'Dataset', 'input', 'required', 'The labelled version of the dataset', 13, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', NULL, '{\r\n \"name\": \"Dataset (labelled)\",\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"Select at least one dataset\"\r\n}'), -(6, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL, '{\r\n \"placeholder\": \"Use default target\"\r\n}'), -(7, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(7, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL, NULL), -(7, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\",\r\n \"default\": \"c_index\"\r\n}'), -(7, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL, NULL), -(7, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n', NULL), -(7, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature_left]\r\n[INPUT:target_feature_right]\r\n[INPUT:target_feature_event]\r\n', '{\r\n \"name\": \"Dataset\",\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"Select a dataset to perform survival analysis on\"\r\n}'), -(7, 'target_feature_event', 'String', 'input', 'required', 'The name of the feature that indicates the type of the event.', 17, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL, NULL), -(7, 'target_feature_left', 'String', 'input', 'optional', 'The name of the feature that indicates the start of the interval.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL, NULL), -(7, 'target_feature_right', 'String', 'input', 'optional', 'The name of the feature that indicates the end of the interval.', 16, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL, NULL), -(8, 'quality_measure', 'String', 'input', 'required', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '[INPUT:quality_measure]', '{\r\n \"autocomplete\": \"plain\",\r\n \"datasource\": \"expdbEvaluationMetrics()\"\r\n}'), -(8, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n[INPUT:target_value]\r\n', '{\r\n \"name\": \"Dataset(s)\",\r\n \"autocomplete\": \"commaSeparated\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"(*) include all datasets\"\r\n}'), -(8, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL, '{\r\n \"placeholder\": \"Use default target\"\r\n}'), -(8, 'target_value', 'String', 'input', 'required', 'The value of the target feature to be used as the SD target value.', 15, '{\r\n\"data_type\": \"string\"\r\n}', NULL, '{\r\n \"placeholder\": \"Use default target value\"\r\n}'), -(8, 'time_limit', 'Integer', 'input', 'required', 'The time limit for SD search', 30, '{\r\n\"data_type\": \"numeric\"\r\n}', '[INPUT:time_limit]', 'NULL'), -(9, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[INPUT:number_samples]\r\n', '{\r\n \"type\": \"select\",\r\n \"table\": \"estimation_procedure\",\r\n \"key\": \"id\",\r\n \"value\": \"name\"\r\n}'), -(9, 'source_data_list', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"json\"\r\n}', '\r\n[INPUT:source_data_list]\r\n[INPUT:target_feature]\r\n', '{\r\n \"name\": \"Dataset(s)\",\r\n \"autocomplete\": \"commaSeparated\",\r\n \"datasource\": \"expdbDatasetVersion()\",\r\n \"placeholder\": \"(*) include all datasets\"\r\n}'), -(9, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\"\r\n}', NULL, '{\r\n \"default\": \"class\",\r\n \"placeholder\": \"Use default target\"\r\n}'); +INSERT INTO `task_type_inout` (`ttid`, `name`, `type`, `io`, `requirement`, `description`, `order`, `api_constraints`, `template_api`) VALUES +(1, 'cost_matrix', 'CostMatrix', 'input', 'optional', 'A matrix describing the cost of miss-classifications per type. ', 21, '{\r\n \"data_type\": \"json\"\r\n}', '[INPUT:cost_matrix]'), +(1, 'custom_testset', 'KeyValue', 'input', 'hidden', 'If applicable, the user can specify a custom testset', 22, '{\r\n \"data_type\": \"json\"\r\n}', NULL), +(1, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[LOOKUP:estimation_procedure.percentage]\r\n[LOOKUP:estimation_procedure.stratified_sampling]\r\n'), +(1, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(1, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(1, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL), +(1, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n\r\n'), +(1, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n'), +(1, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL), +(2, 'custom_testset', 'KeyValue', 'input', 'hidden', 'If applicable, the user can specify a custom testset', 22, '{\r\n \"data_type\": \"json\"\r\n}', NULL), +(2, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[LOOKUP:estimation_procedure.percentage]\r\n'), +(2, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(2, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, predictive_accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(2, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL), +(2, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n'), +(2, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n'), +(2, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL), +(3, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[INPUT:number_samples]\r\n'), +(3, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(3, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(3, 'number_samples', 'String', 'input', 'hidden', 'The (maximum) number of samples to return, or the number of points on the learning curve. The sample sizes grow exponentially as a power of two.', 60, NULL, NULL), +(3, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n\r\n\r\n'), +(3, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n'), +(3, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL), +(4, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]'), +(4, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(4, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(4, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n'), +(4, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n'), +(4, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL), +(5, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to assess the quality of the clustered', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.stratified_sampling]\r\n'), +(5, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(5, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., log likelihood', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(5, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL), +(5, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n'), +(5, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n'), +(6, 'cost_matrix', 'CostMatrix', 'input', 'optional', 'A matrix describing the cost of miss-classifications per type. ', 21, '{\r\n \"data_type\": \"json\"\r\n}', '[INPUT:cost_matrix]'), +(6, 'custom_testset', 'KeyValue', 'input', 'hidden', 'If applicable, the user can specify a custom testset', 22, '{\r\n \"data_type\": \"json\"\r\n}', NULL), +(6, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[LOOKUP:estimation_procedure.percentage]\r\n[LOOKUP:estimation_procedure.stratified_sampling]\r\n'), +(6, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(6, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(6, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL), +(6, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n\r\n'), +(6, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:source_data_labeled]\r\n[INPUT:target_feature]\r\n'), +(6, 'source_data_labeled', 'Dataset', 'input', 'required', 'The labelled version of the dataset', 13, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', NULL), +(6, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL), +(7, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n'), +(7, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(7, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '\r\n[INPUT:evaluation_measures]\r\n'), +(7, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL), +(7, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\r\nARFF\r\n\r\n\r\n\r\n\r\n'), +(7, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature_left]\r\n[INPUT:target_feature_right]\r\n[INPUT:target_feature_event]\r\n'), +(7, 'target_feature_event', 'String', 'input', 'required', 'The name of the feature that indicates the type of the event.', 17, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL), +(7, 'target_feature_left', 'String', 'input', 'optional', 'The name of the feature that indicates the start of the interval.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL), +(7, 'target_feature_right', 'String', 'input', 'optional', 'The name of the feature that indicates the end of the interval.', 16, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"numeric\\\"\"\r\n}', NULL), +(8, 'quality_measure', 'String', 'input', 'required', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"math_function\"\r\n}', '[INPUT:quality_measure]'), +(8, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"did\",\r\n\"from\": \"dataset\"\r\n}', '\r\n[INPUT:source_data]\r\n[INPUT:target_feature]\r\n[INPUT:target_value]\r\n'), +(8, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\",\r\n\"select\": \"name\",\r\n\"from\": \"data_feature\",\r\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\r\n}', NULL), +(8, 'target_value', 'String', 'input', 'required', 'The value of the target feature to be used as the SD target value.', 15, '{\r\n\"data_type\": \"string\"\r\n}', NULL), +(8, 'time_limit', 'Integer', 'input', 'required', 'The time limit for SD search', 30, '{\r\n\"data_type\": \"numeric\"\r\n}', '[INPUT:time_limit]'), +(9, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\r\n\"data_type\": \"numeric\",\r\n\"select\": \"id\",\r\n\"from\": \"estimation_procedure\",\r\n\"where\": \"ttid = [TASK:ttid]\"\r\n}', '\r\n[INPUT:estimation_procedure]\r\n[LOOKUP:estimation_procedure.type]\r\n[CONSTANT:base_url]/api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\r\n[LOOKUP:estimation_procedure.repeats]\r\n[LOOKUP:estimation_procedure.folds]\r\n[INPUT:number_samples]\r\n'), +(9, 'source_data_list', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\r\n\"data_type\": \"json\"\r\n}', '\r\n[INPUT:source_data_list]\r\n[INPUT:target_feature]\r\n'), +(9, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\r\n\"data_type\": \"string\"\r\n}', NULL), +(10, 'annotation_costs', 'List', 'input', 'optional', 'A list specifying the annotation cost per instance. Experimental. Can break the database for large datasets. ', 35, '{\n \"data_type\": \"json\"\n}', '[INPUT:annotation_costs]'), +(10, 'batch_size', 'int', 'input', 'optional', 'The number of instances that need to be selected per batch. ', 38, '{\n \"data_type\": \"numeric\"\n}', '[INPUT:batch_size]'), +(10, 'cost_matrix', 'CostMatrix', 'input', 'optional', 'A matrix describing the cost of miss-classifications per type. ', 21, '{\n \"data_type\": \"json\"\n}', '[INPUT:cost_matrix]'), +(10, 'estimation_procedure', 'Estimation Procedure', 'input', 'required', 'The estimation procedure used to validate the generated models', 20, '{\n\"data_type\": \"numeric\",\n\"select\": \"id\",\n\"from\": \"estimation_procedure\",\n\"where\": \"ttid = [TASK:ttid]\"\n}', '\n[INPUT:estimation_procedure]\n[LOOKUP:estimation_procedure.type]\n[CONSTANT:base_url]api_splits/get/[TASK:id]/Task_[TASK:id]_splits.arff\n[LOOKUP:estimation_procedure.repeats]\n[LOOKUP:estimation_procedure.folds]\n[LOOKUP:estimation_procedure.percentage]\n[LOOKUP:estimation_procedure.stratified_sampling]\n'), +(10, 'evaluations', 'KeyValue', 'output', 'optional', 'A list of user-defined evaluations of the task as key-value pairs.', 50, NULL, NULL), +(10, 'evaluation_measures', 'String', 'input', 'optional', 'The evaluation measures to optimize for, e.g., cpu time, accuracy', 30, '{\n\"data_type\": \"string\",\n\"select\": \"name\",\n\"from\": \"math_function\"\n}', '\n[INPUT:evaluation_measures]\n'), +(10, 'model', 'File', 'output', 'optional', 'A file containing the model built on all the input data.', 60, NULL, NULL), +(10, 'predictions', 'Predictions', 'output', 'optional', 'The desired output format', 40, NULL, '\nARFF\n\n\n\n\n\n\n'), +(10, 'source_data', 'Dataset', 'input', 'required', 'The input data for this task', 10, '{\n\"data_type\": \"numeric\",\n\"select\": \"did\",\n\"from\": \"dataset\"\n}', '\n[INPUT:source_data]\n[INPUT:target_feature]\n'), +(10, 'target_feature', 'String', 'input', 'required', 'The name of the dataset feature to be used as the target feature.', 15, '{\n\"data_type\": \"string\",\n\"select\": \"name\",\n\"from\": \"data_feature\",\n\"where\": \"did = \\\"[INPUT:source_data]\\\" AND data_type = \\\"nominal\\\"\"\n}', NULL), +(10, 'utility_scores', 'Utility Scores', 'output', 'optional', 'The utility scores', 45, NULL, NULL); \ No newline at end of file