diff --git a/data/sql/data_feature.sql b/data/sql/data_feature.sql new file mode 100644 index 000000000..f3e953da4 --- /dev/null +++ b/data/sql/data_feature.sql @@ -0,0 +1,9 @@ +INSERT INTO `data_feature` (`did`, `index`, `evaluation_engine_id`, `name`, `data_type`, `is_target`, `is_row_identifier`, `is_ignore`, `NumberOfDistinctValues`, `NumberOfUniqueValues`, `NumberOfMissingValues`, `NumberOfIntegerValues`, `NumberOfRealValues`, `NumberOfNominalValues`, `NumberOfValues`, `MaximumValue`, `MinimumValue`, `MeanValue`, `StandardDeviation`, `ClassDistribution`) VALUES +(11, 0, 1, 'buying', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"vhigh\",\"high\",\"med\",\"low\"],[[360, 72, 0, 0],[324, 108, 0, 0],[268, 115, 23, 26],[258, 89, 46, 39]]]'), +(11, 1, 1, 'maint', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"vhigh\",\"high\",\"med\",\"low\"],[[360, 72, 0, 0],[314, 105, 0, 13],[268, 115, 23, 26],[268, 92, 46, 26]]]'), +(11, 2, 1, 'doors', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"2\",\"3\",\"4\",\"5more\"],[[326, 81, 15, 10],[300, 99, 18, 15],[292, 102, 18, 20],[292, 102, 18, 20]]]'), +(11, 3, 1, 'persons', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"2\",\"4\",\"more\"],[[576, 0, 0, 0],[312, 198, 36, 30],[322, 186, 33, 35]]]'), +(11, 4, 1, 'lug_boot', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"small\",\"med\",\"big\"],[[450, 105, 21, 0],[392, 135, 24, 25],[368, 144, 24, 40]]]'), +(11, 5, 1, 'safety', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"low\",\"med\",\"high\"],[[576, 0, 0, 0],[357, 180, 39, 0],[277, 204, 30, 65]]]'), +(11, 6, 1, 'class', 'nominal', 'true', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"unacc\",\"acc\",\"good\",\"vgood\"],[[1210, 0, 0, 0],[0, 384, 0, 0],[0, 0, 69, 0],[0, 0, 0, 65]]]') + diff --git a/data/sql/data_feature_description.sql b/data/sql/data_feature_description.sql new file mode 100644 index 000000000..dd8ec76f8 --- /dev/null +++ b/data/sql/data_feature_description.sql @@ -0,0 +1,5 @@ +INSERT INTO `data_feature_description` (`did`, `index`, `uploader`, `date`, `description_type`, `value`) VALUES +(11, 2, 1, '2024-01-09 13:15:36', 'ontology', 'https://en.wikipedia.org/wiki/Car_door'), +(11, 2, 1, '2024-01-09 13:15:36', 'ontology', 'https://en.wikipedia.org/wiki/Door'), +(11, 1, 1, '2024-01-09 13:23:18', 'ontology', 'https://en.wikipedia.org/wiki/Service_(motor_vehicle)'), +(11, 3, 1, '2024-01-09 13:24:18', 'ontology', 'https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States'); \ No newline at end of file diff --git a/data/sql/data_feature_value.sql b/data/sql/data_feature_value.sql new file mode 100644 index 000000000..b8a7ea616 --- /dev/null +++ b/data/sql/data_feature_value.sql @@ -0,0 +1,26 @@ +INSERT INTO `data_feature_value` (`did`, `index`, `value`) VALUES +(11, 0, 'high'), +(11, 0, 'low'), +(11, 0, 'med'), +(11, 0, 'vhigh'), +(11, 1, 'high'), +(11, 1, 'low'), +(11, 1, 'med'), +(11, 1, 'vhigh'), +(11, 2, '2'), +(11, 2, '3'), +(11, 2, '4'), +(11, 2, '5more'), +(11, 3, '2'), +(11, 3, '4'), +(11, 3, 'more'), +(11, 4, 'big'), +(11, 4, 'med'), +(11, 4, 'small'), +(11, 5, 'high'), +(11, 5, 'low'), +(11, 5, 'med'), +(11, 6, 'acc'), +(11, 6, 'good'), +(11, 6, 'unacc'), +(11, 6, 'vgood'); diff --git a/data/sql/data_processed.sql b/data/sql/data_processed.sql new file mode 100644 index 000000000..28714a186 --- /dev/null +++ b/data/sql/data_processed.sql @@ -0,0 +1,2 @@ +INSERT INTO `data_processed` (`did`, `evaluation_engine_id`, `user_id`, `processing_date`, `error`, `warning`, `num_tries`) VALUES +(11, 1, 1, '2024-01-09 18:02:58', NULL, NULL, 1); diff --git a/downloads/openml.sql b/downloads/openml.sql index a4ca83fc1..a41dc2f3c 100644 --- a/downloads/openml.sql +++ b/downloads/openml.sql @@ -60,7 +60,7 @@ CREATE TABLE `file` ( `creator` int(16) NOT NULL, `creation_date` datetime NOT NULL, `filepath` varchar(256) NOT NULL, - `filesize` int(64) NOT NULL, + `filesize` bigint(64) NOT NULL, `filename_original` varchar(256) NOT NULL, `extension` varchar(16) NOT NULL, `mime_type` varchar(32) NOT NULL, diff --git a/downloads/openml_expdb.sql b/downloads/openml_expdb.sql index 6aadb7c54..dd7c223ff 100644 --- a/downloads/openml_expdb.sql +++ b/downloads/openml_expdb.sql @@ -207,6 +207,19 @@ CREATE TABLE `data_feature` ( -- Table structure for table `data_feature_value` -- +CREATE TABLE `data_feature_description` ( + `did` int(10) UNSIGNED NOT NULL, + `index` int(10) UNSIGNED NOT NULL, + `uploader` mediumint(8) UNSIGNED NOT NULL, + `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `description_type` enum('plain','ontology') NOT NULL, + `value` varchar(256) NOT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +-- +-- Table structure for table `data_feature_value` +-- + CREATE TABLE `data_feature_value` ( `did` int(10) UNSIGNED NOT NULL, `index` int(10) UNSIGNED NOT NULL, @@ -946,6 +959,12 @@ ALTER TABLE `data_feature` ADD KEY `evaluation_engine_id` (`evaluation_engine_id`), ADD KEY `did` (`did`,`evaluation_engine_id`); +-- +-- Indexes for table `data_feature_value` +-- +ALTER TABLE `data_feature_description` + ADD KEY `did` (`did`,`index`); + -- -- Indexes for table `data_feature_value` -- @@ -1430,6 +1449,13 @@ ALTER TABLE `data_feature` ALTER TABLE `data_feature_value` ADD CONSTRAINT `data_feature_value_ibfk_1` FOREIGN KEY (`did`,`index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE; + +-- +-- Constraints for table `data_feature_value` +-- +ALTER TABLE `data_feature_description` + ADD CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`,`index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE; + -- -- Constraints for table `data_processed` -- diff --git a/openml_OS/controllers/Cron.php b/openml_OS/controllers/Cron.php index 11b64c5a1..2b822a46c 100644 --- a/openml_OS/controllers/Cron.php +++ b/openml_OS/controllers/Cron.php @@ -167,7 +167,7 @@ function move_run_files($start_index, $end_index) { function install_database() { // note that this one does not come from DATA folder, as they are stored in github $models = directory_map('data/sql/', 1); - $manipulated_order = array('file.sql', 'implementation.sql', 'algorithm_setup.sql', 'dataset.sql', 'task_type.sql', 'task.sql', 'study.sql', 'groups.sql', 'users.sql'); + $manipulated_order = array('file.sql', 'implementation.sql', 'algorithm_setup.sql', 'evaluation_engine.sql', 'dataset.sql', 'data_processed.sql', 'data_feature.sql', 'task_type.sql', 'task.sql', 'study.sql', 'groups.sql', 'users.sql'); // moves elements of $manipulated_order to the start of the models array foreach (array_reverse($manipulated_order) as $name) { diff --git a/openml_OS/models/Data_feature_description.php b/openml_OS/models/Data_feature_description.php new file mode 100644 index 000000000..4807c962d --- /dev/null +++ b/openml_OS/models/Data_feature_description.php @@ -0,0 +1,10 @@ +table = 'data_feature_description'; + $this->id_column = array('did', 'index', 'value'); + } +} +?> diff --git a/openml_OS/models/api/v1/Api_data.php b/openml_OS/models/api/v1/Api_data.php index db8fa73ea..35c797b1c 100644 --- a/openml_OS/models/api/v1/Api_data.php +++ b/openml_OS/models/api/v1/Api_data.php @@ -15,6 +15,7 @@ function __construct() { $this->load->model('Dataset_topic'); $this->load->model('Dataset_description'); $this->load->model('Data_feature'); + $this->load->model('Data_feature_description'); $this->load->model('Data_feature_value'); $this->load->model('Data_quality'); $this->load->model('Feature_quality'); @@ -163,6 +164,16 @@ function bootstrap($format, $segments, $request_type, $user_id) { return; } + if (count($segments) == 3 && $segments[0] == 'feature' && $segments[1] == 'ontology' && $segments[2] == 'add' && $request_type == 'post') { + $this->data_feature_description($this->input->post('data_id'), $this->input->post('index'), $this->input->post('ontology'), 'ontology', true); + return; + } + + if (count($segments) == 3 && $segments[0] == 'feature' && $segments[1] == 'ontology' && $segments[2] == 'remove' && $request_type == 'post') { + $this->data_feature_description($this->input->post('data_id'), $this->input->post('index'), $this->input->post('ontology'), 'ontology', false); + return; + } + if (count($segments) == 2 && $segments[0] == 'status' && $segments[1] == 'update') { $this->status_update($this->input->post('data_id'), $this->input->post('status')); return; @@ -170,6 +181,70 @@ function bootstrap($format, $segments, $request_type, $user_id) { $this->returnError(100, $this->version); } + + private function data_feature_description($data_id, $feature_idx, $description, $description_type, $do_add) { + if ($data_id === false || $feature_idx === false || $description === false) { + $this->returnError(1100, $this->version); + return false; + } + + if (strlen($description) > 256) { + $this->returnError(1105, $this->version); + return false; + } + if ($description_type == 'ontology' && !filter_var($description, FILTER_VALIDATE_URL)) { + $this->returnError(1106, $this->version); + return false; + } + + if ($do_add) { + $descriptions = $this->Data_feature_description->getColumnWhere('value', '`did` = "' . $data_id . '" AND `index` = "'. $feature_idx . '" AND `description_type` = "' . $description_type . '"'); + if($descriptions != false && in_array($description, $descriptions)) { + $this->returnError(1101, $this->version, 450, 'id=' . $data_id . '; description=' . $description); + return false; + } + // todo discuss policy: who is allowed to add ontology to a feature? + + $description_data = array( + 'did' => $data_id, + 'index' => $feature_idx, + 'description_type' => $description_type, + 'value' => $description, + 'uploader' => $this->user_id, + 'date' => now() + ); + + $res = $this->Data_feature_description->insert($description_data); + if ($res == false) { + $this->returnError(1102, $this->version, 450, 'id=' . $data_id . '; description=' . $description); + return false; + } + } else { + $description_record = $this->Data_feature_description->getWhereSingle('did = ' . $data_id . ' AND index = "' . $feature_idx . '" AND `description_type` = "' . $description_type . '" AND `value` = "' . $description . '"'); + if ($description_record == false) { + $this->returnError(1103, $this->version); + return false; + } + // todo discuss policy: who is allowed to remove ontology from a feature? + $is_admin = $this->ion_auth->is_admin($this->user_id); + if ($description_record->uploader != $this->user_id && $is_admin == false) { + $this->returnError(1104, $this->version); + return false; + } + $this->Data_feature_description->delete(array($data_id, $feature_idx, $description)); + } + + $descriptions = $this->Data_feature_description->getColumnWhere('value', 'did = ' . $data_id . ' AND index = "' . $feature_idx . '" AND `description_type` = "' . $description_type . '"'); + $this->xmlContents( + 'data-feature-description', + $this->version, + array( + 'id' => $data_id, + 'description_type' => $description_type, + 'xml_tag_name' => 'feature_description' . '_' . ($do_add ? 'add' : 'remove'), + 'descriptions' => $descriptions) + ); + } /** *@OA\Post( @@ -513,13 +588,7 @@ private function data_fork() { $description_record->did = $new_data_id; $description_record->version = "1"; $this->Dataset_description->insert($description_record); - - // create a copy of the latest description - $description_record = $this->Dataset_description->getWhereSingle('did =' . $data_id, 'version DESC'); - $description_record->did = $new_data_id; - $description_record->version = "1"; - $this->Dataset_description->insert($description_record); - + // update elastic search index. try { $this->elasticsearch->index('data', $new_data_id); @@ -1449,8 +1518,9 @@ private function data_features($data_id) { $this->returnError(273, $this->version); return; } - + $dataset->features = $this->Data_feature->getWhere('did = "' . $dataset->did . '"'); + // obtains possible values for a feature $dataset->features_values = $this->Data_feature_value->getWhere('did = "' . $dataset->did . '"'); $index_values = array(); if ($dataset->features_values) { @@ -1462,6 +1532,21 @@ private function data_features($data_id) { } } $dataset->index_values = $index_values; + + // obtains possible ontologies for a feature (for now: only ontologies) + $dataset->features_descriptions = $this->Data_feature_description->getWhere('did = "' . $dataset->did . '" AND description_type = "ontology"'); + $index_ontologies = array(); + if ($dataset->features_descriptions) { + foreach($dataset->features_descriptions as $val) { + if ($val->description_type == 'ontology') { // this is guaranteed + if (!isset($index_ontologies[$val->index])) { + $index_ontologies[$val->index] = array(); + } + $index_ontologies[$val->index][] = $val->value; + } + } + } + $dataset->index_ontologies = $index_ontologies; if ($data_processed->error && $dataset->features === false) { $this->returnError(274, $this->version); @@ -1642,6 +1727,14 @@ private function data_features_upload() { } else { $nominal_values = false; } + + //actual insert of the feature + if (array_key_exists('ontology', $feature)) { + $ontologies = $feature['ontology']; + unset($feature['ontology']); + } else { + $ontologies = false; + } $result = $this->Data_feature->insert($feature); if (!$result) { @@ -1665,7 +1758,7 @@ private function data_features_upload() { return; } } - + // situation where we are trying to add nominal values to a non-nominal attribute if ($feature['data_type'] != 'nominal') { // only allowed for nominal values $this->db->trans_rollback(); @@ -1673,12 +1766,30 @@ private function data_features_upload() { return; } } elseif ($feature['data_type'] == 'nominal') { - // required for nominal values.. missing so throw error + // nominal values now require this information.. since it is not there, throw the error $this->db->trans_rollback(); $this->returnError(448, $this->version, $this->openmlGeneralErrorCode, 'feature: ' . $feature['name']); return; } + if ($ontologies) { + // check the nominal value property + foreach ($ontologies as $ontology) { + $data = array( + 'did' => $did, + 'index' => $ontology['index'], + 'description_type' => 'ontology', + 'value' => $value + ); + $result = $this->Data_feature_description->insert($data); + if (!$result) { + $this->db->trans_rollback(); + $this->returnError(450, $this->version, $this->openmlGeneralErrorCode, 'feature: ' . $feature['name'] . ', value: ' . $value); + return; + } + } + } + // NOTE: this is commented out because not all datasets have targets, or they can have multiple ones. Targets should also be set more carefully. // if no specified attribute is the target, select the last one: //if( $dataset->default_target_attribute == false && $feature->index > $current_index ) { diff --git a/openml_OS/views/pages/api_new/v1/xml/data-feature-description.tpl.php b/openml_OS/views/pages/api_new/v1/xml/data-feature-description.tpl.php new file mode 100644 index 000000000..d6ce3f588 --- /dev/null +++ b/openml_OS/views/pages/api_new/v1/xml/data-feature-description.tpl.php @@ -0,0 +1,6 @@ + xmlns:oml="http://openml.org/openml"> + + + >> + +> diff --git a/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php b/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php index 85baff26a..c23760acf 100644 --- a/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php +++ b/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php @@ -4,6 +4,10 @@ index; ?> name); ?> data_type; ?> + index, $index_ontologies)): foreach($index_ontologies[$feature->index] as $value): ?> + + index, $index_values)): foreach($index_values[$feature->index] as $value): ?> diff --git a/openml_OS/views/pages/api_new/v1/xml/pre.php b/openml_OS/views/pages/api_new/v1/xml/pre.php index efd9049a2..27c2660b3 100644 --- a/openml_OS/views/pages/api_new/v1/xml/pre.php +++ b/openml_OS/views/pages/api_new/v1/xml/pre.php @@ -540,4 +540,14 @@ //openml.list.data.description $this->apiErrors[1090] = 'Failed to find description versions for this dataset/Unknown dataset'; + +//openml.data.feature.description +$this->apiErrors[1100] = 'Please provide mandatory POST fields'; +$this->apiErrors[1101] = 'This description was already associated with this feature'; +$this->apiErrors[1102] = 'Failure to write to the database'; +$this->apiErrors[1103] = 'Could not find description in database'; +$this->apiErrors[1104] = 'Failure to write to the database'; +$this->apiErrors[1105] = 'Feature description too long'; +$this->apiErrors[1106] = 'Feature description meant as ontology, but is not a valid URL'; + ?> diff --git a/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd b/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd index b50663027..2c1abbe95 100644 --- a/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd +++ b/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd @@ -13,6 +13,7 @@ + @@ -31,6 +32,12 @@ + + + + + +