diff --git a/data/sql/data_feature.sql b/data/sql/data_feature.sql
new file mode 100644
index 000000000..f3e953da4
--- /dev/null
+++ b/data/sql/data_feature.sql
@@ -0,0 +1,9 @@
+INSERT INTO `data_feature` (`did`, `index`, `evaluation_engine_id`, `name`, `data_type`, `is_target`, `is_row_identifier`, `is_ignore`, `NumberOfDistinctValues`, `NumberOfUniqueValues`, `NumberOfMissingValues`, `NumberOfIntegerValues`, `NumberOfRealValues`, `NumberOfNominalValues`, `NumberOfValues`, `MaximumValue`, `MinimumValue`, `MeanValue`, `StandardDeviation`, `ClassDistribution`) VALUES
+(11, 0, 1, 'buying', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"vhigh\",\"high\",\"med\",\"low\"],[[360, 72, 0, 0],[324, 108, 0, 0],[268, 115, 23, 26],[258, 89, 46, 39]]]'),
+(11, 1, 1, 'maint', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"vhigh\",\"high\",\"med\",\"low\"],[[360, 72, 0, 0],[314, 105, 0, 13],[268, 115, 23, 26],[268, 92, 46, 26]]]'),
+(11, 2, 1, 'doors', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"2\",\"3\",\"4\",\"5more\"],[[326, 81, 15, 10],[300, 99, 18, 15],[292, 102, 18, 20],[292, 102, 18, 20]]]'),
+(11, 3, 1, 'persons', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"2\",\"4\",\"more\"],[[576, 0, 0, 0],[312, 198, 36, 30],[322, 186, 33, 35]]]'),
+(11, 4, 1, 'lug_boot', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"small\",\"med\",\"big\"],[[450, 105, 21, 0],[392, 135, 24, 25],[368, 144, 24, 40]]]'),
+(11, 5, 1, 'safety', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"low\",\"med\",\"high\"],[[576, 0, 0, 0],[357, 180, 39, 0],[277, 204, 30, 65]]]'),
+(11, 6, 1, 'class', 'nominal', 'true', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"unacc\",\"acc\",\"good\",\"vgood\"],[[1210, 0, 0, 0],[0, 384, 0, 0],[0, 0, 69, 0],[0, 0, 0, 65]]]')
+
diff --git a/data/sql/data_feature_description.sql b/data/sql/data_feature_description.sql
new file mode 100644
index 000000000..dd8ec76f8
--- /dev/null
+++ b/data/sql/data_feature_description.sql
@@ -0,0 +1,5 @@
+INSERT INTO `data_feature_description` (`did`, `index`, `uploader`, `date`, `description_type`, `value`) VALUES
+(11, 2, 1, '2024-01-09 13:15:36', 'ontology', 'https://en.wikipedia.org/wiki/Car_door'),
+(11, 2, 1, '2024-01-09 13:15:36', 'ontology', 'https://en.wikipedia.org/wiki/Door'),
+(11, 1, 1, '2024-01-09 13:23:18', 'ontology', 'https://en.wikipedia.org/wiki/Service_(motor_vehicle)'),
+(11, 3, 1, '2024-01-09 13:24:18', 'ontology', 'https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States');
\ No newline at end of file
diff --git a/data/sql/data_feature_value.sql b/data/sql/data_feature_value.sql
new file mode 100644
index 000000000..b8a7ea616
--- /dev/null
+++ b/data/sql/data_feature_value.sql
@@ -0,0 +1,26 @@
+INSERT INTO `data_feature_value` (`did`, `index`, `value`) VALUES
+(11, 0, 'high'),
+(11, 0, 'low'),
+(11, 0, 'med'),
+(11, 0, 'vhigh'),
+(11, 1, 'high'),
+(11, 1, 'low'),
+(11, 1, 'med'),
+(11, 1, 'vhigh'),
+(11, 2, '2'),
+(11, 2, '3'),
+(11, 2, '4'),
+(11, 2, '5more'),
+(11, 3, '2'),
+(11, 3, '4'),
+(11, 3, 'more'),
+(11, 4, 'big'),
+(11, 4, 'med'),
+(11, 4, 'small'),
+(11, 5, 'high'),
+(11, 5, 'low'),
+(11, 5, 'med'),
+(11, 6, 'acc'),
+(11, 6, 'good'),
+(11, 6, 'unacc'),
+(11, 6, 'vgood');
diff --git a/data/sql/data_processed.sql b/data/sql/data_processed.sql
new file mode 100644
index 000000000..28714a186
--- /dev/null
+++ b/data/sql/data_processed.sql
@@ -0,0 +1,2 @@
+INSERT INTO `data_processed` (`did`, `evaluation_engine_id`, `user_id`, `processing_date`, `error`, `warning`, `num_tries`) VALUES
+(11, 1, 1, '2024-01-09 18:02:58', NULL, NULL, 1);
diff --git a/downloads/openml.sql b/downloads/openml.sql
index a4ca83fc1..a41dc2f3c 100644
--- a/downloads/openml.sql
+++ b/downloads/openml.sql
@@ -60,7 +60,7 @@ CREATE TABLE `file` (
`creator` int(16) NOT NULL,
`creation_date` datetime NOT NULL,
`filepath` varchar(256) NOT NULL,
- `filesize` int(64) NOT NULL,
+ `filesize` bigint(64) NOT NULL,
`filename_original` varchar(256) NOT NULL,
`extension` varchar(16) NOT NULL,
`mime_type` varchar(32) NOT NULL,
diff --git a/downloads/openml_expdb.sql b/downloads/openml_expdb.sql
index 6aadb7c54..dd7c223ff 100644
--- a/downloads/openml_expdb.sql
+++ b/downloads/openml_expdb.sql
@@ -207,6 +207,19 @@ CREATE TABLE `data_feature` (
-- Table structure for table `data_feature_value`
--
+CREATE TABLE `data_feature_description` (
+ `did` int(10) UNSIGNED NOT NULL,
+ `index` int(10) UNSIGNED NOT NULL,
+ `uploader` mediumint(8) UNSIGNED NOT NULL,
+ `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ `description_type` enum('plain','ontology') NOT NULL,
+ `value` varchar(256) NOT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+--
+-- Table structure for table `data_feature_value`
+--
+
CREATE TABLE `data_feature_value` (
`did` int(10) UNSIGNED NOT NULL,
`index` int(10) UNSIGNED NOT NULL,
@@ -946,6 +959,12 @@ ALTER TABLE `data_feature`
ADD KEY `evaluation_engine_id` (`evaluation_engine_id`),
ADD KEY `did` (`did`,`evaluation_engine_id`);
+--
+-- Indexes for table `data_feature_value`
+--
+ALTER TABLE `data_feature_description`
+ ADD KEY `did` (`did`,`index`);
+
--
-- Indexes for table `data_feature_value`
--
@@ -1430,6 +1449,13 @@ ALTER TABLE `data_feature`
ALTER TABLE `data_feature_value`
ADD CONSTRAINT `data_feature_value_ibfk_1` FOREIGN KEY (`did`,`index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE;
+
+--
+-- Constraints for table `data_feature_value`
+--
+ALTER TABLE `data_feature_description`
+ ADD CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`,`index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE;
+
--
-- Constraints for table `data_processed`
--
diff --git a/openml_OS/controllers/Cron.php b/openml_OS/controllers/Cron.php
index 11b64c5a1..2b822a46c 100644
--- a/openml_OS/controllers/Cron.php
+++ b/openml_OS/controllers/Cron.php
@@ -167,7 +167,7 @@ function move_run_files($start_index, $end_index) {
function install_database() {
// note that this one does not come from DATA folder, as they are stored in github
$models = directory_map('data/sql/', 1);
- $manipulated_order = array('file.sql', 'implementation.sql', 'algorithm_setup.sql', 'dataset.sql', 'task_type.sql', 'task.sql', 'study.sql', 'groups.sql', 'users.sql');
+ $manipulated_order = array('file.sql', 'implementation.sql', 'algorithm_setup.sql', 'evaluation_engine.sql', 'dataset.sql', 'data_processed.sql', 'data_feature.sql', 'task_type.sql', 'task.sql', 'study.sql', 'groups.sql', 'users.sql');
// moves elements of $manipulated_order to the start of the models array
foreach (array_reverse($manipulated_order) as $name) {
diff --git a/openml_OS/models/Data_feature_description.php b/openml_OS/models/Data_feature_description.php
new file mode 100644
index 000000000..4807c962d
--- /dev/null
+++ b/openml_OS/models/Data_feature_description.php
@@ -0,0 +1,10 @@
+table = 'data_feature_description';
+ $this->id_column = array('did', 'index', 'value');
+ }
+}
+?>
diff --git a/openml_OS/models/api/v1/Api_data.php b/openml_OS/models/api/v1/Api_data.php
index db8fa73ea..35c797b1c 100644
--- a/openml_OS/models/api/v1/Api_data.php
+++ b/openml_OS/models/api/v1/Api_data.php
@@ -15,6 +15,7 @@ function __construct() {
$this->load->model('Dataset_topic');
$this->load->model('Dataset_description');
$this->load->model('Data_feature');
+ $this->load->model('Data_feature_description');
$this->load->model('Data_feature_value');
$this->load->model('Data_quality');
$this->load->model('Feature_quality');
@@ -163,6 +164,16 @@ function bootstrap($format, $segments, $request_type, $user_id) {
return;
}
+ if (count($segments) == 3 && $segments[0] == 'feature' && $segments[1] == 'ontology' && $segments[2] == 'add' && $request_type == 'post') {
+ $this->data_feature_description($this->input->post('data_id'), $this->input->post('index'), $this->input->post('ontology'), 'ontology', true);
+ return;
+ }
+
+ if (count($segments) == 3 && $segments[0] == 'feature' && $segments[1] == 'ontology' && $segments[2] == 'remove' && $request_type == 'post') {
+ $this->data_feature_description($this->input->post('data_id'), $this->input->post('index'), $this->input->post('ontology'), 'ontology', false);
+ return;
+ }
+
if (count($segments) == 2 && $segments[0] == 'status' && $segments[1] == 'update') {
$this->status_update($this->input->post('data_id'), $this->input->post('status'));
return;
@@ -170,6 +181,70 @@ function bootstrap($format, $segments, $request_type, $user_id) {
$this->returnError(100, $this->version);
}
+
+ private function data_feature_description($data_id, $feature_idx, $description, $description_type, $do_add) {
+ if ($data_id === false || $feature_idx === false || $description === false) {
+ $this->returnError(1100, $this->version);
+ return false;
+ }
+
+ if (strlen($description) > 256) {
+ $this->returnError(1105, $this->version);
+ return false;
+ }
+ if ($description_type == 'ontology' && !filter_var($description, FILTER_VALIDATE_URL)) {
+ $this->returnError(1106, $this->version);
+ return false;
+ }
+
+ if ($do_add) {
+ $descriptions = $this->Data_feature_description->getColumnWhere('value', '`did` = "' . $data_id . '" AND `index` = "'. $feature_idx . '" AND `description_type` = "' . $description_type . '"');
+ if($descriptions != false && in_array($description, $descriptions)) {
+ $this->returnError(1101, $this->version, 450, 'id=' . $data_id . '; description=' . $description);
+ return false;
+ }
+ // todo discuss policy: who is allowed to add ontology to a feature?
+
+ $description_data = array(
+ 'did' => $data_id,
+ 'index' => $feature_idx,
+ 'description_type' => $description_type,
+ 'value' => $description,
+ 'uploader' => $this->user_id,
+ 'date' => now()
+ );
+
+ $res = $this->Data_feature_description->insert($description_data);
+ if ($res == false) {
+ $this->returnError(1102, $this->version, 450, 'id=' . $data_id . '; description=' . $description);
+ return false;
+ }
+ } else {
+ $description_record = $this->Data_feature_description->getWhereSingle('did = ' . $data_id . ' AND index = "' . $feature_idx . '" AND `description_type` = "' . $description_type . '" AND `value` = "' . $description . '"');
+ if ($description_record == false) {
+ $this->returnError(1103, $this->version);
+ return false;
+ }
+ // todo discuss policy: who is allowed to remove ontology from a feature?
+ $is_admin = $this->ion_auth->is_admin($this->user_id);
+ if ($description_record->uploader != $this->user_id && $is_admin == false) {
+ $this->returnError(1104, $this->version);
+ return false;
+ }
+ $this->Data_feature_description->delete(array($data_id, $feature_idx, $description));
+ }
+
+ $descriptions = $this->Data_feature_description->getColumnWhere('value', 'did = ' . $data_id . ' AND index = "' . $feature_idx . '" AND `description_type` = "' . $description_type . '"');
+ $this->xmlContents(
+ 'data-feature-description',
+ $this->version,
+ array(
+ 'id' => $data_id,
+ 'description_type' => $description_type,
+ 'xml_tag_name' => 'feature_description' . '_' . ($do_add ? 'add' : 'remove'),
+ 'descriptions' => $descriptions)
+ );
+ }
/**
*@OA\Post(
@@ -513,13 +588,7 @@ private function data_fork() {
$description_record->did = $new_data_id;
$description_record->version = "1";
$this->Dataset_description->insert($description_record);
-
- // create a copy of the latest description
- $description_record = $this->Dataset_description->getWhereSingle('did =' . $data_id, 'version DESC');
- $description_record->did = $new_data_id;
- $description_record->version = "1";
- $this->Dataset_description->insert($description_record);
-
+
// update elastic search index.
try {
$this->elasticsearch->index('data', $new_data_id);
@@ -1449,8 +1518,9 @@ private function data_features($data_id) {
$this->returnError(273, $this->version);
return;
}
-
+
$dataset->features = $this->Data_feature->getWhere('did = "' . $dataset->did . '"');
+ // obtains possible values for a feature
$dataset->features_values = $this->Data_feature_value->getWhere('did = "' . $dataset->did . '"');
$index_values = array();
if ($dataset->features_values) {
@@ -1462,6 +1532,21 @@ private function data_features($data_id) {
}
}
$dataset->index_values = $index_values;
+
+ // obtains possible ontologies for a feature (for now: only ontologies)
+ $dataset->features_descriptions = $this->Data_feature_description->getWhere('did = "' . $dataset->did . '" AND description_type = "ontology"');
+ $index_ontologies = array();
+ if ($dataset->features_descriptions) {
+ foreach($dataset->features_descriptions as $val) {
+ if ($val->description_type == 'ontology') { // this is guaranteed
+ if (!isset($index_ontologies[$val->index])) {
+ $index_ontologies[$val->index] = array();
+ }
+ $index_ontologies[$val->index][] = $val->value;
+ }
+ }
+ }
+ $dataset->index_ontologies = $index_ontologies;
if ($data_processed->error && $dataset->features === false) {
$this->returnError(274, $this->version);
@@ -1642,6 +1727,14 @@ private function data_features_upload() {
} else {
$nominal_values = false;
}
+
+ //actual insert of the feature
+ if (array_key_exists('ontology', $feature)) {
+ $ontologies = $feature['ontology'];
+ unset($feature['ontology']);
+ } else {
+ $ontologies = false;
+ }
$result = $this->Data_feature->insert($feature);
if (!$result) {
@@ -1665,7 +1758,7 @@ private function data_features_upload() {
return;
}
}
-
+ // situation where we are trying to add nominal values to a non-nominal attribute
if ($feature['data_type'] != 'nominal') {
// only allowed for nominal values
$this->db->trans_rollback();
@@ -1673,12 +1766,30 @@ private function data_features_upload() {
return;
}
} elseif ($feature['data_type'] == 'nominal') {
- // required for nominal values.. missing so throw error
+ // nominal values now require this information.. since it is not there, throw the error
$this->db->trans_rollback();
$this->returnError(448, $this->version, $this->openmlGeneralErrorCode, 'feature: ' . $feature['name']);
return;
}
+ if ($ontologies) {
+ // check the nominal value property
+ foreach ($ontologies as $ontology) {
+ $data = array(
+ 'did' => $did,
+ 'index' => $ontology['index'],
+ 'description_type' => 'ontology',
+ 'value' => $value
+ );
+ $result = $this->Data_feature_description->insert($data);
+ if (!$result) {
+ $this->db->trans_rollback();
+ $this->returnError(450, $this->version, $this->openmlGeneralErrorCode, 'feature: ' . $feature['name'] . ', value: ' . $value);
+ return;
+ }
+ }
+ }
+
// NOTE: this is commented out because not all datasets have targets, or they can have multiple ones. Targets should also be set more carefully.
// if no specified attribute is the target, select the last one:
//if( $dataset->default_target_attribute == false && $feature->index > $current_index ) {
diff --git a/openml_OS/views/pages/api_new/v1/xml/data-feature-description.tpl.php b/openml_OS/views/pages/api_new/v1/xml/data-feature-description.tpl.php
new file mode 100644
index 000000000..d6ce3f588
--- /dev/null
+++ b/openml_OS/views/pages/api_new/v1/xml/data-feature-description.tpl.php
@@ -0,0 +1,6 @@
+ xmlns:oml="http://openml.org/openml">
+
+
+ >>
+
+>
diff --git a/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php b/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php
index 85baff26a..c23760acf 100644
--- a/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php
+++ b/openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php
@@ -4,6 +4,10 @@
index; ?>
name); ?>
data_type; ?>
+ index, $index_ontologies)): foreach($index_ontologies[$feature->index] as $value): ?>
+
+
index, $index_values)): foreach($index_values[$feature->index] as $value): ?>
diff --git a/openml_OS/views/pages/api_new/v1/xml/pre.php b/openml_OS/views/pages/api_new/v1/xml/pre.php
index efd9049a2..27c2660b3 100644
--- a/openml_OS/views/pages/api_new/v1/xml/pre.php
+++ b/openml_OS/views/pages/api_new/v1/xml/pre.php
@@ -540,4 +540,14 @@
//openml.list.data.description
$this->apiErrors[1090] = 'Failed to find description versions for this dataset/Unknown dataset';
+
+//openml.data.feature.description
+$this->apiErrors[1100] = 'Please provide mandatory POST fields';
+$this->apiErrors[1101] = 'This description was already associated with this feature';
+$this->apiErrors[1102] = 'Failure to write to the database';
+$this->apiErrors[1103] = 'Could not find description in database';
+$this->apiErrors[1104] = 'Failure to write to the database';
+$this->apiErrors[1105] = 'Feature description too long';
+$this->apiErrors[1106] = 'Feature description meant as ontology, but is not a valid URL';
+
?>
diff --git a/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd b/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd
index b50663027..2c1abbe95 100644
--- a/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd
+++ b/openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd
@@ -13,6 +13,7 @@
+
@@ -31,6 +32,12 @@
+
+
+
+
+
+