Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
336f5ab
add new feature
janvanrijn Mar 24, 2022
a58a756
updated error codes
janvanrijn Mar 24, 2022
960ae58
updates api data feature description
janvanrijn Mar 24, 2022
5c86282
pull develop back in branch (#1168)
janvanrijn Nov 17, 2022
babdad1
pull develop into branch (#1169)
janvanrijn Nov 17, 2022
68c2686
pull feature description into develop (#1170)
janvanrijn Nov 17, 2022
5b87b3a
pull develop into feature description (#1171)
janvanrijn Nov 17, 2022
3de1c6b
align htaccess with develop
janvanrijn Nov 17, 2022
b867da4
Develop with changes (#1174)
janvanrijn Nov 18, 2022
1749e78
fixes problem
janvanrijn Nov 18, 2022
fcbd136
fixes problem
janvanrijn Nov 18, 2022
eeb8918
bugfix in old frontend
Jun 14, 2023
f3ce20d
feature description update (#1205)
janvanrijn Jan 8, 2024
5a73332
Merge branch 'develop' into add_feature_description
janvanrijn Jan 8, 2024
48f372d
ontologies to feature view
janvanrijn Jan 9, 2024
b6ba5c3
typo fix
janvanrijn Jan 9, 2024
99dae2d
added if/else construct for ontologies
janvanrijn Jan 9, 2024
3933536
typo fix
janvanrijn Jan 9, 2024
d7eaae6
typo fix
janvanrijn Jan 9, 2024
aff7ab1
adds datafeature description
janvanrijn Jan 9, 2024
aebe164
typo fix
janvanrijn Jan 9, 2024
a0e9d0a
typo fix
janvanrijn Jan 9, 2024
54276cc
typo fix
janvanrijn Jan 9, 2024
ec95066
typo fix
janvanrijn Jan 9, 2024
99e6bea
typo fix
janvanrijn Jan 9, 2024
abf35ae
query fixes
janvanrijn Jan 9, 2024
dfbcdd2
error_handling
janvanrijn Jan 9, 2024
914e8d1
xml fixes
janvanrijn Jan 9, 2024
c5f8803
typo fix
janvanrijn Jan 9, 2024
d5bbc75
typo fix
janvanrijn Jan 9, 2024
4628fc4
fix api call
janvanrijn Jan 9, 2024
2962a4c
api change
janvanrijn Jan 9, 2024
307067c
fix
janvanrijn Jan 9, 2024
60c8e0a
fix
janvanrijn Jan 9, 2024
fb8486b
revert weird change
janvanrijn Jan 9, 2024
7e960de
adds data feature to SQL (needed for ontologies)
janvanrijn Jan 9, 2024
8ddfc9f
update reinstall script
janvanrijn Jan 9, 2024
c3742aa
update cron
janvanrijn Jan 9, 2024
9c7a883
adds data processed
janvanrijn Jan 9, 2024
77861ee
updated
janvanrijn Jan 9, 2024
b6d3016
adds feature index to query
janvanrijn Jan 10, 2024
72331ce
small update
janvanrijn Jan 10, 2024
9c7945f
updates data feature value sql
janvanrijn Jan 10, 2024
42ce8d7
data feature
janvanrijn Jan 10, 2024
5305376
address comments
janvanrijn Jan 10, 2024
8eaba2b
update checks
janvanrijn Jan 10, 2024
a20303b
update
janvanrijn Jan 10, 2024
87d3fbf
error code
janvanrijn Jan 10, 2024
65a93de
fix mistake
janvanrijn Jan 10, 2024
8a65ebd
fix of database table for large files (#1207)
janvanrijn Jan 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions data/sql/data_feature.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
INSERT INTO `data_feature` (`did`, `index`, `evaluation_engine_id`, `name`, `data_type`, `is_target`, `is_row_identifier`, `is_ignore`, `NumberOfDistinctValues`, `NumberOfUniqueValues`, `NumberOfMissingValues`, `NumberOfIntegerValues`, `NumberOfRealValues`, `NumberOfNominalValues`, `NumberOfValues`, `MaximumValue`, `MinimumValue`, `MeanValue`, `StandardDeviation`, `ClassDistribution`) VALUES
(11, 0, 1, 'buying', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"vhigh\",\"high\",\"med\",\"low\"],[[360, 72, 0, 0],[324, 108, 0, 0],[268, 115, 23, 26],[258, 89, 46, 39]]]'),
(11, 1, 1, 'maint', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"vhigh\",\"high\",\"med\",\"low\"],[[360, 72, 0, 0],[314, 105, 0, 13],[268, 115, 23, 26],[268, 92, 46, 26]]]'),
(11, 2, 1, 'doors', 'nominal', 'false', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"2\",\"3\",\"4\",\"5more\"],[[326, 81, 15, 10],[300, 99, 18, 15],[292, 102, 18, 20],[292, 102, 18, 20]]]'),
(11, 3, 1, 'persons', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"2\",\"4\",\"more\"],[[576, 0, 0, 0],[312, 198, 36, 30],[322, 186, 33, 35]]]'),
(11, 4, 1, 'lug_boot', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"small\",\"med\",\"big\"],[[450, 105, 21, 0],[392, 135, 24, 25],[368, 144, 24, 40]]]'),
(11, 5, 1, 'safety', 'nominal', 'false', 'false', 'false', 3, 0, 0, 1728, 0, '3', 1728, NULL, NULL, NULL, NULL, '[[\"low\",\"med\",\"high\"],[[576, 0, 0, 0],[357, 180, 39, 0],[277, 204, 30, 65]]]'),
(11, 6, 1, 'class', 'nominal', 'true', 'false', 'false', 4, 0, 0, 1728, 0, '4', 1728, NULL, NULL, NULL, NULL, '[[\"unacc\",\"acc\",\"good\",\"vgood\"],[[1210, 0, 0, 0],[0, 384, 0, 0],[0, 0, 69, 0],[0, 0, 0, 65]]]')

5 changes: 5 additions & 0 deletions data/sql/data_feature_description.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
INSERT INTO `data_feature_description` (`did`, `index`, `uploader`, `date`, `description_type`, `value`) VALUES
(11, 2, 1, '2024-01-09 13:15:36', 'ontology', 'https://en.wikipedia.org/wiki/Car_door'),
(11, 2, 1, '2024-01-09 13:15:36', 'ontology', 'https://en.wikipedia.org/wiki/Door'),
(11, 1, 1, '2024-01-09 13:23:18', 'ontology', 'https://en.wikipedia.org/wiki/Service_(motor_vehicle)'),
(11, 3, 1, '2024-01-09 13:24:18', 'ontology', 'https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States');
26 changes: 26 additions & 0 deletions data/sql/data_feature_value.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
INSERT INTO `data_feature_value` (`did`, `index`, `value`) VALUES
(11, 0, 'high'),
(11, 0, 'low'),
(11, 0, 'med'),
(11, 0, 'vhigh'),
(11, 1, 'high'),
(11, 1, 'low'),
(11, 1, 'med'),
(11, 1, 'vhigh'),
(11, 2, '2'),
(11, 2, '3'),
(11, 2, '4'),
(11, 2, '5more'),
(11, 3, '2'),
(11, 3, '4'),
(11, 3, 'more'),
(11, 4, 'big'),
(11, 4, 'med'),
(11, 4, 'small'),
(11, 5, 'high'),
(11, 5, 'low'),
(11, 5, 'med'),
(11, 6, 'acc'),
(11, 6, 'good'),
(11, 6, 'unacc'),
(11, 6, 'vgood');
2 changes: 2 additions & 0 deletions data/sql/data_processed.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
INSERT INTO `data_processed` (`did`, `evaluation_engine_id`, `user_id`, `processing_date`, `error`, `warning`, `num_tries`) VALUES
(11, 1, 1, '2024-01-09 18:02:58', NULL, NULL, 1);
2 changes: 1 addition & 1 deletion downloads/openml.sql
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ CREATE TABLE `file` (
`creator` int(16) NOT NULL,
`creation_date` datetime NOT NULL,
`filepath` varchar(256) NOT NULL,
`filesize` int(64) NOT NULL,
`filesize` bigint(64) NOT NULL,
`filename_original` varchar(256) NOT NULL,
`extension` varchar(16) NOT NULL,
`mime_type` varchar(32) NOT NULL,
Expand Down
26 changes: 26 additions & 0 deletions downloads/openml_expdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,19 @@ CREATE TABLE `data_feature` (
-- Table structure for table `data_feature_value`
--

CREATE TABLE `data_feature_description` (
`did` int(10) UNSIGNED NOT NULL,
`index` int(10) UNSIGNED NOT NULL,
`uploader` mediumint(8) UNSIGNED NOT NULL,
`date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`description_type` enum('plain','ontology') NOT NULL,
`value` varchar(256) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

--
-- Table structure for table `data_feature_value`
--

CREATE TABLE `data_feature_value` (
`did` int(10) UNSIGNED NOT NULL,
`index` int(10) UNSIGNED NOT NULL,
Expand Down Expand Up @@ -946,6 +959,12 @@ ALTER TABLE `data_feature`
ADD KEY `evaluation_engine_id` (`evaluation_engine_id`),
ADD KEY `did` (`did`,`evaluation_engine_id`);

--
-- Indexes for table `data_feature_value`
--
ALTER TABLE `data_feature_description`
ADD KEY `did` (`did`,`index`);

--
-- Indexes for table `data_feature_value`
--
Expand Down Expand Up @@ -1430,6 +1449,13 @@ ALTER TABLE `data_feature`
ALTER TABLE `data_feature_value`
ADD CONSTRAINT `data_feature_value_ibfk_1` FOREIGN KEY (`did`,`index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE;


--
-- Constraints for table `data_feature_value`
--
ALTER TABLE `data_feature_description`
ADD CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`,`index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE;

--
-- Constraints for table `data_processed`
--
Expand Down
2 changes: 1 addition & 1 deletion openml_OS/controllers/Cron.php
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ function move_run_files($start_index, $end_index) {
function install_database() {
// note that this one does not come from DATA folder, as they are stored in github
$models = directory_map('data/sql/', 1);
$manipulated_order = array('file.sql', 'implementation.sql', 'algorithm_setup.sql', 'dataset.sql', 'task_type.sql', 'task.sql', 'study.sql', 'groups.sql', 'users.sql');
$manipulated_order = array('file.sql', 'implementation.sql', 'algorithm_setup.sql', 'evaluation_engine.sql', 'dataset.sql', 'data_processed.sql', 'data_feature.sql', 'task_type.sql', 'task.sql', 'study.sql', 'groups.sql', 'users.sql');

// moves elements of $manipulated_order to the start of the models array
foreach (array_reverse($manipulated_order) as $name) {
Expand Down
10 changes: 10 additions & 0 deletions openml_OS/models/Data_feature_description.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php
class Data_feature_description extends MY_Database_Write_Model {

function __construct() {
parent::__construct();
$this->table = 'data_feature_description';
$this->id_column = array('did', 'index', 'value');
}
}
?>
131 changes: 121 additions & 10 deletions openml_OS/models/api/v1/Api_data.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ function __construct() {
$this->load->model('Dataset_topic');
$this->load->model('Dataset_description');
$this->load->model('Data_feature');
$this->load->model('Data_feature_description');
$this->load->model('Data_feature_value');
$this->load->model('Data_quality');
$this->load->model('Feature_quality');
Expand Down Expand Up @@ -163,13 +164,87 @@ function bootstrap($format, $segments, $request_type, $user_id) {
return;
}

if (count($segments) == 3 && $segments[0] == 'feature' && $segments[1] == 'ontology' && $segments[2] == 'add' && $request_type == 'post') {
$this->data_feature_description($this->input->post('data_id'), $this->input->post('index'), $this->input->post('ontology'), 'ontology', true);
return;
}

if (count($segments) == 3 && $segments[0] == 'feature' && $segments[1] == 'ontology' && $segments[2] == 'remove' && $request_type == 'post') {
$this->data_feature_description($this->input->post('data_id'), $this->input->post('index'), $this->input->post('ontology'), 'ontology', false);
return;
}

if (count($segments) == 2 && $segments[0] == 'status' && $segments[1] == 'update') {
$this->status_update($this->input->post('data_id'), $this->input->post('status'));
return;
}

$this->returnError(100, $this->version);
}

private function data_feature_description($data_id, $feature_idx, $description, $description_type, $do_add) {
if ($data_id === false || $feature_idx === false || $description === false) {
$this->returnError(1100, $this->version);
return false;
}

if (strlen($description) > 256) {
$this->returnError(1105, $this->version);
return false;
}
if ($description_type == 'ontology' && !filter_var($description, FILTER_VALIDATE_URL)) {
$this->returnError(1106, $this->version);
return false;
}

if ($do_add) {
$descriptions = $this->Data_feature_description->getColumnWhere('value', '`did` = "' . $data_id . '" AND `index` = "'. $feature_idx . '" AND `description_type` = "' . $description_type . '"');
if($descriptions != false && in_array($description, $descriptions)) {
$this->returnError(1101, $this->version, 450, 'id=' . $data_id . '; description=' . $description);
return false;
}
// todo discuss policy: who is allowed to add ontology to a feature?

$description_data = array(
'did' => $data_id,
'index' => $feature_idx,
'description_type' => $description_type,
'value' => $description,
'uploader' => $this->user_id,
'date' => now()
);

$res = $this->Data_feature_description->insert($description_data);
if ($res == false) {
$this->returnError(1102, $this->version, 450, 'id=' . $data_id . '; description=' . $description);
return false;
}
} else {
$description_record = $this->Data_feature_description->getWhereSingle('did = ' . $data_id . ' AND index = "' . $feature_idx . '" AND `description_type` = "' . $description_type . '" AND `value` = "' . $description . '"');
if ($description_record == false) {
$this->returnError(1103, $this->version);
return false;
}
// todo discuss policy: who is allowed to remove ontology from a feature?
$is_admin = $this->ion_auth->is_admin($this->user_id);
if ($description_record->uploader != $this->user_id && $is_admin == false) {
$this->returnError(1104, $this->version);
return false;
}
$this->Data_feature_description->delete(array($data_id, $feature_idx, $description));
}

$descriptions = $this->Data_feature_description->getColumnWhere('value', 'did = ' . $data_id . ' AND index = "' . $feature_idx . '" AND `description_type` = "' . $description_type . '"');
$this->xmlContents(
'data-feature-description',
$this->version,
array(
'id' => $data_id,
'description_type' => $description_type,
'xml_tag_name' => 'feature_description' . '_' . ($do_add ? 'add' : 'remove'),
'descriptions' => $descriptions)
);
}

/**
*@OA\Post(
Expand Down Expand Up @@ -513,13 +588,7 @@ private function data_fork() {
$description_record->did = $new_data_id;
$description_record->version = "1";
$this->Dataset_description->insert($description_record);

// create a copy of the latest description
$description_record = $this->Dataset_description->getWhereSingle('did =' . $data_id, 'version DESC');
$description_record->did = $new_data_id;
$description_record->version = "1";
$this->Dataset_description->insert($description_record);


// update elastic search index.
try {
$this->elasticsearch->index('data', $new_data_id);
Expand Down Expand Up @@ -1449,8 +1518,9 @@ private function data_features($data_id) {
$this->returnError(273, $this->version);
return;
}

$dataset->features = $this->Data_feature->getWhere('did = "' . $dataset->did . '"');
// obtains possible values for a feature
$dataset->features_values = $this->Data_feature_value->getWhere('did = "' . $dataset->did . '"');
$index_values = array();
if ($dataset->features_values) {
Expand All @@ -1462,6 +1532,21 @@ private function data_features($data_id) {
}
}
$dataset->index_values = $index_values;

// obtains possible ontologies for a feature (for now: only ontologies)
$dataset->features_descriptions = $this->Data_feature_description->getWhere('did = "' . $dataset->did . '" AND description_type = "ontology"');
$index_ontologies = array();
if ($dataset->features_descriptions) {
foreach($dataset->features_descriptions as $val) {
if ($val->description_type == 'ontology') { // this is guaranteed
if (!isset($index_ontologies[$val->index])) {
$index_ontologies[$val->index] = array();
}
$index_ontologies[$val->index][] = $val->value;
}
}
}
$dataset->index_ontologies = $index_ontologies;

if ($data_processed->error && $dataset->features === false) {
$this->returnError(274, $this->version);
Expand Down Expand Up @@ -1642,6 +1727,14 @@ private function data_features_upload() {
} else {
$nominal_values = false;
}

//actual insert of the feature
if (array_key_exists('ontology', $feature)) {
$ontologies = $feature['ontology'];
unset($feature['ontology']);
} else {
$ontologies = false;
}

$result = $this->Data_feature->insert($feature);
if (!$result) {
Expand All @@ -1665,20 +1758,38 @@ private function data_features_upload() {
return;
}
}

// situation where we are trying to add nominal values to a non-nominal attribute
if ($feature['data_type'] != 'nominal') {
// only allowed for nominal values
$this->db->trans_rollback();
$this->returnError(449, $this->version, $this->openmlGeneralErrorCode, 'feature: ' . $feature['name']);
return;
}
} elseif ($feature['data_type'] == 'nominal') {
// required for nominal values.. missing so throw error
// nominal values now require this information.. since it is not there, throw the error
$this->db->trans_rollback();
$this->returnError(448, $this->version, $this->openmlGeneralErrorCode, 'feature: ' . $feature['name']);
return;
}

if ($ontologies) {
// check the nominal value property
foreach ($ontologies as $ontology) {
$data = array(
'did' => $did,
'index' => $ontology['index'],
'description_type' => 'ontology',
'value' => $value
);
$result = $this->Data_feature_description->insert($data);
if (!$result) {
$this->db->trans_rollback();
$this->returnError(450, $this->version, $this->openmlGeneralErrorCode, 'feature: ' . $feature['name'] . ', value: ' . $value);
return;
}
}
}

// NOTE: this is commented out because not all datasets have targets, or they can have multiple ones. Targets should also be set more carefully.
// if no specified attribute is the target, select the last one:
//if( $dataset->default_target_attribute == false && $feature->index > $current_index ) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<oml:<?php echo $xml_tag_name; ?> xmlns:oml="http://openml.org/openml">
<oml:id><?php echo $id; ?></oml:id>
<?php if ($descriptions != false): ?><?php foreach($descriptions as $description): ?>
<oml:<?php echo $description_type; ?>></oml:><?php echo $description; ?></oml:<?php echo $description_type; ?>>
<?php endforeach; endif; ?>
</oml:<?php echo $xml_tag_name; ?>>
4 changes: 4 additions & 0 deletions openml_OS/views/pages/api_new/v1/xml/data-features.tpl.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
<oml:index><?php echo $feature->index; ?></oml:index>
<oml:name><?php echo htmlspecialchars($feature->name); ?></oml:name>
<oml:data_type><?php echo $feature->data_type; ?></oml:data_type>
<?php if (is_array($index_ontologies)):
if (array_key_exists($feature->index, $index_ontologies)): foreach($index_ontologies[$feature->index] as $value): ?>
<oml:ontology><?php echo htmlspecialchars($value); ?></oml:ontology>
<?php endforeach; endif; endif; ?>
<?php if (array_key_exists($feature->index, $index_values)): foreach($index_values[$feature->index] as $value): ?>
<oml:nominal_value><?php echo htmlspecialchars($value); ?></oml:nominal_value>
<?php endforeach; endif; ?>
Expand Down
10 changes: 10 additions & 0 deletions openml_OS/views/pages/api_new/v1/xml/pre.php
Original file line number Diff line number Diff line change
Expand Up @@ -540,4 +540,14 @@
//openml.list.data.description
$this->apiErrors[1090] = 'Failed to find description versions for this dataset/Unknown dataset';


//openml.data.feature.description
$this->apiErrors[1100] = 'Please provide mandatory POST fields';
$this->apiErrors[1101] = 'This description was already associated with this feature';
$this->apiErrors[1102] = 'Failure to write to the database';
$this->apiErrors[1103] = 'Could not find description in database';
$this->apiErrors[1104] = 'Failure to write to the database';
$this->apiErrors[1105] = 'Feature description too long';
$this->apiErrors[1106] = 'Feature description meant as ontology, but is not a valid URL';

?>
7 changes: 7 additions & 0 deletions openml_OS/views/pages/api_new/v1/xsd/openml.data.features.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<xs:element name="index" type="xs:integer"/>
<xs:element name="name" type="oml:basic_latin64"/> <!-- The name of the quality that is set -->
<xs:element name="data_type" type="oml:basic_latin64"/> <!-- The value of the quality -->
<xs:element minOccurs="0" name="ontology" type="oml:ontology_url256"/> <!-- The IRI of the ontology -->
<xs:element minOccurs="0" maxOccurs="unbounded" name="nominal_value" type="oml:basic_latin256"/>
<xs:element minOccurs="0" name="is_target" type="xs:boolean"/>
<xs:element minOccurs="0" name="is_ignore" type="xs:boolean"/>
Expand All @@ -31,6 +32,12 @@
<xs:element minOccurs="0" name="ClassDistribution" type="oml:basic_latin16384"/>
</xs:sequence>
</xs:complexType>
<xs:simpleType name="ontology_url256">
<xs:restriction base="xs:anyURI">
<xs:maxLength value="256" />
<xs:minLength value="5" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="basic_latin64">
<xs:restriction base="xs:string">
<xs:pattern value="\p{IsBasicLatin}*"/>
Expand Down