diff --git a/hepdata/modules/records/utils/submission.py b/hepdata/modules/records/utils/submission.py index d38fc77ea..094ff23d2 100644 --- a/hepdata/modules/records/utils/submission.py +++ b/hepdata/modules/records/utils/submission.py @@ -58,11 +58,7 @@ from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.exc import SQLAlchemyError import yaml - -try: - from yaml import CSafeLoader as Loader -except ImportError: #pragma: no cover - from yaml import SafeLoader as Loader #pragma: no cover +from yaml import CSafeLoader as Loader def construct_yaml_str(self, node): # Override the default string handling function @@ -70,8 +66,6 @@ def construct_yaml_str(self, node): return self.construct_scalar(node) Loader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str) -from urllib.error import URLError - logging.basicConfig() log = logging.getLogger(__name__) diff --git a/hepdata/modules/records/utils/yaml_utils.py b/hepdata/modules/records/utils/yaml_utils.py index 2707c984c..bab95280f 100644 --- a/hepdata/modules/records/utils/yaml_utils.py +++ b/hepdata/modules/records/utils/yaml_utils.py @@ -30,10 +30,7 @@ from hepdata.modules.records.utils.data_processing_utils import str_presenter import shutil import yaml -try: - from yaml import CSafeLoader as Loader, CSafeDumper as Dumper -except ImportError: #pragma: no cover - from yaml import SafeLoader as Loader, SafeDumper as Dumper #pragma: no cover +from yaml import CSafeLoader as Loader, CSafeDumper as Dumper import zipfile from datetime import datetime from dateutil.parser import parse diff --git a/hepdata/modules/records/views.py b/hepdata/modules/records/views.py index 731476894..d91822577 100644 --- a/hepdata/modules/records/views.py +++ b/hepdata/modules/records/views.py @@ -34,10 +34,7 @@ from flask_security.utils import verify_password from sqlalchemy import or_, func import yaml -try: - from yaml import CBaseLoader as Loader -except ImportError: # pragma: no cover - from yaml import BaseLoader as Loader # pragma: no cover +from yaml import CBaseLoader as Loader from hepdata.config import CFG_DATA_TYPE, CFG_PUB_TYPE, SITE_URL from hepdata.ext.elasticsearch.api import get_records_matching_field, get_count_for_collection, get_n_latest_records, \ diff --git a/hepdata/version.py b/hepdata/version.py index 12226ba53..b361fbf2e 100644 --- a/hepdata/version.py +++ b/hepdata/version.py @@ -28,4 +28,4 @@ and parsed by ``setup.py``. """ -__version__ = "0.9.4dev20210218" +__version__ = "0.9.4dev20210224" diff --git a/requirements.txt b/requirements.txt index 8513d0fc5..7fd4b1b3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,6 +40,7 @@ lxml==4.6.2 msgpack==0.6.2 psycopg2-binary==2.8.4 python-dateutil==2.8.1 +pyyaml==5.4.1 requests==2.23.0 requests-oauthlib==1.1.0 # Indirect ('invenio-oauthclient' problem) responses==0.10.9 diff --git a/tests/pyyaml_test.py b/tests/pyyaml_test.py new file mode 100644 index 000000000..0d6b3caf7 --- /dev/null +++ b/tests/pyyaml_test.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# This file is part of HEPData. +# Copyright (C) 2021 CERN. +# +# HEPData is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# HEPData is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HEPData; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +import pytest +import yaml + + +def test_parse_trailing_tab_libyaml(): + """ + Check that PyYAML (with LibYAML) can parse a trailing tab character. + Currently this is only possible with LibYAML, not with pure-Python PyYAML. + + :return: + """ + + data = yaml.load('key: value\t', Loader=yaml.CSafeLoader) + assert data['key'] == 'value' + + +def test_parse_trailing_tab_pyyaml(): + """ + Latest PyYAML v5.4.1 (pure Python) currently has a bug parsing a trailing tab character. + https://github.com/yaml/pyyaml/issues/306 and https://github.com/yaml/pyyaml/issues/450 + + :return: + """ + + with pytest.raises(yaml.scanner.ScannerError): + yaml.load('key: value\t', Loader=yaml.SafeLoader) diff --git a/tests/records_test.py b/tests/records_test.py index c725eb161..34201c7be 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -124,7 +124,7 @@ def test_has_role(app): def test_data_processing(app): base_dir = os.path.dirname(os.path.realpath(__file__)) - data = yaml.safe_load(open(os.path.join(base_dir, 'test_data/data_table.yaml'), 'rt')) + data = yaml.load(open(os.path.join(base_dir, 'test_data/data_table.yaml'), 'rt'), Loader=yaml.CSafeLoader) assert ('independent_variables' in data) assert ('dependent_variables' in data)