From d6dbd2e33bb3c4e98e4b95f52c6ec9911a67f809 Mon Sep 17 00:00:00 2001 From: Alexander Chekunkov Date: Fri, 2 Dec 2016 14:05:33 +0000 Subject: [PATCH] backport msgpack check fix from python-scrapinghub https://github.com/scrapinghub/python-scrapinghub/pull/31 --- hubstorage/collectionsrt.py | 30 ++++++++++++++++++++++++++++++ hubstorage/resourcetype.py | 16 +++++++++++----- tests/test_collections.py | 32 ++++++++++++++++++++++++++++++++ tests/test_project.py | 32 ++++++++++++++++++++++++++++++-- 4 files changed, 103 insertions(+), 7 deletions(-) diff --git a/hubstorage/collectionsrt.py b/hubstorage/collectionsrt.py index 8041863..8470b1c 100644 --- a/hubstorage/collectionsrt.py +++ b/hubstorage/collectionsrt.py @@ -1,13 +1,43 @@ import re + from requests.exceptions import HTTPError + from .resourcetype import DownloadableResource +from .serialization import MSGPACK_AVAILABLE from .utils import urlpathjoin +COLLECTIONS_MSGPACK_REGEX = re.compile( + r"""(v?c?s) # collection type + /\w+ # collection name + ( + /? # no key + | # OR + /(?P[^/]+)/? # item key + ) + $ + """, + re.VERBOSE) + class Collections(DownloadableResource): resource_type = 'collections' + def _allows_mpack(self, path=None): + """Check if request can be served with msgpack data. + + Collection scan and get requests for keys are able to return msgpack data. + + :param path: None, tuple or string + + """ + if not MSGPACK_AVAILABLE: + return False + path = urlpathjoin(path or '') + match = COLLECTIONS_MSGPACK_REGEX.match(path) + # count endpoint doesn't support msgpack + return bool(match and match.group('key') != 'count') + def get(self, _type, _name, _key=None, **params): try: r = self.apiget((_type, _name, _key), params=params) diff --git a/hubstorage/resourcetype.py b/hubstorage/resourcetype.py index bc81fdc..061963a 100644 --- a/hubstorage/resourcetype.py +++ b/hubstorage/resourcetype.py @@ -23,16 +23,22 @@ def __init__(self, client, key, auth=None): self.url = urlpathjoin(client.endpoint, self.key) def _allows_mpack(self, path=None): - """ Check if request can be served with msgpack data. + """Check if request can be served with msgpack data. - Currently, items, logs, collections and samples endpoints are able to + Currently, items, logs and samples endpoints are able to return msgpack data. However, /stats calls can only return JSON data for now. + + :param path: None, tuple or string + """ - if not MSGPACK_AVAILABLE or path == 'stats': + if not MSGPACK_AVAILABLE: return False - return self.resource_type in ('items', 'logs', - 'collections', 'samples') + path = urlpathjoin(path or '') + return ( + self.resource_type in ('items', 'logs', 'samples') and + not path.rstrip('/').endswith('stats') + ) @staticmethod def _enforce_msgpack(**kwargs): diff --git a/tests/test_collections.py b/tests/test_collections.py index faba2c4..ef37e58 100644 --- a/tests/test_collections.py +++ b/tests/test_collections.py @@ -2,8 +2,12 @@ Test Collections """ import random + +import pytest from six.moves import range from contextlib import closing + +from hubstorage import HubstorageClient from .hstestcase import HSTestCase from .testutil import failing_downloader @@ -121,3 +125,31 @@ def test_invalid_collection_name(self): self.assertRaises(ValueError, cols.new_store, '/foo') self.assertRaises(ValueError, cols.create_writer, 'invalidtype', 'n') self.assertRaises(ValueError, cols.create_writer, 's', 'foo-bar') + + +@pytest.mark.parametrize('msgpack_available', [True, False]) +@pytest.mark.parametrize('path,expected_result', [ + ('s/foo', True), + ('s/foo/', True), + (('s', 'foo'), True), + ('s/foo/bar', True), + ('s/foo/bar/', True), + (('s', 'foo', 'bar'), True), + ('vs/foo/bar/', True), + ('cs/foo/bar/', True), + ('vcs/foo/bar/', True), + ('s/foo/scan', True), + ('s/foo/bar/baz', False), + ('s/foo/count', False), + (('s', 'foo', 'count'), False), + ('x/foo', False), + (('x', 'foo'), False), + ('list', False), + (None, False), +]) +def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result): + monkeypatch.setattr( + 'hubstorage.collectionsrt.MSGPACK_AVAILABLE', msgpack_available) + hsclient = HubstorageClient() + collections = hsclient.get_project(2222000).collections + assert collections._allows_mpack(path) is (msgpack_available and expected_result) diff --git a/tests/test_project.py b/tests/test_project.py index 8d61c25..937225f 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2,12 +2,16 @@ Test Project """ import json -import six -from six.moves import range from random import randint, random + +import pytest +import six from requests.exceptions import HTTPError +from six.moves import range + from hubstorage import HubstorageClient from hubstorage.utils import millitime + from .hstestcase import HSTestCase from .testutil import failing_downloader @@ -252,3 +256,27 @@ def test_output_string(self): job.close_writers() items = self.hsclient.get_job(job.key).items.iter_json() self.assertEqual(type(next(items)), str) + + +@pytest.mark.parametrize('msgpack_available', [True, False]) +@pytest.mark.parametrize('path,expected_result', [ + (None, True), + ('33/1', True), + ('33/1/', True), + ((33, 1), True), + ('stats', False), + ('stats/', False), + ('33/1/stats', False), + ('33/1/stats/', False), + ((33, 1, 'stats'), False), +]) +def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result): + monkeypatch.setattr( + 'hubstorage.resourcetype.MSGPACK_AVAILABLE', msgpack_available) + hsclient = HubstorageClient() + job = hsclient.get_job('2222000/1/1') + for resource in [job.items, job.logs, job.samples]: + assert resource._allows_mpack(path) is (msgpack_available and expected_result) + assert job.requests._allows_mpack(path) is False + assert job.metadata._allows_mpack(path) is False + assert job.jobq._allows_mpack(path) is False