From d51459024a9f0ded23200f40da2cefa4aa36daa3 Mon Sep 17 00:00:00 2001 From: Fabio Bertinatto Date: Tue, 8 Mar 2016 11:37:07 -0300 Subject: [PATCH 1/3] Add Python 3 support --- README.rst | 4 ++-- hubstorage/batchuploader.py | 30 ++++++++++++++++++------------ hubstorage/collectionsrt.py | 5 ++--- hubstorage/frontier.py | 3 +-- hubstorage/project.py | 2 +- hubstorage/resourcetype.py | 17 +++++++++++------ hubstorage/serialization.py | 9 ++++----- hubstorage/utils.py | 17 ++++++++--------- requirements.txt | 3 ++- setup.py | 2 +- tests/hstestcase.py | 2 +- tests/test_activity.py | 9 +++++---- tests/test_batchuploader.py | 11 ++++++----- tests/test_client.py | 2 +- tests/test_collections.py | 11 ++++++----- tests/test_frontier.py | 2 +- tests/test_jobq.py | 10 ++++++---- tests/test_jobsmeta.py | 2 +- tests/test_project.py | 24 +++++++++++++----------- tests/test_retry.py | 6 +++--- tests/test_system.py | 5 +++-- tox.ini | 2 +- 22 files changed, 97 insertions(+), 81 deletions(-) diff --git a/README.rst b/README.rst index c0cd946..5b53b08 100644 --- a/README.rst +++ b/README.rst @@ -148,11 +148,11 @@ Jobq metadata fieldset is less detailed, than ``job.metadata``, but contains few Additional fields can be requested using the ``jobmeta`` parameter. If it used, then it's up to the user to list all the required fields, so only few default fields would be added except requested ones. ->>> metadata = project.jobq.list().next() +>>> metadata = next(project.jobq.list()) >>> metadata.get('spider', 'missing') u'foo' >>> jobs_metadata = project.jobq.list(jobmeta=['scheduled_by', ]) ->>> metadata = jobs_metadata.next() +>>> metadata = next(jobs_metadata) >>> metadata.get('scheduled_by', 'missing') u'John' >>> metadata.get('spider', 'missing') diff --git a/hubstorage/batchuploader.py b/hubstorage/batchuploader.py index 1337e8d..1536c57 100644 --- a/hubstorage/batchuploader.py +++ b/hubstorage/batchuploader.py @@ -3,12 +3,14 @@ import random import logging import warnings +import six +from six.moves import range +from six.moves.queue import Queue +from io import BytesIO from gzip import GzipFile from itertools import count import requests -from requests.compat import StringIO from collections import deque -from Queue import Queue from threading import Thread, Event from .utils import xauth, iterqueue from .serialization import jsonencode @@ -91,7 +93,7 @@ def _worker(self): continue # Delay once all writers are processed - if (ctr.next() % len(self._writers) == 0) and not self.closed: + if (next(ctr) % len(self._writers) == 0) and not self.closed: self._interruptable_sleep() # Get next writer to process @@ -125,12 +127,12 @@ def _checkpoint(self, w): 'content-encoding': w.content_encoding, }) w.offset += qiter.count - for _ in xrange(qiter.count): + for _ in range(qiter.count): q.task_done() if w.callback is not None: try: w.callback(response) - except Exception, e: + except Exception: logger.exception("Callback for %s failed", w.url) def _content_encode(self, qiter, w): @@ -148,12 +150,12 @@ def _tryupload(self, batch): Use polinomial backoff with 10 minutes maximum interval that accounts for ~30 hours of total retry time. - >>> sum(min(x**2, 600) for x in xrange(200)) / 3600 + >>> sum(min(x**2, 600) for x in range(200)) / 3600 30 """ url = batch['url'] offset = batch['offset'] - for retryn in xrange(self.worker_max_retries): + for retryn in range(self.worker_max_retries): emsg = '' try: r = self._upload(batch) @@ -229,7 +231,7 @@ def write(self, item): self.itemsq.put(data) if self.itemsq.full(): self.uploader.interrupt() - return self._nextid.next() + return next(self._nextid) def flush(self): self.flushme = True @@ -250,17 +252,21 @@ def __str__(self): def _encode_identity(iter): - data = StringIO() + data = BytesIO() for item in iter: + if isinstance(item, six.text_type): + item = item.encode('utf8') data.write(item) - data.write('\n') + data.write(b'\n') return data.getvalue() def _encode_gzip(iter): - data = StringIO() + data = BytesIO() with GzipFile(fileobj=data, mode='w') as gzo: for item in iter: + if isinstance(item, six.text_type): + item = item.encode('utf8') gzo.write(item) - gzo.write('\n') + gzo.write(b'\n') return data.getvalue() diff --git a/hubstorage/collectionsrt.py b/hubstorage/collectionsrt.py index 8f2b026..8815bde 100644 --- a/hubstorage/collectionsrt.py +++ b/hubstorage/collectionsrt.py @@ -11,7 +11,7 @@ class Collections(DownloadableResource): def get(self, _type, _name, _key=None, **params): try: r = self.apiget((_type, _name, _key), params=params) - return r if _key is None else r.next() + return r if _key is None else next(r) except HTTPError as exc: if exc.response.status_code == 404: raise KeyError(_key) @@ -78,8 +78,7 @@ def _batch(self, method, path, total_param, progress=None, **params): getparams = dict(params) try: while True: - r = self.apirequest(path, method=method, - params=getparams).next() + r = next(self.apirequest(path, method=method, params=getparams)) total += r[total_param] next = r.get('nextstart') if next is None: diff --git a/hubstorage/frontier.py b/hubstorage/frontier.py index 68321e8..6f74451 100644 --- a/hubstorage/frontier.py +++ b/hubstorage/frontier.py @@ -1,4 +1,3 @@ -import json from .resourcetype import ResourceType from .utils import urlpathjoin @@ -38,7 +37,7 @@ def _get_writer(self, frontier, slot): return writer def _writer_callback(self, response): - self.newcount += json.loads(response.content)["newcount"] + self.newcount += response.json()["newcount"] def close(self, block=True): for writer in self._writers.values(): diff --git a/hubstorage/project.py b/hubstorage/project.py index f9cacab..4c4a5ff 100644 --- a/hubstorage/project.py +++ b/hubstorage/project.py @@ -98,7 +98,7 @@ class Ids(ResourceType): def spider(self, spidername, **params): r = self.apiget(('spider', spidername), params=params) - return r.next() + return next(r) class Settings(MappingResourceType): diff --git a/hubstorage/resourcetype.py b/hubstorage/resourcetype.py index 7023fb2..5baddc3 100644 --- a/hubstorage/resourcetype.py +++ b/hubstorage/resourcetype.py @@ -1,3 +1,5 @@ +import six +from six.moves import range import logging, time, json, socket from collections import MutableMapping import requests.exceptions as rexc @@ -26,7 +28,10 @@ def _iter_lines(self, _path, **kwargs): r = self.client.request(**kwargs) - return r.iter_lines() + lines = r.iter_lines() + if six.PY3: + return (l.decode(r.encoding or 'utf8') for l in lines) + return lines def apirequest(self, _path=None, **kwargs): return jldecode(self._iter_lines(_path, **kwargs)) @@ -77,7 +82,7 @@ def iter_json(self, _path=None, requests_params=None, **apiparams): lastexc = None line = None offset = 0 - for attempt in xrange(self.MAX_RETRIES): + for attempt in range(self.MAX_RETRIES): self._add_resume_param(line, offset, apiparams) try: for line in self._iter_lines(_path=_path, params=apiparams, @@ -153,7 +158,7 @@ def get(self, _key, **params): return o def stats(self): - return self.apiget('stats').next() + return next(self.apiget('stats')) class MappingResourceType(ResourceType, MutableMapping): @@ -177,7 +182,7 @@ def _data(self): if self._cached is None: r = self.apiget() try: - self._cached = r.next() + self._cached = next(r) except StopIteration: self._cached = {} @@ -194,8 +199,8 @@ def save(self): if not self.ignore_fields: self.apipost(jl=self._data, is_idempotent=True) else: - self.apipost(jl=dict((k, v) for k, v in self._data.iteritems() - if k not in self.ignore_fields), + self.apipost(jl={k: v for k, v in six.iteritems(self._data) + if k not in self.ignore_fields}, is_idempotent=True) def __getitem__(self, key): diff --git a/hubstorage/serialization.py b/hubstorage/serialization.py index db359c4..3e6e6bc 100644 --- a/hubstorage/serialization.py +++ b/hubstorage/serialization.py @@ -1,3 +1,4 @@ +import six from json import dumps, loads from datetime import datetime @@ -6,7 +7,7 @@ def jlencode(iterable): - if isinstance(iterable, (dict, str, unicode)): + if isinstance(iterable, (dict, six.string_types)): iterable = [iterable] return u'\n'.join(jsonencode(o) for o in iterable) @@ -26,8 +27,6 @@ def jsondefault(o): u = delta.microseconds s = delta.seconds d = delta.days - millis = (u + (s + d * ADAYINSECONDS) * 1e6) / 1000 - return int(millis) + return (u + (s + d * ADAYINSECONDS) * 1e6) // 1000 else: - return str(o) - + return six.text_type(o) diff --git a/hubstorage/utils.py b/hubstorage/utils.py index b34dadb..5a08a83 100644 --- a/hubstorage/utils.py +++ b/hubstorage/utils.py @@ -1,5 +1,6 @@ +import six import time -from Queue import Empty +from six.moves.queue import Empty def urlpathjoin(*parts): @@ -35,12 +36,10 @@ def urlpathjoin(*parts): continue elif isinstance(p, tuple): p = urlpathjoin(*p) - elif isinstance(p, unicode): - p = p.encode('utf8') - elif not isinstance(p, str): - p = str(p) + elif not isinstance(p, six.text_type): + p = six.text_type(p) - url = p if url is None else '{0}/{1}'.format(url.rstrip('/'), p) + url = p if url is None else u'{0}/{1}'.format(url.rstrip(u'/'), p) return url @@ -81,9 +80,9 @@ class iterqueue(object): it exposes an attribute "count" with the number of messages read - >>> from Queue import Queue + >>> from six.moves.queue import Queue >>> q = Queue() - >>> for x in xrange(10): + >>> for x in range(10): ... q.put(x) >>> qiter = iterqueue(q) >>> list(qiter) @@ -91,7 +90,7 @@ class iterqueue(object): >>> qiter.count 10 - >>> for x in xrange(10): + >>> for x in range(10): ... q.put(x) >>> qiter = iterqueue(q, maxcount=4) >>> list(qiter) diff --git a/requirements.txt b/requirements.txt index 0e8deb5..8f7f75e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ requests>=1.0 -retrying>=1.3.3 \ No newline at end of file +retrying>=1.3.3 +six>=1.10.0 diff --git a/setup.py b/setup.py index e67af07..4550a48 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ platforms=['Any'], packages=find_packages(), package_data={'hubstorage': ['VERSION']}, - install_requires=['requests', 'retrying>=1.3.3'], + install_requires=['requests', 'retrying>=1.3.3', 'six>=1.10.0'], classifiers=['Development Status :: 4 - Beta', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', diff --git a/tests/hstestcase.py b/tests/hstestcase.py index d7036f5..a673c7c 100644 --- a/tests/hstestcase.py +++ b/tests/hstestcase.py @@ -39,7 +39,7 @@ def tearDownClass(cls): @classmethod def _remove_all_jobs(cls): project = cls.project - for k in project.settings.keys(): + for k in list(project.settings.keys()): if k != 'botgroups': del project.settings[k] project.settings.save() diff --git a/tests/test_activity.py b/tests/test_activity.py index 0189b3d..302cf0a 100644 --- a/tests/test_activity.py +++ b/tests/test_activity.py @@ -1,14 +1,15 @@ """ Test Activty """ -from hstestcase import HSTestCase +from .hstestcase import HSTestCase +from six.moves import range class ActivityTest(HSTestCase): def test_post_and_reverse_get(self): # make some sample data - orig_data = [{u'foo': 42, u'counter': i} for i in xrange(20)] + orig_data = [{u'foo': 42, u'counter': i} for i in range(20)] data1 = orig_data[:10] data2 = orig_data[10:] @@ -22,12 +23,12 @@ def test_post_and_reverse_get(self): self.assertEqual(orig_data[::-1], result) def test_filters(self): - self.project.activity.post({'c': i} for i in xrange(10)) + self.project.activity.post({'c': i} for i in range(10)) r = list(self.project.activity.list(filter='["c", ">", [5]]', count=2)) self.assertEqual(r, [{'c': 9}, {'c': 8}]) def test_timestamp(self): self.project.activity.add({'foo': 'bar'}, baz='qux') - entry = self.project.activity.list(count=1, meta='_ts').next() + entry = next(self.project.activity.list(count=1, meta='_ts')) self.assertTrue(entry.pop('_ts', None)) self.assertEqual(entry, {'foo': 'bar', 'baz': 'qux'}) diff --git a/tests/test_batchuploader.py b/tests/test_batchuploader.py index 07d7dd3..77c5172 100644 --- a/tests/test_batchuploader.py +++ b/tests/test_batchuploader.py @@ -2,8 +2,9 @@ Test Project """ import time +from six.moves import range from collections import defaultdict -from hstestcase import HSTestCase +from .hstestcase import HSTestCase from hubstorage import ValueTooLarge @@ -18,7 +19,7 @@ def _job_and_writer(self, **writerargs): def test_writer_batchsize(self): job, w = self._job_and_writer(size=10) - for x in xrange(111): + for x in range(111): w.write({'x': x}) w.close() # this works only for small batches (previous size=10 and small data) @@ -47,19 +48,19 @@ def test_writer_maxitemsize(self): ValueTooLarge, 'Value exceeds max encoded size of 1048576 bytes:' ' \'{"b+\\.\\.\\.\'', - w.write, {'b'*(m/2): 'x'*(m/2)}) + w.write, {'b'*(m//2): 'x'*(m//2)}) def test_writer_contentencoding(self): for ce in ('identity', 'gzip'): job, w = self._job_and_writer(content_encoding=ce) - for x in xrange(111): + for x in range(111): w.write({'x': x}) w.close() self.assertEqual(job.items.stats()['totals']['input_values'], 111) def test_writer_interval(self): job, w = self._job_and_writer(size=1000, interval=1) - for x in xrange(111): + for x in range(111): w.write({'x': x}) if x == 50: time.sleep(2) diff --git a/tests/test_client.py b/tests/test_client.py index 6e1ac2a..00e3597 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,7 +1,7 @@ """ Test Client """ -from hstestcase import HSTestCase +from .hstestcase import HSTestCase from hubstorage.utils import millitime, apipoll class ClientTest(HSTestCase): diff --git a/tests/test_collections.py b/tests/test_collections.py index 1d2ab89..512d15a 100644 --- a/tests/test_collections.py +++ b/tests/test_collections.py @@ -2,9 +2,10 @@ Test Collections """ import random +from six.moves import range from contextlib import closing -from hstestcase import HSTestCase -from testutil import failing_downloader +from .hstestcase import HSTestCase +from .testutil import failing_downloader def _mkitem(): @@ -45,7 +46,7 @@ def post_scan_test(self): test_item = _mkitem() last_key = None with closing(col.create_writer()) as writer: - for i in xrange(20): + for i in range(20): test_item['_key'] = last_key = "post_scan_test%d" % i test_item['counter'] = i writer.write(test_item) @@ -66,7 +67,7 @@ def post_scan_test(self): self.assertEqual(len(result), 10) # bulk delete - col.delete('post_scan_test%d' % i for i in xrange(20)) + col.delete('post_scan_test%d' % i for i in range(20)) # test items removed (check first and last) self.assertRaises(KeyError, col.get, 'post_scan_test0') @@ -84,7 +85,7 @@ def test_data_download(self): col = self.project.collections.new_store(self.test_collection_name) items = [] with closing(col.create_writer()) as writer: - for i in xrange(20): + for i in range(20): test_item = _mkitem() test_item['_key'] = "test_data_download%d" % i test_item['counter'] = i diff --git a/tests/test_frontier.py b/tests/test_frontier.py index 50b3a7a..e781377 100644 --- a/tests/test_frontier.py +++ b/tests/test_frontier.py @@ -1,7 +1,7 @@ """ Test Frontier """ -from hstestcase import HSTestCase +from .hstestcase import HSTestCase class FrontierTest(HSTestCase): diff --git a/tests/test_jobq.py b/tests/test_jobq.py index e414cad..4f1e262 100644 --- a/tests/test_jobq.py +++ b/tests/test_jobq.py @@ -2,9 +2,11 @@ Test JobQ """ import os, unittest -from hstestcase import HSTestCase +import six +from six.moves import range from hubstorage.jobq import DuplicateJobError from hubstorage.utils import apipoll +from .hstestcase import HSTestCase EXCLUSIVE = os.environ.get('EXCLUSIVE_STORAGE') @@ -97,10 +99,10 @@ def test_startjob_with_extras(self): 'nil': None, } qj = jobq.push(self.spidername, **pushextras) - startextras = dict(('s_' + k, v) for k, v in pushextras.iteritems()) + startextras = dict(('s_' + k, v) for k, v in six.iteritems(pushextras)) nj = jobq.start(**startextras) self.assertEqual(qj['key'], nj['key']) - for k, v in dict(pushextras, **startextras).iteritems(): + for k, v in six.iteritems(dict(pushextras, **startextras)): if type(v) is float: self.assertAlmostEqual(nj.get(k), v) else: @@ -145,7 +147,7 @@ def test_summary_countstart(self): N = 6 jobq = self.project.jobq for state in ('pending', 'running', 'finished'): - for idx in xrange(N): + for idx in range(N): jobq.push(self.spidername, state=state, idx=idx) s1 = jobq.summary(state) diff --git a/tests/test_jobsmeta.py b/tests/test_jobsmeta.py index b2254c4..c2bfbe6 100644 --- a/tests/test_jobsmeta.py +++ b/tests/test_jobsmeta.py @@ -3,7 +3,7 @@ System tests for operations on stored job metadata """ -from hstestcase import HSTestCase +from .hstestcase import HSTestCase class JobsMetadataTest(HSTestCase): diff --git a/tests/test_project.py b/tests/test_project.py index b70b5c2..d90215a 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2,12 +2,14 @@ Test Project """ import json +import six +from six.moves import range from random import randint, random from requests.exceptions import HTTPError from hubstorage import HubstorageClient from hubstorage.utils import millitime -from hstestcase import HSTestCase -from testutil import failing_downloader +from .hstestcase import HSTestCase +from .testutil import failing_downloader class ProjectTest(HSTestCase): @@ -16,8 +18,8 @@ def test_projectid(self): p1 = self.hsclient.get_project(int(self.projectid)) p2 = self.hsclient.get_project(str(self.projectid)) self.assertEqual(p1.projectid, p2.projectid) - self.assertEqual(type(p1.projectid), str) - self.assertEqual(type(p2.projectid), str) + self.assertEqual(type(p1.projectid), six.text_type) + self.assertEqual(type(p2.projectid), six.text_type) self.assertRaises(AssertionError, self.hsclient.get_project, '111/3') def test_get_job_from_key(self): @@ -173,21 +175,21 @@ def test_requests(self): job.requests.close() rr = job.requests.list() - self.assertEqual(rr.next(), + self.assertEqual(next(rr), {u'status': 200, u'rs': 1337, u'url': u'http://test.com/', u'time': ts, u'duration': 5, u'method': u'GET'}) - self.assertEqual(rr.next(), + self.assertEqual(next(rr), {u'status': 400, u'parent': 0, u'rs': 0, u'url': u'http://test.com/2', u'time': ts + 1, u'duration': 1, u'method': u'POST'}) - self.assertEqual(rr.next(), + self.assertEqual(next(rr), {u'status': 400, u'fp': u'1234', u'parent': 0, u'rs': 0, u'url': u'http://test.com/3', u'time': ts + 2, u'duration': 1, u'method': u'PUT'}) - self.assertRaises(StopIteration, rr.next) + self.assertRaises(StopIteration, next, rr) def test_samples(self): # no samples stored @@ -208,9 +210,9 @@ def test_samples(self): samples = [] ts = millitime() count = int(j2.samples.batch_size * (random() + randint(1, 5))) - for _ in xrange(count): + for _ in range(count): ts += randint(1, 2**16) - row = [ts] + list(randint(0, 2**16) for _ in xrange(randint(0, 100))) + row = [ts] + list(randint(0, 2**16) for _ in range(randint(0, 100))) samples.append(row) j2.samples.write(row) j2.samples.flush() @@ -228,7 +230,7 @@ def test_jobsummary(self): def test_bulkdata(self): j = self.project.push_job(self.spidername, state='running') - for i in xrange(20): + for i in range(20): j.logs.info("log line %d" % i) j.items.write(dict(field1="item%d" % i)) j.requests.add("http://test.com/%d" % i, diff --git a/tests/test_retry.py b/tests/test_retry.py index 5a0f22a..91b7de4 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -1,9 +1,9 @@ """ Test Retry Policy """ -from httplib import BadStatusLine +from six.moves.http_client import BadStatusLine from requests import HTTPError, ConnectionError -from hstestcase import HSTestCase +from .hstestcase import HSTestCase from hubstorage import HubstorageClient import responses import json @@ -76,7 +76,7 @@ def request_callback(request): if attempts_count[0] <= 2: raise ConnectionError("Connection aborted.", BadStatusLine("''")) if attempts_count[0] == 3: - return (429, {}, {}) + return (429, {}, u'') else: resp_body = dict(job_metadata) return (200, {}, json.dumps(resp_body)) diff --git a/tests/test_system.py b/tests/test_system.py index 4b20d8e..c6b6e8e 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -1,6 +1,7 @@ import random +from six.moves import range from contextlib import closing -from hstestcase import HSTestCase +from .hstestcase import HSTestCase from hubstorage import HubstorageClient from hubstorage.utils import millitime @@ -79,7 +80,7 @@ def _run_scraper(self, jobkey, jobauth, close_reason=None): client = HubstorageClient(endpoint=self.endpoint) with closing(client) as scraperclient: job = scraperclient.get_job(jobkey, auth=jobauth) - for idx in xrange(self.MAGICN): + for idx in range(self.MAGICN): iid = job.items.write({'uuid': idx}) job.logs.debug('log debug %s' % idx, idx=idx) job.logs.info('log info %s' % idx, idx=idx) diff --git a/tox.ini b/tox.ini index 92e0476..36d3219 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py27, pypy +envlist = py27, pypy, py33, py34, py35 [testenv] deps = From 58461218c5b05a1e13954ff569f6fbad372b7c57 Mon Sep 17 00:00:00 2001 From: Fabio Bertinatto Date: Tue, 22 Mar 2016 16:14:30 -0300 Subject: [PATCH 2/3] Avoid shadowing builtin iter --- hubstorage/batchuploader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hubstorage/batchuploader.py b/hubstorage/batchuploader.py index 1536c57..b991541 100644 --- a/hubstorage/batchuploader.py +++ b/hubstorage/batchuploader.py @@ -251,9 +251,9 @@ def __str__(self): return self.url -def _encode_identity(iter): +def _encode_identity(iterable): data = BytesIO() - for item in iter: + for item in iterable: if isinstance(item, six.text_type): item = item.encode('utf8') data.write(item) @@ -261,10 +261,10 @@ def _encode_identity(iter): return data.getvalue() -def _encode_gzip(iter): +def _encode_gzip(iterable): data = BytesIO() with GzipFile(fileobj=data, mode='w') as gzo: - for item in iter: + for item in iterable: if isinstance(item, six.text_type): item = item.encode('utf8') gzo.write(item) From b579d3949f68f9aedb358ad69529f80ef723bfdf Mon Sep 17 00:00:00 2001 From: Fabio Bertinatto Date: Fri, 22 Apr 2016 14:51:31 -0300 Subject: [PATCH 3/3] Add test to make sure output is correctly encoded --- tests/test_project.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_project.py b/tests/test_project.py index d90215a..8d61c25 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -243,3 +243,12 @@ def test_bulkdata(self): with failing_downloader(resource): downloaded = list(resource.iter_values()) self.assertEqual(len(downloaded), 20) + + def test_output_string(self): + project = self.hsclient.get_project(self.projectid) + project.push_job(self.spidername) + job = self.start_job() + job.items.write({'foo': 'bar'}) + job.close_writers() + items = self.hsclient.get_job(job.key).items.iter_json() + self.assertEqual(type(next(items)), str)