From 509c3ad7490e1606ca829d3478c0833d2d32a214 Mon Sep 17 00:00:00 2001 From: Oliver Sherouse Date: Mon, 9 Apr 2018 10:52:46 -0400 Subject: [PATCH 1/4] renamed corpora to corpus, added deprecation warning --- .gitignore | 1 + quantgov/__init__.py | 9 ++------- quantgov/__main__.py | 6 +++--- quantgov/corpora/__init__.py | 9 ++++++++- quantgov/corpus/__init__.py | 9 +++++++++ quantgov/{corpora => corpus}/builtins.py | 0 quantgov/{corpora => corpus}/structures.py | 0 quantgov/{corpora => corpus}/utils.py | 0 tests/test_corpora.py | 8 ++++---- 9 files changed, 27 insertions(+), 15 deletions(-) create mode 100644 quantgov/corpus/__init__.py rename quantgov/{corpora => corpus}/builtins.py (100%) rename quantgov/{corpora => corpus}/structures.py (100%) rename quantgov/{corpora => corpus}/utils.py (100%) diff --git a/.gitignore b/.gitignore index 72364f9..d9d3190 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +.pytest_cache # C extensions *.so diff --git a/quantgov/__init__.py b/quantgov/__init__.py index 9efd6ed..a39a47f 100644 --- a/quantgov/__init__.py +++ b/quantgov/__init__.py @@ -1,13 +1,8 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -__all__ = [ - 'corpora', - 'estimator', - 'project', - 'utils', -] -from .corpora.utils import load_driver +from . import corpora, corpus, estimator, project, utils +from .corpus.utils import load_driver __version__ = '0.4.0.dev' diff --git a/quantgov/__main__.py b/quantgov/__main__.py index 8eac757..1db7ad5 100644 --- a/quantgov/__main__.py +++ b/quantgov/__main__.py @@ -15,7 +15,7 @@ import requests import quantgov -import quantgov.corpora.builtins +import quantgov.corpus.builtins from pathlib import Path @@ -40,7 +40,7 @@ def parse_args(): # Corpus command corpus = subparsers.add_parser('corpus') corpus_subcommands = corpus.add_subparsers(dest='subcommand') - for command, builtin in quantgov.corpora.builtins.commands.items(): + for command, builtin in quantgov.corpus.builtins.commands.items(): subcommand = corpus_subcommands.add_parser( command, help=builtin.cli.help) subcommand.add_argument( @@ -161,7 +161,7 @@ def start_component(args): def run_corpus_builtin(args): driver = quantgov.load_driver(args.corpus) writer = csv.writer(args.outfile) - builtin = quantgov.corpora.builtins.commands[args.subcommand] + builtin = quantgov.corpus.builtins.commands[args.subcommand] func_args = {i: j for i, j in vars(args).items() if i not in {'command', 'subcommand', 'outfile', 'corpus'}} writer.writerow(driver.index_labels + builtin.get_columns(func_args)) diff --git a/quantgov/corpora/__init__.py b/quantgov/corpora/__init__.py index be0ec36..2e7d776 100644 --- a/quantgov/corpora/__init__.py +++ b/quantgov/corpora/__init__.py @@ -1,4 +1,6 @@ -from .structures import ( +import warnings + +from ..corpus import ( Document, CorpusStreamer, CorpusDriver, @@ -7,3 +9,8 @@ NamePatternCorpusDriver, IndexDriver ) + +warnings.warn( + ("quantgov.corpora has been moved to quantgov.corpus and will be removed" + " in a future version."), + DeprecationWarning) diff --git a/quantgov/corpus/__init__.py b/quantgov/corpus/__init__.py new file mode 100644 index 0000000..be0ec36 --- /dev/null +++ b/quantgov/corpus/__init__.py @@ -0,0 +1,9 @@ +from .structures import ( + Document, + CorpusStreamer, + CorpusDriver, + FlatFileCorpusDriver, + RecursiveDirectoryCorpusDriver, + NamePatternCorpusDriver, + IndexDriver +) diff --git a/quantgov/corpora/builtins.py b/quantgov/corpus/builtins.py similarity index 100% rename from quantgov/corpora/builtins.py rename to quantgov/corpus/builtins.py diff --git a/quantgov/corpora/structures.py b/quantgov/corpus/structures.py similarity index 100% rename from quantgov/corpora/structures.py rename to quantgov/corpus/structures.py diff --git a/quantgov/corpora/utils.py b/quantgov/corpus/utils.py similarity index 100% rename from quantgov/corpora/utils.py rename to quantgov/corpus/utils.py diff --git a/tests/test_corpora.py b/tests/test_corpora.py index 4eba0fc..f77a249 100644 --- a/tests/test_corpora.py +++ b/tests/test_corpora.py @@ -1,5 +1,5 @@ import pytest -import quantgov.corpora +import quantgov.corpus import subprocess from pathlib import Path @@ -8,7 +8,7 @@ def build_recursive_directory_corpus(directory): for path, text in (('a/1.txt', u'foo'), ('b/2.txt', u'bar')): directory.join(path).write_text(text, encoding='utf-8', ensure=True) - return quantgov.corpora.RecursiveDirectoryCorpusDriver( + return quantgov.corpus.RecursiveDirectoryCorpusDriver( directory=str(directory), index_labels=('letter', 'number')) @@ -16,7 +16,7 @@ def build_name_pattern_corpus(directory): for path, text in (('a_1.txt', u'foo'), ('b_2.txt', u'bar')): path = directory.join(path).write_text( text, encoding='utf-8', ensure=True) - return quantgov.corpora.NamePatternCorpusDriver( + return quantgov.corpus.NamePatternCorpusDriver( pattern=r'(?P[a-z])_(?P\d)', directory=str(directory) ) @@ -35,7 +35,7 @@ def build_index_corpus(directory): with index_path.open('w', encoding='utf-8') as outf: outf.write(u'letter,number,path\n') outf.write(u'\n'.join(','.join(row) for row in rows)) - return quantgov.corpora.IndexDriver(str(index_path)) + return quantgov.corpus.IndexDriver(str(index_path)) BUILDERS = { From 63284c915c9cad54dffea4c5cc4aaef8aad14b14 Mon Sep 17 00:00:00 2001 From: Oliver Sherouse Date: Mon, 9 Apr 2018 11:29:55 -0400 Subject: [PATCH 2/4] quantgov\__init__.py --- quantgov/corpus/utils.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 quantgov/corpus/utils.py diff --git a/quantgov/corpus/utils.py b/quantgov/corpus/utils.py deleted file mode 100644 index 1bdd29f..0000000 --- a/quantgov/corpus/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -quantgov.corpora.utils - utility functions for the corpus submodule -""" -import sys - -from pathlib import Path - - -def load_driver(corpus): - corpus = Path(corpus) - if corpus.name == 'driver.py' or corpus.name == 'timestamp': - corpus = corpus.parent - sys.path.insert(0, str(corpus)) - from driver import driver - sys.path.pop(0) - return driver From 5a735e070396fabeda19b99470b2a6f50715d9b4 Mon Sep 17 00:00:00 2001 From: Oliver Sherouse Date: Mon, 9 Apr 2018 11:30:31 -0400 Subject: [PATCH 3/4] moved load_driver and set up for future forcing of full imports of submodules --- quantgov/utils.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/quantgov/utils.py b/quantgov/utils.py index befd39a..a0d1fe0 100644 --- a/quantgov/utils.py +++ b/quantgov/utils.py @@ -1,7 +1,22 @@ # TODO: Docstrings + import collections import concurrent.futures -import multiprocessing +import os +import sys + +from pathlib import Path + + +def load_driver(corpus): + corpus = Path(corpus) + if corpus.name == 'driver.py' or corpus.name == 'timestamp': + corpus = corpus.parent + sys.path.insert(0, str(corpus)) + from driver import driver + sys.path.pop(0) + return driver + _POOLS = { 'thread': concurrent.futures.ThreadPoolExecutor, @@ -26,7 +41,7 @@ def lazy_parallel(func, *iterables, **kwargs): worker = kwargs.get('worker', 'thread') max_workers = kwargs.get('max_workers') if max_workers is None: # Not in back-port - max_workers = (multiprocessing.cpu_count() or 1) + max_workers = (os.cpu_count() or 1) if worker == 'thread': max_workers *= 5 try: From 18232ac5c2ddacd115bfe8d40249944c698c7ad4 Mon Sep 17 00:00:00 2001 From: Oliver Sherouse Date: Mon, 9 Apr 2018 11:35:53 -0400 Subject: [PATCH 4/4] forgot to add new __init__! --- quantgov/__init__.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/quantgov/__init__.py b/quantgov/__init__.py index a39a47f..219ccfa 100644 --- a/quantgov/__init__.py +++ b/quantgov/__init__.py @@ -1,8 +1,17 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) +__all__ = [ + 'corpora', + 'corpus', + 'estimator', + 'project', + 'utils', +] -from . import corpora, corpus, estimator, project, utils -from .corpus.utils import load_driver + +from . import corpora # Backwards compatibility + +from .utils import load_driver __version__ = '0.4.0.dev'