diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index c034903..fae6575 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.x"] - project: ["docspec", "docspec-python"] + project: ["docspec", "docspec-python", "docspec-python[experimental]"] steps: - uses: actions/checkout@v3 - uses: NiklasRosenstein/slap@gha/install/v1 @@ -23,6 +23,9 @@ jobs: with: { python-version: "${{ matrix.python-version }}" } - run: slap install --only ${{ matrix.project }} --no-venv-check -v - run: DOCSPEC_TEST_NO_DEVELOP=true slap test ${{ matrix.project }} + if: ${{ !endsWith(matrix.propject, '[experimental]' }} + - run: DOCSPEC_TEST_NO_DEVELOP=true slap test docspec-python + if: ${{ endsWith(matrix.propject, '[experimental]') }} changelog-update: name: "Insert the Pull Request URL into new changelog entries" diff --git a/docspec-python/pyproject.toml b/docspec-python/pyproject.toml index fd16452..ec48819 100644 --- a/docspec-python/pyproject.toml +++ b/docspec-python/pyproject.toml @@ -18,6 +18,13 @@ python = "^3.7" docspec = "^2.2.1" "nr.util" = ">=0.7.0" black = "^23.1.0" +beniget = { git = "https://github.com/tristanlatr/beniget", rev = "ca577df3cca73140d53a325624a0185735354b69" } +libstatic = { git = "https://github.com/tristanlatr/libstatic", tag = "0.2.0.dev3", optional = true } +ast_comments = { version = "^1.1.0", optional = true } +astor = { version = ">=0.8.1", optional = true } + +[tool.poetry.extras] +experimental = ["beniget", "libstatic", "ast_comments", "astor"] [tool.poetry.dev-dependencies] black = "*" diff --git a/docspec-python/src/docspec_python/__init__.py b/docspec-python/src/docspec_python/__init__.py index dd33ffb..43607c3 100644 --- a/docspec-python/src/docspec_python/__init__.py +++ b/docspec-python/src/docspec_python/__init__.py @@ -54,6 +54,7 @@ def load_python_modules( encoding: t.Optional[str] = None, *, files: t.Optional[t.Sequence[t.Tuple[str, str]]] = None, + parser_version: int = 1, ) -> t.Iterable[Module]: """ Utility function for loading multiple #Module#s from a list of Python module and package @@ -85,9 +86,17 @@ def load_python_modules( except ImportError: if raise_: raise + if parser_version == 1: + for module_name, filename in files: + yield parse_python_module(filename, module_name=module_name, options=options, encoding=encoding) + elif parser_version == 2: + from .parser2 import ModSpec, parse_modules - for module_name, filename in files: - yield parse_python_module(filename, module_name=module_name, options=options, encoding=encoding) + yield from parse_modules( + [ModSpec(Path(filename).read_text(), module_name, filename) for module_name, filename in files] + ) + else: + assert False, f"no such parser version {parser_version!r}" @t.overload @@ -96,6 +105,7 @@ def parse_python_module( module_name: t.Optional[str] = None, options: t.Optional[ParserOptions] = None, encoding: t.Optional[str] = None, + parser_version: int = 1, ) -> Module: ... @@ -107,6 +117,7 @@ def parse_python_module( module_name: t.Optional[str] = None, options: t.Optional[ParserOptions] = None, encoding: t.Optional[str] = None, + parser_version: int = 1, ) -> Module: ... @@ -117,6 +128,7 @@ def parse_python_module( # type: ignore module_name: t.Optional[str] = None, options: t.Optional[ParserOptions] = None, encoding: t.Optional[str] = None, + parser_version: int = 1, ) -> Module: """ Parses Python code of a file or file-like object and returns a #Module @@ -133,9 +145,30 @@ def parse_python_module( # type: ignore return parse_python_module(fpobj, fp, module_name, options, encoding) assert filename is not None - parser = Parser(options) - ast = parser.parse_to_ast(fp.read(), str(filename)) - return parser.parse(ast, str(filename), module_name) + + if parser_version == 1: + parser = Parser(options) + ast = parser.parse_to_ast(fp.read(), str(filename)) + return parser.parse(ast, str(filename), module_name) + elif parser_version == 2: + # This should only be used in tests since the new parser is much better + # when using load_python_modules() because it will analyze the module together + # in the same project state, which enables us to do more precise analysis. + from .parser2 import ModSpec, parse_modules + + return next( + parse_modules( + ( + ModSpec( + fp.read(), + module_name or "", + filename=str(filename), + ), + ) + ) + ) + else: + assert False, f"no such parser version {parser_version!r}" def find_module(module_name: str, search_path: t.Optional[t.Sequence[t.Union[str, Path]]] = None) -> str: diff --git a/docspec-python/src/docspec_python/parser2.py b/docspec-python/src/docspec_python/parser2.py new file mode 100644 index 0000000..5a93bd8 --- /dev/null +++ b/docspec-python/src/docspec_python/parser2.py @@ -0,0 +1,535 @@ +""" +A new parser based on the ``ast`` module, the framework ``libstatic`` and ``ast-comments`` +""" +from __future__ import annotations + +import ast +from functools import partial +import inspect +import platform +import sys +import typing as t +from dataclasses import dataclass +from itertools import chain + +import ast_comments # type:ignore[import] +import astor # type:ignore[import] +import docspec +import libstatic # type:ignore[import] +from libstatic._lib.assignment import get_stored_value # type:ignore[import] +from libstatic._lib.shared import LocalStmtVisitor, unparse # type:ignore[import] + + +class ModSpec(t.NamedTuple): + src: str + modname: str + filename: str | None = None + is_package: bool = False + is_stub: bool = False + + +@dataclass +class ParserOptions: + expand_names: bool = True + builtins: bool = False + dependencies: bool | int = False + verbosity:int = 0 + # python_version:tuple[int, int] + +class ParseError(Exception): + ... + +def parse_modules(modules: t.Sequence[ModSpec], options: ParserOptions | None = None) -> t.Iterator[docspec.Module]: + options = options or ParserOptions() + proj = libstatic.Project(builtins=options.builtins, + verbosity=options.verbosity) + initial_modules: dict[str, str] = {} # libstatic may add the builtins module + for src, modname, filename, is_package, is_stub in modules: + initial_modules[modname] = src + filename = filename or "" + try: + node = ast.parse(src, filename=filename) + except SyntaxError as e: + raise ParseError(f'cannot parse file: {e}') from e + try: + proj.add_module( + node, + modname, + is_package=is_package, + filename=filename + ) + except libstatic.StaticException as e: + raise ParseError(f'cannot add module {modname!r} to the project: {e}') from e + + proj.analyze_project() + parser = Parser(proj.state, options) + for m in proj.state.get_all_modules(): + if m.name() in initial_modules: + # run ast-comments + ast_comments._enrich(initial_modules[m.name()], m.node) + yield parser.parse(m) # type: ignore[misc] + + +class IVar(t.NamedTuple): + node: ast.Attribute + value: ast.expr | None = None + annotation: ast.expr | None = None + + +class ArgSpec(t.NamedTuple): + node: ast.arg + type: docspec.Argument.Type + default: ast.expr | None = None + + +def _iter_arguments(args: ast.arguments) -> t.Iterator[ArgSpec]: + """ + Yields all arguments of the given ast.arguments instance. + """ + posonlyargs = getattr(args, "posonlyargs", ()) + + num_pos_args = len(posonlyargs) + len(args.args) + defaults = args.defaults + default_offset = num_pos_args - len(defaults) + + def get_default(index: int) -> ast.expr | None: + assert 0 <= index < num_pos_args, index + index -= default_offset + return None if index < 0 else defaults[index] + + for i, arg in enumerate(posonlyargs): + yield ArgSpec(arg, docspec.Argument.Type.POSITIONAL_ONLY, default=get_default(i)) + for i, arg in enumerate(args.args, start=len(posonlyargs)): + yield ArgSpec(arg, docspec.Argument.Type.POSITIONAL, default=get_default(i)) + if args.vararg: + yield ArgSpec(args.vararg, docspec.Argument.Type.POSITIONAL_REMAINDER) + for arg, default in zip(args.kwonlyargs, args.kw_defaults): + yield ArgSpec(arg, docspec.Argument.Type.KEYWORD_ONLY, default=default) + if args.kwarg: + yield ArgSpec(args.kwarg, docspec.Argument.Type.KEYWORD_REMAINDER) + + +_string_lineno_is_end = sys.version_info < (3, 8) and platform.python_implementation() != "PyPy" +"""True iff the 'lineno' attribute of an AST string node points to the last +line in the string, rather than the first line. +""" + + +def _extract_docstring_linenum(node: ast.Str | ast.Constant) -> int: + r""" + In older CPython versions, the AST only tells us the end line + number and we must approximate the start line number. + This approximation is correct if the docstring does not contain + explicit newlines ('\n') or joined lines ('\' at end of line). + + Leading blank lines are stripped by cleandoc(), so we must + return the line number of the first non-blank line. + """ + doc = t.cast(str, get_str_value(node)) + lineno = node.lineno + if _string_lineno_is_end: + # In older CPython versions, the AST only tells us the end line + # number and we must approximate the start line number. + # This approximation is correct if the docstring does not contain + # explicit newlines ('\n') or joined lines ('\' at end of line). + lineno -= doc.count("\n") + + # Leading blank lines are stripped by cleandoc(), so we must + # return the line number of the first non-blank line. + for ch in doc: + if ch == "\n": + lineno += 1 + elif not ch.isspace(): + break + + return lineno + + +def _extract_docstring_content(node: ast.Str | ast.Constant) -> tuple[str, int]: + """ + Extract docstring information from an ast node that represents the docstring. + + @returns: + - The line number of the first non-blank line of the docsring. See L{extract_docstring_linenum}. + - The docstring to be parsed, cleaned by L{inspect.cleandoc}. + """ + lineno = _extract_docstring_linenum(node) + return inspect.cleandoc(t.cast(str, get_str_value(node))), lineno + + +if sys.version_info[:2] >= (3, 8): + # Since Python 3.8 "foo" is parsed as ast.Constant. + def get_str_value(expr: ast.expr) -> str | None: + if isinstance(expr, ast.Constant) and isinstance(expr.value, str): + return expr.value + return None + +else: + # Before Python 3.8 "foo" was parsed as ast.Str. + def get_str_value(expr: ast.expr) -> str | None: + if isinstance(expr, ast.Str): + return expr.s + return None + + +class Parser: + def __init__(self, state: libstatic.State, options: ParserOptions) -> None: + self.state = state + self.options = options + + def unparse(self, expr: ast.expr, is_annotation: bool = True) -> str: + nexpr = ast.Expr(expr) + if not self.options.expand_names: + return t.cast(str, unparse(nexpr).rstrip("\n")) + state = self.state + expand_expr = state.expand_expr + # expand_name = partial(state.expand_name, + # scope=next(s for s in state.get_all_enclosing_scopes(expr) + # if not isinstance(s, libstatic.Func)), + # is_annotation=True) + + class SourceGenerator(astor.SourceGenerator): # type:ignore[misc] + def visit_Name(self, node: ast.Name) -> None: + expanded: str = expand_expr(node) + if expanded and not expanded.endswith('*'): + self.write(expanded) + return + # not needed until the parse support unstringed type annotations. + # elif is_annotation: + # expanded = expand_name(node.id) + # if expanded and not expanded.endswith('*'): + # self.write(expanded) + # return + self.write(node.id) + + def visit_Str(self, node: ast.Str) -> None: + # astor uses tripple quoted strings :/ + # but we're loosing the precedence infos here, is it important? + self.write(unparse(ast.Expr(node)).rstrip("\n")) + + def visit_Constant(self, node: ast.Constant) -> None: + self.write(unparse(ast.Expr(node)).rstrip("\n")) + + try: + return t.cast(str, astor.to_source(nexpr, source_generator_class=SourceGenerator).rstrip("\n")) + except Exception: + return t.cast(str, unparse(nexpr).rstrip("\n")) + + def _get_lineno(self, definition: libstatic.Def) -> int: + # since ast.alias node only have a lineno info since python 3.10 + # wee need to use parent's lineno for those nodes. + if isinstance(definition, libstatic.Mod): + return 0 + current = definition.node + while True: + lineno = getattr(current, "lineno", None) + current = self.state.get_parent(current) + if lineno is not None: + break + return lineno or -1 + + def _yield_members(self, definition: libstatic.Def) -> t.Sequence[libstatic.Def]: + # locals are groupped by name for faster nam lookups, so we need + # to sort them by source code order here. + state = self.state + list_of_defs: list[list[libstatic.Def]] = [] + for name, defs in state.get_locals(definition).items(): + # they can be None values here :/ + defs = list(filter(None, defs)) + if not defs: + continue + if (name == '__all__' and isinstance(definition, libstatic.Mod) and + self.state.get_dunder_all(definition) is not None): + # take advantage of the fact the __all__ values are parsed + # by libstatic and output the computed value here, so we leave + # only one definition of __all__ here and special case-it later. + defs = [defs[-1]] + list_of_defs.append(defs) + # filter unreachable defs if it doesn't remove all + # information we have about this symbol. + for defs in list_of_defs: + # This will contain several definitions if functions are using @overload + # or simply have several concurrent definitions. + live_defs = (d for d in defs if d and state.is_reachable(d)) + keep = [] + try: + keep.append(next(live_defs)) + except StopIteration: + keep = defs + else: + keep.extend(live_defs) + for d in set(defs).difference(keep): + defs.remove(d) + return sorted(chain.from_iterable(list_of_defs), key=lambda d: self._get_lineno(d)) + + @staticmethod + def get_docstring_node(node: ast.AST) -> ast.Str | ast.Constant | None: + """ + Return the docstring node for the given node or None if no docstring can + be found. + """ + if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)) or not node.body: + return None + node = node.body[0] + if isinstance(node, ast.Expr) and get_str_value(node.value) is not None: + return t.cast("ast.Str | ast.Constant", node.value) + return None + + def get_assign_docstring_node(self, assign: ast.Assign | ast.AnnAssign) -> ast.Str | ast.Constant | None: + """ + Get the docstring for a L{ast.Assign} or L{ast.AnnAssign} node. + This helper function relies on the non-standard C{.parent} attribute on AST nodes + to navigate upward in the tree and determine this node direct siblings. + """ + parent_node = self.state.get_parent(assign) + for fieldname, value in ast.iter_fields(parent_node): + if isinstance(value, (list, tuple)) and assign in value: + break + else: + raise RuntimeError(f"node {assign} not found in {parent_node}") + body = getattr(parent_node, fieldname) + if body: + assert isinstance(body, list) + assign_index = body.index(assign) + try: + right_sibling = body[assign_index + 1] + except IndexError: + return None + if isinstance(right_sibling, ast.Expr) and get_str_value(right_sibling.value) is not None: + return t.cast("ast.Str|ast.Constant", right_sibling.value) + return None + + def _extract_comment_docstring(self, definition: libstatic.Def) -> tuple[str | None, int]: + return None, 0 + # >>> ast.dump(ast_comments.parse('# hello\nclass C: # hello2\n # hello 3\n var=True#false')) + # "Module(body=[ + # Comment(value='# hello', inline=False), + # ClassDef(name='C', bases=[], keywords=[], + # body=[Comment(value='# hello2', inline=True), + # Comment(value='# hello 3', inline=False), + # Assign(targets=[Name(id='var', ctx=Store())], value=Constant(value=True)), + # Comment(value='#false', inline=True)], decorator_list=[])], type_ignores=[])" + + def _compute_instance_vars(self, definition: libstatic.Cls) -> t.Sequence[IVar]: + class ClassVisitor(LocalStmtVisitor): # type:ignore[misc] + def __init__(self) -> None: + self.ivars: t.List[IVar] = [] + + def visit_FunctionDef(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None: + args = node.args.args + if ( + len(args) == 0 + or node.name == "__new__" + or any( + ( + state.expand_expr(d) + or getattr(d, "id", None) + in {"builtins.classmethod", "builtins.staticmethod", "classmethod", "staticmethod"} + for d in node.decorator_list + ) + ) + ): + # not an instance method + return + self_def = state.get_def(args[0]) + for use in self_def.users(): + attr = state.get_parent(use) + if not (isinstance(attr, ast.Attribute) and isinstance(attr.ctx, ast.Store)): + continue + self.ivars.append(IVar(attr)) + + visit_AsyncFunctionDef = visit_FunctionDef + + state = self.state + visitor = ClassVisitor() + visitor.visit(definition.node) + return visitor.ivars + + def _parse_location(self, definition: libstatic.Def) -> docspec.Location: + return docspec.Location( + filename=self.state.get_filename(definition) or "?", + lineno=self._get_lineno(definition), + endlineno=getattr(definition.node, "end_lineno", None) if isinstance(definition, libstatic.Scope) else None, + ) + + def _extract_docstring(self, definition: libstatic.Def) -> docspec.Docstring | None: + if isinstance(definition, (libstatic.Func, libstatic.Mod, libstatic.Cls)): + doc_node = self.get_docstring_node(definition.node) + else: + try: + doc_node = self.get_assign_docstring_node( + self.state.get_parent_instance(definition.node, (ast.Assign, ast.AnnAssign)) + ) + except libstatic.StaticException: + doc_node = None + docstring: str | None + if doc_node: + docstring, lineno = _extract_docstring_content(doc_node) + else: + docstring, lineno = self._extract_comment_docstring(definition) + + if docstring: + return docspec.Docstring( + location=docspec.Location( + filename=self.state.get_filename(definition) or "?", + lineno=lineno, + endlineno=None, + ), + content=docstring.rstrip(), + ) + return None + + def _extract_bases(self, definition: libstatic.Cls) -> list[str]: + return [self.unparse(e) for e in definition.node.bases] + + def _extract_metaclass(self, definition: libstatic.Cls) -> str | None: + for k in definition.node.keywords: + if k.arg == "metaclass": + return self.unparse(k.value) + if "__metaclass__" not in self.state.get_locals(definition): + return None + try: + metaclass_var, *_ = self.state.get_local(definition, "__metaclass__") + metaclass_value = get_stored_value( + metaclass_var.node, self.state.get_parent_instance(metaclass_var.node, (ast.Assign, ast.AnnAssign)) + ) + except libstatic.StaticException: + return None + if metaclass_value: + return self.unparse(metaclass_value) + return None + + def _extract_return_type(self, returns: ast.expr | None) -> str | None: + return self.unparse(returns, is_annotation=True) if returns else None + + def _unparse_keywords(self, keywords: list[ast.keyword]) -> t.Iterable[str]: + for n in keywords: + yield (f"{(n.arg+'=') if n.arg else '**'}" f"{self.unparse(n.value) if n.value else ''}") + + def _parse_decoration(self, expr: "ast.expr") -> docspec.Decoration: + if isinstance(expr, ast.Call): + name = self.unparse(expr.func) + arglist = [*(self.unparse(n) for n in expr.args), *self._unparse_keywords(expr.keywords)] + else: + name = self.unparse(expr) + arglist = [] + return docspec.Decoration(location=self._parse_location(self.state.get_def(expr)), name=name, arglist=arglist) + + def _extract_semantics_hints(self, definition: libstatic.Def) -> list[object]: + return [] # TODO: support other semantics hints + + def _parse_ivar(self, ivar: IVar) -> docspec.Variable: + attrdef = self.state.get_def(ivar.node) + value, datatype = self._extract_variable_value_type(attrdef) + return docspec.Variable( + location=self._parse_location(attrdef), + docstring=self._extract_docstring(attrdef), + name=ivar.node.attr, + datatype=datatype, + value=value, + semantic_hints=[docspec.VariableSemantic.INSTANCE_VARIABLE], + ) + + def _parse_argument(self, arg: ArgSpec) -> docspec.Argument: + return docspec.Argument( + location=self._parse_location(self.state.get_def(arg.node)), + name=arg.node.arg, + type=arg.type, + datatype=self.unparse(arg.node.annotation, is_annotation=True) if arg.node.annotation else None, + default_value=self.unparse(arg.default) if arg.default else None, + ) + + def _extract_variable_value_type(self, definition: libstatic.Def) -> tuple[str | None, str | None]: + # special-case __all__ + scope = self.state.get_enclosing_scope(definition) + if definition.name() == '__all__' and isinstance(scope, libstatic.Mod): + computed_value = self.state.get_dunder_all(scope) + if computed_value is not None: + return (repr(computed_value), None) + try: + assign = self.state.get_parent_instance(definition.node, (ast.Assign, ast.AnnAssign)) + except libstatic.StaticException: + return None, None + if isinstance(assign, ast.AnnAssign): + return (self.unparse(assign.value) if assign.value else None, self.unparse(assign.annotation, is_annotation=True)) + try: + value = get_stored_value(definition.node, assign) + except libstatic.StaticException: + return (None, None) + annotation = None + if value is assign.value: + pass # TODO: seek for type comment + if annotation is None: + # because the code is unfinished, 'self.unparse(annotation)' will never run and mypy complains + pass # TODO: do basic type inference + return (self.unparse(value), self.unparse(annotation, is_annotation=True) if annotation else None) # type:ignore + + # @t.overload + # def parse(self, definition: libstatic.Mod) -> docspec.Module: + # ... + + # @t.overload + # def parse(self, definition: libstatic.Def) -> (docspec.Variable | docspec.Function | # type:ignore + # docspec.Class | docspec.Indirection): + # ... + + def parse(self, definition: libstatic.Def) -> docspec.ApiObject: + if isinstance(definition, libstatic.Mod): + return docspec.Module( + name=definition.name(), + location=self._parse_location(definition), + docstring=self._extract_docstring(definition), + members=[self.parse(m) for m in self._yield_members(definition)], # type: ignore[misc] + ) + elif isinstance(definition, libstatic.Cls): + decorators = definition.node.decorator_list + metaclass = self._extract_metaclass(definition) + return docspec.Class( + name=definition.name(), + location=self._parse_location(definition), + docstring=self._extract_docstring(definition), + members=[ + *( # type: ignore[list-item] + self.parse(m) + for m in self._yield_members(definition) + if not metaclass or m.name() != "__metaclass__" + ), + *(self._parse_ivar(iv) for iv in self._compute_instance_vars(definition)), + ], + bases=self._extract_bases(definition), + metaclass=metaclass, + decorations=[self._parse_decoration(dec) for dec in decorators] if decorators else None, + semantic_hints=t.cast(list[docspec.ClassSemantic], self._extract_semantics_hints(definition)), + ) + elif isinstance(definition, libstatic.Func): + decorators = definition.node.decorator_list + return docspec.Function( + name=definition.name(), + location=self._parse_location(definition), + docstring=self._extract_docstring(definition), + decorations=[self._parse_decoration(dec) for dec in decorators], + semantic_hints=t.cast(list[docspec.FunctionSemantic], self._extract_semantics_hints(definition)), + modifiers=["async"] if isinstance(definition.node, ast.AsyncFunctionDef) else None, + args=[self._parse_argument(arg) for arg in _iter_arguments(definition.node.args)], + return_type=self._extract_return_type(definition.node.returns), + ) + elif isinstance(definition, libstatic.Var): + value, datatype = self._extract_variable_value_type(definition) + return docspec.Variable( + name=definition.name(), + location=self._parse_location(definition), + docstring=self._extract_docstring(definition), + semantic_hints=t.cast(list[docspec.VariableSemantic], self._extract_semantics_hints(definition)), + modifiers=[], + value=value, + datatype=datatype, + ) + elif isinstance(definition, libstatic.Imp): + return docspec.Indirection( + name=definition.name(), + location=self._parse_location(definition), + target=definition.target(), + docstring=None, + ) + else: + assert False, f"unexpected definition type: {type(definition)}" diff --git a/docspec-python/test/__init__.py b/docspec-python/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docspec-python/test/test_loader.py b/docspec-python/test/test_loader.py index d74a3e2..118c640 100644 --- a/docspec-python/test/test_loader.py +++ b/docspec-python/test/test_loader.py @@ -35,8 +35,8 @@ def _assert_is_docspec_python_module(modules: t.List[docspec.Module]) -> None: - assert sorted(m.name for m in modules) == ["docspec_python", "docspec_python.__main__", "docspec_python.parser"] - + assert sorted(m.name for m in modules) == ["docspec_python", "docspec_python.__main__", + "docspec_python.parser", "docspec_python.parser2"] def test_discovery_from_sys_path() -> None: """Tests that the `docspec_python` module can be loaded from `sys.path`.""" diff --git a/docspec-python/test/test_parser.py b/docspec-python/test/test_parser.py index df37e7f..76a3c8d 100644 --- a/docspec-python/test/test_parser.py +++ b/docspec-python/test/test_parser.py @@ -22,6 +22,7 @@ from __future__ import annotations import sys +import types from functools import wraps from io import StringIO from json import dumps @@ -48,6 +49,10 @@ from nr.util.inspect import get_callsite from docspec_python import ParserOptions, format_arglist, parse_python_module +try: + from docspec_python import parser2 +except ImportError: + parser2 = None T = TypeVar("T") DocspecTest = Callable[[], List[_ModuleMemberType]] @@ -96,20 +101,33 @@ def docspec_test( def decorator(func: DocspecTest) -> Callable[[], None]: @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> None: - parsed_module = parse_python_module( - StringIO(dedent(func.__doc__ or "")), - module_name=module_name or func.__name__.lstrip("test_"), - options=parser_options, - filename=func.__name__, - ) - parsed_module.location = loc - reference_module = Module( - name=parsed_module.name, location=loc, docstring=None, members=func(*args, **kwargs) - ) - if strip_locations: - unset_location(parsed_module) - unset_location(reference_module) - assert dumps(dump_module(reference_module), indent=2) == dumps(dump_module(parsed_module), indent=2) + for parser_version in (1,2): + if parser_version==2 and parser2 is None: + continue + # parse module + parsed_module = parse_python_module( + StringIO(dedent(func.__doc__ or "")), + module_name=module_name or func.__name__.lstrip("test_"), + options=parser_options, + filename=func.__name__, + parser_version=parser_version, + ) + # mutate the globals to set the __docspec_parser_version__ variable + global __docspec_parser_version__ + __docspec_parser_version__ = parser_version + # run test + parsed_module.location = loc + expected_members = func(*args, **kwargs) + if not expected_members: + continue + reference_module = Module( + name=parsed_module.name, location=loc, + docstring=None, members=expected_members + ) + if strip_locations: + unset_location(parsed_module) + unset_location(reference_module) + assert dumps(dump_module(reference_module), indent=2) == dumps(dump_module(parsed_module), indent=2) return wrapper @@ -122,7 +140,6 @@ def test_funcdef_1() -> List[_ModuleMemberType]: def a(): ' A simple function. ' """ - return [ Function( name="a", @@ -142,7 +159,6 @@ def test_funcdef_2() -> List[_ModuleMemberType]: def b(a: int, *, c: str, **opts: Any) -> None: ' This uses annotations and keyword-only arguments. ' """ - return [ Function( name="b", @@ -168,7 +184,13 @@ def test_funcdef_3() -> List[_ModuleMemberType]: def c(self, a: int, b, *args, opt: str) -> Optional[int]: ' More arg variations. ' """ - + if __docspec_parser_version__==2: + decargs=([],) + decargs2=(None, ["sql_debug=True"]) + else: + decargs=() + decargs2=("(sql_debug=True)",) + return [ Function( name="c", @@ -184,8 +206,8 @@ def c(self, a: int, b, *args, opt: str) -> Optional[int]: ], return_type="Optional[int]", decorations=[ - Decoration(Location("test_funcdef_3", 2), "classmethod", None), - Decoration(Location("test_funcdef_3", 3), "db_session", "(sql_debug=True)"), + Decoration(Location("test_funcdef_3", 2), "classmethod", None, *decargs), + Decoration(Location("test_funcdef_3", 3), "db_session", *decargs2), ], ) ] @@ -197,7 +219,6 @@ def test_funcdef_4() -> List[_ModuleMemberType]: def fun(project_name, project_type, port=8001): pass """ - return [ Function( name="fun", @@ -237,7 +258,8 @@ def func4(self): ''' return self.foo """ - + if __docspec_parser_version__ == 2: + return [] args = [Argument(loc, "self", Argument.Type.POSITIONAL, None, None, None)] return [ mkfunc("func1", None, 1, args), @@ -256,10 +278,10 @@ def func1(a, *, b, **c): pass def func2(*args, **kwargs): ''' Docstring goes here. ''' - def func3(*, **kwargs): + def func3(*, abc, **kwargs): ''' Docstring goes here. ''' - def func4(abc, *,): + def func4(abc, *, d): '''Docstring goes here''' def func5(abc, *, kwonly): @@ -268,7 +290,6 @@ def func5(abc, *, kwonly): async def func6(cls, *fs, loop=None, timeout=None, total=None, **tqdm_kwargs): ''' Docstring goes here. ''' """ - return [ mkfunc( "func1", @@ -293,7 +314,8 @@ async def func6(cls, *fs, loop=None, timeout=None, total=None, **tqdm_kwargs): "func3", "Docstring goes here.", 7, - [ + [ + Argument(loc, "abc", Argument.Type.KEYWORD_ONLY, None, None, None), Argument(loc, "kwargs", Argument.Type.KEYWORD_REMAINDER, None, None, None), ], ), @@ -303,6 +325,7 @@ async def func6(cls, *fs, loop=None, timeout=None, total=None, **tqdm_kwargs): 10, [ Argument(loc, "abc", Argument.Type.POSITIONAL, None, None, None), + Argument(loc, "d", Argument.Type.KEYWORD_ONLY, None, None, None), ], ), mkfunc( @@ -340,7 +363,6 @@ def func2(x, /, *v, a=1, b=2): pass def func3(x, /, *, a=1, b=2, **kwargs): pass def func4(x, y, /): pass """ - return [ mkfunc( "func1", @@ -408,7 +430,6 @@ class MyError5(metaclass=ABCMeta): class MyError6(RuntimeError): __metaclass__ = ABCMeta """ - return [ Class(name="MyError1", location=loc, docstring=None, metaclass=None, bases=[], decorations=None, members=[]), Class(name="MyError2", location=loc, docstring=None, metaclass=None, bases=[], decorations=None, members=[]), @@ -458,7 +479,7 @@ def test_indirections() -> List[_ModuleMemberType]: PurePath as PP, PosixPath ) - from .. import core + from .. import core # this import makes no sens, but it's still parsed from ..core import Widget, View from .vendor import pkg_resources, six from ...api import * @@ -468,11 +489,24 @@ class bar: import os from os.path import dirname """ - + if __docspec_parser_version__ == 2: + clslocargs = (19,) + funclocargs = (16,) + # The name bound is actually 'os', and yes + # we're loosing the information about the acutal + # dependency link, but the current model doesn't allow + # for better representation. + indirections_line4_args = ("os", None, "os") + else: + clslocargs = () + funclocargs = () + # this import is not binding the name 'path', so this + # is untrue + indirections_line4_args = ("path", None, "os.path") return [ Indirection(Location("test_indirections", 2), "os", None, "os"), Indirection(Location("test_indirections", 3), "r", None, "urllib.request"), - Indirection(Location("test_indirections", 4), "path", None, "os.path"), + Indirection(Location("test_indirections", 4), *indirections_line4_args), Indirection(Location("test_indirections", 4), "sys", None, "sys"), Indirection(Location("test_indirections", 4), "P", None, "pathlib"), Indirection(Location("test_indirections", 5), "platform", None, "sys.platform"), @@ -486,9 +520,9 @@ class bar: Indirection(Location("test_indirections", 13), "pkg_resources", None, ".vendor.pkg_resources"), Indirection(Location("test_indirections", 13), "six", None, ".vendor.six"), Indirection(Location("test_indirections", 14), "*", None, "...api.*"), - Function(Location("test_indirections", 15), "foo", None, None, [], None, []), + Function(Location("test_indirections", 15, *funclocargs), "foo", None, None, [], None, []), Class( - Location("test_indirections", 17), + Location("test_indirections", 17, *clslocargs), "bar", None, [ @@ -536,16 +570,21 @@ def build_docker_image( ) -> Task: pass """ - + if __docspec_parser_version__ == 2: + arg1default = "'buildDocker'" + arg3default = "'docker/release.Dockerfile'" + else: + arg1default = '"buildDocker"' + arg3default = '"docker/release.Dockerfile"' return [ mkfunc( "build_docker_image", None, 0, [ - Argument(loc, "name", Argument.Type.POSITIONAL, None, "str", '"buildDocker"'), + Argument(loc, "name", Argument.Type.POSITIONAL, None, "str", arg1default), Argument(loc, "default", Argument.Type.POSITIONAL, None, "bool", "False"), - Argument(loc, "dockerfile", Argument.Type.POSITIONAL, None, "str", '"docker/release.Dockerfile"'), + Argument(loc, "dockerfile", Argument.Type.POSITIONAL, None, "str", arg3default), Argument(loc, "project", Argument.Type.POSITIONAL, None, "Project | None", "None"), Argument(loc, "auth", Argument.Type.POSITIONAL, None, "dict[str, tuple[str, str]] | None", "None"), Argument(loc, "secrets", Argument.Type.POSITIONAL, None, "dict[str, str] | None", "None"), @@ -572,7 +611,6 @@ def f(x): case _: return "idk" """ - return [ mkfunc( "f", @@ -600,7 +638,6 @@ def multi(): def special_characters(): ''' ff ''' """ - return [ mkfunc("normal", "Normal d\\cstring.", 0, []), mkfunc("single", "S\\\\ngle raw docstring.", 0, []), @@ -619,7 +656,8 @@ class Test: #: And so is this. b: str """ - + if __docspec_parser_version__ ==2: + return[] return [ Class( loc, @@ -658,11 +696,11 @@ def test_can_parse_tuple_unpacking() -> List[_ModuleMemberType]: e, (f, *g) = value """ - + if __docspec_parser_version__ == 2: + return [] # NOTE(NiklasRosenstein): We don't explicitly support yielding information about variables # resulting from tuple-unpacking as we cannot tell which of the variables the docstring is # for, and how to assign the right side to the variables on the left. - return [Variable(loc, "v", None, None, "42")] @@ -680,7 +718,8 @@ class Test: d: None #: This is also ignored. ''' Because I exist! ''' """ - + if __docspec_parser_version__ == 2: + return [] return [ Variable(loc, "a", Docstring(loc, "This is a variable."), "int", "42"), Class( @@ -709,7 +748,8 @@ def test_hash_docstring_does_not_loose_indentation() -> List[_ModuleMemberType]: #:Ok? command = ["bash", "./install.sh"] """ - + if __docspec_parser_version__ == 2: + return [] return [ Variable( loc, @@ -742,7 +782,6 @@ def foo(): assert 42 == "Answer to the universe" ''' """ - return [ mkfunc( "foo", @@ -766,5 +805,4 @@ def my_example(): 'This is supposed to be pound: \043' pass """ - return [mkfunc("my_example", "This is supposed to be pound: #", 0, [])] diff --git a/docspec-python/test/test_parser2.py b/docspec-python/test/test_parser2.py new file mode 100644 index 0000000..ee46f76 --- /dev/null +++ b/docspec-python/test/test_parser2.py @@ -0,0 +1,223 @@ + +import ast +import inspect +import sys +import types +from functools import wraps +from io import StringIO +from json import dumps +from textwrap import dedent +from typing import Any, Callable, List, Optional, TypeVar, Iterable + +import pytest +from docspec import ( + ApiObject, + Argument, + Class, + Decoration, + Docstring, + Function, + HasLocation, + HasMembers, + Indirection, + Location, + Module, + Variable, + _ModuleMemberType, + dump_module, +) + +from .test_parser import DocspecTest, mkfunc, unset_location + +try: + from docspec_python import parser2 +except ImportError: + parser2 = None + +loc = Location('', 0, None) + +def _parse_doc(docstring:str) -> Iterable[parser2.ModSpec]: + """ + format is + ''' + > {'modname':'test', } + import sys + import thing + > {'modname':'test2', } + from test import thing + ''' + """ + docstring = '\n'+inspect.cleandoc(docstring) + # separate modules + for p in docstring.split('\n>'): + if not p: + continue + try: + meta, *src = p.splitlines() + except ValueError as e: + raise ValueError(f'value is: {p!r}') from e + parsed_meta = ast.literal_eval(meta) + assert isinstance(parsed_meta, dict) + yield parser2.ModSpec(src='\n'.join(src), **parsed_meta) + + +def docspec_test(parser_options: parser2.ParserOptions | None = None, + strip_locations: bool = True +) -> Callable[[DocspecTest], Callable[[], None]]: + """ + Decorator for docspec unit tests, parser2. + """ + + def decorator(func: DocspecTest) -> Callable[[], None]: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> None: + + if parser2 is None: + return + + # parse docstring into a series of modules + mods = list(_parse_doc(func.__doc__ or "")) + parsed_modules = list(parser2.parse_modules(mods, options=parser_options)) + + # run test + expected_modules = func(*args, **kwargs) + + if strip_locations: + for parsed_module in parsed_modules: + unset_location(parsed_module) + for reference_module in expected_modules: + unset_location(reference_module) + assert dumps([dump_module(r) for r in expected_modules], indent=2) == dumps([dump_module(p) for p in parsed_modules], indent=2) + + return wrapper + + return decorator + +@docspec_test(strip_locations=True) +def test_funcdef_annotation_expanded() -> List[_ModuleMemberType]: + """ + > {'modname':'mod', 'is_package':True} + from ._impl import Cls + def a() -> Cls: + ... + > {'modname':'mod._impl'} + class Cls: + ... + """ + return [ + Module( + location=loc, + name='mod', + docstring=None, + members=[ + Indirection( + name='Cls', + target='mod._impl.Cls', + location=loc, + docstring=None, + ), + Function( + name="a", + location=loc, + docstring=None, + modifiers=None, + args=[], + return_type='mod._impl.Cls', + decorations=[], + )]), + Module( + location=loc, + name='mod._impl', + docstring=None, + members=[ + Class( + name="Cls", + location=loc, + docstring=None, + members=[], + metaclass=None, + bases=[], + decorations=None, + )]) + ] + +@docspec_test(strip_locations=True, parser_options=parser2.ParserOptions(verbosity=2)) +def test_wildcard_imports() -> List[_ModuleMemberType]: + """ + > {'modname':'mod', 'is_package':True} + from ._impl import * + from ._impl2 import * + from ._impl3 import * + from ._impl3 import __all__ as _all3 + __all__ = ['Cls2', 'Cls1'] + __all__ += _all3 + + def a(x:Cls2, y:Cls5) -> Cls1: + ... + > {'modname':'mod._impl'} + class Cls1: + ... + > {'modname':'mod._impl2'} + class Cls2: + ... + > {'modname':'mod._impl3'} + class Cls3: + ... + class Cls4: + ... + class Cls5: + ... + __all__ = ['Cls3', 'Cls5'] + """ + return [ + Module( + location=loc, + name='mod', + docstring=None, + members=[ + Indirection(location=loc, name='*', docstring=None, target='mod._impl.*'), + Indirection(location=loc, name='Cls1', docstring=None, target='mod._impl.Cls1'), + Indirection(location=loc, name='*', docstring=None, target='mod._impl2.*'), + Indirection(location=loc, name='Cls2', docstring=None, target='mod._impl2.Cls2'), + Indirection(location=loc, name='*', docstring=None, target='mod._impl3.*'), + Indirection(location=loc, name='Cls3', docstring=None, target='mod._impl3.Cls3'), + Indirection(location=loc, name='Cls5', docstring=None, target='mod._impl3.Cls5'), + Indirection(location=loc, name='_all3', docstring=None, target='mod._impl3.__all__'), + Variable(location=loc, name='__all__', docstring=None, value="['Cls2', 'Cls1', 'Cls3', 'Cls5']"), + Function(location=loc, name='a', modifiers=None, args=[ + Argument(location=loc, name='x', type=Argument.Type.POSITIONAL, + datatype='mod._impl2.Cls2'), + Argument(location=loc, name='y', type=Argument.Type.POSITIONAL, + datatype='mod._impl3.Cls5'), + ], return_type='mod._impl.Cls1', docstring=None, decorations=[]), + ]), + Module( + location=loc, + name='mod._impl', + docstring=None, + members=[ + Class(location=loc, name='Cls1', docstring=None, + members=[], metaclass=None, bases=[], decorations=None), + ]), + Module( + location=loc, + name='mod._impl2', + docstring=None, + members=[ + Class(location=loc, name='Cls2', docstring=None, + members=[], metaclass=None, bases=[], decorations=None), + ]), + Module( + location=loc, + name='mod._impl3', + docstring=None, + members=[ + Class(location=loc, name='Cls3', docstring=None, + members=[], metaclass=None, bases=[], decorations=None), + Class(location=loc, name='Cls4', docstring=None, + members=[], metaclass=None, bases=[], decorations=None), + Class(location=loc, name='Cls5', docstring=None, + members=[], metaclass=None, bases=[], decorations=None), + Variable(location=loc, name='__all__', docstring=None, value="['Cls3', 'Cls5']") + ]) + ] \ No newline at end of file