diff --git a/document_services.py b/document_services.py index 39b50531..c4c0afdd 100644 --- a/document_services.py +++ b/document_services.py @@ -1,44 +1,69 @@ import json import re import sys -from os import path +from os import path, listdir from abc import ABC, abstractmethod import traceback as tb +from subprocess import Popen, PIPE from typing import Any, Iterable -HAS_WARNED = False +WARNCOUNT: dict[str, int] = {} CURRENT_SERVICE: str | None = None def warn(msg: str, *context: Any): - """Prints a warning message to the console and sets the global HAS_WARNED variable to True. + """Prints a warning message to the console and increments the global WARNCOUNT variable. :param str msg: The warning message to print. + :param Any *context: Any contextual info to be passed along. """ - global HAS_WARNED - WARN = "\033[43m\033[30mWARN:\033[0m " - WARN_TAB = " \033[43m\033[33m|\033[0m " + WARN = "\033[0m\033[43m\033[30mWARN:\033[0m " + WARN_TAB = "\033[0m \033[43m\033[33m|\033[0m " + WARN_TAB_LAST = "\033[0m \033[43m\033[33m\033[58;5;0m\033[4m|\033[0m " - HAS_WARNED = True + WARNCOUNT[msg] = (WARNCOUNT.get(msg) or 0) + 1 stack = tb.extract_stack() stack_str = " -> ".join([f"\033[34m{frame.name}\033[0m" for frame in stack if frame != stack[-1]]) service_msg: str if CURRENT_SERVICE is None: - service_msg = "outside any service" + service_msg = "" else: - service_msg = f"in service \033[36m{CURRENT_SERVICE}\033[0m" + service_msg = f"in service \033[36m{CURRENT_SERVICE}\033[0m " + + context = tuple(f"{c}".strip() for c in context) + context_formatted = [f"\n{c}\033[0m".replace('\n', f"\n\033[0m{WARN_TAB} \033[2m") for c in context] + if len(context_formatted) > 0: + context_formatted[-1] = context_formatted[-1].replace(WARN_TAB, WARN_TAB_LAST) print( - f"{WARN}\033[31m{msg}\033[0m {service_msg} ({stack_str})", - f"\n{WARN_TAB} " if len(context) > 0 else "", - *[f"\033[2m{c}\033[0m".replace('\n', '\n\033[0m' + WARN_TAB + " \033[2m") for c in context], + f"{WARN}\033[31m{msg}\033[0m {service_msg}({stack_str})", + *context_formatted, file=sys.stderr, sep="" ) -def convert_php_type_to_normal_type(param_type: str) -> str: +def convert_php_type_to_normal_type(php_type: str) -> tuple[str, bool]: + CONVERSIONS = { + "int": "int", + "string": "String", + "bool": "bool", + } + + if php_type[0] == '?': + nullable = True + php_type = php_type[1:] + else: + nullable = False + + if php_type in CONVERSIONS.keys(): + return CONVERSIONS[php_type], nullable + else: + warn("unrecognized php datatype", php_type) + return php_type, nullable + +def convert_moodle_type_to_normal_type(param_type: str) -> str: CONVERSIONS = { "PARAM_INT": "int", "PARAM_TEXT": "String", @@ -86,7 +111,7 @@ def parse_nullable(inpot: str) -> bool | None: elif inpot == 'NULL_ALLOWED': return True else: - warn(f"found weird value for nullable: {inpot}") + warn("found weird value for nullable", inpot) return None class PHPNameResolution: @@ -189,7 +214,7 @@ def resolve(self) -> PHPExpression: meth_matches: list[str] = re.findall(meth_pattern, new_file_content, re.DOTALL) if len(meth_matches) == 0: - warn(f"couldn't find {self} inside {self.fp}") + warn("Missing class member", f"couldn't find {self} inside {self.fp}") return PHPConstant('null') elif len(meth_matches) > 1: raise Exception(f"Found multiple definitions for {self} inside {self.fp}") @@ -214,7 +239,7 @@ def getcases(cls, classname: str) -> dict[str, str]: fp = f"lbplanner/classes/enums/{classname}.php" if not path.exists(fp): - warn(f"Couldn't find enum file {fp}") + warn("Couldn't find enum file", fp) return {} with open(fp, "r") as f: matches: list[list[str]] = re.findall(fullbody_pattern, f.read(), re.DOTALL) @@ -223,7 +248,7 @@ def getcases(cls, classname: str) -> dict[str, str]: cases = cls.getcases(matches[0][0]) body = matches[0][1] else: - warn(f"couldn't parse enum {classname}", matches) + warn("couldn't parse enum", f"name: {classname}", matches) matches2: list[str] = re.findall(casepattern, body) for match in matches2: @@ -246,7 +271,7 @@ def __init__(self, classname: str, casename: str, fp: str): def resolve(self) -> PHPString: cases = self.getcases(self.classname) if self.casename not in cases.keys(): - warn(f"enum member {self.classname}::{self.casename} not found", cases) + warn("enum member not found", f"{self.classname}::{self.casename}", cases) return PHPStringLiteral("?") val = cases[self.casename] @@ -334,7 +359,7 @@ def toIR(self) -> 'IRElement': return IRValue(None, None, nullable=True, description="", required=True) assert isinstance(self.parameters[0], PHPConstant) assert isinstance(self.parameters[1], PHPString) - typ = convert_php_type_to_normal_type(self.parameters[0].name) + typ = convert_moodle_type_to_normal_type(self.parameters[0].name) desc = self.parameters[1].get_value() required = True @@ -369,7 +394,7 @@ def toIR(self) -> 'IRElement': return IRValue(typ, default_value=default, nullable=nullable, description=desc, required=required) case _: - warn(f"unkown constructor name: {self.name}") + warn("unkown constructor name", self.name) return IRValue(None, None, nullable=True) class PHPConstant(PHPExpression): @@ -386,12 +411,15 @@ def __str__(self) -> str: class SlotsDict: @property def __dict__(self): - slots = tuple() + return {name: self.__getattribute__(name) for name in self.get_slots()} + def get_slots(self): + slots = tuple() for cls in self.__class__.__mro__: if cls != SlotsDict and issubclass(cls, SlotsDict): slots = cls.__slots__ + slots - return {name: self.__getattribute__(name) for name in slots} + + return slots class FunctionInfo(SlotsDict): __slots__ = ('name', 'group', 'capabilities', 'description', 'path') @@ -415,6 +443,52 @@ def __init__(self, self.parameters = parameters self.returns = returns +class DocString_TypeDescPair(SlotsDict): + __slots__ = ('typ', 'description') + typ: str + description: str + + def __init__(self, typ: str, description: str): + self.typ = typ + self.description = description + +class DocString(SlotsDict): + __slots__ = ('description', 'params', 'returns', 'subpackage', 'copyright') + description: str + params: dict[str, DocString_TypeDescPair] + returns: DocString_TypeDescPair | None + subpackage: str | None + copyright: tuple[int, str] | None + + def __init__( + self, + desc: str, + params: dict[str, DocString_TypeDescPair], + returns: DocString_TypeDescPair | None, + subpackage: str | None, + copyright: tuple[int, str] | None, + ): + self.description = desc + self.params = params + self.returns = returns + self.subpackage = subpackage + self.copyright = copyright + +class ExtractedAPIFunction(SlotsDict): + __slots__ = ('docstring', 'name', 'params', 'returns', 'body') + docstring: DocString + name: str + params: dict[str, str] + returns: str + body: str + + def __init__(self, docstring: DocString, name: str, params: dict[str, str], returns: str, body: str): + self.docstring = docstring + self.name = name + self.params = params + self.returns = returns + self.body = body + class IRElement(SlotsDict, ABC): __slots__ = ('description', 'required', 'type') @@ -664,15 +738,18 @@ def parse_string(code: str) -> tuple[int, PHPStringLiteral]: result.append(c) def extract_function_info(file_content: str) -> list[FunctionInfo]: - function_info = [] + function_infos = [] - # Removing comments, PHP tags, and definitions + # Removing line comments, PHP tags, and definitions clean_content = re.sub(r"//.*|<\?php|defined\(.*\)\s*\|\|\s*die\(\);", "", file_content) # Splitting the content based on function definition blocks # https://regex101.com/r/qyzYks functions = re.findall(r"'(local_lbplanner_(\w+?)_(\w+))' => \[(.*?)\],", clean_content, re.DOTALL) + # to make sure we never accidentally duplicate descriptions + existing_descriptions = [] + for function in functions: func_dict = {} @@ -690,8 +767,8 @@ def extract_function_info(file_content: str) -> list[FunctionInfo]: func_dict["capabilities"] = [cap.strip() for cap in capabilities.group(1).split(',') if len(cap) > 0] # Extracting description - description = re.search(r"'description' => '(.*?)'", function[3]) - func_dict["description"] = description.group(1) if description else None + description = re.search(r"'description' => '([^\n]*)'", function[3]) + func_dict["description"] = description.group(1).replace('\\\'', '\'') if description else None # Extracting and adjusting path classpath = re.search(r"'classpath' => 'local/(.*?)'", function[3]) @@ -699,42 +776,190 @@ def extract_function_info(file_content: str) -> list[FunctionInfo]: # Only adding to the list if all information is present if all(value is not None for value in func_dict.values()): - function_info.append(FunctionInfo(**func_dict)) + finfo = FunctionInfo(**func_dict) + function_infos.append(finfo) + + if finfo.description in existing_descriptions: + warn("duplicated API function description", finfo.description) + else: + existing_descriptions.append(finfo.description) else: - warn(f"Could not gather all info for {func_dict["name"]}", func_dict) + warn("Could not gather all info for API function", func_dict["name"], func_dict) - if len(function_info) == 0: + if len(function_infos) == 0: warn("Couldn't find any functions!") - return function_info + # double-checking using the services list below + services_function_block = re.search(r"\$services = \[.*?'functions' => \[(['a-z_,\s]+)\]", clean_content, re.DOTALL) + if services_function_block is None: + warn("Couldn't find $services") + else: + services_functions = re.findall(r"'local_lbplanner_([a-z]+)_([a-z_]+)'", services_function_block[1]) + + function_infos_copy = function_infos.copy() + for function in services_functions: + # Extracting function name and group + func_name = function[1] + func_group = function[0] + + found = False + for functioninfo in function_infos_copy: + if functioninfo.name == func_name and functioninfo.group == func_group: + function_infos_copy.remove(functioninfo) + found = True + break + + if not found: + warn("Couldn't find service function in $functions", f"{func_group}_{func_name}") + + for functioninfo in function_infos_copy: + # The ones left here are not in services_function. + warn("Couldn't find service function in $services", f"{functioninfo.group}_{functioninfo.name}") + + # double-checking using existing files + function_infos_copy = function_infos.copy() + searchdir = './lbplanner/services' + for subdir in listdir(searchdir): + dirpath = path.join(searchdir, subdir) + if not path.isdir(dirpath): + warn('found file in services folder', subdir) + continue + + for filename in listdir('lbplanner/services/' + subdir): + if path.isdir(filename): + warn('found directory in folder', filename, dirpath) + continue + if not filename.endswith('.php'): + warn('found non-php file in folder', filename, dirpath) + continue + + for functioninfo in function_infos_copy: + if functioninfo.name == filename[:-4] and functioninfo.group == subdir: + function_infos_copy.remove(functioninfo) + found = True + break + + if not found: + warn("Couldn't find service function in $function", f"{func_group}_{func_name}") + for functioninfo in function_infos_copy: + # The ones left here are not in the dirs. + warn("Couldn't find file in services folder", f"{functioninfo.group}/{functioninfo.name}.php") -def extract_php_functions(php_code: str, name: str) -> tuple[str | None, str | None]: + return function_infos + + +def extract_api_functions(php_code: str, name: str) -> tuple[ExtractedAPIFunction | None, ExtractedAPIFunction | None, ExtractedAPIFunction | None]: # Regular expression to match the function names and bodies # https://regex101.com/r/9GtIMA - pattern = re.compile(r"(public static function (\w+_(?:returns|parameters))\W[^{}]*?{[^{}]+?})", re.DOTALL) + pattern = re.compile( + r"(?P /\*\*\n(?: \*[^\n]*\n)*? \*/)\s*public static function (?P\w+(?:_returns|_parameters|))\(\s*(?P(?:\??(?:int|string|bool) \$[a-z]*(?:,\s+)?)*)\)(?:: (?P[a-z_]+))? (?P{.+?^ }$)", + re.DOTALL | re.MULTILINE + ) # Find all matches in the PHP code matches: list[tuple[str, str]] = pattern.findall(php_code) parameters_function = None returns_function = None + main_function = None for match in matches: # Extract function name - function_name = match[1] - - if function_name.endswith("_parameters"): - parameters_function = match[0] - elif function_name.endswith("_returns"): - returns_function = match[0] + if len(match) == 4: + func_docstring, func_name, func_params, func_body = match + func_returns = None + elif len(match) == 5: + func_docstring, func_name, func_params, func_returns, func_body = match + else: + raise Exception("unreachable") + + function_packed = ExtractedAPIFunction( + parse_docstring(func_docstring), + func_name, + parse_php_function_parameters(func_params), + func_returns, + func_body + ) + + if func_name.endswith("_parameters"): + parameters_function = function_packed + elif func_name.endswith("_returns"): + returns_function = function_packed + else: + main_function = function_packed if parameters_function is None: - warn(f"Couldn't find parameters function in {name}") + warn("Couldn't find parameters function", name, php_code) if returns_function is None: - warn(f"Couldn't find returns function in {name}") + warn("Couldn't find returns function", name, php_code) + if main_function is None: + warn("Couldn't find main function", name, php_code) + + return parameters_function, main_function, returns_function + +def extract_main_api_docstring(inpot: str) -> DocString: + return parse_docstring(inpot[inpot.find('/**'):inpot.find('*/')]) + +def parse_docstring(inpot: str) -> DocString: + desc_a = [] + params: dict[str, DocString_TypeDescPair] = {} + returns: DocString_TypeDescPair | None = None + subpackage: str | None = None + copyright: tuple[int, str] | None = None + isdesc = True + for line in inpot.splitlines(): + strippedline = line[line.find('*') + 1:].strip() + if strippedline in ('*', ' ', ''): + continue # empty line, ignore + elif strippedline == '/': + break # last line, ignore + elif strippedline.startswith('NOTE:'): + isdesc = False + continue # internal notes, ignore + elif strippedline.startswith('@'): + isdesc = False + splitline = strippedline.split(' ') + match splitline[0]: + case '@param': + if splitline[2] in params: + warn("specified @param twice in docstring", splitline[2]) + params[splitline[2]] = DocString_TypeDescPair(splitline[1], " ".join(splitline[3:])) + case '@return': + if returns is not None: + warn("specified @returns twice in docstring") + returns = DocString_TypeDescPair(splitline[1], " ".join(splitline[2:])) + case '@package': + if splitline[1] != "local_lbplanner": + warn("found @package with invalid value instead of local_lbplanner", splitline[1]) + case '@subpackage': + subpackage = " ".join(splitline[1:]) + case '@copyright': + copyright = int(splitline[1]), " ".join(splitline[2:]) + case '@throws' | '@see' | '@link' | '@license': + pass + case unknown: + warn("unknown @-rule", unknown, line) + elif isdesc: + desc_a.append(strippedline) + + desc = " ".join(desc_a) + + # remove trailing period + if desc.endswith('.'): + desc = desc[:-1] + + return DocString(desc, params, returns, subpackage, copyright) + +def parse_php_function_parameters(inpot: str) -> dict[str, str]: + """ "int $a, string $b" → {"a": "int", "b": "string"} """ + # https://regex101.com/r/zfWGKi + matches = re.findall(r"(\??[a-z]+)\s+\$([a-z_]+)", inpot) + out = {} + for match in matches: + out[match[1]] = match[0] - return parameters_function, returns_function + return out def find_import(nr: PHPNameResolution, symbol: str) -> str | None: @@ -771,10 +996,10 @@ def makepath(p: str, symbol: str): fp_l.append(fallback) if len(fp_l) > 1: - warn(f"found potential import collision for {symbol} using [{nr}]") + warn("found potential import collision", f"{symbol} in [{nr}]") return None elif len(fp_l) == 0: - warn(f"couldn't find symbol: {symbol} using [{nr}]") + warn("couldn't find symbol", f"{symbol} in [{nr}]") return None else: return fp_l[0] @@ -814,7 +1039,7 @@ def parse_function(input_text: str, nr: PHPNameResolution) -> IRElement | None: return topelement -def main(): +def main() -> None: global CURRENT_SERVICE with open("lbplanner/db/services.php", "r") as file: content = file.read() @@ -829,22 +1054,104 @@ def main(): with open(info.path, "r") as func_file: func_content = func_file.read() - imports = extract_imports(func_content) - params_func, returns_func = extract_php_functions(func_content, info.path) - - if returns_func is None or params_func is None: - continue - returns = parse_function(returns_func, imports) + imports = extract_imports(func_content) + params_func, main_func, returns_func = extract_api_functions(func_content, info.path) + main_docstring = extract_main_api_docstring(func_content) - params = parse_function(params_func, imports) + if returns_func is None or params_func is None: + continue - complete_info.append(FunctionInfoEx(info, params, returns)) + returns = parse_function(returns_func.body, imports) + + params = parse_function(params_func.body, imports) + + complete_info.append(FunctionInfoEx(info, params, returns)) + + if main_func is not None: + # checking function descriptions + if main_func.docstring.description != info.description or main_docstring.description != info.description: + warn( + "non-matching API function descriptions", + f"func docstring: {main_func.docstring.description}", + f"class docstring: {main_docstring.description}", + f"service description: {info.description}", + ) + + # checking parameters + all_param_names = set() + params_moodleset: dict[str, tuple[str, bool]] = {} + if isinstance(params, IRObject): + for name, param in params.fields.items(): + if isinstance(param, IRValue): + params_moodleset[name] = param.type, param.nullable + all_param_names.add(name) + else: + warn("parameters' IRObject contains non-IRValue", param, params) + elif params is not None: + warn("parameters function does not return IRObject", params) + + params_docstringset: dict[str, tuple[str, bool]] = {} + for name, docpair in main_func.docstring.params.items(): + name = name[1:] # removing dollar sign + params_docstringset[name] = convert_php_type_to_normal_type(docpair.typ) + all_param_names.add(name) + + params_phpset: dict[str, tuple[str, bool]] = {} + for name, typ in main_func.params.items(): + params_phpset[name] = convert_php_type_to_normal_type(typ) + + for name in all_param_names: + if not ( + name in params_moodleset.keys() + and name in params_docstringset.keys() + and name in params_phpset.keys() + ): + warn( + "API call parameter not found in all parameter lists", + f"moodle: {params_moodleset}", + f"docstring: {params_docstringset}", + f"php: {params_phpset}", + ) + elif not (params_moodleset[name] == params_docstringset[name] == params_phpset[name]): + warn( + "API call parameter not the same type in all parameter lists", + name, + f"moodle: {params_moodleset[name]}", + f"docstring: {params_docstringset[name]}", + f"php: {params_phpset[name]}", + ) + + # checking copyright + if main_docstring.copyright is None: + warn("missing copyright notice") + else: + with Popen(["git", "log", "-1", '--pretty=format:%as', info.path], stdout=PIPE) as p: + lastmodificationyear = int(p.communicate()[0].decode('utf-8').split('-')[0]) + if main_docstring.copyright[0] != lastmodificationyear: + warn( + "incorrect copyright year", + f"expected: {lastmodificationyear}", + f"got: {main_docstring.copyright[0]}" + ) + if main_docstring.copyright[1] != "necodeIT": + warn( + "incorrect copyright name", + "expected: necodeIT", + f"got: {main_docstring.copyright[1]}" + ) + + # checking subpackage + expected_subpackage = 'services_' + path.basename(path.dirname(info.path)) + if expected_subpackage != main_docstring.subpackage: + warn( + "incorrect subpackage", + f"expected: {expected_subpackage}", + f"got: {main_docstring.subpackage}" + ) CURRENT_SERVICE = None - # TODO: intermediary step - data = json.dumps(complete_info, default=lambda x: x.__dict__) if sys.argv[1] == "-": @@ -866,7 +1173,11 @@ def main(): with open(f"{sys.argv[1]}/script.js", "w") as f: f.write(script) - if HAS_WARNED: + if len(WARNCOUNT) > 0: + total_warns = sum(count for count in WARNCOUNT.values()) + print(f"printed \033[33m{total_warns}\033[0m warnings in total (\033[33m{total_warns / len(infos):.2f}\033[0m per \033[36mservice\033[0m)", file=sys.stderr) + for msg, count in WARNCOUNT.items(): + print(f"\033[33m{count:02d}\033[0mx \033[31m{msg}\033[0m") sys.exit(1) diff --git a/lbplanner/db/services.php b/lbplanner/db/services.php index 3a5bc590..d350a7b5 100644 --- a/lbplanner/db/services.php +++ b/lbplanner/db/services.php @@ -438,7 +438,6 @@ 'local_lbplanner_plan_update_plan', 'local_lbplanner_notifications_get_all_notifications', 'local_lbplanner_notifications_update_notification', - 'local_lbplanner_plan_get_access', 'local_lbplanner_plan_update_access', 'local_lbplanner_user_delete_user', 'local_lbplanner_plan_accept_invite',