diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 00e8ffca07..6a1c4ae477 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,10 +6,9 @@ on: [push, pull_request] permissions: read-all jobs: - # Note: UI related linter tests will run in the gui job. lint: - name: Linters (pylint, pycodestyle) - + # Note: UI-related linters will run in the UI job(s). + name: Linters and Static Analysis runs-on: ubuntu-20.04 steps: @@ -19,9 +18,18 @@ jobs: python-version: '3.8' - name: Install dependencies run: | - pip install $(grep -iE "pylint|pycodestyle" analyzer/requirements_py/dev/requirements.txt) - - name: Run tests - run: make pylint pycodestyle + pip install $(grep -iE \ + "mypy|pycodestyle|pylint|types" \ + analyzer/requirements_py/dev/requirements.txt) \ + $(grep -iE \ + "mypy|pycodestyle|pylint|types" \ + codechecker_common/requirements_py/dev/requirements.txt) + - name: Run pylint & pycodestyle + run: make -k pycodestyle pylint + + - name: Run codechecker_common mypy tests + working-directory: codechecker_common + run: make mypy tools: name: Tools (report-converter, etc.) @@ -120,9 +128,8 @@ jobs: run: | pip install -r requirements_py/dev/requirements.txt - - name: Run mypy tests - working-directory: codechecker_common/tests - run: make mypy + - name: Run unit tests + run: make test_common web: name: Web diff --git a/.gitignore b/.gitignore index b59c2b93c7..fb07577bc4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ .DS_Store -*.py[cod] +### Python ### +# Byte-compiled / optimized / DLL files. +__pycache__/ +*.py[cdo] build build_dist diff --git a/Makefile b/Makefile index 3edbfd76ec..a1f8475f76 100644 --- a/Makefile +++ b/Makefile @@ -184,9 +184,15 @@ pycodestyle: pycodestyle_in_env: $(ACTIVATE_DEV_VENV) && $(PYCODE_CMD) -test: test_analyzer test_web +test: test_common test_analyzer test_web -test_in_env: test_analyzer_in_env test_web_in_env +test_in_env: test_common_in_env test_analyzer_in_env test_web_in_env + +test_common: + BUILD_DIR=$(BUILD_DIR) $(MAKE) -C $(CC_COMMON) test + +test_commin_in_env: + $(MAKE) -C $(CC_COMMON) test_in_env test_analyzer: BUILD_DIR=$(BUILD_DIR) $(MAKE) -C $(CC_ANALYZER) test diff --git a/codechecker_common/Makefile b/codechecker_common/Makefile new file mode 100644 index 0000000000..5975871dc1 --- /dev/null +++ b/codechecker_common/Makefile @@ -0,0 +1 @@ +include tests/Makefile diff --git a/codechecker_common/compatibility/multiprocessing.py b/codechecker_common/compatibility/multiprocessing.py index 14ef7ebebe..49c967654a 100644 --- a/codechecker_common/compatibility/multiprocessing.py +++ b/codechecker_common/compatibility/multiprocessing.py @@ -13,8 +13,7 @@ # pylint: disable=no-name-in-module # pylint: disable=unused-import if sys.platform in ["darwin", "win32"]: - from multiprocess import Pool # type: ignore - from multiprocess import cpu_count + from multiprocess import Pool, Process, cpu_count else: - from concurrent.futures import ProcessPoolExecutor as Pool # type: ignore - from multiprocessing import cpu_count + from concurrent.futures import ProcessPoolExecutor as Pool + from multiprocessing import Process, cpu_count diff --git a/codechecker_common/configuration_access.py b/codechecker_common/configuration_access.py new file mode 100644 index 0000000000..e00f587f50 --- /dev/null +++ b/codechecker_common/configuration_access.py @@ -0,0 +1,1208 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Handles the retrieval and access to a configuration structure loaded from +a file. +""" +from copy import deepcopy +from functools import partial +from enum import Enum, auto as Enumerator +from pathlib import Path, PosixPath, PurePosixPath +from typing import cast, Any, Callable, Dict, List, Optional, Tuple, Type, \ + Union + +from .logger import get_logger +from .util import load_json + + +LOG = get_logger("system") + +_K_CONFIGURATION = "__configuration" +_K_DATA_SLICE = "__data_slice" +_K_FILE_PATH = "__file_path" +_K_OPTION = "__option" +_K_OPTIONS = "__options" +_K_REF = "__ref" +_K_SCHEMA = "__schema" + +OptionDict = Dict[str, "OptionBase"] + + +class AccessError(Exception): + pass + + +class BackingDataError(AccessError): + def __init__(self, option: "OptionBase", cfg: "Configuration"): + super().__init__( + f"'{option._basename}' ('{option._path}') not found in " + f"configuration file '{cfg._file_path}'") + + +class BackingDataIndexError(AccessError, IndexError): + def __init__(self, option: "OptionBase", cfg: "Configuration", + index: int, count: int): + super().__init__( + f"list index {index} out of range for '{option._basename}' " + f"('{option._path}'), only {count} elements exist in " + f"configuration file '{cfg._file_path}'") + + +def _get_children_options(parent_path: str, options: OptionDict) \ + -> OptionDict: + if not parent_path.endswith('/'): + parent_path = parent_path + '/' + keys = set(map(lambda p: p[len(parent_path):].split('/', maxsplit=1)[0], + filter(lambda p: p.startswith(parent_path), options))) + opts = [options[parent_path + k] for k in keys if k != ''] + children = {o._basename: o for o in opts} + return children + + +def _step_into_child(option: "OptionBase", name: str): + try: + child_option = option._get_children()[name] + except KeyError: + # pylint: disable=raise-missing-from + raise AttributeError( + f"'{option._path}' option has no attribute '{name}'") + + try: + access = getattr(child_option, "_access") + return access + except AttributeError: + # pylint: disable=raise-missing-from + raise AttributeError( + f"'{child_option._path}' option can not be accessed") + + +class OptionBase: + def __init__(self, + schema: "Schema", + name: Optional[str], + path: str, + description: Optional[str] = None): + """ + Instantiates a new Option (base class) which designates, under a + user-facing `name`, an element accessible in a configuration + dictionary, as specified by `path`. ``/`` is the root of the + configuration dictionary, and each "directory" is a named key in a + sub-dictionary. + + The `Option` class hierarchy implement accessor classes, which deal + with a type-safe and semantically correct reading of the specified + values, but do not own or store the actual value of the option. + An underlying storage object (almost certainly a `dict`) is always + required during actual value access. + + For example, ``/max_run_count`` denotes the child of the top-level + `dict`, whereas ``/keepalive/enabled`` is a child of a sub-tree. + """ + self._schema_ = schema + self._name_ = name + self._description_ = description + self._path_ = path + + @property + def _name(self) -> str: + return self._name_ if self._name_ is not None else f"<{self._path}>" + + @property + def _description(self) -> Optional[str]: + return self._description_ + + @property + def _path(self) -> str: + return self._path_ + + @property + def _basename(self) -> str: + return PurePosixPath(self._path).name + + def _get_children(self) -> OptionDict: + """ + Returns the options that are registered as children of the current + Option in the schema. + """ + raise NotImplementedError(f"{str(type(self))} can not have children") + + class _Access: + """ + The abstract base class to represent an ongoing access into a loaded + `Configuration`, an established `Schema`. + + When using the member ``.`` (dot) operator on a `Configuration`, + instances of this `_Access` class are created, allowing the client + code to continue descending into potential inner attributes. + + This base class does nothing, apart from storing references to the + core objects it was originally instantiated with. + """ + + def __init__(self, option: "OptionBase", cfg: "Configuration", + data_slice): + # This code is frightening at first, but, unfortunately, the usual + # 'self.member' syntax must be side-stepped such that + # __getattr__ and __setattr__ can be implemented in a + # user-friendly way. + object.__setattr__(self, _K_OPTION, option) + object.__setattr__(self, _K_CONFIGURATION, cfg) + object.__setattr__(self, _K_DATA_SLICE, data_slice) + + def _get(self) -> Any: + raise NotImplementedError( + f"{str(type(object.__getattribute__(self, _K_OPTION)))} " + "can not be get!") + + def _set(self, _value: Any) -> Any: + raise NotImplementedError( + f"{str(type(object.__getattribute__(self, _K_OPTION)))} " + "can not be set!") + + def _access(self, _cfg: "Configuration", _data_slice: Dict[str, Any]) \ + -> Any: + raise NotImplementedError(f"{str(type(self))} can not be accessed") + + +class OptionDirectory(OptionBase): + """ + Represents a collection group of options, corresponding to the "directory" + concept in filesystems. A directory may only contain sub-options and + metadata, and has no value unto itself. + """ + + def __init__(self, + schema: "Schema", + name: Optional[str], + path: str, + description: Optional[str] = None): + super().__init__(schema=schema, name=name, path=path, + description=description) + + def add_option(self, name: Optional[str], path: str, *args, **kwargs): + """ + Adds an option with the given name and sub-path, relative to the + current directory. + + See `Schema.add_option()` for details. + """ + if not path.startswith("./"): + raise ValueError("'path' must be relative to the OptionDirectory") + + # MyPy has a bug for this forwarding idiom, see + # http://github.com/python/mypy/issues/6799. + return self._schema_.add_option(name=name, + path=path, + parent=self, + *args, **kwargs) # type: ignore + + def _get_children(self) -> OptionDict: + return _get_children_options(self._path, self._schema_.options) + + class _Access(OptionBase._Access): + """ + Allows accessing, as attributes, the first-level children of + "directories" (option groups). + """ + + def __init__(self, option: OptionBase, cfg: "Configuration", + data_slice: Dict[str, Any]): + if option._basename != '': + try: + data_slice = data_slice[option._basename] + except KeyError: + data_slice[option._basename] = {} + data_slice = data_slice[option._basename] + + super().__init__(option=option, cfg=cfg, data_slice=data_slice) + + def __dir__(self): + """ + Allows ``dir(...)`` to list the available children options' names. + """ + return sorted(set( + cast(List[Any], dir(super())) + + cast(List[Any], list(self.__dict__.keys())) + + cast(List[Any], list(object.__getattribute__(self, _K_OPTION) + ._get_children().keys())) + )) + + def __getattr__(self, name: str): + """ + Continues the accessing descent of the `Configuration` using the + object member access ``.`` (dot) operator. + """ + opt, cfg, ds = \ + object.__getattribute__(self, _K_OPTION), \ + object.__getattribute__(self, _K_CONFIGURATION), \ + object.__getattribute__(self, _K_DATA_SLICE) + access_ctor = _step_into_child(opt, name) + return access_ctor(cfg, ds)._get() + + def __setattr__(self, name: str, value: Any): + """ + Allows setting an attribute in the `Configuration` using the member + access and set syntax of the ``.`` (dot) operator. + + ``X.foo = 5`` corresponds to ``X.__setattr__('foo', 5)``, and, + thus, this method must be implemented here, for the + `OptionDirectory`. + """ + opt, cfg, ds = \ + object.__getattribute__(self, _K_OPTION), \ + object.__getattribute__(self, _K_CONFIGURATION), \ + object.__getattribute__(self, _K_DATA_SLICE) + access_ctor = _step_into_child(opt, name) + return access_ctor(cfg, ds)._set(value) + + def _get(self): + """An access into a directory allows continuing to subelements.""" + return self + + def _set(self, _value: Any): + option = object.__getattribute__(self, _K_OPTION) + raise NotImplementedError( + f"'{option._basename}' ('{option._path}') directory can not " + "be set directly!") + + def _access(self, cfg: "Configuration", data_slice: Dict[str, Any]) \ + -> Any: + return OptionDirectory._Access(self, cfg, data_slice) + + +class OptionDirectoryList(OptionDirectory): + """ + Represents a special kind of OptionDirectory that acts as a "template" for + inner ``Option``s. + In a group, multiple copies of the same inner structure may exist, and + each instance is accessible in practice by specifying a numeric index of + the instance. + + Registered under the abstract path + ``/authentication/method_ldap/authorities[]/``, this type takes care of + requiring an index to access the children instances of this directory. + """ + + def __init__(self, + schema: "Schema", + name: Optional[str], + path: str, + description: Optional[str] = None): + super().__init__(schema=schema, name=name, path=path, + description=description) + + @property + def _basename(self) -> str: + return PurePosixPath(self._path).name.replace("[]", '', 1) + + class _Access(OptionDirectory._Access): + """ + Allows accessing, as if members of a list, the first-level children of + "directories" (option groups). + """ + def __init__(self, option: OptionBase, cfg: "Configuration", + data_slice: Dict[str, Any]): + super().__init__(option=option, cfg=cfg, data_slice=data_slice) + + def __dir__(self): + return sorted(set( + cast(List[Any], dir(OptionBase._Access)) + + cast(List[Any], list(self.__dict__.keys())) + )) + + def __getattr__(self, _name: str): + raise NotImplementedError("Accessing an array of schema elements " + "must use the subscript operator []") + + def __setattr__(self, _name: str, _value: Any): + raise NotImplementedError("Accessing an array of schema elements " + "must use the subscript operator []") + + def __len__(self) -> int: + """Returns the number of child elements in the option list.""" + return len(self._data_slice) + + def __getitem__(self, index: int): + """ + Continues the accessing descent of the `Configuration` using the + object indexing ``[]`` operator. + """ + opt, cfg, ds = \ + object.__getattribute__(self, _K_OPTION), \ + object.__getattribute__(self, _K_CONFIGURATION), \ + object.__getattribute__(self, _K_DATA_SLICE) + try: + # Wrap the reference to the data of the single element into a + # pseudo-directory structure that contains the data as-if + # it was not the child of a list at all. + elem_slice = {opt._basename: ds[index]} + except IndexError: + # pylint: disable=raise-missing-from + raise BackingDataIndexError(opt, cfg, index, len(ds)) + + # The indexed element of a directory list is a single directory. + return OptionDirectory._Access(opt, cfg, elem_slice)._get() + + def __setitem__(self, index: int, _value: Any): + option = object.__getattribute__(self, _K_OPTION) + raise NotImplementedError( + f"'{option._basename}' ('{option._path}') array elements can " + "not be set directly!") + + def _get(self): + """An access into a directory allows continuing to subelements.""" + return self + + def _set(self, _value: Any): + option = object.__getattribute__(self, _K_OPTION) + raise NotImplementedError( + f"'{option._basename}' ('{option._path}') directory can not " + "be set directly!") + + def _access(self, cfg: "Configuration", data_slice: Dict[str, Any]) \ + -> Any: + return OptionDirectoryList._Access(self, cfg, data_slice) + + +class InvalidOptionValueError(ValueError): + def __init__(self, option: OptionBase, value: Any): + super().__init__(f"invalid value {str(value)} passed to option " + f"'{option._basename}' ('{option._path}')") + + +def _log_validation_failure_custom_message(message: str, option: OptionBase, + _value: Any): + LOG.error("Option '%s' ('%s'): %s", + option._basename, option._path, message) + + +class ReadOnlyOptionError(AccessError): + def __init__(self, option: OptionBase): + super().__init__(f"option '{option._basename}' ('{option._path}') is " + "read-only!") + + +class RaiseIfUnset: + """ + Tag type to indicate that accessing `Option`'s `default()` should + ``raise`` if the value is not defined in the `Configuration` structure. + """ + + +class UnsetError(AccessError, KeyError): + def __init__(self, option: OptionBase): + super().__init__(f"option '{option._basename}' ('{option._path}') is " + "not set, and no suitable default value exists!") + + +class Option(OptionBase): + """ + `Option`s encapsulate the access to leaf nodes of the configuration + file, and return or assign their values in a raw form directly to the + data backing memory. + + Note that an `Option` can still represent a complete `list` or `dict`, but + using such means that client code accesses the collection as a single + entity, without the configuration access layer associating further + semantics to individual elements. + """ + + def __init__(self, + schema: "Schema", + name: Optional[str], + path: str, + description: Optional[str] = None, + default: Union[RaiseIfUnset, + None, + Any, + Callable[[], Any]] = RaiseIfUnset, + read_only: bool = True, + secret: bool = False, + validation_predicate: Optional[ + Callable[[Any], bool] + ] = None, + validation_fail_action: Optional[ + Union[Callable[[OptionBase, Any], None], + str] + ] = None, + supports_update: bool = True, + update_callback: Optional[ + Callable[[OptionBase, str, Any, Any], None] + ] = None, + ): + """ + Initialises an `Option`, setting up its behaviour. + + Accessing an `Option` for reading, when done through a `Configuration` + structure, will return the value in the `Configuration`'s memory, + unless the `Option` is not mapped. + In that case, the `default` value is returned for reads, which may be + a concrete value, a factory function returning a concrete value, + `None`, or the special tag type `RaiseIfUnset`. + If the default `default` choice, `RaiseIfUnset`, is used, then the + unmappedness of the `Option` will raise the `UnsetError` to client + code; otherwise, the appropriate default object, or the result of the + factory function, is returned. + + If `read_only` is set to `False`, the option will be assignable with + the usual ``__setattr__`` syntax. + Note, that setting an `Option` only changes its value **IN MEMORY**, + mutating the `Configuration` data structure, but **NOT** the file in + storage. + Note also, that Python does not support the verification of read-only + status or a method for "const correctness" as thoroughly as other + languages, such as C or C++. + If the `Option` corresponds to a complex (but from the purview of the + `configuration_access` library, unmanaged) data structure, such as a + `list`, `read_only` will **NOT PREVENT** client code from calling + mutators such as ``append()`` on the loaded entity. + + If the option is set to be `secret`, the values are not printed to + the output during `LOG` messages from this library. + The setting does not affect any other behaviour. + + When accessing the value, setting a new value, or updating via a + reload, if set, the `validation_predicate` function is executed, which + is expected to return `False` if the value is invalid based on some + domain-specific criteria. + If the validation fails, either the `str` message in + `validation_fail_action` (or a default, if `None`) is logged, or, if + `validation_fail_action` is a function, that callback is executed. + + After the callback has returned, invalid values will be handled in the + following way: + - Reading or setting an invalid value will result in a + `InvalidOptionValueError` being raised. + - Updating to an invalid value will keep the old value intact. + + By default, `Option`s support hot reloading, see + `Configuration.reload()`. + Following a reload, the value of the `Option` will be reflecting the + changes ingested from the backing storage file. + If `update_callback` is set to some function, it will be executed, + passing both the old and the new value of the `Option`. + Set `supports_update` to `False` to disable support for hot reloads. + If disabled, changes to the underlying value will be detected and + reported, but the reading accesses will still return the old value. + """ + super().__init__(schema=schema, name=name, path=path, + description=description) + + self._default = default + self._read_only = read_only + self._secret = secret + + self._reload_update = supports_update + self._update_callback = update_callback + + self._validator = validation_predicate + self._fail_callback: Optional[Callable[[OptionBase, Any], None]] = None + if validation_fail_action is None: + self._fail_callback = _log_validation_failure + elif isinstance(validation_fail_action, str): + self._fail_callback = partial( + _log_validation_failure_custom_message, + validation_fail_action) + else: + self._fail_callback = validation_fail_action + + if self._default != RaiseIfUnset and self._validator is not None \ + and not self._validator(self.default): + raise ValueError(f"Default value '{str(self.default)}' for " + f"option '{self._basename}' ('{self._path}') is " + "invalid according to the validation predicate " + "and should not be used!") + + def _get_children(self) -> OptionDict: + raise NotImplementedError("'Option' is a leaf node.") + + class _Access(OptionBase._Access): + """ + Allows retrieving and setting the value of a leaf configuration option. + """ + + def __init__(self, option: OptionBase, cfg: "Configuration", + data_slice: Dict[str, Any]): + if not isinstance(data_slice, dict): + raise TypeError("data captured in an access to a scalar " + "must offer reference semantics!") + + super().__init__(option=option, cfg=cfg, data_slice=data_slice) + + def _get(self): + opt, cfg, ds = \ + cast(Option, object.__getattribute__(self, _K_OPTION)), \ + object.__getattribute__(self, _K_CONFIGURATION), \ + object.__getattribute__(self, _K_DATA_SLICE) + try: + value = ds[opt._basename] + except KeyError: + if opt.has_default: + return opt.default + raise UnsetError(opt) from BackingDataError(opt, cfg) + + if not opt.run_validation(value): + opt.run_validation_failure_action(value) + raise InvalidOptionValueError(opt, value) + + return value + + def _set(self, value: Any): + opt, ds = \ + cast(Option, object.__getattribute__(self, _K_OPTION)), \ + object.__getattribute__(self, _K_DATA_SLICE) + + if opt.is_read_only: + raise ReadOnlyOptionError(opt) + + if not opt.run_validation(value): + opt.run_validation_failure_action(value) + raise InvalidOptionValueError(opt, value) + + ds[opt._basename] = value + + def _access(self, cfg: "Configuration", data_slice: Dict[str, Any]) \ + -> Any: + return Option._Access(self, cfg, data_slice) + + @property + def has_default(self) -> bool: + return self._default != RaiseIfUnset + + @property + def default(self) -> Optional[Any]: + """Explicitly returns the default value for this `Option`.""" + if not self.has_default: + raise UnsetError(self) + if callable(self._default): + return self._default() + return self._default + + @property + def is_read_only(self) -> bool: + return self._read_only + + @property + def is_secret(self) -> bool: + return self._secret + + @property + def is_updatable(self) -> bool: + return self._reload_update + + def run_validation(self, value: Any) -> bool: + """ + Executes the validation function of the `Option` and returns whether + the provided `value` is valid. + + This method does not execute the "validation failure callback", see + `run_validation_failure_action()` for that. + """ + return not self._validator or self._validator(value) + + def run_validation_failure_action(self, value: Any): + """ + Executes the `Option`'s validation failure callback action with the + given `value` as the parameter. + """ + if self._fail_callback: + return self._fail_callback(self, value) + return None + + def run_update_callback(self, path: str, old_value: Any, value: Any): + """ + Executes the `Option`'s `update_callback` action with the specified + values. + """ + if self._update_callback: + return self._update_callback(self, path, old_value, value) + return None + + +def _value_or_secret(option: Option, value: Any) -> str: + return "(secret!)" if option.is_secret else f"'{value}'" + + +def _log_validation_failure(option: OptionBase, value: Any) -> None: + LOG.error("Invalid value %s passed to option '%s' ('%s')", + '?' if not isinstance(option, Option) + else _value_or_secret(option, value), + option._basename, option._path) + + +class Schema: + """ + A schema is a collection of `Option` objects, which allow checked, + semantic access to a configuration data structure. + This object is a set of proxies, essentially a glorified sack of pointer + to data members. + The actual configuration values are NOT stored in this object, + see `Configuration`. + """ + def __init__(self): + # This code is frightening at first, but, unfortunately, the usual + # 'self.member' syntax must be side-stepped such that __getattr__ and + # __setattr__ can be implemented in a user-friendly way. + object.__setattr__(self, _K_OPTIONS, { + '/': OptionDirectory( + schema=self, + name=None, + path='/', + description="") + }) + + @property + def options(self) -> OptionDict: + return object.__getattribute__(self, _K_OPTIONS) + + @property + def root(self) -> OptionDirectory: + return cast(OptionDirectory, self.options['/']) + + def add_option(self, + name: Optional[str], + path: str, + parent: Optional[OptionDirectory] = None, + **kwargs) -> OptionBase: + """ + Registers an `Option` in the current `Schema`. + + The apparent path of the to-be-created `Option` determines its type: + - paths ending in ``"[]/"`` denote an `OptionDirectoryList`, which + is a numbered list of `OptionDirectory`s, containing multiple + instances of `Option`s. + - paths ending in ``'/'`` denote an `OptionDirectory`. + - everything else denotes an `Option`, which is a leaf value. + + If path begins with ``"./"``, the `parent` parameter should be set, + and path is understood relative to the parent. + By default, the `parent` is the same as the `Root` of the schema. + + Additional keyword arguments are forwarded to the `Option` constructor. + """ + if path == '/': + raise ValueError("The '/' root of the Option structure is " + "hard-coded and can not be manually added as " + "an option!") + if not path.startswith(('/', "./")): + raise ValueError( + f"Path '{path}' must be a proper relative or absolute " + "POSIX-y path") + if path.endswith("[]"): + raise ValueError( + f"Path '{path}' designating an indexable sequence must use " + "the directory syntax, and end with \"[]/\"") + if ' ' in path: + raise ValueError(f"Path '{path}' must not contain spaces") + + clazz: Type[OptionBase] = Option + if path.endswith('/'): + clazz = OptionDirectory + path = path.rstrip('/') + if path.endswith("[]"): + clazz = OptionDirectoryList + + if path.startswith("./"): + if parent is None: + parent = self.root + path = parent._path.rstrip('/') + '/' + path.replace("./", '', 1) + + options = object.__getattribute__(self, _K_OPTIONS) + for parent_path in map(str, reversed(PurePosixPath(path).parents)): + if parent_path not in options: + raise KeyError(f"Parent OptionDirectory-like '{parent_path}' " + "is not registered") + + opt = clazz(self, name, path, **kwargs) + if opt._path in options: + raise KeyError(f"Option '{opt._path}' is already registered!") + options[opt._path] = opt + return opt + + +def _get_config_json(file_path: Path) -> Dict[str, Any]: + LOG.debug("Reading configuration file '%s'...", file_path) + config_dict = load_json(str(file_path), None) + if config_dict is None: + raise ValueError( + f"Configuration file '{str(file_path)}' was invalid JSON. " + "The log output contains more information.") + LOG.debug("Loaded configuration file '%s'.", file_path) + + return config_dict + + +class ConfigurationUpdateFailureReason(Enum): + UPDATE_UNSUPPORTED = Enumerator() + VERIFICATION_FAILED = Enumerator() + LIST_ELEMENT_ONLY_PARTIALLY_UPDATED = Enumerator() + + +class Configuration: + """ + `Configuration` contains the memory-backed data structure loaded from a + configuration file, and allows access to it through an established + Schema. + """ + + ValidationResult = List[Tuple[str, Option]] + UpdateResult = Tuple[List[Tuple[str, OptionBase, Any]], + List[Tuple[str, OptionBase, + ConfigurationUpdateFailureReason]] + ] + + def __init__(self, schema: Schema, configuration: Dict[str, Any], + file_path: Path): + """ + Initialise a new `Configuration` collection. + + The collection copies and takes ownership of the `configuration` data + structure. + """ + object.__setattr__(self, _K_CONFIGURATION, deepcopy(configuration)) + object.__setattr__(self, _K_FILE_PATH, file_path) + object.__setattr__(self, _K_SCHEMA, schema) + + @classmethod + def from_file(cls, schema: Schema, configuration_file: Path): + """ + Initialise a new `Configuration` collection from the contents of a + file. + + :param configuration_file: The configuration file to be read and + parsed. This file *MUST* exist to initialise this instance. + The file *MUST* be in JSON format, currently this is the only one + supported. + """ + return cls(schema, + _get_config_json(configuration_file), + configuration_file) + + @classmethod + def from_memory(cls, schema: Schema, config_dict: Dict[str, Any]): + """ + Initialise a new `Configuration` collection from the contents of a + data structure in memory. + + The ceated data structure is deep-copied and does not alias + the parameter. + """ + return cls(schema, config_dict, + PosixPath(f"/mem/@{hex(id(config_dict))}")) + + def __dir__(self): + """ + Allows ``dir(...)`` to list the top-level children of the + `Configuration`. + """ + return sorted(set( + cast(List[Any], dir(super())) + + cast(List[Any], list(self.__dict__.keys())) + + cast(List[Any], list(self._schema.root._get_children().keys())) + )) + + @property + def _file_path(self) -> Optional[Path]: + """Returns the file path from which the `Configuration` was loaded.""" + path = object.__getattribute__(self, _K_FILE_PATH) + return path if not str(path).startswith("/mem/@") else None + + @property + def _schema(self) -> Schema: + """Returns the `Schema` used as the schema of the `Configuration`.""" + return object.__getattribute__(self, _K_SCHEMA) + + def __getattr__(self, name: str) -> OptionBase: + """ + Starts the accessing descent of the `Configuration` using the object + member access ``.`` (dot) operator. + """ + data: dict = object.__getattribute__(self, _K_CONFIGURATION) + return getattr(self._schema.root._access(self, data), name) + + def __setattr__(self, name: str, value: Any): + """ + Helper method that makes configuration options settable through + assigning a member accessed via the ``.`` (dot) operator. + """ + data: dict = object.__getattribute__(self, _K_CONFIGURATION) + return setattr(self._schema.root._access(self, data), name, value) + + def _validate(self, + execute_validation_failure_callbacks: bool = True + ) -> ValidationResult: + """ + Checks all `Option`s in the current `Configuration` for their validity, + as specified by `Option.validation_predicate`, and returns the list of + those that failed. + + If `execute_validation_failure_callbacks` is `True`, the + "validation failure action" callbacks will be called for each failing + `Option`, fully simulating the normal behaviour of reading an `Option` + with an invalid value. + + Despite, this function **never** raises the `InvalidOptionValueError`. + """ + failed_options: List[Tuple[str, Option]] = [] + + def _traverse(path: PurePosixPath, + opt: OptionBase, + data_slice: dict): + if isinstance(opt, OptionDirectoryList): + try: + list_slice = data_slice[opt._basename] + except KeyError: + return + + for i, e in enumerate(list_slice): + child_path = path / str(i) + _traverse(child_path, + OptionDirectory(opt._schema_, + opt._name, + opt._path, + opt._description), + # Construct a top-level data slice for one + # element of the list. + {f"{opt._basename}[]": e}) + return + if isinstance(opt, OptionDirectory): + try: + directory_slice = data_slice[opt._basename] + except KeyError: + directory_slice = {} + + for child in opt._get_children().values(): + child_path = path / child._basename + try: + # Construct a data slice for the recursing child. + # This method only reads the values, so it is not a + # problem that references are not bound here. + child_slice = {child._basename: + directory_slice[child._basename]} + except KeyError: + child_slice = {} + _traverse(child_path, child, child_slice) + return + if isinstance(opt, Option): + try: + value = data_slice[opt._basename] + except KeyError: + if not opt.has_default: + if execute_validation_failure_callbacks: + opt.run_validation_failure_action(None) + failed_options.append((str(path), opt)) + + # If there is a default, Option's constructor took care of + # ensuring that the default passes validation. + return + + if not opt.run_validation(value): + failed_options.append((str(path), opt)) + if execute_validation_failure_callbacks: + opt.run_validation_failure_action(value) + return + + raise NotImplementedError( + f"Unhandled Option type: {str(type(opt))}") + + root = cast(OptionDirectory, self._schema.root) + _traverse(PurePosixPath(root._path), root, + {root._basename: + object.__getattribute__(self, _K_CONFIGURATION)}) + return failed_options + + def _update(self) -> UpdateResult: + """ + Updates the `Configuration` automatically from the last used backing + file, as available in `file_path`, if it was loaded from one. + Otherwise, it does nothing. + """ + return self._update_from_file(self._file_path) if self._file_path \ + else ([], []) + + def _update_from_file(self, configuration_file: Path) -> UpdateResult: + """ + Updates the `Configuration` from the specified `configuration_file`, + and sets `file_path` to point at this new file instead. + """ + LOG.info("Start updating configuration from file '%s'...", + configuration_file) + ret = self._update_from_memory(_get_config_json(configuration_file)) + object.__setattr__(self, _K_FILE_PATH, configuration_file) + return ret + + def _update_from_memory(self, config_dict: Dict[str, Any]) \ + -> UpdateResult: + """ + Updates the `Configuration` with the contents of the specified + `config_dict`. + + During an update, the `config_dict` structure is walked recursively, + according to the `Schema` of the `Configuration`. + For each changed value that does not match the corresponding value in + the currently loaded configuration data structure, the change is + validated according to `Option.validation_predicate`. + If the validation succeeds, the change is "merged" into the data owned + by `self`; otherwise, the old value is kept intact and the `Option` is + returned as a failing reload, + see `ConfigurationUpdateFailureReason.VERIFICATION_FAILED`. + + If `Option.update_callback` is set for a successfully updated + `Option`, it is fired accordingly passing the `Option` instance, + the old, and the new value. + + Updates are only carried out for `Option`s that have their + `Option.supports_update` flag set to `True`. + Otherwise, if a non-updatable value still in fact changed when + differentiating the `self._configuration` and `config_dict`, this + is logged, but the old value is kept intact. + + Returns the `Option`s that changed value, and that failed to update. + """ + LOG.info("Start updating configuration ...") + unsuccessful_updates: List[Tuple[ + str, OptionBase, ConfigurationUpdateFailureReason]] = [] + + def _traverse(path: PurePosixPath, + opt: OptionBase, + self_data_slice: dict, + new_data_slice: dict) -> Tuple[ + bool, + List[Tuple[str, OptionBase, Any]], + List[Tuple[Option, str, Any, Any]] + ]: + updated_options: List[Tuple[str, OptionBase, Any]] = [] + update_callbacks_to_run: List[Tuple[Option, str, Any, Any]] = [] + if isinstance(opt, OptionDirectoryList): + success = True + # Updating lists is more convoluted than fixed trees. + # Unfortunately, lists do not have a key beyond an index, so + # identifying changes in configuration would be more involved, + # and impossible in the general case, as new list elements + # could be added BEFORE existing ones, removed all over the + # place, and "swapping" two elements shows up as potentially + # all fields changing in both affected elements. + # + # This poses a problem if an inner OptionDirectory tree can + # only partially update. + # Such would result in, e.g., if the "username" is missing + # from an authentication-like configuration where such is + # invalid result in keeping the old "username" in the run-time + # but using the new "password", which would clearly botch the + # sanity of the running process. + # + # Instead, we will collect the result values in a separate + # list, and save the modified element only if it was fully + # updateable. + # In every other case, the already in-memory value will be + # kept active. + list_slice_result: List[Dict[Any, Any]] = [] + try: + list_slice_self = self_data_slice[_K_REF][opt._basename] + except KeyError: + list_slice_self = [] + + try: + list_slice_new = new_data_slice[_K_REF][opt._basename] + except KeyError: + list_slice_new = [] + + if len(list_slice_self) != len(list_slice_new): + LOG.warning("Length of list option '%s' ('%s') changed " + "from %d to %d.", + opt._name, path, + len(list_slice_self), len(list_slice_new)) + + success = True + for idx in range(0, min(len(list_slice_self), + len(list_slice_new))): + child_path = path / str(idx) + child_old_value = deepcopy(list_slice_self[idx]) + + # Construct top-level data slices for the current and new + # data to simulate directory access in the recursion. + directory_slice_self = {_K_REF: {f"{opt._basename}[]": + list_slice_self[idx]}} + directory_slice_new = {_K_REF: {f"{opt._basename}[]": + list_slice_new[idx]}} + + child_success, child_updates, child_callbacks = \ + _traverse(child_path, + OptionDirectory(opt._schema_, + opt._name, + opt._path, + opt._description), + directory_slice_self, + directory_slice_new) + + if child_success: + # If the update was successful, "directory_slice_self" + # will have the updated data patched in. + list_slice_result.append( + directory_slice_self[_K_REF][ + f"{opt._basename}[]"]) + updated_options.extend(child_updates) + update_callbacks_to_run.extend(child_callbacks) + else: + success = False + unsuccessful_updates.append(( + str(child_path), opt, + ConfigurationUpdateFailureReason. + LIST_ELEMENT_ONLY_PARTIALLY_UPDATED)) + LOG.error("Failed to update a configuration option " + "in an element of the list option " + "'%s' ('%s') at index %d. " + "The entire list element will retain its " + "**OLD VALUE**!", + opt._name, path, idx) + list_slice_result.append(child_old_value) + + self_data_slice[_K_REF][opt._basename] = list_slice_result + return success, updated_options, update_callbacks_to_run + if isinstance(opt, OptionDirectory): + try: + directory_slice_self = \ + self_data_slice[_K_REF][opt._basename] + except KeyError: + directory_slice_self = {} + + try: + directory_slice_new = \ + new_data_slice[_K_REF][opt._basename] + except KeyError: + directory_slice_new = {} + + success = True + for child in opt._get_children().values(): + child_path = path / child._basename + + # Construct data slices for the children by binding the + # entire parent collection behind a reference, as the + # child Option handling will usually **WRITE** into this + # data structure. + child_slice_self = {_K_REF: directory_slice_self} + child_slice_new = {_K_REF: directory_slice_new} + + child_success, child_updates, child_callbacks = \ + _traverse(child_path, child, + child_slice_self, child_slice_new) + success = success and child_success + updated_options.extend(child_updates) + update_callbacks_to_run.extend(child_callbacks) + + # It might be that an entire OptionDirectory tree happens to + # be added with the update(), and was not present originally. + # In that case, directory_slice_self was a local dict() + # literal, not part of the full configuration tree, so it has + # to be added now. + self_data_slice[_K_REF][opt._basename] = directory_slice_self + return success, updated_options, update_callbacks_to_run + if isinstance(opt, Option): + old_value, new_value = None, None + try: + old_value = self_data_slice[_K_REF][opt._basename] + old_value_exists = True + except KeyError: + old_value_exists = opt.has_default + if old_value_exists: + old_value = opt.default + + try: + new_value = new_data_slice[_K_REF][opt._basename] + new_value_exists = True + except KeyError: + new_value_exists = opt.has_default + if new_value_exists: + new_value = opt.default + + if (not old_value_exists and not new_value_exists) \ + or old_value == new_value: + # There are no changes to the value (either explicitly, + # or the values were matching the defaults). + return True, [], [] + + if not new_value_exists: + # The value is gone from the new configuration object, and + # "lack of value" would default to throwing. + LOG.error("Value of configuration option '%s' ('%s') " + "missing from updated configuration, but " + "it is invalid to not have this value set!", + opt._name, path) + LOG.info("Configuration option '%s' ('%s') will keep its " + "**OLD VALUE**: %s", + opt._name, path, + _value_or_secret(opt, old_value)) + unsuccessful_updates.append( + (str(path), opt, + ConfigurationUpdateFailureReason.VERIFICATION_FAILED)) + return False, [], [] + + if not opt.is_updatable: + LOG.error("Value of configuration option '%s' ('%s') " + "observed change from %s to %s, " + "but it does NOT support hot (online) changes. " + "You will need to re-run CodeChecker for the " + "change to have an effect!", + opt._name, path, + _value_or_secret(opt, old_value), + _value_or_secret(opt, new_value)) + LOG.info("Configuration option '%s' ('%s') will keep its " + "**OLD VALUE**: %s", + opt._name, path, + _value_or_secret(opt, old_value)) + unsuccessful_updates.append( + (str(path), opt, + ConfigurationUpdateFailureReason.UPDATE_UNSUPPORTED)) + return False, [], [] + + if not opt.run_validation(new_value): + opt.run_validation_failure_action(new_value) + LOG.error("Value of configuration option '%s' ('%s') " + "observed change from %s to %s, " + "but the new value is invalid!", + opt._name, path, + _value_or_secret(opt, old_value), + _value_or_secret(opt, new_value)) + LOG.info("Configuration option '%s' ('%s') will keep its " + "**OLD VALUE**: %s", + opt._name, path, + _value_or_secret(opt, old_value)) + unsuccessful_updates.append( + (str(path), opt, + ConfigurationUpdateFailureReason.VERIFICATION_FAILED)) + return False, [], [] + + LOG.warning("Value of configuration option '%s' ('%s') " + "changed from %s to %s!", + opt._name, path, + _value_or_secret(opt, old_value), + _value_or_secret(opt, new_value)) + self_data_slice[_K_REF][opt._basename] = new_value + return True, [(str(path), opt, new_value)], \ + [(opt, str(path), old_value, new_value)] + + raise NotImplementedError( + f"Unhandled Option type: {str(type(opt))}") + + root = cast(OptionDirectory, self._schema.root) + _, updated_options, update_callbacks_to_run = \ + _traverse(PurePosixPath(root._path), root, + # Craft data slices that encapsulate the parent + # "directory" such that the changes to the in-memory + # configuration can be done through the usual reference + # semantics. + {_K_REF: { + root._basename: + object.__getattribute__(self, _K_CONFIGURATION)}}, + {_K_REF: {root._basename: config_dict}}) + + for opt, path, old, new in update_callbacks_to_run: + opt.run_update_callback(path, old, new) + + LOG.info("Done updating configuration file.") + return updated_options, unsuccessful_updates diff --git a/codechecker_common/pytest.ini b/codechecker_common/pytest.ini new file mode 100644 index 0000000000..4512a33003 --- /dev/null +++ b/codechecker_common/pytest.ini @@ -0,0 +1,4 @@ +[pytest] + +addopts = + --verbose diff --git a/codechecker_common/requirements_py/dev/requirements.txt b/codechecker_common/requirements_py/dev/requirements.txt index 7f2d6b5d78..1ddd3ecfbb 100644 --- a/codechecker_common/requirements_py/dev/requirements.txt +++ b/codechecker_common/requirements_py/dev/requirements.txt @@ -4,3 +4,4 @@ mypy==1.7.1 PyYAML==6.0.1 types-PyYAML==6.0.12.12 setuptools==70.2.0 +pytest==7.3.1 diff --git a/codechecker_common/tests/Makefile b/codechecker_common/tests/Makefile index 984afc9d68..4931a75902 100644 --- a/codechecker_common/tests/Makefile +++ b/codechecker_common/tests/Makefile @@ -2,7 +2,8 @@ CURRENT_DIR = ${CURDIR} # Root of the repository. -REPO_ROOT ?= $(CURRENT_DIR)/../.. +REPO_ROOT ?= $(CURRENT_DIR)/.. +REPO_ROOT_ENV_VAR ?= REPO_ROOT=$(REPO_ROOT) MYPY_CMD = mypy --ignore-missing-imports $(REPO_ROOT)/codechecker_common @@ -11,3 +12,17 @@ mypy: mypy_in_env: venv_dev $(ACTIVATE_DEV_VENV) && $(MYPY_CMD) + +test: test_unit + +test_in_env: test_unit_in_env + +# pytest test runner configuration options. +PYTESTCFG = -c pytest.ini ${EXTRA_PYTEST_ARGS} +UNIT_TEST_CMD = $(REPO_ROOT_ENV_VAR) pytest $(PYTESTCFG) tests/unit + +test_unit: + $(UNIT_TEST_CMD) + +test_unit_in_env: venv_dev + $(ACTIVATE_DEV_VENV) && $(UNIT_TEST_CMD) diff --git a/codechecker_common/tests/unit/__init__.py b/codechecker_common/tests/unit/__init__.py new file mode 100644 index 0000000000..3697aa9467 --- /dev/null +++ b/codechecker_common/tests/unit/__init__.py @@ -0,0 +1,20 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Setup python modules for the unit tests. +""" + + +import os +import sys + +REPO_ROOT = os.path.abspath(os.environ['REPO_ROOT']) +PKG_ROOT = os.path.join(REPO_ROOT, 'build', 'CodeChecker') + +sys.path.append(REPO_ROOT) +sys.path.append(os.path.join(PKG_ROOT, 'lib', 'python3')) diff --git a/codechecker_common/tests/unit/test_configuration_access.py b/codechecker_common/tests/unit/test_configuration_access.py new file mode 100644 index 0000000000..ee99869fda --- /dev/null +++ b/codechecker_common/tests/unit/test_configuration_access.py @@ -0,0 +1,352 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +from typing import cast + +import unittest + +from codechecker_common import configuration_access as cfg + + +class ConfigurationAccessTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.SimpleSchema = cfg.Schema() + cls.SimpleSchema.add_option("default", "/default", + supports_update=False) + cls.SimpleSchema.add_option("writable", "/writable", read_only=False) + cls.SimpleSchema.add_option("with_default_1", "/with_default_1", + default=42, read_only=False) + cls.SimpleSchema.add_option("with_default_2", "/with_default_2", + default=lambda *_: 159, read_only=False) + + cls.SimpleSchema.add_option("validated", "/validated", + validation_predicate=lambda v: v != 1, + read_only=False, + default=0) + + cls.ValidationCallbackTriggered = None + + def _validation_callback(_option, value): + cls.ValidationCallbackTriggered = value + + cls.SimpleSchema.add_option( + "validated_with_callback", "/validated_with_callback", + validation_predicate=lambda v: v != 4, + validation_fail_action=_validation_callback, + read_only=False, + default=0) + + cls.ComplexSchema = cfg.Schema() + privilege_dir = cast( + cfg.OptionDirectory, + cls.ComplexSchema.add_option("privileges", "/privileges/")) + privilege_dir.add_option("normal_privilege", "./normal_privilege") + config_dir = cast( + cfg.OptionDirectoryList, + cls.ComplexSchema.add_option("configs", "/configs[]/")) + config_dir.add_option("username", "./username", + validation_predicate=lambda v: v != "") + config_dir.add_option("is_admin", "./is_admin", default=False, + validation_predicate=lambda v: type(v) is bool) + + cls.QuadraticSchema = cfg.Schema() + cls.QuadraticSchema.add_option("users", "/Users[]/") + cls.QuadraticSchema.add_option("username", "/Users[]/username", + validation_predicate=lambda v: v != "") + cls.QuadraticSchema.add_option("user_privileges", + "/Users[]/Privileges[]/") + cls.QuadraticSchema.add_option("user_privilege_code", + "/Users[]/Privileges[]/ID", + validation_predicate=lambda v: + type(v) is int) + cls.QuadraticSchema.add_option("user_privilege_scope", + "/Users[]/Privileges[]/Scope", + validation_predicate=lambda v: + type(v) is list) + + def test_option_disallows_invalid_default(self): + s = cfg.Schema() + with self.assertRaises(ValueError): + s.add_option("invalid", "/invalid", + default=42, + validation_predicate=lambda v: v != 42) + + def test_simple_options(self): + c = cfg.Configuration.from_memory(self.SimpleSchema, {}) + + with self.assertRaises(cfg.UnsetError): + self.assertEqual(c.default, -1) + with self.assertRaises(cfg.ReadOnlyOptionError): + c.default = 8 + + with self.assertRaises(cfg.UnsetError): + self.assertEqual(c.writable, -1) + + c.writable = -2 + self.assertEqual(c.writable, -2) + + self.assertEqual(c.with_default_1, 42) + self.assertEqual(c.with_default_2, 159) + + c.with_default_1 = 16 + self.assertEqual(c.with_default_1, 16) + self.assertEqual(c.with_default_2, 159) + + c.with_default_2 = 42 + self.assertEqual(c.with_default_1, 16) + self.assertEqual(c.with_default_2, 42) + + self.assertEqual(c.validated, 0) + with self.assertRaises(cfg.InvalidOptionValueError): + c.validated = 1 + self.assertEqual(c.validated, 0) + c.validated = 2 + self.assertEqual(c.validated, 2) + + def test_simple_options_with_existing_info(self): + c = cfg.Configuration.from_memory( + self.SimpleSchema, + { + "default": 1, + "writable": 2, + "with_default_1": 999, + "with_default_2": -42, + "validated": 8 + }) + + self.assertEqual(c.default, 1) + self.assertEqual(c.writable, 2) + self.assertEqual(c.with_default_1, 999) + self.assertEqual(c.with_default_2, -42) + self.assertEqual(c.validated, 8) + + c = cfg.Configuration.from_memory( + self.SimpleSchema, + { + "validated": 1 + }) + + with self.assertRaises(cfg.InvalidOptionValueError): + self.assertEqual(c.validated, 1) + + def test_validation_callback(self): + c = cfg.Configuration.from_memory(self.SimpleSchema, {}) + + with self.assertRaises(cfg.InvalidOptionValueError): + c.validated_with_callback = 4 + self.assertEqual(self.ValidationCallbackTriggered, 4) + + def test_early_validation(self): + c = cfg.Configuration.from_memory( + self.SimpleSchema, + { + # default is missing + "writable": 42, + # with_default_1 is missing + "with_default_2": -42, + # validated is failing + "validated": 1, + # validated_with_callback is failing + "validated_with_callback": 4, + }) + + fails = c._validate() + self.assertEqual(self.ValidationCallbackTriggered, 4) + self.assertEqual(len(fails), 3) + self.assertSetEqual( + {"/default", "/validated", "/validated_with_callback"}, + set(dict(fails).keys())) + + # Missing entire keys that are "sub-directories" of values. + c = cfg.Configuration.from_memory(self.ComplexSchema, {}) + + fails = c._validate() + self.assertEqual(len(fails), 1) + self.assertSetEqual( + {"/privileges/normal_privilege"}, + set(dict(fails).keys())) + + c = cfg.Configuration.from_memory( + self.ComplexSchema, + { + "privileges": { + "normal_privilege": 1 + }, + "configs": [ + { + "username": "", + "is_admin": False + }, + { + "username": "admin", + "is_admin": True + }, + { + "username": "user", + "is_admin": 3.14 + } + ] + }) + + fails = c._validate() + self.assertEqual(len(fails), 2) + self.assertSetEqual( + {"/configs/0/username", "/configs/2/is_admin"}, + set(dict(fails).keys())) + + c = cfg.Configuration.from_memory( + self.QuadraticSchema, + { + "Users": [ + { + "username": "root", + "Privileges": [ + { + "ID": 0, + "Scope": "" + } + ] + }, + { + "username": "admin", + "Privileges": [ + { + "ID": 42, + "Scope": ["normal"] + }, + { + "ID": 43, + "Scope": ["secret", "confidential"] + } + ] + }, + { + "username": "user", + "Privileges": [ + { + "ID": None, + "Scope": ["nothing"] + } + ] + } + ] + }) + + fails = c._validate() + self.assertEqual(len(fails), 2) + self.assertSetEqual( + {"/Users/0/Privileges/0/Scope", "/Users/2/Privileges/0/ID"}, + set(dict(fails).keys())) + + def test_update(self): + c1 = {"default": 1} + c2 = {"default": 2} + c = cfg.Configuration.from_memory(self.SimpleSchema, c1) + changes, fails = c._update_from_memory(c2) + + self.assertEqual(len(changes), 0) + self.assertEqual(len(fails), 1) + self.assertDictEqual( + {"/default": + cfg.ConfigurationUpdateFailureReason.UPDATE_UNSUPPORTED}, + {path: reason for path, _, reason in fails}) + + self.assertEqual(c.default, 1) + + c1 = { + "Users": [ + { + "username": "root", + "Privileges": [ + { + "ID": 0, + "Scope": [""] + } + ] + }, + { + "username": "admin", + "Privileges": [ + { + "ID": 42, + "Scope": ["normal"] + }, + { + "ID": 43, + "Scope": ["secret", "confidential"] + } + ] + }, + { + "username": "user", + "Privileges": [ + { + "ID": 1337, + "Scope": ["nothing"] + } + ] + } + ] + } + c2 = { + "Users": [ + { + "username": "__root", + "Privileges": [ + { + "ID": 1, + "Scope": [""] + } + ] + }, + { + "username": "__admin", + "Privileges": [ + { + "ID": 42, + "Scope": ["normal"] + }, + { + "ID": 43, + "Scope": ["secret", "confidential"] + } + ] + }, + { + "username": "?", # This change is dropped... + "Privileges": [ + { + "ID": None, # ... because this is invalid. + "Scope": ["nothing"] + } + ] + } + ] + } + c = cfg.Configuration.from_memory(self.QuadraticSchema, c1) + changes, fails = c._update_from_memory(c2) + + self.assertEqual(len(changes), 3) + self.assertSetEqual( + {"/Users/0/username", "/Users/0/Privileges/0/ID", + "/Users/1/username"}, + {path for path, _, _ in changes}) + + self.assertEqual(len(fails), 3) + self.assertDictEqual( + {"/Users/2/Privileges/0/ID": + cfg.ConfigurationUpdateFailureReason.VERIFICATION_FAILED, + "/Users/2/Privileges/0": cfg.ConfigurationUpdateFailureReason. + LIST_ELEMENT_ONLY_PARTIALLY_UPDATED, + "/Users/2": cfg.ConfigurationUpdateFailureReason. + LIST_ELEMENT_ONLY_PARTIALLY_UPDATED}, + {path: reason for path, _, reason in fails}) + + self.assertEqual(cast(cfg.OptionDirectoryList._Access, c.Users)[2] + .username, "user") diff --git a/codechecker_common/util.py b/codechecker_common/util.py index e389b8d1a0..49f6f58f39 100644 --- a/codechecker_common/util.py +++ b/codechecker_common/util.py @@ -8,9 +8,12 @@ """ Util module. """ +import datetime +import hashlib import itertools import json import os +import random from typing import TextIO import portalocker @@ -90,9 +93,11 @@ def load_json(path: str, default=None, lock=False, display_warning=True): def get_linef(fp: TextIO, line_no: int) -> str: - """'fp' should be (readable) file object. - Return the line content at line_no or an empty line - if there is less lines than line_no. + """ + `fp` should be (readable) file object. + + Return the line content at `line_no` or an empty line if there are less + lines than `line_no`. """ fp.seek(0) for line in fp: @@ -112,3 +117,19 @@ def path_for_fake_root(full_path: str, root_path: str = '/') -> str: def strtobool(value: str) -> bool: """Parse a string value to a boolean.""" return value.lower() in ('y', 'yes', 't', 'true', 'on', '1') + + +def generate_random_token(num_bytes: int = 32) -> str: + """ + Returns a random-generated string usable as a token with `num_bytes` + hexadecimal characters in the output. + """ + prefix = str(os.getpid()).encode() + suffix = str(datetime.datetime.now()).encode() + + hash_value = ''.join( + [hashlib.sha256(prefix + os.urandom(num_bytes * 2) + suffix) + .hexdigest() + for _ in range(0, -(num_bytes // -64))]) + idx = random.randrange(0, len(hash_value) - num_bytes + 1) + return hash_value[idx:(idx + num_bytes)] diff --git a/docs/web/authentication.md b/docs/web/authentication.md index e7a0f2cf1e..2b2b9d27c6 100644 --- a/docs/web/authentication.md +++ b/docs/web/authentication.md @@ -15,7 +15,7 @@ Table of Contents * [PAM authentication](#pam-authentication) * [LDAP authentication](#ldap-authentication) * [Configuration options](#configuration-options) - * Membership in custom groups with [regex_groups](#regex_groups-authentication) + * [Membership in custom groups with RegEx groups](#regex_groups-authentication) * [Client-side configuration](#client-side-configuration) * [Web-browser client](#web-browser-client) * [Command-line client](#command-line-client) @@ -38,32 +38,25 @@ is handled. * `enabled` - Setting this to `false` disables privileged access - - * `realm_name` + Setting this to `false` disables privileged access. - The name to show for web-browser viewers' pop-up login window via - *HTTP Authenticate* - - * `realm_error` - - The error message shown in the browser when the user fails to authenticate - * `logins_until_cleanup` After this many login attempts made towards the server, it will perform an automatic cleanup of old, expired sessions. + This option can be changed and reloaded without server restart by using the `--reload` option of CodeChecker server command. - + * `session_lifetime` (in seconds) The lifetime of the session sets that after this many seconds - since last session access the session is permanently invalidated. + since last session access the session is permanently invalidated, and the + user is logged out. This option can be changed and reloaded without server restart by using the `--reload` option of CodeChecker server command. - + * `refresh_time` (in seconds) Refresh time of the local session objects. We use local session @@ -76,9 +69,6 @@ is handled. This option can be changed and reloaded without server restart by using the `--reload` option of CodeChecker server command. -If the server is shut down, every session is **immediately** invalidated. The -running sessions are only stored in the server's memory, they are not written -to storage. Every authentication method is its own JSON object in this section. Every authentication method has its own `enabled` key which dictates whether it is @@ -185,8 +175,8 @@ servers as it can elongate the authentication process. * `tls_require_cert` - If set to `never`, skip verification of certificate in LDAPS connections - (!!! INSECURE !!!). + If set to `never`, skip verification of certificate in LDAPS connections. + **Setting this makes the authentication process INSECURE!** * `username` @@ -199,35 +189,39 @@ servers as it can elongate the authentication process. * `referrals` - Microsoft Active Directory by returns referrals (search continuations). + Microsoft Active Directory by default returns referrals + (search continuations). LDAPv3 does not specify which credentials should be used by the clients when chasing these referrals and will be tried as an anonymous access by - the libldap library which might fail. Will be disabled by default. + the `libldap` library, which might fail. + Will be disabled by default. * `deref` - Configure how the alias dereferencing is done in libldap (valid values: - `always`, `never`). + Configure how the alias dereferencing is done in `libldap` (valid values: + `"always"`, `"never"`). * `accountBase` - Root tree containing all the user accounts. + Root tree containing all user accounts. * `accountScope` - Scope of the search performed. Accepted values are: base, one, subtree. + Scope of the search performed. + Accepted values are: `"base"`, `"one"`, `"subtree"`. * `accountPattern` - The special `$USN$` token in the query is replaced to the *username* at - login. Query pattern used to search for a user account. Must be a valid - LDAP query expression. + Query pattern used to search for a user account. + Must be a valid LDAP query expression. + The special `$USN$` token in the query is replaced with the *username* at + login. Example configuration: `(&(objectClass=person)(sAMAccountName=$USN$))` * `user_dn_postfix_preference` - User DN postfix preference value can be used to select out one prefered + User DN postfix preference value can be used to select out one preferred user DN if multiple DN entries are found by the LDAP search. The configured value will be matched and the first matching will be used. If only one DN was found this postfix matching will not be used. @@ -238,7 +232,7 @@ servers as it can elongate the authentication process. * `groupBase` - Root tree containing all the groups. + Root tree containing all groups. * `groupPattern` @@ -292,13 +286,13 @@ servers as it can elongate the authentication process. } ~~~ -## Membership in custom groups with regex_groups +## Membership in custom groups with RegEx groups Many regular expressions can be listed to define a group. Please note that the regular expressions are searched in the whole username string, so they should be properly anchored if you want to match only in the beginning or in the end. Regular expression matching follows the rules of Python's -[re.search()](https://docs.python.org/3/library/re.html). +[`re.search()`](https://docs.python.org/3/library/re.html). The following example will create a group named `everybody` that contains every user regardless of the authentication method, and a group named `admins` diff --git a/docs/web/db_schema_guide.md b/docs/web/db_schema_guide.md index bb35946169..29723c5788 100644 --- a/docs/web/db_schema_guide.md +++ b/docs/web/db_schema_guide.md @@ -39,7 +39,7 @@ according to your database configuration. ### **Step 3**: Use alembic to autogenerate migration scripts -`alembic --name config_db revision --autogenerate -m "Change description"` +`PYTHONPATH="build/CodeChecker/lib/python3" alembic --name config_db revision --autogenerate -m "Change description"` ### **Step 4**: Check the generated scripts @@ -81,7 +81,7 @@ according to your database configuration. #### **Step 2**: Generating migration scripts using autogenerate -`alembic --name run_db revision --autogenerate -m "Change description"` +`PYTHONPATH="build/CodeChecker/lib/python3" alembic --name config_db revision --autogenerate -m "Change description"` #### **Step 3**: Check the generated scripts diff --git a/docs/web/server_config.md b/docs/web/server_config.md index add9bddcb7..8ac0781d32 100644 --- a/docs/web/server_config.md +++ b/docs/web/server_config.md @@ -1,11 +1,12 @@ CodeChecker server configuration ==================================== -The server's configuration is stored in the server's *workspace* folder, in -`server_config.json`. This file is created, at the first start of the server, -using the package's installed `config/server_config.json` as a template. +The server's configuration is stored in the server's *workspace* directory, in +**`server_config.json`**. +This file is created, at the first start of the server, using the package +s installed `config/server_config.json` as a template. -> **NOTICE!** `session_config.json` file has been deprecated. +> **NOTICE!** The _`session_config.json`_ file has been deprecated. Table of Contents ================= @@ -57,17 +58,19 @@ The `limit` section controls limitation of analysis statistics. The `failure_zip_size` section of the `limit` controls the maximum size of uploadable failure zips in *bytes*. -*Default value*: 52428800 bytes = 50 MB +*Default value*: 52 428 800 bytes (50 MiB). #### Size of the compilation database The `compilation_database_size` section of the `limit` controls the maximum size of uploadable compilation database file in *bytes*. -*Default value*: 104857600 bytes = 100 MB +*Default value*: 104 857 600 bytes (100 MiB). ### Keepalive -Linux has built-in support for keepalive. When using a CodeChecker server -with `Docker Swarm` it is recommended to use the following settings: +Linux has built-in support for keep-alive of the network sockets. +When using a CodeChecker server with _Docker Swarm_ it is recommended to use +the following settings: + ```json { "keepalive": { @@ -89,19 +92,19 @@ For more information about this problem can be found here: https://github.com/moby/moby/issues/31208#issuecomment-303905737 #### Idle time -The interval between the last data packet sent (simple ACKs are not considered -data) and the first keepalive probe. +The interval (in seconds) between the last data packet sent (simple `ACK`s are +not considered data) and the first keepalive probe. By default the server will use the value from your host configured by the -`net.ipv4.tcp_keepalive_time` parameter. This value can be overriden by the +`net.ipv4.tcp_keepalive_time` parameter. This value can be overridden by the `idle` key in the server configuration file. #### Interval time -The interval between subsequential keepalive probes, regardless of what the -connection has exchanged in the meantime. +The interval (in seconds) between subsequential keepalive probes, regardless of +what the connection has exchanged in the meantime. By default the server will use the value from your host configured by the -`net.ipv4.tcp_keepalive_intvl` parameter. This value can be overriden by the +`net.ipv4.tcp_keepalive_intvl` parameter. This value can be overridden by the `interval` key in the server configuration file. #### Probes @@ -109,9 +112,9 @@ The number of unacknowledged probes to send before considering the connection dead and notifying the application layer. By default the server will use the value from your host configured by the -`net.ipv4.tcp_keepalive_probes` parameter. This value can be overriden by the +`net.ipv4.tcp_keepalive_probes` parameter. This value can be overridden by the `max_probe` key in the server configuration file. ## Authentication For authentication configuration options and which options can be reloaded see -the [Authentication](authentication.md) documentation. +the [Authentication](authentication.md) documentation. \ No newline at end of file diff --git a/docs/web/user_guide.md b/docs/web/user_guide.md index 846599b76a..a3cf42acdd 100644 --- a/docs/web/user_guide.md +++ b/docs/web/user_guide.md @@ -429,7 +429,7 @@ usage: CodeChecker store [-h] [-t {plist}] [-n NAME] [--tag TAG] [--verbose {info,debug,debug_analyzer}] [file/folder [file/folder ...]] -Store the results from one or more 'codechecker-analyze' result files in a +Store the results from one or more 'CodeChecker analyze' result files in a database. positional arguments: diff --git a/web/client/codechecker_client/cmd/store.py b/web/client/codechecker_client/cmd/store.py index 58e7f307a9..bd70cc7deb 100644 --- a/web/client/codechecker_client/cmd/store.py +++ b/web/client/codechecker_client/cmd/store.py @@ -9,8 +9,6 @@ 'CodeChecker store' parses a list of analysis results and stores them in the database. """ - - import argparse import base64 import functools @@ -66,7 +64,7 @@ def assemble_blame_info(_, __) -> int: LOG = logger.get_logger('system') -MAX_UPLOAD_SIZE = 1 * 1024 * 1024 * 1024 # 1GiB +MAX_UPLOAD_SIZE = 1 * 1024 * 1024 * 1024 # 1 GiB. AnalyzerResultFileReports = Dict[str, List[Report]] @@ -141,11 +139,11 @@ def sizeof_fmt(num, suffix='B'): Source: https://stackoverflow.com/questions/1094841/ reusable-library-to-get-human-readable-version-of-file-size """ - for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: + for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi', 'Ri']: if abs(num) < 1024.0: return f"{num:3.1f}{unit}{suffix}" num /= 1024.0 - return f"{num:.1f}Yi{suffix}" + return f"{num:.1f}Qi{suffix}" def get_file_content_hash(file_path): @@ -170,7 +168,7 @@ def get_argparser_ctor_args(): # Description is shown when the command's help is queried directly 'description': """ -Store the results from one or more 'codechecker-analyze' result files in a +Store the results from one or more 'CodeChecker analyze' result files in a database.""", # Epilogue is shown after the arguments when the help is queried diff --git a/web/server/codechecker_server/api/authentication.py b/web/server/codechecker_server/api/authentication.py index 1430ad9fd6..1906a71893 100644 --- a/web/server/codechecker_server/api/authentication.py +++ b/web/server/codechecker_server/api/authentication.py @@ -28,7 +28,7 @@ from ..permissions import handler_from_scope_params as make_handler, \ require_manager, require_permission from ..server import permissions -from ..session_manager import generate_session_token +from ..session_manager import generate_random_token, SESSION_TOKEN_LENGTH LOG = get_logger('server') @@ -40,8 +40,12 @@ class ThriftAuthHandler: Handle Thrift authentication requests. """ - def __init__(self, manager, auth_session, config_database): - self.__manager = manager + def __init__(self, + _configuration_manager, + session_manager, + auth_session, + config_database): + self.__session_manager = session_manager self.__auth_session = auth_session self.__config_db = config_database @@ -58,7 +62,7 @@ def __require_privilaged_access(self): def __has_permission(self, permission) -> bool: """ True if the current user has given permission rights. """ - if self.__manager.is_enabled and not self.__auth_session: + if self.__session_manager.is_enabled and not self.__auth_session: return False return self.hasPermission(permission, None) @@ -86,7 +90,7 @@ def checkAPIVersion(self): def getAuthParameters(self): alive = self.__auth_session.is_alive if self.__auth_session \ else False - return HandshakeInformation(self.__manager.is_enabled, alive) + return HandshakeInformation(self.__session_manager.is_enabled, alive) @timeit def getLoggedInUser(self): @@ -134,7 +138,7 @@ def getAccessControl(self): else: product_permissions[endpoint].user[name].append(perm) - default_superuser = self.__manager.default_superuser_name + default_superuser = self.__session_manager.default_superuser_name if default_superuser: global_permissions.user[default_superuser].append("SUPERUSER") @@ -153,7 +157,7 @@ def performLogin(self, auth_method, auth_string): user_name, _ = auth_string.split(':', 1) LOG.debug("'%s' logging in...", user_name) - session = self.__manager.create_session(auth_string) + session = self.__session_manager.create_session(auth_string) if session: LOG.info("'%s' logged in.", user_name) @@ -180,7 +184,7 @@ def destroySession(self): if self.__auth_session: token = self.__auth_session.token - is_logged_out = self.__manager.invalidate(token) + is_logged_out = self.__session_manager.invalidate(token) if is_logged_out: LOG.info("'%s' logged out.", user_name) return is_logged_out @@ -363,7 +367,7 @@ def newToken(self, description): """ self.__require_privilaged_access() with DBSession(self.__config_db) as session: - token = generate_session_token() + token = generate_random_token(SESSION_TOKEN_LENGTH) user = self.getLoggedInUser() groups = ';'.join(self.__auth_session.groups) session_token = Session(token, user, groups, description, False) @@ -402,7 +406,7 @@ def removeToken(self, token): "database.") # Invalidate the local session by token. - self.__manager.invalidate_local_session(token) + self.__session_manager.invalidate_local_session(token) LOG.info("Personal access token '%s...' has been removed by '%s'.", token[:5], self.getLoggedInUser()) diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py index 87ab4e2a52..1452081929 100644 --- a/web/server/codechecker_server/api/mass_store_run.py +++ b/web/server/codechecker_server/api/mass_store_run.py @@ -105,9 +105,9 @@ def __exit__(self, *args): def unzip(b64zip: str, output_dir: str) -> int: """ - This function unzips the base64 encoded zip file. This zip is extracted - to a temporary directory and the ZIP is then deleted. The function returns - the size of the extracted decompressed zip file. + This function unzips a Base64 encoded and ZLib-compressed ZIP file. This + ZIP is extracted to a temporary directory and the ZIP is then deleted. + The function returns the size of the extracted decompressed ZIP file. """ if len(b64zip) == 0: return 0 @@ -126,11 +126,10 @@ def unzip(b64zip: str, output_dir: str) -> int: import traceback traceback.print_exc() raise - return 0 def get_file_content(file_path: str) -> bytes: - """Return the file content for the given filepath. """ + """Return the file content for the given filepath.""" with open(file_path, 'rb') as f: return f.read() @@ -276,8 +275,8 @@ def __init__( self.__get_report_limit_for_product() @property - def __manager(self): - return self.__report_server._manager + def __configuration_manager(self): + return self.__report_server._configuration_manager @property def __config_database(self): @@ -299,7 +298,7 @@ def __check_run_limit(self): """ Checks the maximum allowed of uploadable runs for the current product. """ - max_run_count = self.__manager.get_max_run_count() + max_run_count = self.__configuration_manager.max_run_count with DBSession(self.__config_database) as session: product = session.query(Product).get(self.__product.id) @@ -1392,7 +1391,6 @@ def finish_checker_run( if not run: return False - run.mark_finished() run.duration = self.__duration return True diff --git a/web/server/codechecker_server/api/report_server.py b/web/server/codechecker_server/api/report_server.py index c98cbc71c0..b2cffbb16c 100644 --- a/web/server/codechecker_server/api/report_server.py +++ b/web/server/codechecker_server/api/report_server.py @@ -21,7 +21,7 @@ from copy import deepcopy from collections import OrderedDict, defaultdict, namedtuple -from datetime import datetime, timedelta +from datetime import datetime from typing import Any, Dict, List, Optional, Set, Tuple import sqlalchemy @@ -1081,8 +1081,7 @@ def check_remove_runs_lock(session, run_ids): expired yet. If so, the run cannot be deleted, as someone is assumed to be storing into it. """ - locks_expired_at = datetime.now() - timedelta( - seconds=db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE) + locks_expired_at = datetime.now() - db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE run_locks = session.query(RunLock.name) \ .filter(RunLock.locked_at >= locks_expired_at) @@ -1377,7 +1376,8 @@ class ThriftRequestHandler: """ def __init__(self, - manager, + configuration_manager, + session_manager, Session, product, auth_session, @@ -1390,7 +1390,8 @@ def __init__(self, raise ValueError("Cannot initialize request handler without " "a product to serve.") - self._manager = manager + self._configuration_manager = configuration_manager + self._session_manager = session_manager self._product = product self._auth_session = auth_session self._config_database = config_database @@ -3938,7 +3939,8 @@ def massStoreRun(self, name, tag, version, b64zip, force, def allowsStoringAnalysisStatistics(self): self.__require_store() - return bool(self._manager.get_analysis_statistics_dir()) + return self._configuration_manager.store.analysis_statistics_dir \ + is not None @exc_to_thrift_reqfail @timeit @@ -3948,13 +3950,14 @@ def getAnalysisStatisticsLimits(self): cfg = {} # Get the limit of failure zip size. - failure_zip_size = self._manager.get_failure_zip_size() + failure_zip_size = self._configuration_manager.store.limit \ + .failure_zip_size if failure_zip_size: cfg[ttypes.StoreLimitKind.FAILURE_ZIP_SIZE] = failure_zip_size # Get the limit of compilation database size. compilation_database_size = \ - self._manager.get_compilation_database_size() + self._configuration_manager.store.limit.compilation_database_size if compilation_database_size: cfg[ttypes.StoreLimitKind.COMPILATION_DATABASE_SIZE] = \ compilation_database_size @@ -3966,7 +3969,8 @@ def getAnalysisStatisticsLimits(self): def storeAnalysisStatistics(self, run_name, b64zip): self.__require_store() - report_dir_store = self._manager.get_analysis_statistics_dir() + report_dir_store = self._configuration_manager.store \ + .analysis_statistics_dir if report_dir_store: try: product_dir = os.path.join(report_dir_store, diff --git a/web/server/codechecker_server/auth/cc_ldap.py b/web/server/codechecker_server/auth/cc_ldap.py index e126109026..ec8a30fcf8 100644 --- a/web/server/codechecker_server/auth/cc_ldap.py +++ b/web/server/codechecker_server/auth/cc_ldap.py @@ -95,13 +95,15 @@ import ldap from ldap.dn import escape_dn_chars +from codechecker_common import configuration_access from codechecker_common.logger import get_logger LOG = get_logger('server') def bytes_to_str(data): - """ Converts the given data to text. + """ + Converts the given data to text. The LDAP protocol states that some fields be encoded in UTF-8. Attribute values, on the other hand, MAY contain any type of data, including text. @@ -224,17 +226,18 @@ def __init__(self, ldap_config, who=None, cred=None): None if initialization failed. """ - ldap_server = ldap_config.get('connection_url') - if ldap_server is None: - LOG.error('Server address is missing from the configuration') + try: + ldap_server = ldap_config.connection_url + except configuration_access.UnsetError: + LOG.error("LDAP server address is missing from the configuration!") self.connection = None return - referrals = ldap_config.get('referrals', False) + referrals = ldap_config.referrals ldap.set_option(ldap.OPT_REFERRALS, 1 if referrals else 0) - deref = ldap_config.get('deref', ldap.DEREF_ALWAYS) - if deref == 'never': + deref = ldap_config.deref + if deref == "never": deref = ldap.DEREF_NEVER else: deref = ldap.DEREF_ALWAYS @@ -243,9 +246,9 @@ def __init__(self, ldap_config, who=None, cred=None): ldap.protocol_version = ldap.VERSION3 - # Verify certificate in LDAPS connections - tls_require_cert = ldap_config.get('tls_require_cert', '') - if tls_require_cert.lower() == 'never': + # Verify the server certificate in LDAPS connections. + tls_require_cert = ldap_config.tls_require_cert + if tls_require_cert.lower() == "never": LOG.debug("Insecure LDAPS connection because of " "tls_require_cert=='never'") ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER) @@ -309,7 +312,7 @@ def get_ldap_query_scope(scope_form_config): return ldap.SCOPE_SUBTREE -def auth_user(ldap_config, username=None, credentials=None): +def auth_user(ldap_config, username=None, credentials=None) -> bool: """ Authenticate a user. """ @@ -318,12 +321,12 @@ def auth_user(ldap_config, username=None, credentials=None): ' authentication.') return False - account_base = ldap_config.get('accountBase') + account_base = ldap_config.accountBase if account_base is None: LOG.warning('Account base needs to be configured to query users') return False - account_pattern = ldap_config.get('accountPattern') + account_pattern = ldap_config.accountPattern if account_pattern is None: LOG.warning('No account pattern is defined to search for users.') LOG.warning('Please configure one.') @@ -332,21 +335,21 @@ def auth_user(ldap_config, username=None, credentials=None): username = escape_dn_chars(username) account_pattern = account_pattern.replace('$USN$', username) - account_scope = ldap_config.get('accountScope', '') + account_scope = ldap_config.accountScope account_scope = get_ldap_query_scope(account_scope) - service_user = ldap_config.get('username') - service_cred = ldap_config.get('password') + service_user = ldap_config.username + service_cred = ldap_config.password - # Service user is not configured try to authenticate - # with the given username and credentials. if not service_user: + # Service user is not configured. Try to authenticate with the login + # username and credentials. service_user = username service_cred = credentials LOG.debug("Creating SERVICE connection...") - user_dn_postfix_preference = ldap_config.get('user_dn_postfix_preference') + user_dn_postfix_preference = ldap_config.user_dn_postfix_preference with LDAPConnection(ldap_config, service_user, service_cred) as connection: if connection is None: @@ -382,12 +385,12 @@ def get_groups(ldap_config, username, credentials): Get the LDAP groups for a given user. """ - account_base = ldap_config.get('accountBase') + account_base = ldap_config.accountBase if account_base is None: LOG.error('Account base needs to be configured to query users') return False - account_pattern = ldap_config.get('accountPattern') + account_pattern = ldap_config.accountPattern if account_pattern is None: LOG.error('No account pattern is defined to search for users.') LOG.error('Please configure one.') @@ -395,16 +398,16 @@ def get_groups(ldap_config, username, credentials): account_pattern = account_pattern.replace('$USN$', username) - account_scope = ldap_config.get('accountScope', '') + account_scope = ldap_config.accountScope account_scope = get_ldap_query_scope(account_scope) - service_user = ldap_config.get('username') - service_cred = ldap_config.get('password') + service_user = ldap_config.username + service_cred = ldap_config.password if not service_user: service_user = username service_cred = credentials - user_dn_postfix_preference = ldap_config.get('user_dn_postfix_preference') + user_dn_postfix_preference = ldap_config.user_dn_postfix_preference LOG.debug("creating LDAP connection. service user %s", service_user) with LDAPConnection(ldap_config, service_user, service_cred) as connection: @@ -419,7 +422,7 @@ def get_groups(ldap_config, username, credentials): account_scope, user_dn_postfix_preference) - group_pattern = ldap_config.get('groupPattern') + group_pattern = ldap_config.groupPattern if user_dn and not group_pattern: LOG.debug("User '%s' found but there is no group_pattern" " to check LDAP for group membership.", @@ -429,16 +432,16 @@ def get_groups(ldap_config, username, credentials): LOG.debug('Checking for group membership %s', user_dn) - group_scope = ldap_config.get('groupScope', '') + group_scope = ldap_config.groupScope group_scope = get_ldap_query_scope(group_scope) - group_base = ldap_config.get('groupBase') + group_base = ldap_config.groupBase if group_base is None: LOG.error('Group base needs to be configured to' 'query ldap groups.') return [] - group_name_attr = ldap_config.get('groupNameAttr') + group_name_attr = ldap_config.groupNameAttr if group_name_attr is None: LOG.error('groupNameAttr needs to be configured to' 'query ldap groups.' @@ -446,9 +449,9 @@ def get_groups(ldap_config, username, credentials): 'attribute of the group.') return [] - # Remove non ascii characters. + # Remove non-ASCII characters. group_name_attr = \ - group_name_attr.encode('ascii', 'ignore').decode('utf-8') + group_name_attr.encode("ascii", "ignore").decode("utf-8") attr_list = [group_name_attr] LOG.debug("Performing LDAP search for group: %s Group Name Attr: %s", diff --git a/web/server/codechecker_server/auth/cc_pam.py b/web/server/codechecker_server/auth/cc_pam.py index bbb7e05608..c9473bfe82 100644 --- a/web/server/codechecker_server/auth/cc_pam.py +++ b/web/server/codechecker_server/auth/cc_pam.py @@ -24,16 +24,17 @@ """ -import pam import grp +import pam import pwd +from typing import List from codechecker_common.logger import get_logger LOG = get_logger('server') -def auth_user(pam_config, username, password): +def auth_user(pam_config, username, password) -> bool: """ Authenticate user with PAM. """ @@ -43,25 +44,22 @@ def auth_user(pam_config, username, password): auth = pam.pam() if auth.authenticate(username, password): - allowed_users = pam_config.get("users") \ - or [] - allowed_group = pam_config.get("groups")\ - or [] + allowed_users: List[str] = pam_config.users + allowed_groups: List[str] = pam_config.groups - if not allowed_users and not allowed_group: + if not allowed_users and not allowed_groups: # If no filters are set, only authentication is needed. return True - elif username in allowed_users: - # The user is allowed by username. + + if username in allowed_users: return True - else: - # Otherwise, check group memeberships. If any of the user's - # groups are an allowed groupl, the user is allowed. - groups = [g.gr_name for g in grp.getgrall() - if username in g.gr_mem] - gid = pwd.getpwnam(username).pw_gid - groups.append(grp.getgrgid(gid).gr_name) - - return not set(groups).isdisjoint(set(pam_config.get("groups"))) + + # Otherwise, check group memeberships. If any of the user's groups is + # an allowed group, the user is allowed. + groups = {g.gr_name for g in grp.getgrall() if username in g.gr_mem} + gid = pwd.getpwnam(username).pw_gid + groups.add(grp.getgrgid(gid).gr_name) + + return not groups.isdisjoint(allowed_groups) return False diff --git a/web/server/codechecker_server/database/database.py b/web/server/codechecker_server/database/database.py index d5abc0bda2..dce700a7a3 100644 --- a/web/server/codechecker_server/database/database.py +++ b/web/server/codechecker_server/database/database.py @@ -199,15 +199,14 @@ def _create_schema(self): return True - except sqlalchemy.exc.SQLAlchemyError as alch_err: - LOG.error(str(alch_err)) + except sqlalchemy.exc.SQLAlchemyError: + LOG.error("Failed to create initial database schema") import traceback traceback.print_exc() return False - except Exception as ex: + except Exception: LOG.error("Failed to create initial database schema") - LOG.error(ex) import traceback traceback.print_exc() return False diff --git a/web/server/codechecker_server/database/db_cleanup.py b/web/server/codechecker_server/database/db_cleanup.py index 74c219f782..7672e4d430 100644 --- a/web/server/codechecker_server/database/db_cleanup.py +++ b/web/server/codechecker_server/database/db_cleanup.py @@ -28,7 +28,7 @@ Report, ReportAnalysisInfo, RunHistoryAnalysisInfo, RunLock LOG = get_logger('server') -RUN_LOCK_TIMEOUT_IN_DATABASE = 30 * 60 # 30 minutes. +RUN_LOCK_TIMEOUT_IN_DATABASE = timedelta(minutes=30) SQLITE_LIMIT_COMPOUND_SELECT = 500 @@ -57,8 +57,7 @@ def remove_expired_run_locks(product): LOG.debug("[%s] Garbage collection of expired run locks started...", product.endpoint) try: - locks_expired_at = datetime.now() - timedelta( - seconds=RUN_LOCK_TIMEOUT_IN_DATABASE) + locks_expired_at = datetime.now() - RUN_LOCK_TIMEOUT_IN_DATABASE count = session.query(RunLock) \ .filter(RunLock.locked_at < locks_expired_at) \ diff --git a/web/server/codechecker_server/database/run_db_model.py b/web/server/codechecker_server/database/run_db_model.py index c8eaec963f..d483ccebff 100644 --- a/web/server/codechecker_server/database/run_db_model.py +++ b/web/server/codechecker_server/database/run_db_model.py @@ -9,7 +9,6 @@ SQLAlchemy ORM model for the analysis run storage database. """ from datetime import datetime, timedelta -from math import ceil import os from typing import Optional @@ -109,10 +108,6 @@ def __init__(self, name, version): self.date, self.name, self.version = datetime.now(), name, version self.duration = -1 - def mark_finished(self): - if self.duration == -1: - self.duration = ceil((datetime.now() - self.date).total_seconds()) - class RunLock(Base): """ @@ -137,16 +132,19 @@ def touch(self): """Update the lock's timestamp to be the current one.""" self.locked_at = datetime.now() - def when_expires(self, grace_seconds): - """Calculates when the current lock will expire assuming the - expiration time is grace_seconds, and the lock will never be touched - until this moment.""" - return self.locked_at + timedelta(seconds=grace_seconds) - - def has_expired(self, grace_seconds): - """Returns if the lock has expired, i.e. since the last touch() - or creation, grace_seconds number of seconds has passed.""" - return datetime.now() > self.when_expires(grace_seconds) + def when_expires(self, delta: timedelta): + """ + Calculates when the current lock will expire assuming the expiration + time is `delta`, and the lock will never be touched until this moment. + """ + return self.locked_at + delta + + def has_expired(self, delta: timedelta): + """ + Returns if the lock has expired, i.e. since the last `touch` or + creation, `delta` time has passed. + """ + return datetime.now() > self.when_expires(delta) class AnalyzerStatistic(Base): diff --git a/web/server/codechecker_server/migrations/README.md b/web/server/codechecker_server/migrations/README.md new file mode 100644 index 0000000000..7ec7ef07e7 --- /dev/null +++ b/web/server/codechecker_server/migrations/README.md @@ -0,0 +1 @@ +Please see the [DB Schema Guide](/docs/web/db_schema_guide.md) for the developer documentation for adding database migrations. \ No newline at end of file diff --git a/web/server/codechecker_server/server.py b/web/server/codechecker_server/server.py index 40bdf6db4d..1d68f49073 100644 --- a/web/server/codechecker_server/server.py +++ b/web/server/codechecker_server/server.py @@ -6,29 +6,26 @@ # # ------------------------------------------------------------------------- """ -Main server starts a http server which handles Thrift client -and browser requests. +Main server starts a HTTP server which handles Thrift client and browser +requests. """ - - import atexit import datetime from functools import partial from hashlib import sha256 from http.server import HTTPServer, SimpleHTTPRequestHandler import os +import pathlib import posixpath from random import sample -import shutil import signal import socket import ssl import sys import stat -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, cast import urllib -import multiprocess from sqlalchemy.orm import sessionmaker from sqlalchemy.sql.expression import func from thrift.protocol import TJSONProtocol @@ -49,14 +46,17 @@ serverInfoService as ServerInfoAPI_v6 from codechecker_common import util -from codechecker_common.logger import get_logger from codechecker_common.compatibility.multiprocessing import \ - Pool, cpu_count + Pool, Process, cpu_count +from codechecker_common.logger import get_logger +from codechecker_common.util import generate_random_token from codechecker_web.shared import database_status from codechecker_web.shared.version import get_version_str -from . import instance_manager, permissions, routing, session_manager +from . import instance_manager, permissions, routing, server_configuration +from .session_manager import SessionManager, SESSION_COOKIE_NAME + from .api.authentication import ThriftAuthHandler as AuthHandler_v6 from .api.config_handler import ThriftConfigHandler as ConfigHandler_v6 from .api.product_server import ThriftProductHandler as ProductHandler_v6 @@ -68,18 +68,13 @@ Configuration as ORMConfiguration from .database.database import DBSession from .database.run_db_model import IDENTIFIER as RUN_META, Run, RunLock -from .tmp import get_tmp_dir_hash LOG = get_logger('server') class RequestHandler(SimpleHTTPRequestHandler): - """ - Handle thrift and browser requests - Simply modified and extended version of SimpleHTTPRequestHandler - """ - auth_session = None + """Handle Thrift RPC and Browser HTTP requests.""" def __init__(self, request, client_address, server): self.path = None @@ -104,7 +99,7 @@ def send_thrift_exception(self, error_msg, iprot, oprot, otrans): result = otrans.getvalue() self.send_response(200) self.send_header("content-type", "application/x-thrift") - self.send_header("Content-Length", len(result)) + self.send_header("Content-Length", str(len(result))) self.end_headers() self.wfile.write(result) @@ -116,7 +111,7 @@ def __check_session_cookie(self): cookie was found in the headers. None, otherwise. """ - if not self.server.manager.is_enabled: + if not self.server.session_manager.is_enabled: return None session = None @@ -127,8 +122,9 @@ def __check_session_cookie(self): for cookie in split: values = cookie.split("=") if len(values) == 2 and \ - values[0] == session_manager.SESSION_COOKIE_NAME: - session = self.server.manager.get_session(values[1]) + values[0] == SESSION_COOKIE_NAME: + session = self.server.session_manager.get_session( + values[1]) if session and session.is_alive: # If a valid session token was found and it can still be used, @@ -149,7 +145,7 @@ def __check_session_cookie(self): return None def __handle_readiness(self): - """ Handle readiness probe. """ + """Handle Kubernetes Readiness probe.""" try: cfg_sess = self.server.config_session() cfg_sess.query(ORMConfiguration).count() @@ -167,7 +163,7 @@ def __handle_readiness(self): cfg_sess.commit() def __handle_liveness(self): - """ Handle liveness probe. """ + """Handle Kubernetes liveness probe.""" self.send_response(200) self.end_headers() self.wfile.write(b'CODECHECKER_SERVER_IS_LIVE') @@ -181,8 +177,7 @@ def end_headers(self): token = self.auth_session.token if token: self.send_header( - "Set-Cookie", - f"{session_manager.SESSION_COOKIE_NAME}={token}; Path=/") + "Set-Cookie", f"{SESSION_COOKIE_NAME}={token}; Path=/") # Set the current user name in the header. user_name = self.auth_session.user @@ -226,16 +221,13 @@ def do_GET(self): if self.path == '/': self.path = 'index.html' - SimpleHTTPRequestHandler.do_GET(self) - return + return SimpleHTTPRequestHandler.do_GET(self) + # Kubernetes cluster probe endpoints. if self.path == '/live': - self.__handle_liveness() - return - + return self.__handle_liveness() if self.path == '/ready': - self.__handle_readiness() - return + return self.__handle_readiness() product_endpoint, _ = routing.split_client_GET_request(self.path) @@ -250,7 +242,7 @@ def do_GET(self): if not os.path.exists(self.translate_path(self.path)): self.path = 'index.html' - SimpleHTTPRequestHandler.do_GET(self) + return SimpleHTTPRequestHandler.do_GET(self) def __check_prod_db(self, product_endpoint): """ @@ -333,7 +325,7 @@ def do_POST(self): otrans = TTransport.TMemoryBuffer() oprot = output_protocol_factory.getProtocol(otrans) - if self.server.manager.is_enabled and \ + if self.server.session_manager.is_enabled and \ not self.path.endswith(('/Authentication', '/Configuration', '/ServerInfo')) and \ @@ -371,7 +363,8 @@ def do_POST(self): if major_version == 6: if request_endpoint == 'Authentication': auth_handler = AuthHandler_v6( - self.server.manager, + self.server.configuration_manager, + self.server.session_manager, self.auth_session, self.server.config_session) processor = AuthAPI_v6.Processor(auth_handler) @@ -407,7 +400,8 @@ def do_POST(self): product = self.__check_prod_db(product_endpoint) acc_handler = ReportHandler_v6( - self.server.manager, + self.server.configuration_manager, + self.server.session_manager, product.session_factory, product, self.auth_session, @@ -730,22 +724,20 @@ def _get_products() -> List[Product]: class CCSimpleHttpServer(HTTPServer): - """ - Simple http server to handle requests from the clients. - """ + """Simple HTTP server to handle requests from the clients.""" - daemon_threads = False address_family = socket.AF_INET # IPv4 def __init__(self, - server_address, + server_address: Tuple[str, int], RequestHandlerClass, config_directory, product_db_sql_server, pckg_data, context, check_env, - manager): + configuration_manager: server_configuration.Configuration, + session_manager: SessionManager): LOG.debug("Initializing HTTP server...") @@ -755,14 +747,16 @@ def __init__(self, self.version = pckg_data['version'] self.context = context self.check_env = check_env - self.manager = manager + self.configuration_manager = configuration_manager + self.session_manager = session_manager + self.address, self.port = server_address self.__products = {} # Create a database engine for the configuration database. LOG.debug("Creating database engine for CONFIG DATABASE...") self.__engine = product_db_sql_server.create_engine() self.config_session = sessionmaker(bind=self.__engine) - self.manager.set_database_connection(self.config_session) + self.session_manager.set_database_connection(self.config_session) # Load the initial list of products and set up the server. cfg_sess = self.config_session() @@ -780,7 +774,7 @@ def __init__(self, cfg_sess.close() try: - HTTPServer.__init__(self, server_address, + HTTPServer.__init__(self, (self.address, self.port), RequestHandlerClass, bind_and_activate=True) ssl_key_file = os.path.join(config_directory, "key.pem") @@ -813,12 +807,17 @@ def __init__(self, LOG.error("Couldn't start the server: %s", e.__str__()) raise + # If the server was started with the port 0, the OS will pick an + # available port. For this reason we will update the port variable + # after server initialization. + self.port = self.socket.getsockname()[1] + def configure_keepalive(self): """ Enable keepalive on the socket and some TCP keepalive configuration option based on the server configuration file. """ - if not self.manager.is_keepalive_enabled(): + if not self.configuration_manager.keepalive.enabled: return keepalive_is_on = self.socket.getsockopt(socket.SOL_SOCKET, @@ -833,26 +832,38 @@ def configure_keepalive(self): if ret: LOG.error('Failed to set socket keepalive: %s', ret) - idle = self.manager.get_keepalive_idle() + idle = self.configuration_manager.keepalive.idle if idle: ret = self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, idle) if ret: LOG.error('Failed to set TCP keepalive idle: %s', ret) + else: + idle = self.socket.getsockopt(socket.IPPROTO_TCP, + socket.TCP_KEEPIDLE) + self.configuration_manager.keepalive.idle = idle - interval = self.manager.get_keepalive_interval() + interval = self.configuration_manager.keepalive.interval if interval: ret = self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, interval) if ret: LOG.error('Failed to set TCP keepalive interval: %s', ret) + else: + interval = self.socket.getsockopt(socket.IPPROTO_TCP, + socket.TCP_KEEPINTVL) + self.configuration_manager.keepalive.interval = interval - max_probe = self.manager.get_keepalive_max_probe() - if max_probe: + max_probes = self.configuration_manager.keepalive.max_probe + if max_probes: ret = self.socket.setsockopt(socket.IPPROTO_TCP, - socket.TCP_KEEPCNT, max_probe) + socket.TCP_KEEPCNT, max_probes) if ret: - LOG.error('Failed to set TCP max keepalive probe: %s', ret) + LOG.error('Failed to set TCP max keepalive probes: %s', ret) + else: + max_probes = self.socket.getsockopt(socket.IPPROTO_TCP, + socket.TCP_KEEPCNT) + self.configuration_manager.keepalive.max_probe = max_probes def terminate(self): """ @@ -982,6 +993,10 @@ def remove_products_except(self, endpoints_to_keep): if ep not in endpoints_to_keep: self.remove_product(ep) + @property + def formatted_address(self) -> str: + return f"{str(self.address)}:{self.port}" + class CCSimpleHttpServerIPv6(CCSimpleHttpServer): """ @@ -991,7 +1006,7 @@ class CCSimpleHttpServerIPv6(CCSimpleHttpServer): address_family = socket.AF_INET6 -def __make_root_file(root_file): +def __make_root_file(root_file: str) -> str: """ Generate a root username and password SHA. This hash is saved to the given file path, and is also returned. @@ -1000,7 +1015,7 @@ def __make_root_file(root_file): LOG.debug("Generating initial superuser (root) credentials...") username = ''.join(sample("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 6)) - password = get_tmp_dir_hash()[:8] + password = generate_random_token(8) LOG.info("A NEW superuser credential was generated for the server. " "This information IS SAVED, thus subsequent server starts " @@ -1028,65 +1043,58 @@ def __make_root_file(root_file): return secret -def start_server(config_directory, package_data, port, config_sql_server, - listen_address, force_auth, skip_db_cleanup: bool, - context, check_env): +def __load_or_create_root_file(config_directory: str) -> str: """ - Start http server to handle web client and thrift requests. - """ - LOG.debug("Starting CodeChecker server...") - - server_addr = (listen_address, port) + Loads the stored hashed superuser (root) user name and password for the + server, or creates an automatically generated one if such does not exist. + Returns the SHA-hashed expected username and password of root. + """ root_file = os.path.join(config_directory, 'root.user') if not os.path.exists(root_file): LOG.warning("Server started without 'root.user' present in " "CONFIG_DIRECTORY!") - root_sha = __make_root_file(root_file) else: LOG.debug("Root file was found. Loading...") try: with open(root_file, 'r', encoding="utf-8", errors="ignore") as f: root_sha = f.read() - LOG.debug("Root digest is '%s'", root_sha) + LOG.debug("Root digest is '%s'", root_sha) + return root_sha except IOError: LOG.info("Cannot open root file '%s' even though it exists", root_file) - root_sha = __make_root_file(root_file) - - # Check whether configuration file exists, create an example if not. - server_cfg_file = os.path.join(config_directory, 'server_config.json') - if not os.path.exists(server_cfg_file): - # For backward compatibility reason if the session_config.json file - # exists we rename it to server_config.json. - session_cfg_file = os.path.join(config_directory, - 'session_config.json') - example_cfg_file = os.path.join(os.environ['CC_DATA_FILES_DIR'], - 'config', 'server_config.json') - if os.path.exists(session_cfg_file): - LOG.info("Renaming '%s' to '%s'. Please check the example " - "configuration file ('%s') or the user guide for more " - "information.", session_cfg_file, - server_cfg_file, example_cfg_file) - os.rename(session_cfg_file, server_cfg_file) - else: - LOG.info("CodeChecker server's example configuration file " - "created at '%s'", server_cfg_file) - shutil.copyfile(example_cfg_file, server_cfg_file) + return __make_root_file(root_file) + + +def start_server(config_directory, package_data, port: int, config_sql_server, + listen_address: str, force_auth, skip_db_cleanup, + context, check_env): + """ + Start http server to handle web client and thrift requests. + """ + LOG.debug("Begin starting CodeChecker server...") + + root_sha = __load_or_create_root_file(config_directory) + config_dir = pathlib.Path(config_directory) try: - manager = session_manager.SessionManager( - server_cfg_file, - root_sha, - force_auth) - except IOError as ioerr: - LOG.debug(ioerr) - LOG.error("The server's configuration file " - "is missing or can not be read!") - sys.exit(1) - except ValueError as verr: - LOG.debug(verr) - LOG.error("The server's configuration file is invalid!") + configuration = server_configuration.load_configuration(config_dir) + bad_configs = configuration._validate() + if bad_configs: + raise ValueError("Some of the configuration values are invalid:" + "\n\t* %s" + % ("\n\t* ".join(c for c, _ in bad_configs))) + + session_manager = SessionManager(configuration, + root_sha, + force_auth) + except Exception as err: + LOG.debug(err) + LOG.error("The server's configuration file is missing, can not " + "be read, or is in an invalid format!") + import traceback + traceback.print_exc() sys.exit(1) if not skip_db_cleanup: @@ -1104,25 +1112,21 @@ def start_server(config_directory, package_data, port, config_sql_server, LOG.debug("Skipping db_cleanup, as requested.") server_clazz = CCSimpleHttpServer - if ':' in server_addr[0]: + if ':' in listen_address: # IPv6 address specified for listening. # FIXME: Python>=3.8 automatically handles IPv6 if ':' is in the bind # address, see https://bugs.python.org/issue24209. server_clazz = CCSimpleHttpServerIPv6 - http_server = server_clazz(server_addr, + http_server = server_clazz((listen_address, port), RequestHandler, config_directory, config_sql_server, package_data, context, check_env, - manager) - - # If the server was started with the port 0, the OS will pick an available - # port. For this reason we will update the port variable after server - # initialization. - port = http_server.socket.getsockname()[1] + configuration, + session_manager) processes = [] @@ -1130,23 +1134,47 @@ def signal_handler(signum, _): """ Handle SIGTERM to stop the server running. """ - LOG.info("Shutting down the WEB server on [%s:%d]", - '[' + listen_address + ']' - if server_clazz is CCSimpleHttpServerIPv6 else listen_address, - port) + LOG.info("Shutting down the WEB server on [%s]", + http_server.formatted_address) http_server.terminate() # Terminate child processes. - for pp in processes: - pp.terminate() + for p in processes: + p.terminate() sys.exit(128 + signum) def reload_signal_handler(*_args, **_kwargs): - """ - Reloads server configuration file. - """ - manager.reload_config() + """Reloads the server configuration file.""" + LOG.info("Received signal to reload server configuration ...") + + # First, check if the configuration file's new contents are fully + # valid as-if a new server would be started with them. + try: + configuration2 = server_configuration.load_configuration( + config_dir) + bad_configs = configuration2._validate() + if bad_configs: + raise ValueError("Some of the configuration values are " + "invalid:\n\t* %s" + % ("\n\t* ".join(c for c, _ in bad_configs))) + except Exception as err: + LOG.error(err) + LOG.error("The server's configuration file has changed in a way " + "that it would be invalid for the starting of a new " + "server!") + LOG.error("Rejecting configuration reload, and keeping the " + "**OLD VALUES** intact!") + return + + changed_cfgs, _ = configuration._update() + + if any(path in ["/authentication/session_lifetime", + "/authentication/refresh_time"] + for path, _, _ in changed_cfgs): + session_manager.configuration_reloaded_update_sessions() + + LOG.info("Server configuration reload: Done.") try: instance_manager.register(os.getpid(), @@ -1156,11 +1184,6 @@ def reload_signal_handler(*_args, **_kwargs): except IOError as ex: LOG.debug(ex.strerror) - LOG.info("Server waiting for client requests on [%s:%d]", - '[' + listen_address + ']' - if server_clazz is CCSimpleHttpServerIPv6 else listen_address, - port) - def unregister_handler(pid): """ Handle errors during instance unregistration. @@ -1174,8 +1197,11 @@ def unregister_handler(pid): atexit.register(unregister_handler, os.getpid()) - for _ in range(manager.worker_processes - 1): - p = multiprocess.Process(target=http_server.serve_forever) + requested_worker_threads = cast(int, configuration.worker_processes) + LOG.info("Spawning %d API request handler processes...", + requested_worker_threads) + for _ in range(requested_worker_threads - 1): + p = Process(target=http_server.serve_forever) processes.append(p) p.start() @@ -1185,6 +1211,9 @@ def unregister_handler(pid): if sys.platform != "win32": signal.signal(signal.SIGHUP, reload_signal_handler) + LOG.info("Server waiting for client requests on [%s]", + http_server.formatted_address) + # Main process also acts as a worker. http_server.serve_forever() diff --git a/web/server/codechecker_server/server_configuration.py b/web/server/codechecker_server/server_configuration.py new file mode 100644 index 0000000000..113f6e6965 --- /dev/null +++ b/web/server/codechecker_server/server_configuration.py @@ -0,0 +1,591 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Handles the retrieval and access to the file-based configuration of a server. +""" +from datetime import timedelta +import os +from pathlib import Path +import shutil +import stat +from typing import cast + +from codechecker_common.configuration_access import Configuration, Schema, \ + OptionDirectory, OptionDirectoryList +from codechecker_common.logger import get_logger + +from codechecker_web.shared.env import check_file_owner_rw + + +LOG = get_logger("server") + + +def register_configuration_options(s: Schema) -> Schema: + """ + Registers the `Schema` of `Option`s that are accessible in a server + configuration file. + + Returns the `Schema` object. + """ + s.add_option("max_run_count", "./max_run_count", + default=None, + validation_predicate=lambda v: + v is None or isinstance(v, int), + validation_fail_action="A size limit must be a number.", + # FIXME: Obnoxious design, but changing this would break + # reverse compatibility against currently configured + # deployments. + description="The maximum storable run count. If 'None' or " + "negative, an unlimited amount can be stored." + ) + + s.add_option("worker_processes", "./worker_processes", + default=os.cpu_count(), + validation_predicate=lambda v: isinstance(v, int) and v > 0, + validation_fail_action="The number of 'worker_processes' " + "can not be 0 or negative!", + read_only=True, + supports_update=False, + description="The number of API request handler processes " + "to start on the server." + ) + + store = cast(OptionDirectory, + s.add_option("store", "./store/", + description="Configuration for the handling of " + "'store' endpoint operations." + )) + + store.add_option("analysis_statistics_dir", + "./analysis_statistics_dir", + default=None, + validation_predicate=lambda v: v is None or ( + isinstance(v, str) and bool(v)), + description="The server-side directory where compressed " + "analysis statistics files should be saved. " + "If unset (None), analysis statistics are " + "NOT stored on the server." + ) + + store_limits = cast(OptionDirectory, + store.add_option("store_limits", "./limit/", + description="Allowed size and time " + "limits during a " + "'store'." + )) + + store_limits.add_option("failure_zip_size", + "./failure_zip_size", + default=50 * 1024 * 1024, # 50 MiB. + validation_predicate=lambda v: + v is None or (isinstance(v, int) and v >= 0), + validation_fail_action="A size limit can not be " + "negative.", + read_only=False, + description="The maximum size of the collected " + "failure ZIPs which can be stored on " + "the server." + ) + + store_limits.add_option("compilation_database_size", + "./compilation_database_size", + default=100 * 1024 * 1024, # 100 MiB. + validation_predicate=lambda v: + v is None or (isinstance(v, int) and v >= 0), + validation_fail_action="A size limit can not be " + "negative.", + read_only=False, + description="The limit for the compilation " + "database file size." + ) + + keepalive = cast(OptionDirectory, + s.add_option("keepalive", "/keepalive/", + description="TCP keep-alive configuration " + "parameters for the server's " + "listen socket." + )) + + keepalive.add_option("enable_keepalive", "./enabled", + default=False, + supports_update=False, + description="Whether to set up TCP keep-alive on " + "the server socket. This is recommended " + "to be turned on in a distributed " + "environment, such as Docker Swarm." + ) + + keepalive.add_option("keepalive_time_idle", + "./idle", + default=None, + validation_predicate=lambda v: + v is None or (isinstance(v, int) and v >= 0), + validation_fail_action="A time limit can not be " + "negative.", + read_only=False, + supports_update=False, + description="The interval (in seconds) after the " + "sending of the last data packet " + "(excluding ACKs) and the first " + "keepalive probe. If unset (None), the " + "default will be taken from the system " + "configuration " + "'net.ipv4.tcp_keepalive_time'." + ) + + keepalive.add_option("keepalive_time_interval", + "./interval", + default=None, + validation_predicate=lambda v: + v is None or (isinstance(v, int) and v >= 0), + validation_fail_action="A time limit can not be " + "negative.", + read_only=False, + supports_update=False, + description="The interval (in seconds) between the " + "sending of subsequent keepalive probes." + "If unset (None), the default will be " + "taken from the system configuration " + "'net.ipv4.tcp_keepalive_intvl'." + ) + + keepalive.add_option("keepalive_max_probes", + "./max_probe", + default=None, + validation_predicate=lambda v: + v is None or (isinstance(v, int) and v >= 0), + validation_fail_action="A size limit can not be " + "negative.", + read_only=False, + supports_update=False, + description="The number of unacknowledged keepalive " + "probes to send before the connection " + "is considered dead by the kernel, and " + "this is signalled to the server " + "process. If unset (None), the default " + "will be taken from the system " + "configuration " + "'net.ipv4.tcp_keepalive_probes'." + ) + + auth = cast(OptionDirectory, + s.add_option("authentication", "./authentication/", + description="Authentication (privilege-only " + "access) configuration root." + )) + + auth.add_option("auth_enabled", "./enabled", + default=False, + validation_predicate=lambda v: isinstance(v, bool), + read_only=False, + supports_update=False, + description="Toggles the entire authentication system." + ) + + auth.add_option("auth_logins_until_cleanup", "./logins_until_cleanup", + default=60, + validation_predicate=lambda v: isinstance(v, int) + and v >= 0, + validation_fail_action="A counter limit can not be " + "negative.", + description="After this many login attempts, the server " + "should perform an automatic cleanup of old, " + "expired sessions." + ) + + auth.add_option("auth_session_lifetime", "./session_lifetime", + default=int(timedelta(minutes=5).total_seconds()), + validation_predicate=lambda v: isinstance(v, int) + and v > 0, + validation_fail_action="A time limit can not be negative.", + description="If an authenticated session is not accessed " + "for this many seconds, it will be " + "permanently invalidated, and the user " + "logged out." + ) + + auth.add_option("auth_refresh_time", "./refresh_time", + default=int(timedelta(minutes=1).total_seconds()), + validation_predicate=lambda v: isinstance(v, int) + and v > 0, + validation_fail_action="A time limit can not be negative.", + description="If an authenticated session is not accessed " + "for this many seconds, it will be validated " + "against the contents of the database as " + "opposed to reusing the local (in-memory) " + "cache." + ) + + auth_regex_groups = cast(OptionDirectory, + auth.add_option( + "auth_regex_groups", "./regex_groups/", + description="Allows creating virtual " + "groups usable by other " + "subsystems based on username " + "patterns matching regular " + "expressions." + )) + + auth_regex_groups.add_option("auth_regex_groups_enabled", "./enabled", + default=False, + validation_predicate=lambda v: + isinstance(v, bool), + read_only=False, + supports_update=False, + description="Toggles this authentication " + "subsystem." + ) + + auth_regex_groups.add_option("auth_regex_groups_groups", "./groups", + default={}, + secret=True, + validation_predicate=lambda v: + isinstance(v, dict) and + all(isinstance(v2, list) + for v2 in v.values()), + supports_update=False, + description="Mapping of virtual group names " + "to a list of username patterns." + ) + + dictionary_auth = cast(OptionDirectory, + auth.add_option( + "dictionary_auth", "./method_dictionary/", + description="Hardcoded dicitonary based " + "authentication." + )) + + dictionary_auth.add_option("dictionary_auth_enabled", "./enabled", + default=False, + validation_predicate=lambda v: + isinstance(v, bool), + supports_update=False, + description="Toggles this authentication " + "subsystem." + ) + + dictionary_auth.add_option("dictionary_auth_auths", "./auths", + default=[], + secret=True, + validation_predicate=lambda v: + isinstance(v, list), + supports_update=False, + description="'Username:Password' of the known " + "and allowed users." + ) + + dictionary_auth.add_option("dictionary_auth_groups", "./groups", + default={}, + secret=True, + validation_predicate=lambda v: + isinstance(v, dict) and + all(isinstance(v2, list) for v2 in v.values()), + supports_update=False, + description="Mapping of user names to the " + "authentication groups they " + "belong to." + ) + + pam_auth = cast(OptionDirectory, + auth.add_option("pam_auth", "./method_pam/", + description="Linux PAM based " + "authentication." + )) + + pam_auth.add_option("pam_auth_enabled", "./enabled", + default=False, + validation_predicate=lambda v: + isinstance(v, bool), + read_only=False, + supports_update=False, + description="Toggles this authentication subsystem." + ) + + pam_auth.add_option("pam_auth_auths", "./users", + default=[], + secret=True, + validation_predicate=lambda v: + isinstance(v, list), + supports_update=False, + description="The list of PAM user names allowed to " + "access the system." + ) + + pam_auth.add_option("pam_auth_groups", "./groups", + default=[], + secret=True, + validation_predicate=lambda v: + isinstance(v, list), + supports_update=False, + description="The list of PAM group names allowed to " + "access the system." + ) + + ldap_auth = cast(OptionDirectory, + auth.add_option("ldap_auth", "./method_ldap/", + description="LDAP-based authentication." + )) + + ldap_auth.add_option("ldap_auth_enabled", "./enabled", + default=False, + validation_predicate=lambda v: + isinstance(v, bool), + read_only=False, + supports_update=False, + description="Toggles this authentication subsystem." + ) + + ldap_authorities = cast(OptionDirectoryList, + ldap_auth.add_option( + "ldap_authorities", "./authorities[]/", + description="A priority list of LDAP " + "authority servers where the " + "users' authentication requests " + "will be validated against." + )) + + ldap_authorities.add_option("ldap_connection_url", "./connection_url", + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="URL of the LDAP server which " + "will be queried for user " + "information and group " + "membership." + ) + + ldap_authorities.add_option("ldap_tls_require_cert", "./tls_require_cert", + default="always", + validation_predicate=lambda v: + v in ["always", "never"], + supports_update=False, + description="If set to 'never', skip " + "certificate verification during " + "'ldaps://' connections. " + "Using this option is insecure!" + ) + + ldap_authorities.add_option("ldap_username", "./username", + default=None, + secret=True, + validation_predicate=lambda v: + v is None or isinstance(v, str), + supports_update=False, + description="Username for the LDAP bind() " + "call used for the server " + "connection. " + "If unset, the to-be logged-in " + "user's username is used." + ) + + ldap_authorities.add_option("ldap_password", "./password", + default=None, + secret=True, + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="Password for the LDAP bind() " + "call used for the server " + "connection. " + "If unset, the to-be logged-in " + "user's credentials are used." + ) + + ldap_authorities.add_option("ldap_use_referrals", "./referrals", + default=False, + supports_update=False, + validation_predicate=lambda v: + isinstance(v, bool), + description="Microsoft AD by default returns " + "referral (search continuation) " + "objects, which do not synergise " + "well with 'libldap', as it is " + "not specified what credentials " + "should be used to follow up on " + "the referral. " + "Because of this, and because " + "the default (anonymous) " + "behaviour might fail, using " + "referrals is opt-in." + ) + + ldap_authorities.add_option("ldap_deref", "./deref", + default="always", + supports_update=False, + validation_predicate=lambda v: + v in ["always", "never"], + description="Configure how the alias " + "dereferencing is done in " + "'libldap'." + ) + + ldap_authorities.add_option("ldap_account_base", "./accountBase", + default=None, + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="Root tree containing all " + "user accounts." + ) + + ldap_authorities.add_option("ldap_account_scope", "./accountScope", + default="subtree", + supports_update=False, + validation_predicate=lambda v: + v in ["base", "one", "subtree"], + description="Scope of the search performed." + ) + + ldap_authorities.add_option("ldap_account_pattern", "./accountPattern", + default=None, + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="Query pattern used to search " + "for a user account. " + "Must be an LDAP query " + "expression, and the special " + "$USN$ token is replace with the " + "username of the to-be logged-in " + "user." + ) + + ldap_authorities.add_option("ldap_userDN_postfix_preference", + "./user_dn_postfix_preference", + default=None, + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="Used to select the preferred " + "user DN if multiple entries are " + "returned by the LDAP search. " + "The configured value will be " + "matched and the first matching " + "user DN is used, in case " + "multiple choices were " + "available. " + "If unset and multiple choices " + "had been available, the first " + "result is used, which may be " + "non-deterministic!" + ) + + ldap_authorities.add_option("ldap_group_base", "./groupBase", + default=None, + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="Root tree containing all groups." + ) + + ldap_authorities.add_option("ldap_group_scope", "./groupScope", + default="subtree", + supports_update=False, + validation_predicate=lambda v: + v in ["base", "one", "subtree"], + description="Scope of the search performed." + ) + + ldap_authorities.add_option("ldap_group_pattern", "./groupPattern", + default=None, + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="Query pattern used to search " + "for the group(s) of a user. " + "Must be an LDAP query " + "expression, and the special " + "$USERDN$ token is replace with " + "the obtained user DN for to-be " + "logged-in user." + ) + + ldap_authorities.add_option("ldap_group_name_attribute", "./groupNameAttr", + default=None, + supports_update=False, + validation_predicate=lambda v: + v is None or isinstance(v, str), + description="The attribute of the GROUP " + "object which contains the name " + "of the group." + ) + + return s + + +def get_example_configuration_file_path() -> Path: + """ + Returns the location of the example configuration that is shipped + together with the CodeChecker package. + """ + return Path(os.environ["CC_DATA_FILES_DIR"], + "config", "server_config.json") + + +def migrate_session_config_to_server_config(session_config_file: Path, + server_config_file: Path) -> Path: + """ + Migrates an existing, deprecated `session_config` file to its new + `server_config` structure. + + Returns `server_config_file` path. + """ + if session_config_file.exists() and not server_config_file.exists(): + LOG.warning("The use of '%s' file is deprecated since " + "CodeChecker v6.5!", session_config_file) + + session_config_file.rename(server_config_file) + LOG.info("Automatically renamed '%s' to '%s'...\n\t" + "Please check the example configuration configuration file " + "('%s') or the User Guide " + "(http://codechecker.readthedocs.io) for more information.", + session_config_file, server_config_file, + get_example_configuration_file_path()) + return server_config_file + + +def create_server_config_file(server_config_file: Path) -> Path: + """ + Creates a default server configuration file at the specified location from + the package's built-in example. + + Returns `server_config_file` path. + """ + if not server_config_file.exists(): + shutil.copy(get_example_configuration_file_path(), server_config_file) + server_config_file.chmod(stat.S_IRUSR | stat.S_IWUSR) + LOG.info("CodeChecker server's example configuration file created " + "at '%s'", server_config_file) + return server_config_file + + +def load_configuration(config_directory: Path) -> Configuration: + """ + Do whatever is needed to get to a valid server configuration at the + expected file path under `config_directory`. + Following that, read it, parse it, and return the contents as a + schema-enabled `Configuration` object. + """ + server_config = config_directory / "server_config.json" + if not server_config.exists(): + server_config = migrate_session_config_to_server_config( + config_directory / "session_config.json", + server_config) + if not server_config.exists(): + server_config = create_server_config_file(server_config) + if not server_config.exists(): + LOG.fatal("Server configuration factory ran out of options to " + "instantiate a viable configuration for this instance!") + raise FileNotFoundError(str(server_config)) + + # This helper function prints a warning to the output if the access + # to the file is too permissive. + check_file_owner_rw(server_config) + + return Configuration.from_file(register_configuration_options(Schema()), + server_config) diff --git a/web/server/codechecker_server/session_manager.py b/web/server/codechecker_server/session_manager.py index 276af909cd..961d309da6 100644 --- a/web/server/codechecker_server/session_manager.py +++ b/web/server/codechecker_server/session_manager.py @@ -8,26 +8,21 @@ """ Handles the management of authentication sessions on the server's side. """ - import hashlib -import json -import os import re -import uuid from datetime import datetime -from typing import Optional +from typing import Optional, Set -from codechecker_common.compatibility.multiprocessing import cpu_count from codechecker_common.logger import get_logger -from codechecker_common.util import load_json +from codechecker_common.util import generate_random_token -from codechecker_web.shared.env import check_file_owner_rw from codechecker_web.shared.version import SESSION_COOKIE_NAME as _SCN from .database.config_db_model import Session as SessionRecord from .database.config_db_model import SystemPermission from .permissions import SUPERUSER +from .server_configuration import Configuration UNSUPPORTED_METHODS = [] @@ -35,41 +30,17 @@ try: from .auth import cc_ldap except ImportError: - UNSUPPORTED_METHODS.append('ldap') + UNSUPPORTED_METHODS.append("ldap") try: from .auth import cc_pam except ImportError: - UNSUPPORTED_METHODS.append('pam') + UNSUPPORTED_METHODS.append("pam") LOG = get_logger("server") SESSION_COOKIE_NAME = _SCN - - -def generate_session_token(): - """ - Returns a random session token. - """ - return uuid.UUID(bytes=os.urandom(16)).hex - - -def get_worker_processes(scfg_dict): - """ - Return number of worker processes from the config dictionary. - - Return 'worker_processes' field from the config dictionary or returns the - default value if this field is not set or the value is negative. - """ - default = cpu_count() - worker_processes = scfg_dict.get('worker_processes', default) - - if worker_processes < 0: - LOG.warning("Number of worker processes can not be negative! Default " - "value will be used: %s", default) - worker_processes = default - - return worker_processes +SESSION_TOKEN_LENGTH = 32 class _Session: @@ -84,11 +55,11 @@ def __init__(self, token, username, groups, self.groups = groups self.session_lifetime = session_lifetime - self.refresh_time = refresh_time if refresh_time else None + self.refresh_time = refresh_time self.__root = is_root self.__database = database self.__can_expire = can_expire - self.last_access = last_access if last_access else datetime.now() + self.last_access = last_access or datetime.now() @property def is_root(self): @@ -161,12 +132,15 @@ class SessionManager: CodeChecker server. """ - def __init__(self, configuration_file, root_sha, force_auth=False): + def __init__(self, + configuration: Configuration, + root_sha: str, + force_auth: bool = False): """ Initialise a new Session Manager on the server. - :param configuration_file: The configuration file to read - authentication backends from. + :param configuration_file: The server's configuration data. + It contains authentication backend configuration information. :param root_sha: The SHA-256 hash of the root user's authentication. :param force_auth: If True, the manager will be enabled even if the configuration file disables authentication. @@ -174,172 +148,78 @@ def __init__(self, configuration_file, root_sha, force_auth=False): self.__database_connection = None self.__logins_since_prune = 0 self.__sessions = [] - self.__configuration_file = configuration_file - - scfg_dict = self.__get_config_dict() - - # FIXME: Refactor this. This is irrelevant to authentication config, - # so it should NOT be handled by session_manager. A separate config - # handler for the server's stuff should be created, that can properly - # instantiate SessionManager with the found configuration. - self.__worker_processes = get_worker_processes(scfg_dict) - self.__max_run_count = scfg_dict.get('max_run_count', None) - self.__store_config = scfg_dict.get('store', {}) - self.__keepalive_config = scfg_dict.get('keepalive', {}) - self.__auth_config = scfg_dict['authentication'] + self.__root_sha = root_sha + self._configuration = configuration.authentication if force_auth: LOG.debug("Authentication was force-enabled.") - self.__auth_config['enabled'] = True - - if 'soft_expire' in self.__auth_config: - LOG.debug("Found deprecated argument 'soft_expire' in " - "server_config.authentication.") - - self.__refresh_time = self.__auth_config['refresh_time'] \ - if 'refresh_time' in self.__auth_config else None - - # Save the root SHA into the configuration (but only in memory!) - self.__auth_config['method_root'] = root_sha - - self.__regex_groups_enabled = False - - # Pre-compile the regular expressions of 'regex_groups' - if 'regex_groups' in self.__auth_config: - self.__regex_groups_enabled = self.__auth_config['regex_groups'] \ - .get('enabled', False) - - regex_groups = self.__auth_config['regex_groups'] \ - .get('groups', []) - d = {} - for group_name, regex_list in regex_groups.items(): - d[group_name] = [re.compile(r) for r in regex_list] - self.__group_regexes_compiled = d - - # If no methods are configured as enabled, disable authentication. - if scfg_dict['authentication'].get('enabled'): - found_auth_method = False - - if 'method_dictionary' in self.__auth_config and \ - self.__auth_config['method_dictionary'].get('enabled'): - found_auth_method = True - - if 'method_ldap' in self.__auth_config and \ - self.__auth_config['method_ldap'].get('enabled'): - if 'ldap' not in UNSUPPORTED_METHODS: - found_auth_method = True - else: - LOG.warning("LDAP authentication was enabled but " - "prerequisites are NOT installed on the system" - "... Disabling LDAP authentication.") - self.__auth_config['method_ldap']['enabled'] = False - - if 'method_pam' in self.__auth_config and \ - self.__auth_config['method_pam'].get('enabled'): - if 'pam' not in UNSUPPORTED_METHODS: - found_auth_method = True - else: - LOG.warning("PAM authentication was enabled but " - "prerequisites are NOT installed on the system" - "... Disabling PAM authentication.") - self.__auth_config['method_pam']['enabled'] = False - - if not found_auth_method: - if force_auth: - LOG.warning("Authentication was manually enabled, but no " - "valid authentication backends are " - "configured... The server will only allow " - "the master superuser (root) access.") - else: - LOG.warning("Authentication is enabled but no valid " - "authentication backends are configured... " - "Falling back to no authentication.") - self.__auth_config['enabled'] = False - - def __get_config_dict(self): - """ - Get server config information from the configuration file. Raise - ValueError if the configuration file is invalid. - """ - LOG.debug(self.__configuration_file) - cfg_dict = load_json(self.__configuration_file, {}) - if cfg_dict != {}: - check_file_owner_rw(self.__configuration_file) + self._configuration.enabled = True + + if not self.is_enabled: + return + + # If no authentication methods are enabled, or none of them could + # starts due to lack of valid configuration or lack of dependencies, + # fall back to disabling authentication. + found_working_auth_method = False + if self._configuration.method_dictionary.enabled: + found_working_auth_method = True + + if self._configuration.method_pam.enabled: + if "pam" in UNSUPPORTED_METHODS: + LOG.warning("PAM authentication was enabled but " + "prerequisites are NOT installed on the system" + "... Disabling PAM authentication.") + self._configuration.method_pam.enabled = False + else: + found_working_auth_method = True + + if self._configuration.method_ldap.enabled: + if "ldap" in UNSUPPORTED_METHODS: + LOG.warning("LDAP authentication was enabled but " + "prerequisites are NOT installed on the system" + "... Disabling LDAP authentication.") + self._configuration.method_ldap.enabled = False + else: + found_working_auth_method = True + + if not found_working_auth_method: + if force_auth: + LOG.warning("Authentication was force-enabled, but no " + "valid authentication backends are " + "configured... The server will only allow " + "the master superuser (\"root\") access.") + else: + LOG.warning("Authentication is enabled but no valid " + "authentication backends are configured... " + "Falling back to no authentication!") + self._configuration.enabled = False + + # Pre-compile the regular expressions from 'regex_groups'. + if self.is_enabled and self._configuration.regex_groups.enabled: + self.__regex_groups = {g: [re.compile(rx) for rx in l] + for g, l in self._configuration + .regex_groups.groups.items() + } else: - # If the configuration dict is empty, it means a JSON couldn't - # have been parsed from it. - raise ValueError("Server configuration file was invalid, or " - "empty.") - return cfg_dict - - def reload_config(self): - LOG.info("Reload server configuration file...") - try: - cfg_dict = self.__get_config_dict() - - prev_max_run_count = self.__max_run_count - new_max_run_count = cfg_dict.get('max_run_count', None) - if prev_max_run_count != new_max_run_count: - self.__max_run_count = new_max_run_count - LOG.debug("Changed 'max_run_count' value from %s to %s", - prev_max_run_count, new_max_run_count) - - prev_store_config = json.dumps(self.__store_config, sort_keys=True, - indent=2) - new_store_config_val = cfg_dict.get('store', {}) - new_store_config = json.dumps(new_store_config_val, sort_keys=True, - indent=2) - if prev_store_config != new_store_config: - self.__store_config = new_store_config_val - LOG.debug("Updating 'store' config from %s to %s", - prev_store_config, new_store_config) - - update_sessions = False - auth_fields_to_update = ['session_lifetime', 'refresh_time', - 'logins_until_cleanup'] - for field in auth_fields_to_update: - if field in self.__auth_config: - prev_value = self.__auth_config[field] - new_value = cfg_dict['authentication'].get(field, 0) - if prev_value != new_value: - self.__auth_config[field] = new_value - LOG.debug("Changed '%s' value from %s to %s", - field, prev_value, new_value) - update_sessions = True - - if update_sessions: - # Update configuration options of the already existing - # sessions. - for session in self.__sessions: - session.session_lifetime = \ - self.__auth_config['session_lifetime'] - session.refresh_time = self.__auth_config['refresh_time'] - - LOG.info("Done.") - except ValueError as ex: - LOG.error("Couldn't reload server configuration file") - LOG.error(str(ex)) + self.__regex_groups = {} - @property - def is_enabled(self): - return self.__auth_config.get('enabled') + def configuration_reloaded_update_sessions(self): + LOG.info("Updating lifetime of existing sessions ...") + for session in self.__sessions: + session.session_lifetime = self._configuration.session_lifetime + session.refresh_time = self._configuration.refresh_time @property - def worker_processes(self): - return self.__worker_processes - - def get_realm(self): - return { - "realm": self.__auth_config.get('realm_name'), - "error": self.__auth_config.get('realm_error') - } + def is_enabled(self) -> bool: + return self._configuration.enabled @property def default_superuser_name(self) -> Optional[str]: - """ Get default superuser name. """ - root = self.__auth_config['method_root'].split(":") + """Get default superuser name.""" + root = self.__root_sha.split(':', 1) - # Previously the root file doesn't contain the user name. In this case + # Previously, the root file doesn't contain the user name. In this case # we will return with no user name. if len(root) <= 1: return None @@ -355,13 +235,13 @@ def set_database_connection(self, connection): """ self.__database_connection = connection - def __handle_validation(self, auth_string): + def __handle_validation(self, auth_string: str) -> dict: """ - Validate an oncoming authorization request - against some authority controller. + Validate an oncoming authorization request against some authority + controller. - Returns False if no validation was done, or a validation object - if the user was successfully authenticated. + Returns `False` if no validation was done, or a validation object if + the user was successfully authenticated. This validation object contains two keys: username and groups. """ @@ -370,42 +250,40 @@ def __handle_validation(self, auth_string): or self.__try_auth_pam(auth_string) \ or self.__try_auth_ldap(auth_string) if not validation: - return False + return {} # If a validation method is enabled and regex_groups is enabled too, - # we will extend the 'groups'. - extra_groups = self.__try_regex_groups(validation['username']) + # we will extend the "groups". + extra_groups = self.__try_regex_groups(validation["username"]) if extra_groups: - already_groups = set(validation['groups']) - validation['groups'] = list(already_groups | extra_groups) + validation["groups"] = sorted(set(validation.get("groups", [])) + | extra_groups) - LOG.debug('User validation details: %s', str(validation)) + LOG.debug("User validation details: %s", str(validation)) return validation - def __is_method_enabled(self, method): + def __is_method_enabled(self, method: str) -> bool: return method not in UNSUPPORTED_METHODS and \ - 'method_' + method in self.__auth_config and \ - self.__auth_config['method_' + method].get('enabled') + getattr(self._configuration, f"method_{method}").enabled - def __try_auth_root(self, auth_string): + def __try_auth_root(self, auth_string: str) -> dict: """ Try to authenticate the user against the root username:password's hash. """ user_name = SessionManager.get_user_name(auth_string) - sha = hashlib.sha256(auth_string.encode('utf8')).hexdigest() + sha = hashlib.sha256(auth_string.encode("utf-8")).hexdigest() - if f"{user_name}:{sha}" == self.__auth_config['method_root']: - return { - 'username': SessionManager.get_user_name(auth_string), - 'groups': [], - 'root': True - } + if self.__root_sha == f"{user_name}:{sha}": + return {"username": user_name, + "groups": [], + "root": True + } - return False + return {} - def __try_auth_token(self, auth_string): + def __try_auth_token(self, auth_string: str) -> dict: if not self.__database_connection: - return None + return {} user_name, token = auth_string.split(':', 1) @@ -420,74 +298,64 @@ def __try_auth_token(self, auth_string): .limit(1).one_or_none() if not auth_session: - return None + return {} return auth_session except Exception as e: - LOG.error("Couldn't check login in the database: ") - LOG.error(str(e)) + LOG.error("Couldn't check login in the database: %s", str(e)) finally: if transaction: transaction.close() - return None + return {} - def __try_auth_dictionary(self, auth_string): + def __try_auth_dictionary(self, auth_string: str) -> dict: """ Try to authenticate the user against the hardcoded credential list. Returns a validation object if successful, which contains the users' groups. """ - method_config = self.__auth_config.get('method_dictionary') - if not method_config: - return False - - valid = self.__is_method_enabled('dictionary') and \ - auth_string in method_config.get('auths') - if not valid: - return False + if not self.__is_method_enabled("dictionary") or \ + auth_string not in self._configuration \ + .method_dictionary.auths: + return {} username = SessionManager.get_user_name(auth_string) - group_list = method_config['groups'][username] if \ - 'groups' in method_config and \ - username in method_config['groups'] else [] + return {"username": username, + "groups": self._configuration.method_dictionary + .groups.get(username, []) + } - return { - 'username': username, - 'groups': group_list - } - - def __try_auth_pam(self, auth_string): + def __try_auth_pam(self, auth_string: str) -> dict: """ Try to authenticate user based on the PAM configuration. """ - if self.__is_method_enabled('pam'): - username, password = auth_string.split(':', 1) - if cc_pam.auth_user(self.__auth_config['method_pam'], - username, password): - # PAM does not hold a group membership list we can reliably - # query. - return {'username': username} + if not self.__is_method_enabled("pam"): + return {} - return False + username, password = auth_string.split(':', 1) + if cc_pam.auth_user(self._configuration.method_pam, + username, password): + # PAM does not hold a group membership list we can reliably query. + return {"username": username} + + return {} - def __try_auth_ldap(self, auth_string): + def __try_auth_ldap(self, auth_string: str) -> dict: """ Try to authenticate user to all the configured authorities. """ - if self.__is_method_enabled('ldap'): + if self.__is_method_enabled("ldap"): username, password = auth_string.split(':', 1) - - ldap_authorities = self.__auth_config['method_ldap'] \ - .get('authorities') - for ldap_conf in ldap_authorities: + for ldap_conf in self._configuration.method_ldap.authorities: if cc_ldap.auth_user(ldap_conf, username, password): groups = cc_ldap.get_groups(ldap_conf, username, password) self.__update_groups(username, groups) - return {'username': username, 'groups': groups} + return {"username": username, + "groups": groups} - return False + return {} def __update_groups(self, user_name, groups): """ @@ -514,26 +382,19 @@ def __update_groups(self, user_name, groups): return False - def __try_regex_groups(self, username): + def __try_regex_groups(self, username) -> Set[str]: """ - Return a set of groups that the user belongs to, depending on whether - the username matches the regular expression of the group. - + Returns a set of groups that the user belongs to, depending on whether + the username matches a regular expression of the group. """ - if not self.__regex_groups_enabled: - return set() - - matching_groups = set() - for group_name, regex_list in self.__group_regexes_compiled.items(): - for r in regex_list: - if re.search(r, username): - matching_groups.add(group_name) - - return matching_groups + return {group + for group, regexes in self.__regex_groups.items() + for regex in regexes + if re.search(regex, username)} @staticmethod def get_user_name(auth_string): - return auth_string.split(':')[0] + return auth_string.split(':', 1)[0] def get_db_auth_session_tokens(self, user_name): """ @@ -560,9 +421,9 @@ def get_db_auth_session_tokens(self, user_name): return None - def __is_root_user(self, user_name): + def __is_root_user(self, user_name: str) -> bool: """ Return True if the given user has system permissions. """ - if self.__auth_config['method_root'].split(":")[0] == user_name: + if self.default_superuser_name == user_name: return True transaction = None @@ -593,19 +454,19 @@ def __create_local_session(self, token, user_name, groups, is_root, return _Session( token, user_name, groups, - self.__auth_config['session_lifetime'], - self.__refresh_time, is_root, self.__database_connection, - last_access, can_expire) + self._configuration.session_lifetime, + self._configuration.refresh_time, + is_root, self.__database_connection, last_access, can_expire) def create_session(self, auth_string): - """ Creates a new session for the given auth-string. """ - if not self.__auth_config['enabled']: + """Creates a new session for the given auth-string.""" + if not self.is_enabled: return None # Perform cleanup of session memory, if neccessary. self.__logins_since_prune += 1 if self.__logins_since_prune >= \ - self.__auth_config['logins_until_cleanup']: + self._configuration.logins_until_cleanup: self.__cleanup_sessions() # Try authenticate user with personal access token. @@ -622,7 +483,7 @@ def create_session(self, auth_string): return False # Generate a new token and create a local session. - token = generate_session_token() + token = generate_random_token(SESSION_TOKEN_LENGTH) user_name = validation.get('username') groups = validation.get('groups', []) is_root = validation.get('root', False) @@ -636,8 +497,7 @@ def create_session(self, auth_string): if self.__database_connection: try: transaction = self.__database_connection() - record = SessionRecord(token, user_name, - ';'.join(groups)) + record = SessionRecord(token, user_name, ';'.join(groups)) transaction.add(record) transaction.commit() except Exception as e: @@ -650,56 +510,6 @@ def create_session(self, auth_string): return local_session - def get_max_run_count(self): - """ - Returns the maximum storable run count. If the value is None it means - we can upload unlimited number of runs. - """ - return self.__max_run_count - - def get_analysis_statistics_dir(self): - """ - Get directory where the compressed analysis statistics files should be - stored. If the value is None it means we do not want to store - analysis statistics information on the server. - """ - - return self.__store_config.get('analysis_statistics_dir') - - def get_failure_zip_size(self): - """ - Maximum size of the collected failed zips which can be store on the - server. - """ - limit = self.__store_config.get('limit', {}) - return limit.get('failure_zip_size') - - def get_compilation_database_size(self): - """ - Limit of the compilation database file size. - """ - limit = self.__store_config.get('limit', {}) - return limit.get('compilation_database_size') - - def is_keepalive_enabled(self): - """ - True if the keepalive functionality is explicitly enabled, otherwise it - will return False. - """ - return self.__keepalive_config.get('enabled') - - def get_keepalive_idle(self): - """ Get keepalive idle time. """ - return self.__keepalive_config.get('idle') - - def get_keepalive_interval(self): - """ Get keepalive interval time. """ - return self.__keepalive_config.get('interval') - - def get_keepalive_max_probe(self): - """ Get keepalive max probe count. """ - return self.__keepalive_config.get('max_probe') - def __get_local_session_from_db(self, token): """ Creates a local session if a valid session token can be found in the diff --git a/web/server/codechecker_server/tmp.py b/web/server/codechecker_server/tmp.py deleted file mode 100644 index bbc5e77bea..0000000000 --- a/web/server/codechecker_server/tmp.py +++ /dev/null @@ -1,37 +0,0 @@ -# ------------------------------------------------------------------------- -# -# Part of the CodeChecker project, under the Apache License v2.0 with -# LLVM Exceptions. See LICENSE for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ------------------------------------------------------------------------- -""" -Temporary directory module. -""" - - -import datetime -import hashlib -import os - - -from codechecker_common.logger import get_logger - -LOG = get_logger('system') - - -def get_tmp_dir_hash(): - """Generate a hash based on the current time and process id.""" - - pid = os.getpid() - time = datetime.datetime.now() - - data = str(pid) + str(time) - - dir_hash = hashlib.md5() - dir_hash.update(data.encode("utf-8")) - - LOG.debug('The generated temporary directory hash is %s.', - dir_hash.hexdigest()) - - return dir_hash.hexdigest() diff --git a/web/server/config/server_config.json b/web/server/config/server_config.json index e42745f08d..aa72fa0142 100644 --- a/web/server/config/server_config.json +++ b/web/server/config/server_config.json @@ -15,8 +15,6 @@ }, "authentication": { "enabled" : false, - "realm_name" : "CodeChecker Privileged server", - "realm_error" : "Access requires valid credentials.", "session_lifetime" : 300, "refresh_time" : 60, "logins_until_cleanup" : 30, diff --git a/web/server/tests/unit/test_ccldap.py b/web/server/tests/unit/test_ccldap.py index ad4405f8e1..045d4a0bea 100644 --- a/web/server/tests/unit/test_ccldap.py +++ b/web/server/tests/unit/test_ccldap.py @@ -13,7 +13,12 @@ import unittest from unittest.mock import patch +from codechecker_common.configuration_access import \ + Configuration, OptionDirectory, Schema + from codechecker_server.auth import cc_ldap +from codechecker_server.server_configuration import \ + register_configuration_options class MockLdap: @@ -57,6 +62,21 @@ def search_s( return [] +SERVER_CFG_SCHEMA = register_configuration_options(Schema()) + + +def _make_ldap_config(authority_configuration: dict) -> OptionDirectory: + full_config_stub = { + "authentication": { + "method_ldap": { + "authorities": [authority_configuration] + } + } + } + cfg = Configuration.from_memory(SERVER_CFG_SCHEMA, full_config_stub) + return cfg.authentication.method_ldap.authorities[0] + + class CCLDAPTest(unittest.TestCase): top = ('o=test', {'o': ['test']}) @@ -71,20 +91,22 @@ class CCLDAPTest(unittest.TestCase): # It takes the form {dn: {attr: [value, ...], ...}, ...}. directory = dict([top, example, other, service_user, user2]) - # service_user is used as a service user in the configuration. - ldap_config = {"connection_url": "ldap://localhost/", - "username": "cn=service_user,ou=example,o=test", - "password": "servicepw", - "referrals": False, - "deref": "always", - "accountBase": "ou=other,o=test", - "accountScope": "subtree", - "accountPattern": "(cn=$USN$)", - "groupBase": "o=test", - "groupScope": "subtree", - "groupPattern": "", - "groupNameAttr": "" - } + _ldap_config = { + "connection_url": "ldap://localhost/", + # service_user is used as a service user in the configuration. + "username": "cn=service_user,ou=example,o=test", + "password": "servicepw", + "referrals": False, + "deref": "always", + "accountBase": "ou=other,o=test", + "accountScope": "subtree", + "accountPattern": "(cn=$USN$)", + "groupBase": "o=test", + "groupScope": "subtree", + "groupPattern": "", + "groupNameAttr": "" + } + ldap_config = _make_ldap_config(_ldap_config) def setUp(self): self.ldap_patcher = patch('ldap.initialize') @@ -96,7 +118,8 @@ def test_empty_config(self): At least a connection_url is required in the ldap config. Without it no connection can be initialized. """ - with cc_ldap.LDAPConnection({}, None, None) as connection: + ldap_config = _make_ldap_config({}) + with cc_ldap.LDAPConnection(ldap_config, None, None) as connection: self.assertIsNone(connection) def test_anonymous_bind(self): @@ -111,6 +134,7 @@ def test_ldap_conn_context_bind_with_cred(self): Bind to LDAP server with username and credentials. """ ldap_config = {"connection_url": "ldap://localhost/"} + ldap_config = _make_ldap_config(ldap_config) with cc_ldap.LDAPConnection(ldap_config, 'cn=service_user,ou=example,o=test', @@ -123,6 +147,8 @@ def test_ldap_conn_context_anonym_no_pass_bind(self): but username is provided at context initialization. """ ldap_config = {"connection_url": "ldap://localhost/"} + ldap_config = _make_ldap_config(ldap_config) + with cc_ldap.LDAPConnection(ldap_config, 'cn=service_user,ou=example,o=test', '') as connection: @@ -134,6 +160,8 @@ def test_ldap_conn_context_anonym_empty_pass_bind(self): but username and credentials provided context initialization. """ ldap_config = {"connection_url": "ldap://localhost/"} + ldap_config = _make_ldap_config(ldap_config) + with cc_ldap.LDAPConnection(ldap_config, 'cn=service_user,ou=example,o=test', 'servicepw') as connection: @@ -144,6 +172,8 @@ def test_get_user_dn(self): Search for the full user DN. """ ldap_config = {"connection_url": "ldap://localhost/"} + ldap_config = _make_ldap_config(ldap_config) + with cc_ldap.LDAPConnection(ldap_config, 'cn=service_user,ou=example,o=test') \ as connection: