diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 16cc9c75..66862953 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -21,6 +21,8 @@ "vscode": { "extensions": [ "charliermarsh.ruff", + "ms-python.debugpy", + "ms-python.python", "Vue.volar" ] } diff --git a/.gitignore b/.gitignore index 3732e540..d34fb719 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,8 @@ nginx/ssl/keys/* **/node_modules **/junit.xml -# Screenshots +# Screenshots and logs +logs/*.log.* logs/screenshots/* !logs/screenshots/.gitkeep diff --git a/.vscode/launch.json b/.vscode/launch.json index deaaa013..855ea2f8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "configurations": [ { "name": "BugHog", - "type": "python", + "type": "debugpy", "request": "launch", "program": "/app/bci/app.py", "purpose": [ diff --git a/Dockerfile b/Dockerfile index 9439b3d8..570d62c3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM node:lts-alpine as ui-build-stage +FROM node:22.12-alpine as ui-build-stage WORKDIR /app COPY /bci/web/vue/package*.json ./ RUN npm install @@ -7,9 +7,16 @@ RUN npm run build FROM openresty/openresty:1.27.1.1-bullseye AS nginx +RUN apt update -y && \ + apt install -y curl && \ + rm -rf /var/lib/apt/lists/* +RUN mkdir -p /www/data/js && \ + curl https://cdn.bokeh.org/bokeh/release/bokeh-3.6.1.min.js -o /www/data/js/bokeh.min.js && \ + curl https://cdn.bokeh.org/bokeh/release/bokeh-api-3.6.1.min.js -o /www/data/js/bokeh-api.min.js COPY ./nginx/start.sh /usr/local/bin/ COPY ./nginx/config /etc/nginx/config COPY --from=ui-build-stage /app/dist /www/data +COPY --from=ui-build-stage /app/node_modules/ace-builds/src-min-noconflict /www/data/node_modules/ace-builds/src-min-noconflict CMD ["start.sh"] diff --git a/bci/browser/binary/binary.py b/bci/browser/binary/binary.py index f0624fed..3a586fb8 100644 --- a/bci/browser/binary/binary.py +++ b/bci/browser/binary/binary.py @@ -2,6 +2,7 @@ import logging import os +import time from abc import abstractmethod from typing import Optional @@ -53,7 +54,7 @@ def origin(self) -> str: raise AttributeError(f"Unknown binary origin for path '{self.get_bin_path()}'") @staticmethod - def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]: + def _list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]: binaries = [] for subfolder_path in os.listdir(os.path.join(bin_folder_path, 'downloaded')): bin_entry = {} @@ -63,10 +64,10 @@ def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]: @staticmethod def list_artisanal_binaries(bin_folder_path: str, executable_name: str): - return Binary.get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list() + return Binary._get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list() @staticmethod - def get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager: + def _get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager: return ArtisanalBuildManager(bin_folder_path, executable_name) def fetch_binary(self): @@ -80,8 +81,10 @@ def fetch_binary(self): return # Try to download binary elif self.is_available_online(): + start = time.time() self.download_binary() - logger.info(f'Binary for {self.state.index} downloaded') + elapsed_time = time.time() - start + logger.info(f'Binary for {self.state.index} downloaded in {elapsed_time:.2f}s') BinaryCache.store_binary_files(self.get_potential_bin_path(), self.state) else: raise BuildNotAvailableError(self.browser_name, self.state) diff --git a/bci/browser/binary/factory.py b/bci/browser/binary/factory.py index 2206d77f..736f24e8 100644 --- a/bci/browser/binary/factory.py +++ b/bci/browser/binary/factory.py @@ -1,53 +1,13 @@ -from typing import Type - from bci.browser.binary.binary import Binary from bci.browser.binary.vendors.chromium import ChromiumBinary from bci.browser.binary.vendors.firefox import FirefoxBinary from bci.version_control.states.state import State -def list_downloaded_binaries(browser): - return __get_class(browser).list_downloaded_binaries() - - -def list_artisanal_binaries(browser): - return __get_class(browser).list_artisanal_binaries() - - -def update_artisanal_binaries(browser): - return __get_class(browser).get_artisanal_manager().update() - - -def download_online_binary(browser, revision_number): - return __get_class(browser).download_online_binary(revision_number) - - -def binary_is_available(state: State) -> bool: - return __has_available_binary_online(state) or __has_available_binary_artisanal(state) - - -def __has_available_binary_online(state: State) -> bool: - return __get_class(state.browser_name).has_available_binary_online() - - -def __has_available_binary_artisanal(state: State) -> bool: - return __get_class(state.browser_name).get_artisanal_manager().has_artisanal_binary_for(state) - - def get_binary(state: State) -> Binary: return __get_object(state) -def __get_class(browser_name: str) -> Type[Binary]: - match browser_name: - case 'chromium': - return ChromiumBinary - case 'firefox': - return FirefoxBinary - case _: - raise ValueError(f'Unknown browser {browser_name}') - - def __get_object(state: State) -> Binary: match state.browser_name: case 'chromium': diff --git a/bci/browser/binary/vendors/chromium.py b/bci/browser/binary/vendors/chromium.py index 8511b73e..f7a99a08 100644 --- a/bci/browser/binary/vendors/chromium.py +++ b/bci/browser/binary/vendors/chromium.py @@ -68,11 +68,23 @@ def download_binary(self): bin_path = self.get_potential_bin_path() os.makedirs(os.path.dirname(bin_path), exist_ok=True) unzipped_folder_path = os.path.join(os.path.dirname(zip_file_path), "chrome-linux") + self.__remove_unnecessary_files(unzipped_folder_path) util.safe_move_dir(unzipped_folder_path, os.path.dirname(bin_path)) cli.execute_and_return_status("chmod -R a+x %s" % os.path.dirname(bin_path)) # Remove temporary files in /tmp/COMMIT_POS shutil.rmtree(os.path.dirname(zip_file_path)) + def __remove_unnecessary_files(self, binary_folder_path: str) -> None: + """ + Remove binary files that are not necessary for default usage of the browser. + This is to improve performance, especially when caching binary files. + + :param binary_folder_path: Path to the folder where the binary files are stored. + """ + locales_folder_path = os.path.join(binary_folder_path, 'locales') + if os.path.isdir(locales_folder_path): + util.remove_all_in_folder(locales_folder_path, except_files=['en-GB.pak', 'en-US.pak']) + def _get_version(self) -> str: command = "./chrome --version" if bin_path := self.get_bin_path(): @@ -86,11 +98,11 @@ def _get_version(self) -> str: @staticmethod def list_downloaded_binaries() -> list[dict[str, str]]: - return Binary.list_downloaded_binaries(BIN_FOLDER_PATH) + return Binary._list_downloaded_binaries(BIN_FOLDER_PATH) @staticmethod def get_artisanal_manager() -> ArtisanalBuildManager: - return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) + return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) browser_version_to_driver_version = { '88': "88.0.4324.96", diff --git a/bci/browser/binary/vendors/firefox.py b/bci/browser/binary/vendors/firefox.py index c928e659..2cb32824 100644 --- a/bci/browser/binary/vendors/firefox.py +++ b/bci/browser/binary/vendors/firefox.py @@ -95,11 +95,11 @@ def get_driver_version(self, browser_version): @staticmethod def list_downloaded_binaries() -> list[dict[str, str]]: - return Binary.list_downloaded_binaries(BIN_FOLDER_PATH) + return Binary._list_downloaded_binaries(BIN_FOLDER_PATH) @staticmethod def get_artisanal_manager() -> ArtisanalBuildManager: - return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) + return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) browser_version_to_driver_version = { '84': "0.28.0", diff --git a/bci/configuration.py b/bci/configuration.py index 166e0e03..4aa4692a 100644 --- a/bci/configuration.py +++ b/bci/configuration.py @@ -70,16 +70,17 @@ def initialize_folders(): def get_database_params() -> DatabaseParameters: required_database_params = ['BCI_MONGO_HOST', 'BCI_MONGO_USERNAME', 'BCI_MONGO_DATABASE', 'BCI_MONGO_PASSWORD'] missing_database_params = [param for param in required_database_params if os.getenv(param) in ['', None]] + binary_cache_limit = int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)) if missing_database_params: logger.info(f'Could not find database parameters {missing_database_params}, using database container...') - return container.run() + return container.run(binary_cache_limit) else: database_params = DatabaseParameters( os.getenv('BCI_MONGO_HOST'), os.getenv('BCI_MONGO_USERNAME'), os.getenv('BCI_MONGO_PASSWORD'), os.getenv('BCI_MONGO_DATABASE'), - int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)), + binary_cache_limit, ) logger.info(f"Found database environment variables '{database_params}'") return database_params @@ -140,7 +141,7 @@ def configure_loggers(): bci_logger.addHandler(stream_handler) # Configure file handler - file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=2) + file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=3, maxBytes=8*1024*1024) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(Loggers.formatter) bci_logger.addHandler(file_handler) @@ -154,8 +155,7 @@ def configure_loggers(): # Configure memory handler Loggers.memory_handler.setLevel(logging.INFO) - buffer_formatter = logging.handlers.BufferingHandler(Loggers.formatter) - Loggers.memory_handler.setFormatter(buffer_formatter) + Loggers.memory_handler.setFormatter(Loggers.formatter) bci_logger.addHandler(Loggers.memory_handler) # Log uncaught exceptions diff --git a/bci/database/mongo/binary_cache.py b/bci/database/mongo/binary_cache.py index 7a98434e..bf19086c 100644 --- a/bci/database/mongo/binary_cache.py +++ b/bci/database/mongo/binary_cache.py @@ -69,7 +69,7 @@ def write_from_db(file_path: str, grid_file_id: str) -> None: return True @staticmethod - def store_binary_files(binary_executable_path: str, state: State) -> bool: + def store_binary_files(binary_executable_path: str, state: State): """ Stores the files in the folder of the given path in the database. @@ -80,35 +80,60 @@ def store_binary_files(binary_executable_path: str, state: State) -> bool: if MongoDB().binary_cache_limit <= 0: return False - while BinaryCache.__count_cached_binaries() >= MongoDB.binary_cache_limit: + while BinaryCache.__count_cached_binaries() >= MongoDB().binary_cache_limit: if BinaryCache.__count_cached_binaries(state_type='revision') <= 0: # There are only version binaries in the cache, which will never be removed return False BinaryCache.__remove_least_used_revision_binary_files() + logger.debug(f"Caching binary files for {state}...") fs = MongoDB().gridfs + binary_folder_path = os.path.dirname(binary_executable_path) + last_access_ts = datetime.datetime.now() + def store_file(file_path: str) -> None: + # Max chunk size is 16 MB (meta-data included) + chunk_size = 1024 * 1024 * 15 + with open(file_path, 'rb') as file: + file_id = fs.new_file( + file_type='binary', + browser_name=state.browser_name, + state_type=state.type, + state_index=state.index, + relative_file_path=os.path.relpath(file_path, binary_folder_path), + access_count=0, + last_access_ts=last_access_ts, + chunk_size=chunk_size + ) + while chunk := file.read(chunk_size): + file_id.write(chunk) + file_id.close() + start_time = time.time() - with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + futures = [] for root, _, files in os.walk(binary_folder_path): for file in files: file_path = os.path.join(root, file) - with open(file_path, 'rb') as file: - executor.submit( - fs.put, - file.read(), - file_type='binary', - browser_name=state.browser_name, - state_type=state.type, - state_index=state.index, - relative_file_path=os.path.relpath(file_path, binary_folder_path), - access_count=0, - last_access_ts=datetime.datetime.now(), - ) + future = executor.submit(store_file, file_path) + futures.append(future) + logger.debug(f"Number of files to cache: {len(futures)}") executor.shutdown(wait=True) - elapsed_time = time.time() - start_time - logger.debug(f'Stored binary in {elapsed_time:.2f}s') - return True + + futures_with_exception = [future for future in futures if future.exception() is not None] + if futures_with_exception: + logger.error( + ( + f"Something went wrong caching binary files for {state}, " + "Removing possibly imcomplete binary files from cache." + ), + exc_info=futures_with_exception[0].exception() + ) + BinaryCache.__remove_revision_binary_files(state.type, state.index) + logger.debug(f"Removed possibly incomplete cached binary files for {state}.") + else: + elapsed_time = time.time() - start_time + logger.debug(f'Stored binary in {elapsed_time:.2f}s') @staticmethod def __count_cached_binaries(state_type: Optional[str] = None) -> int: @@ -130,7 +155,6 @@ def __remove_least_used_revision_binary_files() -> None: """ Removes the least used revision binary files from the database. """ - fs = MongoDB().gridfs files_collection = MongoDB().get_collection('fs.files') grid_cursor = files_collection.find( @@ -139,6 +163,16 @@ def __remove_least_used_revision_binary_files() -> None: ) for state_doc in grid_cursor: state_index = state_doc['state_index'] - for grid_doc in files_collection.find({'state_index': state_index, 'state_type': 'revision'}): - fs.delete(grid_doc['_id']) + BinaryCache.__remove_revision_binary_files('revision', state_index) break + + @staticmethod + def __remove_revision_binary_files(state_type: str, state_index: int) -> None: + """ + Removes the binary files associated with the parameters. + """ + fs = MongoDB().gridfs + files_collection = MongoDB().get_collection('fs.files') + + for grid_doc in files_collection.find({'state_index': state_index, 'state_type': state_type}): + fs.delete(grid_doc['_id']) diff --git a/bci/database/mongo/container.py b/bci/database/mongo/container.py index 16e6b452..dbe49dcb 100644 --- a/bci/database/mongo/container.py +++ b/bci/database/mongo/container.py @@ -20,7 +20,7 @@ DEFAULT_HOST = 'bh_db' -def run() -> DatabaseParameters: +def run(binary_cache_limit: int) -> DatabaseParameters: docker_client = docker.from_env() try: mongo_container = docker_client.containers.get(DEFAULT_HOST) @@ -33,7 +33,7 @@ def run() -> DatabaseParameters: LOGGER.debug('MongoDB container not found, creating a new one...') __create_new_container(DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, DEFAULT_HOST) LOGGER.debug('MongoDB container has started!') - return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, 0) + return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, binary_cache_limit) def stop(): @@ -51,7 +51,7 @@ def __create_new_container(user: str, pw: str, db_name, db_host): raise AttributeError("Could not create container because of missing HOST_PWD environment variable") docker_client = docker.from_env() docker_client.containers.run( - 'mongo:5.0.17', + 'mongo:6', name=db_host, hostname=db_host, network=NETWORK_NAME, diff --git a/bci/database/mongo/mongodb.py b/bci/database/mongo/mongodb.py index df8c5678..456bad7a 100644 --- a/bci/database/mongo/mongodb.py +++ b/bci/database/mongo/mongodb.py @@ -62,6 +62,7 @@ def connect(self, db_params: DatabaseParameters) -> None: retryWrites=False, serverSelectionTimeoutMS=10000, ) + self.binary_cache_limit = db_params.binary_cache_limit logger.info(f'Binary cache limit set to {db_params.binary_cache_limit}') # Force connection to check whether MongoDB server is reachable try: diff --git a/bci/database/mongo/revision_cache.py b/bci/database/mongo/revision_cache.py index 6a06efda..4270b5a2 100644 --- a/bci/database/mongo/revision_cache.py +++ b/bci/database/mongo/revision_cache.py @@ -20,7 +20,7 @@ def store_firefox_binary_availability(data: dict) -> None: collection.delete_many({}) collection.insert_many(values) - logger.info(f'Revision Cache was updates ({len(values)} documents).') + logger.info(f'Revision Cache was updated ({len(values)} documents).') @staticmethod def firefox_get_revision_number(revision_id: str) -> int: diff --git a/bci/distribution/worker_manager.py b/bci/distribution/worker_manager.py index e6194012..3390a1cc 100644 --- a/bci/distribution/worker_manager.py +++ b/bci/distribution/worker_manager.py @@ -42,6 +42,8 @@ def __run_container(self, params: WorkerParameters, blocking_wait=True) -> None: container_name = f'bh_worker_{container_id}' def start_container_thread(): + if (host_pwd := os.getenv('HOST_PWD', None)) is None: + raise AttributeError('Could not find HOST_PWD environment var') try: # Sometimes, it takes a while for Docker to remove the container while True: @@ -60,17 +62,20 @@ def start_container_thread(): for container in active_containers: logger.info(f'Removing old container \'{container.attrs["Name"]}\' to start new one') container.remove(force=True) - if (host_pwd := os.getenv('HOST_PWD', None)) is None: - raise AttributeError('Could not find HOST_PWD environment var') - self.client.containers.run( + except docker.errors.APIError: + logger.error("Could not consult list of active containers", exc_info=True) + + container = None + try: + container = self.client.containers.run( f'bughog/worker:{Global.get_tag()}', name=container_name, hostname=container_name, shm_size='2gb', network='bh_net', - mem_limit='1g', # To prevent one container from consuming multiple gigs of memory (was the case for a Firefox evaluation) - detach=False, - remove=True, + mem_limit='4g', # To prevent one container from consuming multiple gigs of memory (was the case for a Firefox evaluation) + mem_reservation='2g', + detach=True, labels=['bh_worker'], command=[params.serialize()], volumes=[ @@ -86,20 +91,34 @@ def start_container_thread(): '/dev/shm:/dev/shm', ], ) - logger.debug(f"Container '{container_name}' finished experiments for '{params.state}'") - Clients.push_results_to_all() + result = container.wait() + if result["StatusCode"] != 0: + logger.error( + f"'{container_name}' exited unexpectedly with status code {result['StatusCode']}. " + "Check the worker logs in ./logs/ for more information." + ) + else: + logger.debug(f"Container '{container_name}' finished experiments for '{params.state}'") + Clients.push_results_to_all() + except docker.errors.APIError: + logger.error("Received a docker error", exc_info=True) except docker.errors.ContainerError: logger.error( f"Could not run container '{container_name}' or container was unexpectedly removed", exc_info=True ) + if container is not None: + container_info = container.attrs["State"] + logger.error(f"'{container_name}' exited unexpectedly with {container_info}", exc_info=True) finally: + if container is not None: + container.remove() self.container_id_pool.put(container_id) thread = threading.Thread(target=start_container_thread) thread.start() logger.info(f"Container '{container_name}' started experiments for '{params.state}'") - # To avoid race-condition where more than max containers are started - time.sleep(3) + # Sleep to avoid all workers downloading browser binaries at once, clogging up all IO. + time.sleep(5) def get_nb_of_running_worker_containers(self): return len(self.get_runnning_containers()) diff --git a/bci/evaluations/custom/custom_evaluation.py b/bci/evaluations/custom/custom_evaluation.py index 96a441ba..76da69ff 100644 --- a/bci/evaluations/custom/custom_evaluation.py +++ b/bci/evaluations/custom/custom_evaluation.py @@ -14,6 +14,8 @@ class CustomEvaluationFramework(EvaluationFramework): + __files_and_folders_to_ignore = ['.DS_Store'] + def __init__(self): super().__init__() self.dir_tree = self.initialize_dir_tree() @@ -37,7 +39,10 @@ def set_nested_value(d: dict, keys: list[str], value: dict): # Remove base path from root root = root[len(path):] keys = root.split('/')[1:] - subdir_tree = {dir: {} for dir in dirs} | {file: None for file in files} + subdir_tree = ( + {dir: {} for dir in dirs if dir not in CustomEvaluationFramework.__files_and_folders_to_ignore} | + {file: None for file in files if file not in CustomEvaluationFramework.__files_and_folders_to_ignore} + ) if root: set_nested_value(dir_tree, keys, subdir_tree) else: @@ -85,6 +90,7 @@ def __get_url_queue(project: str, project_path: str, experiment: str) -> Optiona else: # Otherwise, a default URL queue is used, based on the domain that hosts the main page experiment_path = os.path.join(project_path, experiment) + assert os.path.isdir(experiment_path) for domain in os.listdir(experiment_path): main_folder_path = os.path.join(experiment_path, domain, 'main') if os.path.exists(main_folder_path): @@ -164,19 +170,14 @@ def get_mech_groups(self, project: str) -> list[tuple[str, bool]]: def get_projects(self) -> list[str]: return sorted(list(self.tests_per_project.keys())) - def create_empty_project(self, project_name: str) -> bool: - if project_name is None or project_name == '': - logger.error('Given project name is invalid') - return False - + def create_empty_project(self, project_name: str): + self.is_valid_name(project_name) if project_name in self.dir_tree: - logger.error(f"Given project name '{project_name}' already exists") - return False + raise AttributeError(f"The given project name '{project_name}' already exists.") new_project_path = os.path.join(Global.custom_page_folder, project_name) os.mkdir(new_project_path) self.sync_with_folders() - return True def get_poc_structure(self, project: str, poc: str) -> dict: return self.dir_tree[project][poc] @@ -206,20 +207,23 @@ def update_poc_file(self, project: str, poc: str, domain: str, path: str, file_n return True return False - def create_empty_poc(self, project: str, poc_name: str) -> bool: + def create_empty_poc(self, project: str, poc_name: str): + self.is_valid_name(poc_name) poc_path = os.path.join(Global.custom_page_folder, project, poc_name) - if not os.path.exists(poc_path): - os.makedirs(poc_path) - self.sync_with_folders() - Clients.push_experiments_to_all() - return True - return False + if os.path.exists(poc_path): + raise AttributeError(f"The given PoC name '{poc_name}' already exists.") + + os.makedirs(poc_path) + self.sync_with_folders() + Clients.push_experiments_to_all() + - def add_page(self, project: str, poc: str, domain: str, path: str, file_type: str) -> bool: + def add_page(self, project: str, poc: str, domain: str, path: str, file_type: str): domain_path = os.path.join(Global.custom_page_folder, project, poc, domain) if not os.path.exists(domain_path): os.makedirs(domain_path) + self.is_valid_name(path) if file_type == 'py': file_name = path if path.endswith('.py') else path + '.py' file_path = os.path.join(domain_path, file_name) @@ -229,21 +233,19 @@ def add_page(self, project: str, poc: str, domain: str, path: str, file_type: st os.makedirs(page_path) new_file_name = f'index.{file_type}' file_path = os.path.join(page_path, new_file_name) + headers_file_path = os.path.join(page_path, 'headers.json') + if not os.path.exists(headers_file_path): + with open(headers_file_path, 'w') as file: + file.write(self.get_default_file_content('headers.json')) if os.path.exists(file_path): - return False + raise AttributeError(f"The given page '{path}' does already exist.") with open(file_path, 'w') as file: file.write(self.get_default_file_content(file_type)) - if self.include_file_headers(file_type): - headers_file_path = os.path.join(page_path, 'headers.json') - if not os.path.exists(headers_file_path): - with open(headers_file_path, 'w') as file: - file.write(self.get_default_file_content('headers.json')) self.sync_with_folders() # Notify clients of change (an experiment might now be runnable) Clients.push_experiments_to_all() - return True def add_config(self, project: str, poc: str, type: str) -> bool: content = self.get_default_file_content(type) @@ -271,10 +273,6 @@ def get_default_file_content(file_type: str) -> str: with open(path, 'r') as file: return file.read() - @staticmethod - def include_file_headers(file_type: str) -> bool: - return file_type != 'py' - def sync_with_folders(self): self.dir_tree = self.initialize_dir_tree() self.tests_per_project = self.initialize_tests_and_interactions(self.dir_tree) diff --git a/bci/evaluations/evaluation_framework.py b/bci/evaluations/evaluation_framework.py index 10f0d1f7..86361c18 100644 --- a/bci/evaluations/evaluation_framework.py +++ b/bci/evaluations/evaluation_framework.py @@ -1,6 +1,6 @@ import logging import os -import traceback +import re from abc import ABC, abstractmethod from bci.browser.configuration.browser import Browser @@ -16,7 +16,7 @@ class EvaluationFramework(ABC): def __init__(self): self.should_stop = False - def evaluate(self, worker_params: WorkerParameters): + def evaluate(self, worker_params: WorkerParameters, is_worker=False): test_params = worker_params.create_test_params() if MongoDB().has_result(test_params): @@ -41,8 +41,10 @@ def evaluate(self, worker_params: WorkerParameters): logger.info(f'Test finalized: {test_params}') except Exception as e: state.condition = StateCondition.FAILED - logger.error('An error occurred during evaluation', exc_info=True) - traceback.print_exc() + if is_worker: + raise e + else: + logger.error('An error occurred during evaluation', exc_info=True) finally: browser.post_test_cleanup() @@ -70,3 +72,20 @@ def get_mech_groups(self, project: str) -> list[tuple[str, bool]]: Returns the available mechanism groups for this evaluation framework. """ pass + + @staticmethod + def is_valid_name(name: str) -> None: + """ + Checks whether the given string is a valid experiment, page or project name, and raises an exception if not. + This is to prevent issues with URL encoding and decoding. + + :param name: Name to be checked on validity. + """ + if name is None or name == '': + raise AttributeError("The given name cannot be empty.") + if re.match(r'^[A-Za-z0-9_\-.]+$', name) is None: + raise AttributeError( + f"The given name '{name}' is invalid. Only letters, numbers, " + "'.', '-' and '_' can be used, and the name should not be empty." + ) + diff --git a/bci/main.py b/bci/main.py index a377f18b..912f2bbc 100644 --- a/bci/main.py +++ b/bci/main.py @@ -151,6 +151,7 @@ def quit_bughog(self) -> None: self.activate_stop_forcefully() mongodb_container.stop() logger.info('Stopping BugHog core...') + logging.shutdown() exit(0) def sigint_handler(self, sig_number, stack_frame) -> None: diff --git a/bci/util.py b/bci/util.py index 3175ca1c..f91bf5dc 100644 --- a/bci/util.py +++ b/bci/util.py @@ -7,6 +7,7 @@ import os import shutil import time +from typing import Optional import requests @@ -42,11 +43,13 @@ def copy_folder(src_path, dst_path): shutil.copytree(src_path, dst_path, dirs_exist_ok=True) -def remove_all_in_folder(folder_path: str) -> None: +def remove_all_in_folder(folder_path: str, except_files: Optional[list[str]]=None) -> None: + except_files = [] if except_files is None else except_files for root, dirs, files in os.walk(folder_path): for file_name in files: file_path = os.path.join(root, file_name) - os.remove(file_path) + if file_name not in except_files: + os.remove(file_path) for dir_name in dirs: dir_path = os.path.join(root, dir_name) shutil.rmtree(dir_path) diff --git a/bci/web/blueprints/api.py b/bci/web/blueprints/api.py index 607edcfe..de9090d7 100644 --- a/bci/web/blueprints/api.py +++ b/bci/web/blueprints/api.py @@ -157,10 +157,16 @@ def create_project(): 'msg': "No parameters found" } project_name = request.json.get('project_name') - return { - 'status': 'OK', - 'projects': __get_main().evaluation_framework.create_empty_project(project_name) - } + try: + __get_main().evaluation_framework.create_empty_project(project_name) + return { + 'status': 'OK' + } + except AttributeError as e: + return { + 'status': 'NOK', + 'msg': str(e) + } @api.route('/system/', methods=['GET']) @@ -218,7 +224,7 @@ def poc(project: str, poc: str): @api.route('/poc////', methods=['GET', 'POST']) -def get_poc_file_content(project: str, poc: str, file: str): +def poc_file_content(project: str, poc: str, file: str): domain = request.args.get('domain', '') path = request.args.get('path', '') if request.method == 'GET': @@ -257,14 +263,15 @@ def add_page(project: str, poc: str): domain = data['domain'] path = data['page'] file_type = data['file_type'] - success = __get_main().evaluation_framework.add_page(project, poc, domain, path, file_type) - if success: + try: + __get_main().evaluation_framework.add_page(project, poc, domain, path, file_type) return { 'status': 'OK' } - else: + except AttributeError as e: return { - 'status': 'NOK' + 'status': 'NOK', + 'msg': str(e) } @@ -311,14 +318,15 @@ def create_experiment(project: str): 'msg': 'Missing experiment name' } poc_name = data['poc_name'] - if __get_main().evaluation_framework.create_empty_poc(project, poc_name): + try: + __get_main().evaluation_framework.create_empty_poc(project, poc_name) return { 'status': 'OK' } - else: + except AttributeError as e: return { 'status': 'NOK', - 'msg': 'Could not create experiment' + 'msg': str(e) } diff --git a/bci/web/vue/index.html b/bci/web/vue/index.html index 6bf8ba4b..50d462fc 100644 --- a/bci/web/vue/index.html +++ b/bci/web/vue/index.html @@ -2,10 +2,10 @@ - + - - + + BugHog diff --git a/bci/web/vue/src/App.vue b/bci/web/vue/src/App.vue index 4d316bf0..c9a698d1 100644 --- a/bci/web/vue/src/App.vue +++ b/bci/web/vue/src/App.vue @@ -400,7 +400,11 @@ export default { const url = `/api/poc/${this.selected.project}/`; axios.post(url, {'poc_name': this.dialog.new_experiment_name}) .then((res) => { - this.dialog.new_experiment_name = null; + if (res.data.status === "OK") { + this.dialog.new_experiment_name = null; + } else { + alert(res.data.msg); + } }) .catch((error) => { console.error('Could not create new experiment'); @@ -411,10 +415,14 @@ export default { const new_project_name = this.dialog.new_project_name; axios.post(url, {'project_name': new_project_name}) .then((res) => { - this.dialog.new_project_name = null; - this.get_projects(() => { - this.set_curr_project(new_project_name); - }); + if (res.data.status === "OK") { + this.dialog.new_project_name = null; + this.get_projects(() => { + this.set_curr_project(new_project_name); + }); + } else { + alert(res.data.msg); + } }) .catch((error) => { console.error('Could not create new project', error); diff --git a/bci/web/vue/src/components/gantt.vue b/bci/web/vue/src/components/gantt.vue index fcbad9d4..a33bd103 100644 --- a/bci/web/vue/src/components/gantt.vue +++ b/bci/web/vue/src/components/gantt.vue @@ -22,21 +22,22 @@ export default { shift_down: false, } }, + created: function() { + document.addEventListener("keydown", (e) => { + if (e.key === "Shift") { + this.shift_down = true; + } + }); + document.addEventListener("keyup", (e) => { + if (e.key === "Shift") { + this.shift_down = false; + } + }); + }, methods: { init_plot() { console.log("Initializing Gantt chart..."); - document.addEventListener("keydown", (e) => { - if (e.shiftKey) { - this.shift_down = true; - } - }); - document.addEventListener("keyup", (e) => { - if (e.shiftKey) { - this.shift_down = false; - } - }); - if (this.revision_source.length === 0 || this.version_source.length === 0) { this.x_min = 1; this.x_max = 1000000; diff --git a/bci/web/vue/src/components/poc-editor.vue b/bci/web/vue/src/components/poc-editor.vue index 98385d7a..f807ea30 100644 --- a/bci/web/vue/src/components/poc-editor.vue +++ b/bci/web/vue/src/components/poc-editor.vue @@ -156,17 +156,18 @@ const domain = this.active_poc.active_domain; const file_path = this.active_poc.active_path; const file_name = this.active_file.name; - const params = { + const data = { "content": this.editor.session.getValue() }; + const get_params = {}; if (domain !== null) { - params["domain"] = domain; + get_params["domain"] = domain; } if (file_path !== null) { - params["path"] = file_path; + get_params["path"] = file_path; } const path = `/api/poc/${project}/${poc}/${file_name}/`; - axios.post(path, {"content": this.editor.session.getValue()}, {params: params}) + axios.post(path, data, {params: get_params}) .then((res) => { if (res.data.status == "NOK") { console.error("Could not update file on server"); @@ -221,10 +222,14 @@ "page": page, "file_type": file_type, }) - .then(() => { - this.update_poc_tree(); - this.dialog.domain.name = null; - this.dialog.file.type = null; + .then((res) => { + if (res.data.status === "OK") { + this.update_poc_tree(); + this.dialog.domain.name = null; + this.dialog.file.type = null; + } else { + alert(res.data.msg); + } }) .catch(() => { diff --git a/bci/web/vue/tailwind.config.js b/bci/web/vue/tailwind.config.js index a6fc0f41..9ec93a0c 100644 --- a/bci/web/vue/tailwind.config.js +++ b/bci/web/vue/tailwind.config.js @@ -1,5 +1,7 @@ +import flowbite from 'flowbite/plugin'; + /** @type {import('tailwindcss').Config} */ -module.exports = { +export default { mode: "jit", content: [ "./index.html", @@ -8,7 +10,7 @@ module.exports = { ], darkMode: ['selector'], plugins: [ - require('flowbite/plugin'), + flowbite, ], theme: { extend: { diff --git a/bci/worker.py b/bci/worker.py index a18928f2..fe17129b 100644 --- a/bci/worker.py +++ b/bci/worker.py @@ -22,7 +22,12 @@ def run(params: WorkerParameters): evaluation_framework = CustomEvaluationFramework() # browser_build, repo_state = get_browser_build_and_repo_state(params) - evaluation_framework.evaluate(params) + try: + evaluation_framework.evaluate(params, is_worker=True) + except Exception: + logger.fatal("An exception occurred during evaluation", exc_info=True) + logging.shutdown() + os._exit(1) if __name__ == '__main__': @@ -35,4 +40,5 @@ def run(params: WorkerParameters): logger.info('Worker started') run(params) logger.info('Worker finished, exiting...') + logging.shutdown() os._exit(0) diff --git a/nginx/config/core_dev.conf b/nginx/config/core_dev.conf index 78a0ca80..59637d40 100644 --- a/nginx/config/core_dev.conf +++ b/nginx/config/core_dev.conf @@ -4,9 +4,14 @@ location = /favicon.ico { alias /www/data/res/bughog.ico; } +location /js/ { + root /www/data/; +} + location / { proxy_pass http://node:5173; proxy_set_header Host $host; + proxy_http_version 1.1; proxy_set_header Connection "Upgrade"; proxy_set_header Upgrade $http_upgrade; proxy_set_header X-Real-IP $remote_addr; @@ -16,6 +21,7 @@ location / { location /api/ { proxy_pass http://core:5000; proxy_set_header Host $host; + proxy_http_version 1.1; proxy_set_header Connection "Upgrade"; proxy_set_header Upgrade $http_upgrade; proxy_set_header X-Real-IP $remote_addr; diff --git a/nginx/config/core_prod.conf b/nginx/config/core_prod.conf index 43ce77b9..832d9209 100644 --- a/nginx/config/core_prod.conf +++ b/nginx/config/core_prod.conf @@ -11,6 +11,7 @@ location / { location /api/ { proxy_pass http://core:5000; proxy_set_header Host $host; + proxy_http_version 1.1; proxy_set_header Connection "Upgrade"; proxy_set_header Upgrade $http_upgrade; proxy_set_header X-Real-IP $remote_addr; diff --git a/nginx/config/experiments.conf b/nginx/config/experiments.conf index 36531f6c..45e10608 100644 --- a/nginx/config/experiments.conf +++ b/nginx/config/experiments.conf @@ -41,15 +41,18 @@ location ~ index\.(html|js)$ { root /www/data/pages; } -location ~ ^/[^/]+/[^/]+/[^/]+/?$ { +location ~ ^/([^/]+)/([^/]+)/([^/]+)/?$ { root /www/data/pages; - index index.html index.js; # Rewrite URLs conform to experiment file structure - rewrite ^/([^/]+)/([^/]+)/([^/]+)/?$ /$1/$2/$host/$3/ break; + set $dynamic_path /$1/$2/$host/$3/; + # The `/` at the end of the first try_files parameter is to indicate that the existence of a dir needs to be checked instead of a file. + # More info: http://nginx.org/en/docs/http/ngx_http_core_module.html#try_files + try_files $dynamic_path/index.html $dynamic_path/index.js =404; # Add experiment headers - access_by_lua_block { - local cjson = require "cjson" - local file_path = ngx.var.document_root .. ngx.var.uri .. "headers.json" + header_filter_by_lua_block { + local cjson = require "cjson.safe" + local decoded_dynamic_path = ngx.unescape_uri(ngx.var.dynamic_path) + local file_path = ngx.var.document_root .. decoded_dynamic_path .. "headers.json" local file = io.open(file_path, "r") if file then @@ -60,7 +63,11 @@ location ~ ^/[^/]+/[^/]+/[^/]+/?$ { if headers then for _, header in ipairs(headers) do if header.key and header.value then - ngx.header[header.key] = header.value + if string.lower(header.key) == "status" then + ngx.status = tonumber(header.value) + else + ngx.header[header.key] = header.value + end end end else diff --git a/scripts/build.sh b/scripts/build.sh index 10b7a20b..67b1f104 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -1,3 +1,3 @@ #!/bin/sh -exec docker compose build +exec docker compose build --pull