From 3906a1da846da570662c5f48c0224fdaa8b14248 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Fri, 29 Nov 2024 12:49:26 +0000 Subject: [PATCH 01/15] Fix bug where experiment page status code was not used. Additionally, add input string validiation and front-end error reporting. --- bci/evaluations/custom/custom_evaluation.py | 46 +++++++++------------ bci/evaluations/evaluation_framework.py | 18 ++++++++ bci/web/blueprints/api.py | 30 +++++++++----- bci/web/vue/index.html | 2 +- bci/web/vue/src/App.vue | 18 +++++--- bci/web/vue/src/components/poc-editor.vue | 12 ++++-- nginx/config/experiments.conf | 21 ++++++---- 7 files changed, 92 insertions(+), 55 deletions(-) diff --git a/bci/evaluations/custom/custom_evaluation.py b/bci/evaluations/custom/custom_evaluation.py index 96a441ba..defe4217 100644 --- a/bci/evaluations/custom/custom_evaluation.py +++ b/bci/evaluations/custom/custom_evaluation.py @@ -164,19 +164,14 @@ def get_mech_groups(self, project: str) -> list[tuple[str, bool]]: def get_projects(self) -> list[str]: return sorted(list(self.tests_per_project.keys())) - def create_empty_project(self, project_name: str) -> bool: - if project_name is None or project_name == '': - logger.error('Given project name is invalid') - return False - + def create_empty_project(self, project_name: str): + self.is_valid_name(project_name) if project_name in self.dir_tree: - logger.error(f"Given project name '{project_name}' already exists") - return False + raise AttributeError(f"The given project name '{project_name}' already exists.") new_project_path = os.path.join(Global.custom_page_folder, project_name) os.mkdir(new_project_path) self.sync_with_folders() - return True def get_poc_structure(self, project: str, poc: str) -> dict: return self.dir_tree[project][poc] @@ -206,20 +201,23 @@ def update_poc_file(self, project: str, poc: str, domain: str, path: str, file_n return True return False - def create_empty_poc(self, project: str, poc_name: str) -> bool: + def create_empty_poc(self, project: str, poc_name: str): + self.is_valid_name(poc_name) poc_path = os.path.join(Global.custom_page_folder, project, poc_name) - if not os.path.exists(poc_path): - os.makedirs(poc_path) - self.sync_with_folders() - Clients.push_experiments_to_all() - return True - return False + if os.path.exists(poc_path): + raise AttributeError(f"The given PoC name '{poc_name}' already exists.") - def add_page(self, project: str, poc: str, domain: str, path: str, file_type: str) -> bool: + os.makedirs(poc_path) + self.sync_with_folders() + Clients.push_experiments_to_all() + + + def add_page(self, project: str, poc: str, domain: str, path: str, file_type: str): domain_path = os.path.join(Global.custom_page_folder, project, poc, domain) if not os.path.exists(domain_path): os.makedirs(domain_path) + self.is_valid_name(path) if file_type == 'py': file_name = path if path.endswith('.py') else path + '.py' file_path = os.path.join(domain_path, file_name) @@ -229,21 +227,19 @@ def add_page(self, project: str, poc: str, domain: str, path: str, file_type: st os.makedirs(page_path) new_file_name = f'index.{file_type}' file_path = os.path.join(page_path, new_file_name) + headers_file_path = os.path.join(page_path, 'headers.json') + if not os.path.exists(headers_file_path): + with open(headers_file_path, 'w') as file: + file.write(self.get_default_file_content('headers.json')) if os.path.exists(file_path): - return False + raise AttributeError(f"The given page '{path}' does already exist.") with open(file_path, 'w') as file: file.write(self.get_default_file_content(file_type)) - if self.include_file_headers(file_type): - headers_file_path = os.path.join(page_path, 'headers.json') - if not os.path.exists(headers_file_path): - with open(headers_file_path, 'w') as file: - file.write(self.get_default_file_content('headers.json')) self.sync_with_folders() # Notify clients of change (an experiment might now be runnable) Clients.push_experiments_to_all() - return True def add_config(self, project: str, poc: str, type: str) -> bool: content = self.get_default_file_content(type) @@ -271,10 +267,6 @@ def get_default_file_content(file_type: str) -> str: with open(path, 'r') as file: return file.read() - @staticmethod - def include_file_headers(file_type: str) -> bool: - return file_type != 'py' - def sync_with_folders(self): self.dir_tree = self.initialize_dir_tree() self.tests_per_project = self.initialize_tests_and_interactions(self.dir_tree) diff --git a/bci/evaluations/evaluation_framework.py b/bci/evaluations/evaluation_framework.py index 10f0d1f7..d71de4e2 100644 --- a/bci/evaluations/evaluation_framework.py +++ b/bci/evaluations/evaluation_framework.py @@ -1,5 +1,6 @@ import logging import os +import re import traceback from abc import ABC, abstractmethod @@ -70,3 +71,20 @@ def get_mech_groups(self, project: str) -> list[tuple[str, bool]]: Returns the available mechanism groups for this evaluation framework. """ pass + + @staticmethod + def is_valid_name(name: str) -> None: + """ + Checks whether the given string is a valid experiment, page or project name, and raises an exception if not. + This is to prevent issues with URL encoding and decoding. + + :param name: Name to be checked on validity. + """ + if name is None or name == '': + raise AttributeError("The given name cannot be empty.") + if re.match(r'^[A-Za-z0-9_\-.]+$', name) is None: + raise AttributeError( + f"The given name '{name}' is invalid. Only letters, numbers, " + "'.', '-' and '_' can be used, and the name should not be empty." + ) + diff --git a/bci/web/blueprints/api.py b/bci/web/blueprints/api.py index 607edcfe..5fc9f28d 100644 --- a/bci/web/blueprints/api.py +++ b/bci/web/blueprints/api.py @@ -157,10 +157,16 @@ def create_project(): 'msg': "No parameters found" } project_name = request.json.get('project_name') - return { - 'status': 'OK', - 'projects': __get_main().evaluation_framework.create_empty_project(project_name) - } + try: + __get_main().evaluation_framework.create_empty_project(project_name) + return { + 'status': 'OK' + } + except AttributeError as e: + return { + 'status': 'NOK', + 'msg': str(e) + } @api.route('/system/', methods=['GET']) @@ -257,14 +263,15 @@ def add_page(project: str, poc: str): domain = data['domain'] path = data['page'] file_type = data['file_type'] - success = __get_main().evaluation_framework.add_page(project, poc, domain, path, file_type) - if success: + try: + __get_main().evaluation_framework.add_page(project, poc, domain, path, file_type) return { 'status': 'OK' } - else: + except AttributeError as e: return { - 'status': 'NOK' + 'status': 'NOK', + 'msg': str(e) } @@ -311,14 +318,15 @@ def create_experiment(project: str): 'msg': 'Missing experiment name' } poc_name = data['poc_name'] - if __get_main().evaluation_framework.create_empty_poc(project, poc_name): + try: + __get_main().evaluation_framework.create_empty_poc(project, poc_name) return { 'status': 'OK' } - else: + except AttributeError as e: return { 'status': 'NOK', - 'msg': 'Could not create experiment' + 'msg': str(e) } diff --git a/bci/web/vue/index.html b/bci/web/vue/index.html index 6bf8ba4b..b360dea2 100644 --- a/bci/web/vue/index.html +++ b/bci/web/vue/index.html @@ -2,7 +2,7 @@ - + diff --git a/bci/web/vue/src/App.vue b/bci/web/vue/src/App.vue index 4d316bf0..c9a698d1 100644 --- a/bci/web/vue/src/App.vue +++ b/bci/web/vue/src/App.vue @@ -400,7 +400,11 @@ export default { const url = `/api/poc/${this.selected.project}/`; axios.post(url, {'poc_name': this.dialog.new_experiment_name}) .then((res) => { - this.dialog.new_experiment_name = null; + if (res.data.status === "OK") { + this.dialog.new_experiment_name = null; + } else { + alert(res.data.msg); + } }) .catch((error) => { console.error('Could not create new experiment'); @@ -411,10 +415,14 @@ export default { const new_project_name = this.dialog.new_project_name; axios.post(url, {'project_name': new_project_name}) .then((res) => { - this.dialog.new_project_name = null; - this.get_projects(() => { - this.set_curr_project(new_project_name); - }); + if (res.data.status === "OK") { + this.dialog.new_project_name = null; + this.get_projects(() => { + this.set_curr_project(new_project_name); + }); + } else { + alert(res.data.msg); + } }) .catch((error) => { console.error('Could not create new project', error); diff --git a/bci/web/vue/src/components/poc-editor.vue b/bci/web/vue/src/components/poc-editor.vue index 98385d7a..8a4950b4 100644 --- a/bci/web/vue/src/components/poc-editor.vue +++ b/bci/web/vue/src/components/poc-editor.vue @@ -221,10 +221,14 @@ "page": page, "file_type": file_type, }) - .then(() => { - this.update_poc_tree(); - this.dialog.domain.name = null; - this.dialog.file.type = null; + .then((res) => { + if (res.data.status === "OK") { + this.update_poc_tree(); + this.dialog.domain.name = null; + this.dialog.file.type = null; + } else { + alert(res.data.msg); + } }) .catch(() => { diff --git a/nginx/config/experiments.conf b/nginx/config/experiments.conf index 36531f6c..45e10608 100644 --- a/nginx/config/experiments.conf +++ b/nginx/config/experiments.conf @@ -41,15 +41,18 @@ location ~ index\.(html|js)$ { root /www/data/pages; } -location ~ ^/[^/]+/[^/]+/[^/]+/?$ { +location ~ ^/([^/]+)/([^/]+)/([^/]+)/?$ { root /www/data/pages; - index index.html index.js; # Rewrite URLs conform to experiment file structure - rewrite ^/([^/]+)/([^/]+)/([^/]+)/?$ /$1/$2/$host/$3/ break; + set $dynamic_path /$1/$2/$host/$3/; + # The `/` at the end of the first try_files parameter is to indicate that the existence of a dir needs to be checked instead of a file. + # More info: http://nginx.org/en/docs/http/ngx_http_core_module.html#try_files + try_files $dynamic_path/index.html $dynamic_path/index.js =404; # Add experiment headers - access_by_lua_block { - local cjson = require "cjson" - local file_path = ngx.var.document_root .. ngx.var.uri .. "headers.json" + header_filter_by_lua_block { + local cjson = require "cjson.safe" + local decoded_dynamic_path = ngx.unescape_uri(ngx.var.dynamic_path) + local file_path = ngx.var.document_root .. decoded_dynamic_path .. "headers.json" local file = io.open(file_path, "r") if file then @@ -60,7 +63,11 @@ location ~ ^/[^/]+/[^/]+/[^/]+/?$ { if headers then for _, header in ipairs(headers) do if header.key and header.value then - ngx.header[header.key] = header.value + if string.lower(header.key) == "status" then + ngx.status = tonumber(header.value) + else + ngx.header[header.key] = header.value + end end end else From 5f86448160c5e958ae94b1ab594350e0d343abfb Mon Sep 17 00:00:00 2001 From: Gertjan Date: Fri, 29 Nov 2024 14:19:03 +0000 Subject: [PATCH 02/15] Fix sticky SHIFT key --- bci/web/vue/src/components/gantt.vue | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/bci/web/vue/src/components/gantt.vue b/bci/web/vue/src/components/gantt.vue index fcbad9d4..a33bd103 100644 --- a/bci/web/vue/src/components/gantt.vue +++ b/bci/web/vue/src/components/gantt.vue @@ -22,21 +22,22 @@ export default { shift_down: false, } }, + created: function() { + document.addEventListener("keydown", (e) => { + if (e.key === "Shift") { + this.shift_down = true; + } + }); + document.addEventListener("keyup", (e) => { + if (e.key === "Shift") { + this.shift_down = false; + } + }); + }, methods: { init_plot() { console.log("Initializing Gantt chart..."); - document.addEventListener("keydown", (e) => { - if (e.shiftKey) { - this.shift_down = true; - } - }); - document.addEventListener("keyup", (e) => { - if (e.shiftKey) { - this.shift_down = false; - } - }); - if (this.revision_source.length === 0 || this.version_source.length === 0) { this.x_min = 1; this.x_max = 1000000; From ee381f0a0f49efe4ba51b125ab6d2ecf6d010395 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Mon, 2 Dec 2024 10:50:55 +0000 Subject: [PATCH 03/15] Fix bug where PoC editor would fail to send big PoC files to server. This was caused by the file contents being sent as a GET parameter, in addition to as a json in the request body. With big PoC files, the request would fail due to the long request line. Now, the GET parameter has been omitted. --- bci/web/blueprints/api.py | 2 +- bci/web/vue/src/components/poc-editor.vue | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bci/web/blueprints/api.py b/bci/web/blueprints/api.py index 5fc9f28d..de9090d7 100644 --- a/bci/web/blueprints/api.py +++ b/bci/web/blueprints/api.py @@ -224,7 +224,7 @@ def poc(project: str, poc: str): @api.route('/poc////', methods=['GET', 'POST']) -def get_poc_file_content(project: str, poc: str, file: str): +def poc_file_content(project: str, poc: str, file: str): domain = request.args.get('domain', '') path = request.args.get('path', '') if request.method == 'GET': diff --git a/bci/web/vue/src/components/poc-editor.vue b/bci/web/vue/src/components/poc-editor.vue index 8a4950b4..f807ea30 100644 --- a/bci/web/vue/src/components/poc-editor.vue +++ b/bci/web/vue/src/components/poc-editor.vue @@ -156,17 +156,18 @@ const domain = this.active_poc.active_domain; const file_path = this.active_poc.active_path; const file_name = this.active_file.name; - const params = { + const data = { "content": this.editor.session.getValue() }; + const get_params = {}; if (domain !== null) { - params["domain"] = domain; + get_params["domain"] = domain; } if (file_path !== null) { - params["path"] = file_path; + get_params["path"] = file_path; } const path = `/api/poc/${project}/${poc}/${file_name}/`; - axios.post(path, {"content": this.editor.session.getValue()}, {params: params}) + axios.post(path, data, {params: get_params}) .then((res) => { if (res.data.status == "NOK") { console.error("Could not update file on server"); From ce967dd806f828d5ae830788ceb2ddc5d24de40b Mon Sep 17 00:00:00 2001 From: Gertjan Date: Mon, 2 Dec 2024 10:55:13 +0000 Subject: [PATCH 04/15] Fix bug where production image would fail to fetch ace-editor script. This script file is now embedded in the nginx image. Additionally, bokeh scripts are now also stored in the nginx image, so we don't need to fetch them from the CDN at runtime. --- Dockerfile | 7 +++++++ bci/web/vue/index.html | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9439b3d8..8574700c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,9 +7,16 @@ RUN npm run build FROM openresty/openresty:1.27.1.1-bullseye AS nginx +RUN apt update -y && \ + apt install -y curl && \ + rm -rf /var/lib/apt/lists/* +RUN mkdir -p /www/data/js && \ + curl https://cdn.bokeh.org/bokeh/release/bokeh-3.6.1.min.js -o /www/data/js/bokeh.min.js && \ + curl https://cdn.bokeh.org/bokeh/release/bokeh-api-3.6.1.min.js -o /www/data/js/bokeh-api.min.js COPY ./nginx/start.sh /usr/local/bin/ COPY ./nginx/config /etc/nginx/config COPY --from=ui-build-stage /app/dist /www/data +COPY --from=ui-build-stage /app/node_modules/ace-builds/src-min-noconflict /www/data/node_modules/ace-builds/src-min-noconflict CMD ["start.sh"] diff --git a/bci/web/vue/index.html b/bci/web/vue/index.html index b360dea2..50d462fc 100644 --- a/bci/web/vue/index.html +++ b/bci/web/vue/index.html @@ -4,8 +4,8 @@ - - + + BugHog From 222af9401fc57631f89ecf1c23d9740c6735e30b Mon Sep 17 00:00:00 2001 From: Gertjan Date: Tue, 3 Dec 2024 07:48:58 +0000 Subject: [PATCH 05/15] Fix route to /js/ folder in dev nginx --- nginx/config/core_dev.conf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nginx/config/core_dev.conf b/nginx/config/core_dev.conf index 78a0ca80..be067ad5 100644 --- a/nginx/config/core_dev.conf +++ b/nginx/config/core_dev.conf @@ -4,6 +4,10 @@ location = /favicon.ico { alias /www/data/res/bughog.ico; } +location /js/ { + root /www/data/; +} + location / { proxy_pass http://node:5173; proxy_set_header Host $host; From b42ab312141e075c16f1d5a1c6b46f3983d8c26c Mon Sep 17 00:00:00 2001 From: Jakub Szymsza Date: Wed, 4 Dec 2024 10:39:53 +0000 Subject: [PATCH 06/15] Fix tailwind.config.js syntax --- bci/web/vue/tailwind.config.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bci/web/vue/tailwind.config.js b/bci/web/vue/tailwind.config.js index a6fc0f41..9ec93a0c 100644 --- a/bci/web/vue/tailwind.config.js +++ b/bci/web/vue/tailwind.config.js @@ -1,5 +1,7 @@ +import flowbite from 'flowbite/plugin'; + /** @type {import('tailwindcss').Config} */ -module.exports = { +export default { mode: "jit", content: [ "./index.html", @@ -8,7 +10,7 @@ module.exports = { ], darkMode: ['selector'], plugins: [ - require('flowbite/plugin'), + flowbite, ], theme: { extend: { From 95358d5d902e82c0c3c548ea2dae3e75eedab688 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Wed, 4 Dec 2024 11:54:05 +0000 Subject: [PATCH 07/15] Pin node docker image version and other dev changes. - Pinning node image version prevents breaking upgrades. - Add python extensions to vscode devcontainer config - Make docker pull all required parent images in ./scripts/build.sh --- .devcontainer/devcontainer.json | 2 ++ .vscode/launch.json | 2 +- Dockerfile | 2 +- scripts/build.sh | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 16cc9c75..66862953 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -21,6 +21,8 @@ "vscode": { "extensions": [ "charliermarsh.ruff", + "ms-python.debugpy", + "ms-python.python", "Vue.volar" ] } diff --git a/.vscode/launch.json b/.vscode/launch.json index deaaa013..855ea2f8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "configurations": [ { "name": "BugHog", - "type": "python", + "type": "debugpy", "request": "launch", "program": "/app/bci/app.py", "purpose": [ diff --git a/Dockerfile b/Dockerfile index 8574700c..570d62c3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM node:lts-alpine as ui-build-stage +FROM node:22.12-alpine as ui-build-stage WORKDIR /app COPY /bci/web/vue/package*.json ./ RUN npm install diff --git a/scripts/build.sh b/scripts/build.sh index 10b7a20b..67b1f104 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -1,3 +1,3 @@ #!/bin/sh -exec docker compose build +exec docker compose build --pull From eff772b6348af91428558c0a858b32bd9a75a907 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Wed, 4 Dec 2024 12:20:54 +0000 Subject: [PATCH 08/15] Improve worker error reporting and allow more RAM. By changing the way containers are started, we can now better report on the cause of a failure. RAM allowance is increased, because it sometimes caused a 137 (OOM) which would kill the container. --- bci/distribution/worker_manager.py | 39 ++++++++++++++++++------- bci/evaluations/evaluation_framework.py | 9 +++--- bci/worker.py | 8 ++++- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/bci/distribution/worker_manager.py b/bci/distribution/worker_manager.py index e6194012..3390a1cc 100644 --- a/bci/distribution/worker_manager.py +++ b/bci/distribution/worker_manager.py @@ -42,6 +42,8 @@ def __run_container(self, params: WorkerParameters, blocking_wait=True) -> None: container_name = f'bh_worker_{container_id}' def start_container_thread(): + if (host_pwd := os.getenv('HOST_PWD', None)) is None: + raise AttributeError('Could not find HOST_PWD environment var') try: # Sometimes, it takes a while for Docker to remove the container while True: @@ -60,17 +62,20 @@ def start_container_thread(): for container in active_containers: logger.info(f'Removing old container \'{container.attrs["Name"]}\' to start new one') container.remove(force=True) - if (host_pwd := os.getenv('HOST_PWD', None)) is None: - raise AttributeError('Could not find HOST_PWD environment var') - self.client.containers.run( + except docker.errors.APIError: + logger.error("Could not consult list of active containers", exc_info=True) + + container = None + try: + container = self.client.containers.run( f'bughog/worker:{Global.get_tag()}', name=container_name, hostname=container_name, shm_size='2gb', network='bh_net', - mem_limit='1g', # To prevent one container from consuming multiple gigs of memory (was the case for a Firefox evaluation) - detach=False, - remove=True, + mem_limit='4g', # To prevent one container from consuming multiple gigs of memory (was the case for a Firefox evaluation) + mem_reservation='2g', + detach=True, labels=['bh_worker'], command=[params.serialize()], volumes=[ @@ -86,20 +91,34 @@ def start_container_thread(): '/dev/shm:/dev/shm', ], ) - logger.debug(f"Container '{container_name}' finished experiments for '{params.state}'") - Clients.push_results_to_all() + result = container.wait() + if result["StatusCode"] != 0: + logger.error( + f"'{container_name}' exited unexpectedly with status code {result['StatusCode']}. " + "Check the worker logs in ./logs/ for more information." + ) + else: + logger.debug(f"Container '{container_name}' finished experiments for '{params.state}'") + Clients.push_results_to_all() + except docker.errors.APIError: + logger.error("Received a docker error", exc_info=True) except docker.errors.ContainerError: logger.error( f"Could not run container '{container_name}' or container was unexpectedly removed", exc_info=True ) + if container is not None: + container_info = container.attrs["State"] + logger.error(f"'{container_name}' exited unexpectedly with {container_info}", exc_info=True) finally: + if container is not None: + container.remove() self.container_id_pool.put(container_id) thread = threading.Thread(target=start_container_thread) thread.start() logger.info(f"Container '{container_name}' started experiments for '{params.state}'") - # To avoid race-condition where more than max containers are started - time.sleep(3) + # Sleep to avoid all workers downloading browser binaries at once, clogging up all IO. + time.sleep(5) def get_nb_of_running_worker_containers(self): return len(self.get_runnning_containers()) diff --git a/bci/evaluations/evaluation_framework.py b/bci/evaluations/evaluation_framework.py index d71de4e2..86361c18 100644 --- a/bci/evaluations/evaluation_framework.py +++ b/bci/evaluations/evaluation_framework.py @@ -1,7 +1,6 @@ import logging import os import re -import traceback from abc import ABC, abstractmethod from bci.browser.configuration.browser import Browser @@ -17,7 +16,7 @@ class EvaluationFramework(ABC): def __init__(self): self.should_stop = False - def evaluate(self, worker_params: WorkerParameters): + def evaluate(self, worker_params: WorkerParameters, is_worker=False): test_params = worker_params.create_test_params() if MongoDB().has_result(test_params): @@ -42,8 +41,10 @@ def evaluate(self, worker_params: WorkerParameters): logger.info(f'Test finalized: {test_params}') except Exception as e: state.condition = StateCondition.FAILED - logger.error('An error occurred during evaluation', exc_info=True) - traceback.print_exc() + if is_worker: + raise e + else: + logger.error('An error occurred during evaluation', exc_info=True) finally: browser.post_test_cleanup() diff --git a/bci/worker.py b/bci/worker.py index a18928f2..fe17129b 100644 --- a/bci/worker.py +++ b/bci/worker.py @@ -22,7 +22,12 @@ def run(params: WorkerParameters): evaluation_framework = CustomEvaluationFramework() # browser_build, repo_state = get_browser_build_and_repo_state(params) - evaluation_framework.evaluate(params) + try: + evaluation_framework.evaluate(params, is_worker=True) + except Exception: + logger.fatal("An exception occurred during evaluation", exc_info=True) + logging.shutdown() + os._exit(1) if __name__ == '__main__': @@ -35,4 +40,5 @@ def run(params: WorkerParameters): logger.info('Worker started') run(params) logger.info('Worker finished, exiting...') + logging.shutdown() os._exit(0) From a6657d5bdaaf755a57a561de7aec2aaabb05ad3d Mon Sep 17 00:00:00 2001 From: Gertjan Date: Wed, 4 Dec 2024 12:33:15 +0000 Subject: [PATCH 09/15] Enable browser caching and improve performance. Caching improvements: - Chunked uploading to database - Ignore unnecessary locale files Additionally, some code cleanup. --- bci/browser/binary/binary.py | 6 +- bci/browser/binary/factory.py | 40 -------------- bci/browser/binary/vendors/chromium.py | 16 +++++- bci/browser/binary/vendors/firefox.py | 4 +- bci/database/mongo/binary_cache.py | 76 +++++++++++++++++++------- bci/database/mongo/mongodb.py | 1 + bci/main.py | 1 + bci/util.py | 7 ++- 8 files changed, 81 insertions(+), 70 deletions(-) diff --git a/bci/browser/binary/binary.py b/bci/browser/binary/binary.py index f0624fed..aaad5955 100644 --- a/bci/browser/binary/binary.py +++ b/bci/browser/binary/binary.py @@ -53,7 +53,7 @@ def origin(self) -> str: raise AttributeError(f"Unknown binary origin for path '{self.get_bin_path()}'") @staticmethod - def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]: + def _list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]: binaries = [] for subfolder_path in os.listdir(os.path.join(bin_folder_path, 'downloaded')): bin_entry = {} @@ -63,10 +63,10 @@ def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]: @staticmethod def list_artisanal_binaries(bin_folder_path: str, executable_name: str): - return Binary.get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list() + return Binary._get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list() @staticmethod - def get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager: + def _get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager: return ArtisanalBuildManager(bin_folder_path, executable_name) def fetch_binary(self): diff --git a/bci/browser/binary/factory.py b/bci/browser/binary/factory.py index 2206d77f..736f24e8 100644 --- a/bci/browser/binary/factory.py +++ b/bci/browser/binary/factory.py @@ -1,53 +1,13 @@ -from typing import Type - from bci.browser.binary.binary import Binary from bci.browser.binary.vendors.chromium import ChromiumBinary from bci.browser.binary.vendors.firefox import FirefoxBinary from bci.version_control.states.state import State -def list_downloaded_binaries(browser): - return __get_class(browser).list_downloaded_binaries() - - -def list_artisanal_binaries(browser): - return __get_class(browser).list_artisanal_binaries() - - -def update_artisanal_binaries(browser): - return __get_class(browser).get_artisanal_manager().update() - - -def download_online_binary(browser, revision_number): - return __get_class(browser).download_online_binary(revision_number) - - -def binary_is_available(state: State) -> bool: - return __has_available_binary_online(state) or __has_available_binary_artisanal(state) - - -def __has_available_binary_online(state: State) -> bool: - return __get_class(state.browser_name).has_available_binary_online() - - -def __has_available_binary_artisanal(state: State) -> bool: - return __get_class(state.browser_name).get_artisanal_manager().has_artisanal_binary_for(state) - - def get_binary(state: State) -> Binary: return __get_object(state) -def __get_class(browser_name: str) -> Type[Binary]: - match browser_name: - case 'chromium': - return ChromiumBinary - case 'firefox': - return FirefoxBinary - case _: - raise ValueError(f'Unknown browser {browser_name}') - - def __get_object(state: State) -> Binary: match state.browser_name: case 'chromium': diff --git a/bci/browser/binary/vendors/chromium.py b/bci/browser/binary/vendors/chromium.py index 8511b73e..f7a99a08 100644 --- a/bci/browser/binary/vendors/chromium.py +++ b/bci/browser/binary/vendors/chromium.py @@ -68,11 +68,23 @@ def download_binary(self): bin_path = self.get_potential_bin_path() os.makedirs(os.path.dirname(bin_path), exist_ok=True) unzipped_folder_path = os.path.join(os.path.dirname(zip_file_path), "chrome-linux") + self.__remove_unnecessary_files(unzipped_folder_path) util.safe_move_dir(unzipped_folder_path, os.path.dirname(bin_path)) cli.execute_and_return_status("chmod -R a+x %s" % os.path.dirname(bin_path)) # Remove temporary files in /tmp/COMMIT_POS shutil.rmtree(os.path.dirname(zip_file_path)) + def __remove_unnecessary_files(self, binary_folder_path: str) -> None: + """ + Remove binary files that are not necessary for default usage of the browser. + This is to improve performance, especially when caching binary files. + + :param binary_folder_path: Path to the folder where the binary files are stored. + """ + locales_folder_path = os.path.join(binary_folder_path, 'locales') + if os.path.isdir(locales_folder_path): + util.remove_all_in_folder(locales_folder_path, except_files=['en-GB.pak', 'en-US.pak']) + def _get_version(self) -> str: command = "./chrome --version" if bin_path := self.get_bin_path(): @@ -86,11 +98,11 @@ def _get_version(self) -> str: @staticmethod def list_downloaded_binaries() -> list[dict[str, str]]: - return Binary.list_downloaded_binaries(BIN_FOLDER_PATH) + return Binary._list_downloaded_binaries(BIN_FOLDER_PATH) @staticmethod def get_artisanal_manager() -> ArtisanalBuildManager: - return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) + return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) browser_version_to_driver_version = { '88': "88.0.4324.96", diff --git a/bci/browser/binary/vendors/firefox.py b/bci/browser/binary/vendors/firefox.py index c928e659..2cb32824 100644 --- a/bci/browser/binary/vendors/firefox.py +++ b/bci/browser/binary/vendors/firefox.py @@ -95,11 +95,11 @@ def get_driver_version(self, browser_version): @staticmethod def list_downloaded_binaries() -> list[dict[str, str]]: - return Binary.list_downloaded_binaries(BIN_FOLDER_PATH) + return Binary._list_downloaded_binaries(BIN_FOLDER_PATH) @staticmethod def get_artisanal_manager() -> ArtisanalBuildManager: - return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) + return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME) browser_version_to_driver_version = { '84': "0.28.0", diff --git a/bci/database/mongo/binary_cache.py b/bci/database/mongo/binary_cache.py index 7a98434e..bf19086c 100644 --- a/bci/database/mongo/binary_cache.py +++ b/bci/database/mongo/binary_cache.py @@ -69,7 +69,7 @@ def write_from_db(file_path: str, grid_file_id: str) -> None: return True @staticmethod - def store_binary_files(binary_executable_path: str, state: State) -> bool: + def store_binary_files(binary_executable_path: str, state: State): """ Stores the files in the folder of the given path in the database. @@ -80,35 +80,60 @@ def store_binary_files(binary_executable_path: str, state: State) -> bool: if MongoDB().binary_cache_limit <= 0: return False - while BinaryCache.__count_cached_binaries() >= MongoDB.binary_cache_limit: + while BinaryCache.__count_cached_binaries() >= MongoDB().binary_cache_limit: if BinaryCache.__count_cached_binaries(state_type='revision') <= 0: # There are only version binaries in the cache, which will never be removed return False BinaryCache.__remove_least_used_revision_binary_files() + logger.debug(f"Caching binary files for {state}...") fs = MongoDB().gridfs + binary_folder_path = os.path.dirname(binary_executable_path) + last_access_ts = datetime.datetime.now() + def store_file(file_path: str) -> None: + # Max chunk size is 16 MB (meta-data included) + chunk_size = 1024 * 1024 * 15 + with open(file_path, 'rb') as file: + file_id = fs.new_file( + file_type='binary', + browser_name=state.browser_name, + state_type=state.type, + state_index=state.index, + relative_file_path=os.path.relpath(file_path, binary_folder_path), + access_count=0, + last_access_ts=last_access_ts, + chunk_size=chunk_size + ) + while chunk := file.read(chunk_size): + file_id.write(chunk) + file_id.close() + start_time = time.time() - with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + futures = [] for root, _, files in os.walk(binary_folder_path): for file in files: file_path = os.path.join(root, file) - with open(file_path, 'rb') as file: - executor.submit( - fs.put, - file.read(), - file_type='binary', - browser_name=state.browser_name, - state_type=state.type, - state_index=state.index, - relative_file_path=os.path.relpath(file_path, binary_folder_path), - access_count=0, - last_access_ts=datetime.datetime.now(), - ) + future = executor.submit(store_file, file_path) + futures.append(future) + logger.debug(f"Number of files to cache: {len(futures)}") executor.shutdown(wait=True) - elapsed_time = time.time() - start_time - logger.debug(f'Stored binary in {elapsed_time:.2f}s') - return True + + futures_with_exception = [future for future in futures if future.exception() is not None] + if futures_with_exception: + logger.error( + ( + f"Something went wrong caching binary files for {state}, " + "Removing possibly imcomplete binary files from cache." + ), + exc_info=futures_with_exception[0].exception() + ) + BinaryCache.__remove_revision_binary_files(state.type, state.index) + logger.debug(f"Removed possibly incomplete cached binary files for {state}.") + else: + elapsed_time = time.time() - start_time + logger.debug(f'Stored binary in {elapsed_time:.2f}s') @staticmethod def __count_cached_binaries(state_type: Optional[str] = None) -> int: @@ -130,7 +155,6 @@ def __remove_least_used_revision_binary_files() -> None: """ Removes the least used revision binary files from the database. """ - fs = MongoDB().gridfs files_collection = MongoDB().get_collection('fs.files') grid_cursor = files_collection.find( @@ -139,6 +163,16 @@ def __remove_least_used_revision_binary_files() -> None: ) for state_doc in grid_cursor: state_index = state_doc['state_index'] - for grid_doc in files_collection.find({'state_index': state_index, 'state_type': 'revision'}): - fs.delete(grid_doc['_id']) + BinaryCache.__remove_revision_binary_files('revision', state_index) break + + @staticmethod + def __remove_revision_binary_files(state_type: str, state_index: int) -> None: + """ + Removes the binary files associated with the parameters. + """ + fs = MongoDB().gridfs + files_collection = MongoDB().get_collection('fs.files') + + for grid_doc in files_collection.find({'state_index': state_index, 'state_type': state_type}): + fs.delete(grid_doc['_id']) diff --git a/bci/database/mongo/mongodb.py b/bci/database/mongo/mongodb.py index df8c5678..456bad7a 100644 --- a/bci/database/mongo/mongodb.py +++ b/bci/database/mongo/mongodb.py @@ -62,6 +62,7 @@ def connect(self, db_params: DatabaseParameters) -> None: retryWrites=False, serverSelectionTimeoutMS=10000, ) + self.binary_cache_limit = db_params.binary_cache_limit logger.info(f'Binary cache limit set to {db_params.binary_cache_limit}') # Force connection to check whether MongoDB server is reachable try: diff --git a/bci/main.py b/bci/main.py index a377f18b..912f2bbc 100644 --- a/bci/main.py +++ b/bci/main.py @@ -151,6 +151,7 @@ def quit_bughog(self) -> None: self.activate_stop_forcefully() mongodb_container.stop() logger.info('Stopping BugHog core...') + logging.shutdown() exit(0) def sigint_handler(self, sig_number, stack_frame) -> None: diff --git a/bci/util.py b/bci/util.py index 3175ca1c..f91bf5dc 100644 --- a/bci/util.py +++ b/bci/util.py @@ -7,6 +7,7 @@ import os import shutil import time +from typing import Optional import requests @@ -42,11 +43,13 @@ def copy_folder(src_path, dst_path): shutil.copytree(src_path, dst_path, dirs_exist_ok=True) -def remove_all_in_folder(folder_path: str) -> None: +def remove_all_in_folder(folder_path: str, except_files: Optional[list[str]]=None) -> None: + except_files = [] if except_files is None else except_files for root, dirs, files in os.walk(folder_path): for file_name in files: file_path = os.path.join(root, file_name) - os.remove(file_path) + if file_name not in except_files: + os.remove(file_path) for dir_name in dirs: dir_path = os.path.join(root, dir_name) shutil.rmtree(dir_path) From d43d4a71955af08b4fc8014d4a8432ac74d60477 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Wed, 4 Dec 2024 12:41:56 +0000 Subject: [PATCH 10/15] Add log rotation --- .gitignore | 3 ++- bci/configuration.py | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 3732e540..d34fb719 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,8 @@ nginx/ssl/keys/* **/node_modules **/junit.xml -# Screenshots +# Screenshots and logs +logs/*.log.* logs/screenshots/* !logs/screenshots/.gitkeep diff --git a/bci/configuration.py b/bci/configuration.py index 166e0e03..092b55a4 100644 --- a/bci/configuration.py +++ b/bci/configuration.py @@ -140,7 +140,7 @@ def configure_loggers(): bci_logger.addHandler(stream_handler) # Configure file handler - file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=2) + file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=3, maxBytes=8*1024*1024) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(Loggers.formatter) bci_logger.addHandler(file_handler) @@ -154,8 +154,7 @@ def configure_loggers(): # Configure memory handler Loggers.memory_handler.setLevel(logging.INFO) - buffer_formatter = logging.handlers.BufferingHandler(Loggers.formatter) - Loggers.memory_handler.setFormatter(buffer_formatter) + Loggers.memory_handler.setFormatter(Loggers.formatter) bci_logger.addHandler(Loggers.memory_handler) # Log uncaught exceptions From 00b9a9f1203beee9196cc1d5cec55a9d18671ab2 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Fri, 6 Dec 2024 15:02:40 +0000 Subject: [PATCH 11/15] Ignore '.DS_Store' in experiments folder and ignore typo --- bci/database/mongo/revision_cache.py | 2 +- bci/evaluations/custom/custom_evaluation.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bci/database/mongo/revision_cache.py b/bci/database/mongo/revision_cache.py index 6a06efda..4270b5a2 100644 --- a/bci/database/mongo/revision_cache.py +++ b/bci/database/mongo/revision_cache.py @@ -20,7 +20,7 @@ def store_firefox_binary_availability(data: dict) -> None: collection.delete_many({}) collection.insert_many(values) - logger.info(f'Revision Cache was updates ({len(values)} documents).') + logger.info(f'Revision Cache was updated ({len(values)} documents).') @staticmethod def firefox_get_revision_number(revision_id: str) -> int: diff --git a/bci/evaluations/custom/custom_evaluation.py b/bci/evaluations/custom/custom_evaluation.py index defe4217..76da69ff 100644 --- a/bci/evaluations/custom/custom_evaluation.py +++ b/bci/evaluations/custom/custom_evaluation.py @@ -14,6 +14,8 @@ class CustomEvaluationFramework(EvaluationFramework): + __files_and_folders_to_ignore = ['.DS_Store'] + def __init__(self): super().__init__() self.dir_tree = self.initialize_dir_tree() @@ -37,7 +39,10 @@ def set_nested_value(d: dict, keys: list[str], value: dict): # Remove base path from root root = root[len(path):] keys = root.split('/')[1:] - subdir_tree = {dir: {} for dir in dirs} | {file: None for file in files} + subdir_tree = ( + {dir: {} for dir in dirs if dir not in CustomEvaluationFramework.__files_and_folders_to_ignore} | + {file: None for file in files if file not in CustomEvaluationFramework.__files_and_folders_to_ignore} + ) if root: set_nested_value(dir_tree, keys, subdir_tree) else: @@ -85,6 +90,7 @@ def __get_url_queue(project: str, project_path: str, experiment: str) -> Optiona else: # Otherwise, a default URL queue is used, based on the domain that hosts the main page experiment_path = os.path.join(project_path, experiment) + assert os.path.isdir(experiment_path) for domain in os.listdir(experiment_path): main_folder_path = os.path.join(experiment_path, domain, 'main') if os.path.exists(main_folder_path): From 79d39abe95f51cd0e369fef45484dfee26bfb67b Mon Sep 17 00:00:00 2001 From: Gertjan Date: Fri, 6 Dec 2024 15:15:59 +0000 Subject: [PATCH 12/15] Attempt at fixing the long wait for WebSocket connection upon boot --- nginx/config/core_dev.conf | 2 ++ nginx/config/core_prod.conf | 1 + 2 files changed, 3 insertions(+) diff --git a/nginx/config/core_dev.conf b/nginx/config/core_dev.conf index be067ad5..59637d40 100644 --- a/nginx/config/core_dev.conf +++ b/nginx/config/core_dev.conf @@ -11,6 +11,7 @@ location /js/ { location / { proxy_pass http://node:5173; proxy_set_header Host $host; + proxy_http_version 1.1; proxy_set_header Connection "Upgrade"; proxy_set_header Upgrade $http_upgrade; proxy_set_header X-Real-IP $remote_addr; @@ -20,6 +21,7 @@ location / { location /api/ { proxy_pass http://core:5000; proxy_set_header Host $host; + proxy_http_version 1.1; proxy_set_header Connection "Upgrade"; proxy_set_header Upgrade $http_upgrade; proxy_set_header X-Real-IP $remote_addr; diff --git a/nginx/config/core_prod.conf b/nginx/config/core_prod.conf index 43ce77b9..832d9209 100644 --- a/nginx/config/core_prod.conf +++ b/nginx/config/core_prod.conf @@ -11,6 +11,7 @@ location / { location /api/ { proxy_pass http://core:5000; proxy_set_header Host $host; + proxy_http_version 1.1; proxy_set_header Connection "Upgrade"; proxy_set_header Upgrade $http_upgrade; proxy_set_header X-Real-IP $remote_addr; From 73d2656dfffcbcccc55cd2376efeb8619999488e Mon Sep 17 00:00:00 2001 From: Gertjan Date: Fri, 6 Dec 2024 18:33:13 +0000 Subject: [PATCH 13/15] Allow binary caching without external database --- bci/configuration.py | 5 +++-- bci/database/mongo/container.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bci/configuration.py b/bci/configuration.py index 092b55a4..4aa4692a 100644 --- a/bci/configuration.py +++ b/bci/configuration.py @@ -70,16 +70,17 @@ def initialize_folders(): def get_database_params() -> DatabaseParameters: required_database_params = ['BCI_MONGO_HOST', 'BCI_MONGO_USERNAME', 'BCI_MONGO_DATABASE', 'BCI_MONGO_PASSWORD'] missing_database_params = [param for param in required_database_params if os.getenv(param) in ['', None]] + binary_cache_limit = int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)) if missing_database_params: logger.info(f'Could not find database parameters {missing_database_params}, using database container...') - return container.run() + return container.run(binary_cache_limit) else: database_params = DatabaseParameters( os.getenv('BCI_MONGO_HOST'), os.getenv('BCI_MONGO_USERNAME'), os.getenv('BCI_MONGO_PASSWORD'), os.getenv('BCI_MONGO_DATABASE'), - int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)), + binary_cache_limit, ) logger.info(f"Found database environment variables '{database_params}'") return database_params diff --git a/bci/database/mongo/container.py b/bci/database/mongo/container.py index 16e6b452..f6eb0466 100644 --- a/bci/database/mongo/container.py +++ b/bci/database/mongo/container.py @@ -20,7 +20,7 @@ DEFAULT_HOST = 'bh_db' -def run() -> DatabaseParameters: +def run(binary_cache_limit: int) -> DatabaseParameters: docker_client = docker.from_env() try: mongo_container = docker_client.containers.get(DEFAULT_HOST) @@ -33,7 +33,7 @@ def run() -> DatabaseParameters: LOGGER.debug('MongoDB container not found, creating a new one...') __create_new_container(DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, DEFAULT_HOST) LOGGER.debug('MongoDB container has started!') - return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, 0) + return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, binary_cache_limit) def stop(): From e6f3fa632454dea6e3a8b83e33f2fa480c0c14d2 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Fri, 6 Dec 2024 18:34:08 +0000 Subject: [PATCH 14/15] Log timing of binary downloading from vendor endpoint --- bci/browser/binary/binary.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bci/browser/binary/binary.py b/bci/browser/binary/binary.py index aaad5955..3a586fb8 100644 --- a/bci/browser/binary/binary.py +++ b/bci/browser/binary/binary.py @@ -2,6 +2,7 @@ import logging import os +import time from abc import abstractmethod from typing import Optional @@ -80,8 +81,10 @@ def fetch_binary(self): return # Try to download binary elif self.is_available_online(): + start = time.time() self.download_binary() - logger.info(f'Binary for {self.state.index} downloaded') + elapsed_time = time.time() - start + logger.info(f'Binary for {self.state.index} downloaded in {elapsed_time:.2f}s') BinaryCache.store_binary_files(self.get_potential_bin_path(), self.state) else: raise BuildNotAvailableError(self.browser_name, self.state) From 9cc24be37d2d7cbd16f6a20df4d01049626bda10 Mon Sep 17 00:00:00 2001 From: Gertjan Date: Fri, 6 Dec 2024 18:37:29 +0000 Subject: [PATCH 15/15] Update Mongo container to v6 --- bci/database/mongo/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bci/database/mongo/container.py b/bci/database/mongo/container.py index f6eb0466..dbe49dcb 100644 --- a/bci/database/mongo/container.py +++ b/bci/database/mongo/container.py @@ -51,7 +51,7 @@ def __create_new_container(user: str, pw: str, db_name, db_host): raise AttributeError("Could not create container because of missing HOST_PWD environment variable") docker_client = docker.from_env() docker_client.containers.run( - 'mongo:5.0.17', + 'mongo:6', name=db_host, hostname=db_host, network=NETWORK_NAME,