From 48d46698a52d1566b1e3ffd4c203ffe263345ab0 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 29 Sep 2022 17:30:23 +0100 Subject: [PATCH 1/9] Add a crypto handler --- matrix_content_scanner/crypto.py | 91 ++++++++++++++++++++++++++++++++ matrix_content_scanner/mcs.py | 5 ++ tests/test_crypto.py | 43 +++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 matrix_content_scanner/crypto.py create mode 100644 tests/test_crypto.py diff --git a/matrix_content_scanner/crypto.py b/matrix_content_scanner/crypto.py new file mode 100644 index 0000000..58f9495 --- /dev/null +++ b/matrix_content_scanner/crypto.py @@ -0,0 +1,91 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +from typing import TYPE_CHECKING + +from olm.pk import PkDecryption, PkDecryptionError, PkMessage + +from matrix_content_scanner.utils.constants import ErrCode +from matrix_content_scanner.utils.errors import ContentScannerRestError +from matrix_content_scanner.utils.types import JsonDict + +if TYPE_CHECKING: + from matrix_content_scanner.mcs import MatrixContentScanner + + +logger = logging.getLogger(__name__) + + +class CryptoHandler: + """Handler for handling Olm-encrypted request bodies.""" + + def __init__(self, mcs: "MatrixContentScanner") -> None: + key = mcs.config.crypto.pickle_key + path = mcs.config.crypto.pickle_path + try: + # Try reading the pickle from disk. + with open(path, "r") as fp: + pickle = fp.read() + + # Create a PkDecryption object with the content and key. + self._decryptor: PkDecryption = PkDecryption.from_pickle( + pickle=pickle.encode("ascii"), + passphrase=key, + ) + + logger.info("Loaded Olm pickle from %s", path) + except FileNotFoundError: + # If the pickle file doesn't exist, try creating it. + self._decryptor = PkDecryption() + pickle_bytes = self._decryptor.pickle(passphrase=key) + + logger.info( + "Olm pickle not found, generating one and saving it at %s", path + ) + + with open(path, "w+") as fp: + fp.write(pickle_bytes.decode("ascii")) + + self.public_key = self._decryptor.public_key + + def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict: + """Decrypts an Olm-encrypted body. + + Args: + ciphertext: The encrypted body's ciphertext. + mac: The encrypted body's MAC. + ephemeral: The encrypted body's ephemeral key. + + Returns: + The decrypted body, parsed as JSON. + """ + try: + decrypted = self._decryptor.decrypt( + message=PkMessage( + ephemeral_key=ephemeral, + mac=mac, + ciphertext=ciphertext, + ) + ) + except PkDecryptionError as e: + logger.error("Failed to decrypt encrypted body: %s", e) + raise ContentScannerRestError( + http_status=400, + reason=ErrCode.FAILED_TO_DECRYPT, + info=str(e), + ) + + # We know that `decrypted` parses as a JsonDict. + return json.loads(decrypted) # type: ignore[no-any-return] diff --git a/matrix_content_scanner/mcs.py b/matrix_content_scanner/mcs.py index 57d8333..6934698 100644 --- a/matrix_content_scanner/mcs.py +++ b/matrix_content_scanner/mcs.py @@ -24,6 +24,7 @@ from matrix_content_scanner import logutils from matrix_content_scanner.config import MatrixContentScannerConfig +from matrix_content_scanner.crypto import CryptoHandler from matrix_content_scanner.httpserver import HTTPServer from matrix_content_scanner.scanner.file_downloader import FileDownloader from matrix_content_scanner.scanner.scanner import Scanner @@ -58,6 +59,10 @@ def file_downloader(self) -> FileDownloader: def scanner(self) -> Scanner: return Scanner(self) + @cached_property + def crypto_handler(self) -> CryptoHandler: + return CryptoHandler(self) + def start(self) -> None: """Start the HTTP server and start the reactor.""" setup_logging() diff --git a/tests/test_crypto.py b/tests/test_crypto.py new file mode 100644 index 0000000..e5d224f --- /dev/null +++ b/tests/test_crypto.py @@ -0,0 +1,43 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import unittest + +from olm.pk import PkEncryption + +from tests.testutils import get_content_scanner + + +class CryptoHandlerTestCase(unittest.TestCase): + def setUp(self) -> None: + self.crypto_handler = get_content_scanner().crypto_handler + + def test_decrypt(self) -> None: + """Tests that an Olm-encrypted payload is successfully decrypted.""" + payload = {"foo": "bar"} + + # Encrypt the payload with PkEncryption. + pke = PkEncryption(self.crypto_handler.public_key) + encrypted = pke.encrypt(json.dumps(payload)) + + # Decrypt the payload with the crypto handler. + decrypted = self.crypto_handler.decrypt_body( + encrypted.ciphertext, + encrypted.mac, + encrypted.ephemeral_key, + ) + + # Check that the decrypted payload is the same as the original one before + # encryption. + self.assertEqual(decrypted, payload) From ac7e6e689012251a2b8e3b2fbe23b00411a0fc51 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 29 Sep 2022 17:33:43 +0100 Subject: [PATCH 2/9] Use custom branch for olm bindings with mypy --- tox.ini | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tox.ini b/tox.ini index 08ce094..490e065 100644 --- a/tox.ini +++ b/tox.ini @@ -33,5 +33,13 @@ commands = extras = dev +# The current version of python-olm that's on PyPI does not include a types marker. +# Ideally we'd just specify a custom pip install command that uses an --index-url that +# points to Olm's PyPI repo, but it looks like the packaging does not include py.typed in +# wheels. https://gitlab.matrix.org/matrix-org/olm/-/merge_requests/62 is an attempt at +# fixing this. +deps = + git+https://gitlab.matrix.org/babolivier/olm.git@babolivier/py.typed_manifest#egg=python-olm&subdirectory=python + commands = mypy matrix_content_scanner tests From 884dcdaf5e4a09e4040b43e6990d4a3ca66ae255 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 3 Oct 2022 16:37:20 +0100 Subject: [PATCH 3/9] Incorporate review --- matrix_content_scanner/config.py | 28 ++++++++++++++++++++++++++++ matrix_content_scanner/crypto.py | 6 +++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/matrix_content_scanner/config.py b/matrix_content_scanner/config.py index 2039373..ce8c1f2 100644 --- a/matrix_content_scanner/config.py +++ b/matrix_content_scanner/config.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +from pathlib import Path from typing import Any, Dict, List, Optional, Union import attr @@ -60,6 +62,29 @@ def _parse_size(size: Optional[Union[str, float]]) -> Optional[float]: raise ConfigError(e) +def _check_file_path(raw_path: str) -> None: + """Check if the file at the given path exists and is readable. If it's not, check if + the parent directory exists and is writeable. + + Args: + raw_path: The path to check. + + Raises: + ConfigError if the file does not exist and the parent directory cannot be written + into or does not exist. + """ + path = Path(raw_path).resolve() + + if os.access(path, os.R_OK): + return + + if not os.access(path.parent, os.W_OK): + raise ConfigError( + "File %s does not exist and parent directory is not writeable or does not exist" + % raw_path + ) + + # Schema to validate the raw configuration dictionary against. _config_schema = { "type": "object", @@ -181,3 +206,6 @@ def __init__(self, config_dict: Dict[str, Any]): self.crypto = CryptoConfig(**(config_dict.get("crypto") or {})) self.download = DownloadConfig(**(config_dict.get("download") or {})) self.result_cache = ResultCacheConfig(**(config_dict.get("result_cache") or {})) + + # Check that we can read the pickle file, or create it if it doesn't exist. + _check_file_path(self.crypto.pickle_path) diff --git a/matrix_content_scanner/crypto.py b/matrix_content_scanner/crypto.py index 58f9495..5bec0ac 100644 --- a/matrix_content_scanner/crypto.py +++ b/matrix_content_scanner/crypto.py @@ -48,13 +48,13 @@ def __init__(self, mcs: "MatrixContentScanner") -> None: logger.info("Loaded Olm pickle from %s", path) except FileNotFoundError: # If the pickle file doesn't exist, try creating it. - self._decryptor = PkDecryption() - pickle_bytes = self._decryptor.pickle(passphrase=key) - logger.info( "Olm pickle not found, generating one and saving it at %s", path ) + self._decryptor = PkDecryption() + pickle_bytes = self._decryptor.pickle(passphrase=key) + with open(path, "w+") as fp: fp.write(pickle_bytes.decode("ascii")) From 5eddde56653235190c73976582d05a3119fe8d2d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 4 Oct 2022 13:48:29 +0100 Subject: [PATCH 4/9] Incorporate review --- README.md | 20 ++++++++--- config.sample.yaml | 7 ++-- matrix_content_scanner/config.py | 26 -------------- matrix_content_scanner/crypto.py | 60 +++++++++++++++++++++++++------- matrix_content_scanner/mcs.py | 31 +++++++++++++++-- tests/testutils.py | 8 ++++- 6 files changed, 103 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index de11dc7..5b469f3 100644 --- a/README.md +++ b/README.md @@ -24,14 +24,24 @@ pip install matrix-content-scanner Copy and edit the [sample configuration file](https://github.com/matrix-org/matrix-content-scanner-python/blob/main/config.sample.yaml). Each key is documented in this file. -Then run the content scanner (from within your virtual environment if one was created): +Before running the Matrix Content Scanner for the first time (if you are not [migrating +from the legacy Matrix Content Scanner](#migrating-from-the-legacy-matrix-content-scanner)), +run (from within your virtual environment if one was created): ```commandline -python -m matrix_content_scanner.mcs -c CONFIG_FILE +python -m matrix_content_scanner.mcs -c CONFIG_FILE --generate-secrets ``` Where `CONFIG_FILE` is the path to your configuration file. +This will generate the cryptographic secrets required for the content scanner to run. + +Then run the content scanner: + +```commandline +python -m matrix_content_scanner.mcs -c CONFIG_FILE +``` + ## API See [the API documentation](/docs/api.md) for information about how clients are expected @@ -56,9 +66,9 @@ deployment instructions) is the configuration format: * `acceptedMimeType` is renamed `scan.allowed_mimetypes` * `requestHeader` is renamed `download.additional_headers` and turned into a dictionary. -Note that the format of the cryptographic pickle file and key are compatible between this -project and the legacy Matrix Content Scanner. If no file exist at that path one will be -created automatically. +Note that the format of the cryptographic pickle file file and key are compatible between +this project and the legacy Matrix Content Scanner. If no file exist at that path one will +be created automatically. ## Development diff --git a/config.sample.yaml b/config.sample.yaml index f4a8834..15a5258 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -100,10 +100,13 @@ download: # Configuration for decrypting Olm-encrypted request bodies. crypto: - # The path to the Olm pickle file. + # The path to the Olm pickle file. This file contains the key pair to use when + # encrypting and decrypting encrypted POST request bodies. + # This file needs to be created with the --generate-secrets command line argument + # for the Matrix Content Scanner to run, see README.md for more information. # Required. pickle_path: "./pickle" - # The key to the pickle. + # The key to use to decode the pickle file. # Required. pickle_key: "this_is_a_secret" diff --git a/matrix_content_scanner/config.py b/matrix_content_scanner/config.py index ce8c1f2..02e5538 100644 --- a/matrix_content_scanner/config.py +++ b/matrix_content_scanner/config.py @@ -62,29 +62,6 @@ def _parse_size(size: Optional[Union[str, float]]) -> Optional[float]: raise ConfigError(e) -def _check_file_path(raw_path: str) -> None: - """Check if the file at the given path exists and is readable. If it's not, check if - the parent directory exists and is writeable. - - Args: - raw_path: The path to check. - - Raises: - ConfigError if the file does not exist and the parent directory cannot be written - into or does not exist. - """ - path = Path(raw_path).resolve() - - if os.access(path, os.R_OK): - return - - if not os.access(path.parent, os.W_OK): - raise ConfigError( - "File %s does not exist and parent directory is not writeable or does not exist" - % raw_path - ) - - # Schema to validate the raw configuration dictionary against. _config_schema = { "type": "object", @@ -206,6 +183,3 @@ def __init__(self, config_dict: Dict[str, Any]): self.crypto = CryptoConfig(**(config_dict.get("crypto") or {})) self.download = DownloadConfig(**(config_dict.get("download") or {})) self.result_cache = ResultCacheConfig(**(config_dict.get("result_cache") or {})) - - # Check that we can read the pickle file, or create it if it doesn't exist. - _check_file_path(self.crypto.pickle_path) diff --git a/matrix_content_scanner/crypto.py b/matrix_content_scanner/crypto.py index 5bec0ac..5819fbe 100644 --- a/matrix_content_scanner/crypto.py +++ b/matrix_content_scanner/crypto.py @@ -17,8 +17,9 @@ from olm.pk import PkDecryption, PkDecryptionError, PkMessage +from matrix_content_scanner.config import MatrixContentScannerConfig from matrix_content_scanner.utils.constants import ErrCode -from matrix_content_scanner.utils.errors import ContentScannerRestError +from matrix_content_scanner.utils.errors import ContentScannerRestError, ConfigError from matrix_content_scanner.utils.types import JsonDict if TYPE_CHECKING: @@ -34,31 +35,64 @@ class CryptoHandler: def __init__(self, mcs: "MatrixContentScanner") -> None: key = mcs.config.crypto.pickle_key path = mcs.config.crypto.pickle_path + + # Try reading the pickle file from disk. try: - # Try reading the pickle from disk. with open(path, "r") as fp: pickle = fp.read() + except OSError as e: + raise ConfigError( + "Failed to open the pickle file configured at crypto.pickle_path (%s): %s" + % (path, e) + ) - # Create a PkDecryption object with the content and key. + # Create a PkDecryption object with the content and key. + try: self._decryptor: PkDecryption = PkDecryption.from_pickle( pickle=pickle.encode("ascii"), passphrase=key, ) - - logger.info("Loaded Olm pickle from %s", path) - except FileNotFoundError: - # If the pickle file doesn't exist, try creating it. - logger.info( - "Olm pickle not found, generating one and saving it at %s", path + except PkDecryptionError as e: + # If we failed to extract the key pair from the pickle file, it's likely + # because the key is incorrect, or there's an issue with the file's content. + raise ConfigError( + "Configured value for crypto.pickle_key is incorrect or pickle file is" + " corrupted (Olm error code: %s)" % e ) - self._decryptor = PkDecryption() - pickle_bytes = self._decryptor.pickle(passphrase=key) + logger.info("Loaded Olm key pair from pickle file %s", path) + + self.public_key = self._decryptor.public_key + + @staticmethod + def generate_and_store_key_pair(config: MatrixContentScannerConfig) -> None: + """Generates a new Olm key pair, and store it in the configured pickle file. + + Args: + config: The content scanner config. + + Raises: + ConfigError if we failed to write the file. + """ + path = config.crypto.pickle_path + + logger.info( + "Generating a new Olm key pair and storing it in pickle file %s", path + ) + + # Generate a new key pair and turns it into a pickle. + decryptor = PkDecryption() + pickle_bytes = decryptor.pickle(passphrase=config.crypto.pickle_key) + # Try to write the pickle's content into a file. + try: with open(path, "w+") as fp: fp.write(pickle_bytes.decode("ascii")) - - self.public_key = self._decryptor.public_key + except OSError as e: + raise ConfigError( + "Failed to write the pickle file at the location configured for" + " crypto.pickle_path (%s): %s" % (path, e) + ) def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict: """Decrypts an Olm-encrypted body. diff --git a/matrix_content_scanner/mcs.py b/matrix_content_scanner/mcs.py index 6934698..fd40c76 100644 --- a/matrix_content_scanner/mcs.py +++ b/matrix_content_scanner/mcs.py @@ -65,7 +65,6 @@ def crypto_handler(self) -> CryptoHandler: def start(self) -> None: """Start the HTTP server and start the reactor.""" - setup_logging() http_server = HTTPServer(self) http_server.start() self.reactor.run() @@ -93,6 +92,8 @@ def setup_logging() -> None: if __name__ == "__main__": + setup_logging() + parser = argparse.ArgumentParser( description="A web service for scanning media hosted by a Matrix media repository." ) @@ -102,6 +103,11 @@ def setup_logging() -> None: required=True, help="The YAML configuration file.", ) + parser.add_argument( + "--generate-secrets", + action='store_true', + help="Generate secrets such as cryptographic key pairs needed for the content scanner to run.", + ) args = parser.parse_args() @@ -114,6 +120,27 @@ def setup_logging() -> None: logger.error("Failed to read configuration file: %s", e) sys.exit(1) - # Start the content scanner. + # If required by the command-line arguments, generate and store the secrets needed for + # the program to run. + if args.generate_secrets: + try: + CryptoHandler.generate_and_store_key_pair(cfg) + except ConfigError as e: + logger.error("Failed to generate secrets: %s", e) + sys.exit(1) + + sys.exit(0) + + # Create the content scanner. mcs = MatrixContentScanner(cfg) + + # Construct the crypto handler early on, so we can make sure we can load the Olm key + # pair from the pickle file. + try: + _ = mcs.crypto_handler + except ConfigError as e: + logger.error(e) + sys.exit(1) + + # Start the content scanner. mcs.start() diff --git a/tests/testutils.py b/tests/testutils.py index c5d0362..22cddf5 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -18,6 +18,7 @@ from twisted.web.http_headers import Headers from matrix_content_scanner.config import MatrixContentScannerConfig +from matrix_content_scanner.crypto import CryptoHandler from matrix_content_scanner.mcs import MatrixContentScanner from matrix_content_scanner.utils.types import JsonDict @@ -123,4 +124,9 @@ def get_content_scanner(config: Optional[JsonDict] = None) -> MatrixContentScann # all required settings in that section. default_config.update(config) - return MatrixContentScanner(MatrixContentScannerConfig(default_config)) + parsed_config = MatrixContentScannerConfig(default_config) + + # Generate the Olm key pair and store them in a pickle file. + CryptoHandler.generate_and_store_key_pair(parsed_config) + + return MatrixContentScanner(parsed_config) From 7883e9d4cf16584e0f2f62ff7d44bf47bbac1fa9 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 4 Oct 2022 13:50:02 +0100 Subject: [PATCH 5/9] Lint --- matrix_content_scanner/config.py | 2 -- matrix_content_scanner/crypto.py | 2 +- matrix_content_scanner/mcs.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/matrix_content_scanner/config.py b/matrix_content_scanner/config.py index 02e5538..2039373 100644 --- a/matrix_content_scanner/config.py +++ b/matrix_content_scanner/config.py @@ -11,8 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -from pathlib import Path from typing import Any, Dict, List, Optional, Union import attr diff --git a/matrix_content_scanner/crypto.py b/matrix_content_scanner/crypto.py index 5819fbe..b73f3ec 100644 --- a/matrix_content_scanner/crypto.py +++ b/matrix_content_scanner/crypto.py @@ -19,7 +19,7 @@ from matrix_content_scanner.config import MatrixContentScannerConfig from matrix_content_scanner.utils.constants import ErrCode -from matrix_content_scanner.utils.errors import ContentScannerRestError, ConfigError +from matrix_content_scanner.utils.errors import ConfigError, ContentScannerRestError from matrix_content_scanner.utils.types import JsonDict if TYPE_CHECKING: diff --git a/matrix_content_scanner/mcs.py b/matrix_content_scanner/mcs.py index fd40c76..ee4fe26 100644 --- a/matrix_content_scanner/mcs.py +++ b/matrix_content_scanner/mcs.py @@ -105,7 +105,7 @@ def setup_logging() -> None: ) parser.add_argument( "--generate-secrets", - action='store_true', + action="store_true", help="Generate secrets such as cryptographic key pairs needed for the content scanner to run.", ) From 8a999d2fae709e76175a5ab4d77fe7c02f7052b6 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 10 Oct 2022 16:43:38 +0100 Subject: [PATCH 6/9] New olm release! --- tox.ini | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tox.ini b/tox.ini index 490e065..1ee6265 100644 --- a/tox.ini +++ b/tox.ini @@ -34,12 +34,12 @@ commands = extras = dev # The current version of python-olm that's on PyPI does not include a types marker. -# Ideally we'd just specify a custom pip install command that uses an --index-url that -# points to Olm's PyPI repo, but it looks like the packaging does not include py.typed in -# wheels. https://gitlab.matrix.org/matrix-org/olm/-/merge_requests/62 is an attempt at -# fixing this. -deps = - git+https://gitlab.matrix.org/babolivier/olm.git@babolivier/py.typed_manifest#egg=python-olm&subdirectory=python +# Hopefully that's something we can fix at some point, but in the mean time let's not +# block things on this and instead use the wheels on gitlab.matrix.org's repository (which +# do have a type marker). We use --index-url (and not --extra-index-url) so that pip does +# not try to download the python-olm that's on pypi.org. This is fine because GitLab will +# redirect requests for packages it doesn't know about to pypi.org. +install_command = python -m pip install --index-url=https://gitlab.matrix.org/api/v4/projects/27/packages/pypi/simple {opts} {packages} commands = mypy matrix_content_scanner tests From 36201ed41c6eac320a3b24d81b08172d9cfb7bff Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 11 Oct 2022 15:33:28 +0100 Subject: [PATCH 7/9] Revert back to generating the pickle file automatically --- README.md | 14 +---- config.sample.yaml | 2 - matrix_content_scanner/crypto.py | 88 +++++++++++++++----------------- matrix_content_scanner/mcs.py | 25 ++------- tests/testutils.py | 4 -- 5 files changed, 46 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index 5b469f3..ace0751 100644 --- a/README.md +++ b/README.md @@ -24,24 +24,14 @@ pip install matrix-content-scanner Copy and edit the [sample configuration file](https://github.com/matrix-org/matrix-content-scanner-python/blob/main/config.sample.yaml). Each key is documented in this file. -Before running the Matrix Content Scanner for the first time (if you are not [migrating -from the legacy Matrix Content Scanner](#migrating-from-the-legacy-matrix-content-scanner)), -run (from within your virtual environment if one was created): +Then run the content scanner (from within your virtual environment if one was created): ```commandline -python -m matrix_content_scanner.mcs -c CONFIG_FILE --generate-secrets +python -m matrix_content_scanner.mcs -c CONFIG_FILE ``` Where `CONFIG_FILE` is the path to your configuration file. -This will generate the cryptographic secrets required for the content scanner to run. - -Then run the content scanner: - -```commandline -python -m matrix_content_scanner.mcs -c CONFIG_FILE -``` - ## API See [the API documentation](/docs/api.md) for information about how clients are expected diff --git a/config.sample.yaml b/config.sample.yaml index 15a5258..5f5eeec 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -102,8 +102,6 @@ download: crypto: # The path to the Olm pickle file. This file contains the key pair to use when # encrypting and decrypting encrypted POST request bodies. - # This file needs to be created with the --generate-secrets command line argument - # for the Matrix Content Scanner to run, see README.md for more information. # Required. pickle_path: "./pickle" diff --git a/matrix_content_scanner/crypto.py b/matrix_content_scanner/crypto.py index b73f3ec..52fc1a0 100644 --- a/matrix_content_scanner/crypto.py +++ b/matrix_content_scanner/crypto.py @@ -17,7 +17,6 @@ from olm.pk import PkDecryption, PkDecryptionError, PkMessage -from matrix_content_scanner.config import MatrixContentScannerConfig from matrix_content_scanner.utils.constants import ErrCode from matrix_content_scanner.utils.errors import ConfigError, ContentScannerRestError from matrix_content_scanner.utils.types import JsonDict @@ -40,59 +39,52 @@ def __init__(self, mcs: "MatrixContentScanner") -> None: try: with open(path, "r") as fp: pickle = fp.read() - except OSError as e: - raise ConfigError( - "Failed to open the pickle file configured at crypto.pickle_path (%s): %s" - % (path, e) - ) - # Create a PkDecryption object with the content and key. - try: - self._decryptor: PkDecryption = PkDecryption.from_pickle( - pickle=pickle.encode("ascii"), - passphrase=key, - ) - except PkDecryptionError as e: - # If we failed to extract the key pair from the pickle file, it's likely - # because the key is incorrect, or there's an issue with the file's content. - raise ConfigError( - "Configured value for crypto.pickle_key is incorrect or pickle file is" - " corrupted (Olm error code: %s)" % e - ) - - logger.info("Loaded Olm key pair from pickle file %s", path) - - self.public_key = self._decryptor.public_key - - @staticmethod - def generate_and_store_key_pair(config: MatrixContentScannerConfig) -> None: - """Generates a new Olm key pair, and store it in the configured pickle file. + # Create a PkDecryption object with the content and key. + try: + self._decryptor: PkDecryption = PkDecryption.from_pickle( + pickle=pickle.encode("ascii"), + passphrase=key, + ) + except PkDecryptionError as e: + # If we failed to extract the key pair from the pickle file, it's likely + # because the key is incorrect, or there's an issue with the file's + # content. + raise ConfigError( + "Configured value for crypto.pickle_key is incorrect or pickle file" + " is corrupted (Olm error code: %s)" % e + ) - Args: - config: The content scanner config. + logger.info("Loaded Olm key pair from pickle file %s", path) - Raises: - ConfigError if we failed to write the file. - """ - path = config.crypto.pickle_path + except OSError as e: + if not isinstance(e, FileNotFoundError): + raise ConfigError( + "Failed to read the pickle file at the location configured for" + " crypto.pickle_path (%s): %s" % (path, e) + ) - logger.info( - "Generating a new Olm key pair and storing it in pickle file %s", path - ) + logger.info( + "Pickle file not found, generating a new Olm key pair and storing it in" + " pickle file %s", + path, + ) - # Generate a new key pair and turns it into a pickle. - decryptor = PkDecryption() - pickle_bytes = decryptor.pickle(passphrase=config.crypto.pickle_key) + # Generate a new key pair and turns it into a pickle. + self._decryptor = PkDecryption() + pickle_bytes = self._decryptor.pickle(passphrase=key) + + # Try to write the pickle's content into a file. + try: + with open(path, "w+") as fp: + fp.write(pickle_bytes.decode("ascii")) + except OSError as e: + raise ConfigError( + "Failed to write the pickle file at the location configured for" + " crypto.pickle_path (%s): %s" % (path, e) + ) - # Try to write the pickle's content into a file. - try: - with open(path, "w+") as fp: - fp.write(pickle_bytes.decode("ascii")) - except OSError as e: - raise ConfigError( - "Failed to write the pickle file at the location configured for" - " crypto.pickle_path (%s): %s" % (path, e) - ) + self.public_key = self._decryptor.public_key def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict: """Decrypts an Olm-encrypted body. diff --git a/matrix_content_scanner/mcs.py b/matrix_content_scanner/mcs.py index ee4fe26..ab21f0d 100644 --- a/matrix_content_scanner/mcs.py +++ b/matrix_content_scanner/mcs.py @@ -65,6 +65,7 @@ def crypto_handler(self) -> CryptoHandler: def start(self) -> None: """Start the HTTP server and start the reactor.""" + setup_logging() http_server = HTTPServer(self) http_server.start() self.reactor.run() @@ -92,8 +93,6 @@ def setup_logging() -> None: if __name__ == "__main__": - setup_logging() - parser = argparse.ArgumentParser( description="A web service for scanning media hosted by a Matrix media repository." ) @@ -103,11 +102,6 @@ def setup_logging() -> None: required=True, help="The YAML configuration file.", ) - parser.add_argument( - "--generate-secrets", - action="store_true", - help="Generate secrets such as cryptographic key pairs needed for the content scanner to run.", - ) args = parser.parse_args() @@ -117,29 +111,18 @@ def setup_logging() -> None: except (ConfigError, ScannerError) as e: # If there's an error reading the file, print it and exit without raising so we # don't confuse/annoy the user with an unnecessary stack trace. - logger.error("Failed to read configuration file: %s", e) + print("Failed to read configuration file: %s" % e, file=sys.stderr) sys.exit(1) - # If required by the command-line arguments, generate and store the secrets needed for - # the program to run. - if args.generate_secrets: - try: - CryptoHandler.generate_and_store_key_pair(cfg) - except ConfigError as e: - logger.error("Failed to generate secrets: %s", e) - sys.exit(1) - - sys.exit(0) - # Create the content scanner. mcs = MatrixContentScanner(cfg) # Construct the crypto handler early on, so we can make sure we can load the Olm key - # pair from the pickle file. + # pair from the pickle file (or write it if it doesn't already exist). try: _ = mcs.crypto_handler except ConfigError as e: - logger.error(e) + print(e, file=sys.stderr) sys.exit(1) # Start the content scanner. diff --git a/tests/testutils.py b/tests/testutils.py index 22cddf5..f081771 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -18,7 +18,6 @@ from twisted.web.http_headers import Headers from matrix_content_scanner.config import MatrixContentScannerConfig -from matrix_content_scanner.crypto import CryptoHandler from matrix_content_scanner.mcs import MatrixContentScanner from matrix_content_scanner.utils.types import JsonDict @@ -126,7 +125,4 @@ def get_content_scanner(config: Optional[JsonDict] = None) -> MatrixContentScann parsed_config = MatrixContentScannerConfig(default_config) - # Generate the Olm key pair and store them in a pickle file. - CryptoHandler.generate_and_store_key_pair(parsed_config) - return MatrixContentScanner(parsed_config) From 2c53eb4a6a76d466d0bdb79c4d498ac103c8a5ef Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 12 Oct 2022 15:44:11 +0100 Subject: [PATCH 8/9] Incorporate review --- README.md | 2 +- config.sample.yaml | 1 + matrix_content_scanner/crypto.py | 18 +++++++++--------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ace0751..16f78c6 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ deployment instructions) is the configuration format: * `acceptedMimeType` is renamed `scan.allowed_mimetypes` * `requestHeader` is renamed `download.additional_headers` and turned into a dictionary. -Note that the format of the cryptographic pickle file file and key are compatible between +Note that the format of the cryptographic pickle file and key are compatible between this project and the legacy Matrix Content Scanner. If no file exist at that path one will be created automatically. diff --git a/config.sample.yaml b/config.sample.yaml index 5f5eeec..8988c98 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -102,6 +102,7 @@ download: crypto: # The path to the Olm pickle file. This file contains the key pair to use when # encrypting and decrypting encrypted POST request bodies. + # The pickle file is automatically created at startup if it doesn't already exist. # Required. pickle_path: "./pickle" diff --git a/matrix_content_scanner/crypto.py b/matrix_content_scanner/crypto.py index 52fc1a0..fc52b9c 100644 --- a/matrix_content_scanner/crypto.py +++ b/matrix_content_scanner/crypto.py @@ -52,18 +52,12 @@ def __init__(self, mcs: "MatrixContentScanner") -> None: # content. raise ConfigError( "Configured value for crypto.pickle_key is incorrect or pickle file" - " is corrupted (Olm error code: %s)" % e + f" is corrupted (Olm error code: {e})" ) logger.info("Loaded Olm key pair from pickle file %s", path) - except OSError as e: - if not isinstance(e, FileNotFoundError): - raise ConfigError( - "Failed to read the pickle file at the location configured for" - " crypto.pickle_path (%s): %s" % (path, e) - ) - + except FileNotFoundError: logger.info( "Pickle file not found, generating a new Olm key pair and storing it in" " pickle file %s", @@ -81,9 +75,15 @@ def __init__(self, mcs: "MatrixContentScanner") -> None: except OSError as e: raise ConfigError( "Failed to write the pickle file at the location configured for" - " crypto.pickle_path (%s): %s" % (path, e) + f" crypto.pickle_path ({path}): {e}" ) + except OSError as e: + raise ConfigError( + "Failed to read the pickle file at the location configured for" + f" crypto.pickle_path ({path}): {e}" + ) + self.public_key = self._decryptor.public_key def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict: From 11f7ed6cb3c8f5290dd285d517246ca03073439f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 12 Oct 2022 18:43:58 +0200 Subject: [PATCH 9/9] Update config.sample.yaml Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- config.sample.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.sample.yaml b/config.sample.yaml index 8988c98..f8d4f4e 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -102,7 +102,7 @@ download: crypto: # The path to the Olm pickle file. This file contains the key pair to use when # encrypting and decrypting encrypted POST request bodies. - # The pickle file is automatically created at startup if it doesn't already exist. + # A new keypair will be created at startup if the pickle file doesn't already exist. # Required. pickle_path: "./pickle"