diff --git a/changelog.d/9259.feature b/changelog.d/9259.feature new file mode 100644 index 000000000000..08304bf39e04 --- /dev/null +++ b/changelog.d/9259.feature @@ -0,0 +1 @@ +Add a new Synapse module type for manipulating user directory search results. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index fbbf71edd9fc..ae00b317bbf8 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2545,6 +2545,21 @@ spam_checker: #user_directory: # enabled: true # search_all_users: false +# +# # Server admins can define a Python module that implements extra rules for +# # user directory search. In order to work, this module needs to override +# # the methods defined in +# # synapse/storage/database/main/user_directory_search_module.py. +# # +# user_directory_search_module: +# # Your custom user directory search module's class name +# # +# module: "my_custom_module.UserDirectorySearchModule" +# +# # Custom configuration options passed to the module +# # +# config: +# example_option: 'things' # User Consent configuration diff --git a/docs/user_directory.md b/docs/user_directory.md index 872fc2197968..a60cff7b8180 100644 --- a/docs/user_directory.md +++ b/docs/user_directory.md @@ -1,5 +1,4 @@ -User Directory API Implementation -================================= +# User Directory API Implementation The user directory is currently maintained based on the 'visible' users on this particular server - i.e. ones which your account shares a room with, or @@ -10,3 +9,79 @@ DB corruption) get stale or out of sync. If this happens, for now the solution to fix it is to execute the SQL [here](../synapse/storage/databases/main/schema/delta/53/user_dir_populate.sql) and then restart synapse. This should then start a background task to flush the current tables and regenerate the directory. + +## Custom User Directory Search Modules + +Syanpse can be configured to make use of custom modules that modify the results from a user +directory query. These are standard python modules containing a class that implements all or a +subset of required methods. These are then called by Synapse's `UserDirectorySearchModule` +class. Example implementations of this module are: + +* https://github.com/matrix-org/matrix-synapse-user-directory-search-dinum + +### Available methods + +#### parse_config + +As with all Synapse modules, your class must implement the static method `parse_config` with +the following signature: + +```python +@staticmethod +def parse_config(config: dict) -> Any +``` + +`parse_config` is the first method to be called on your module, and will be passed a python +dictionary derived from the options specified in the homeserver config file for your module. +`parse_config` can return any type, and that return value will be given as the `config` +argument to your class's `__init__` method during initialisation. + +#### \_\_init\_\_ + +Your class must implement an `__init__` method with the following signature: + +```python +def __init__( + self, + config: Any, + database_engine_type: synapse.storage.engines.BaseDatabaseEngine, + module_api: synapse.module_api.ModuleApi, +) -> None +``` + +For each argument: + +* `config` - The return value from `parse_config`, containing any variables that may change + the behaviour of your module. +* `database_engine_type` - The type of database engine currently in use by the server - one + of the engine classes defined under [synapse/storage/engines/](../synapse/storage/engines) + (i.e `PostgresEngine`, `Sqlite3Engine`). Useful for determining user + directory-related queries to run on the database. +* `module_api` - An instance of Synapse's [ModuleApi class](../synapse/module_api/__init__.py), + which provides many methods for modules to get or set parts of the running Synapse instance. + +`__init__` is called after `parse_config` during homeserver initialisation. + +#### get_search_query_ordering + +```python +get_search_query_ordering(self) -> str +``` + +This method is optional. If defined, it is called each time a user directory search is performed. + +`get_search_query_ordering` allows modifying the ordering of user directory search +results by returning an SQL string that will be used as the `ORDER BY` clause when +retrieving user directory search results from the database. The full query that the clause +is inserted into can be found in +[`UserDirectoryStore.search_user_dir`](../synapse/storage/databases/main/user_directory.py), +and depends on the database engine in use. + +### Synapse configuration + +For Synapse to load your module during initialisation, the `user_directory.enabled` +homeserver config option must be true, and the `user_directory.user_directory_search_module` +option must be filled out. The [sample homeserver config](sample_config.yaml) provides an example +configuration and a description of each option. The contents of the `config` option is +what is passed to your module's `parse_config` method after being converted from a YAML to a +python dictionary. diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index c8d19c5d6b4e..a56109d63758 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from synapse.util.module_loader import load_module from ._base import Config @@ -26,6 +27,7 @@ class UserDirectoryConfig(Config): def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False + self.user_directory_search_module = None user_directory_config = config.get("user_directory", None) if user_directory_config: self.user_directory_search_enabled = user_directory_config.get( @@ -35,6 +37,12 @@ def read_config(self, config, **kwargs): "search_all_users", False ) + provider = user_directory_config.get("user_directory_search_module", None) + if provider is not None: + self.user_directory_search_module = load_module( + provider, "user_directory.user_directory_search_module" + ) + def generate_config_section(self, config_dir_path, server_name, **kwargs): return """ # User Directory configuration @@ -52,4 +60,19 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): #user_directory: # enabled: true # search_all_users: false + # + # # Server admins can define a Python module that implements extra rules for + # # user directory search. In order to work, this module needs to override + # # the methods defined in + # # synapse/storage/database/main/user_directory_search_module.py. + # # + # user_directory_search_module: + # # Your custom user directory search module's class name + # # + # module: "my_custom_module.UserDirectorySearchModule" + # + # # Custom configuration options passed to the module + # # + # config: + # example_option: 'things' """ diff --git a/synapse/server.py b/synapse/server.py index 9bdd3177d79d..87cb20f42740 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -119,6 +119,9 @@ ) from synapse.state import StateHandler, StateResolutionHandler from synapse.storage import Databases, DataStore, Storage +from synapse.storage.databases.main.user_directory_search_module import ( + UserDirectorySearchModule, +) from synapse.streams.events import EventSources from synapse.types import DomainSpecificString from synapse.util import Clock @@ -625,6 +628,10 @@ def get_spam_checker(self) -> SpamChecker: def get_third_party_event_rules(self) -> ThirdPartyEventRules: return ThirdPartyEventRules(self) + @cache_in_self + def get_user_directory_search_module(self) -> UserDirectorySearchModule: + return UserDirectorySearchModule(self) + @cache_in_self def get_room_member_handler(self): if self.config.worker_app: diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 7b9729da0958..dc7c1db768d4 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -750,14 +750,23 @@ async def search_user_dir(self, user_id, search_term, limit): ) """ + # Either the built-in or a custom user directory search module if + # one has been defined in the config. + # Used to determine the sort order of results. + # + # We load the module here as the database must have been initialised first. Thus + # loading the module in this class's __init__ function will fail. + user_directory_search_module = self.hs.get_user_directory_search_module() + + # Retrieve the ordering SQL and any additional specified arguments + ( + ordering_clause, + order_args, + ) = user_directory_search_module.get_search_query_ordering(self.database_engine) + if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) - # We order by rank and then if they have profile info - # The ranking algorithm is hand tweaked for "best" results. Broadly - # the idea is we give a higher weight to exact matches. - # The array of numbers are the weights for the various part of the - # search: (domain, _, display name, localpart) sql = """ SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search as t @@ -765,32 +774,18 @@ async def search_user_dir(self, user_id, search_term, limit): WHERE %s AND vector @@ to_tsquery('simple', ?) - ORDER BY - (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) - * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) - * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) - * ( - 3 * ts_rank_cd( - '{0.1, 0.1, 0.9, 1.0}', - vector, - to_tsquery('simple', ?), - 8 - ) - + ts_rank_cd( - '{0.1, 0.1, 0.9, 1.0}', - vector, - to_tsquery('simple', ?), - 8 - ) - ) - DESC, - display_name IS NULL, - avatar_url IS NULL + ORDER BY %s LIMIT ? """ % ( where_clause, + ordering_clause, + ) + args = ( + join_args + + (full_query, exact_query, prefix_query) + + order_args + + (limit + 1,) ) - args = join_args + (full_query, exact_query, prefix_query, limit + 1) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -802,14 +797,13 @@ async def search_user_dir(self, user_id, search_term, limit): %s AND value MATCH ? ORDER BY - rank(matchinfo(user_directory_search)) DESC, - display_name IS NULL, - avatar_url IS NULL + %s LIMIT ? """ % ( where_clause, + ordering_clause, ) - args = join_args + (search_query, limit + 1) + args = join_args + (search_query,) + order_args + (limit + 1,) else: # This should be unreachable. raise Exception("Unrecognized database engine") diff --git a/synapse/storage/databases/main/user_directory_search_module.py b/synapse/storage/databases/main/user_directory_search_module.py new file mode 100644 index 000000000000..5f86aa864cd9 --- /dev/null +++ b/synapse/storage/databases/main/user_directory_search_module.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Tuple + +from synapse.storage.engines import BaseDatabaseEngine +from synapse.storage.engines.postgres import PostgresEngine +from synapse.storage.engines.sqlite import Sqlite3Engine + +if TYPE_CHECKING: + from synapse.app.homeserver import HomeServer + + +class UserDirectorySearchModule: + """Allows server admins to provide a Python module that augments the results of a + user directory search. + + Args: + hs: The HomeServer object. + """ + + def __init__(self, hs: "HomeServer"): + # If defined, calls to methods will be redirected to this module instead + self.custom_module = None + + module = None + config = None + if hs.config.user_directory_search_module: + module, config = hs.config.user_directory_search_module + + if module is not None: + self.custom_module = module(config=config, module_api=hs.get_module_api(),) + + def get_search_query_ordering( + self, database_engine_type: BaseDatabaseEngine, + ) -> Tuple[str, Tuple]: + """Returns the contents of the ORDER BY section of the user directory search + query. The full query can be found in UserDirectoryStore. + + Args: + database_engine_type: The type of database engine that is in use. One of + those in synapse/storage/engines/*. + Ex. synapse.storage.engines.PostgresEngine + + Returns: + A string that can be placed after ORDER BY in order to influence the + ordering of results from a user directory search. + """ + if self.custom_module is None or not hasattr( + self.custom_module, "get_search_query_ordering" + ): + if isinstance(database_engine_type, PostgresEngine): + # We order by rank and then if a user has profile info. + # This ranking algorithm is hand tweaked for "best" results. Broadly + # the idea is that a higher weight is given to exact matches. + # The array of numbers are the weights for the various part of the + # search: (domain, _, display name, localpart) + return ( + """ + (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) + * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) + * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) + * ( + 3 * ts_rank_cd( + '{0.1, 0.1, 0.9, 1.0}', + vector, + to_tsquery('simple', ?), + 8 + ) + + ts_rank_cd( + '{0.1, 0.1, 0.9, 1.0}', + vector, + to_tsquery('simple', ?), + 8 + ) + ) + DESC, + display_name IS NULL, + avatar_url IS NULL + """, + (), + ) + elif isinstance(database_engine_type, Sqlite3Engine): + # We order by rank and then if a user has profile info. + return ( + """ + rank(matchinfo(user_directory_search)) DESC, + display_name IS NULL, + avatar_url IS NULL + """, + (), + ) + else: + raise Exception("Received an unrecognised database engine") + + return self.custom_module.get_search_query_ordering(database_engine_type) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 9c886d671a1b..5a1f63a5e7ea 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -12,14 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Tuple + from mock import Mock from twisted.internet import defer import synapse.rest.admin from synapse.api.constants import EventTypes, RoomEncryptionAlgorithms, UserTypes +from synapse.config import ConfigError +from synapse.module_api import ModuleApi from synapse.rest.client.v1 import login, room from synapse.rest.client.v2_alpha import user_directory +from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.roommember import ProfileInfo from tests import unittest @@ -585,3 +590,153 @@ def test_disabling_room_list(self): ) self.assertEquals(200, channel.code, channel.result) self.assertTrue(len(channel.json_body["results"]) == 0) + + +class UserDirectorySearchTestModule: + def __init__(self, config: Dict, module_api: ModuleApi): + self.config = config + self.user_displayname_to_show_first = self.config[ + "user_displayname_to_show_first" + ] + + # Ensure an initialised ModuleApi has been passed + if module_api is None or not isinstance(module_api, ModuleApi): + raise Exception("Passed module_api is invalid") + + @staticmethod + def parse_config(config: Dict) -> Dict: + """Parse the dict provided by the homeserver's config + Args: + config: A dictionary containing configuration options for this provider. + + Returns: + A custom config object for this module. + """ + if "user_displayname_to_show_first" not in config: + raise ConfigError("user_displayname_to_show_first is a required field") + + return config + + def get_search_query_ordering( + self, database_engine_type: BaseDatabaseEngine + ) -> Tuple[str, Tuple]: + """Returns the contents of the ORDER BY section of the user directory search + query. + + Args: + database_engine_type: The type of database engine that is in use. One of + those in synapse/storage/engines/*. + Ex. synapse.storage.engines.PostgresEngine + + Returns: + A tuple containing: + + * A string that can be placed after ORDER BY in order to influence the + ordering of results from a user directory search. + * A tuple containing any extra arguments to provide to the query. + """ + # Users with a specific display name should get the highest overall rank. + # Otherwise, we simply order users by their display name lexicographically. + return ( + """ + display_name = ? DESC, + display_name ASC + """, + (self.user_displayname_to_show_first,), + ) + + +# A displayname for a user that we expect our custom module to present first in results +test_displayname = "3 Super Special Displayname" + + +class UserDirectorySearchModuleTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + user_directory.register_servlets, + ] + + @override_config( + { + "user_directory": { + "enabled": True, + "search_all_users": True, + "user_directory_search_module": { + "module": ( + "tests.handlers.test_user_directory.UserDirectorySearchTestModule" + ), + "config": { + "user_displayname_to_show_first": test_displayname, + "test_option": True, + }, + }, + } + } + ) + def test_parse_config(self): + """Test that parsing a config produces the expected ModuleConfig object.""" + # Check that our custom module was loaded + user_dir_search_module = self.hs.get_user_directory_search_module() + self.assertIsNotNone(user_dir_search_module.custom_module) + self.assertTrue( + isinstance( + user_dir_search_module.custom_module, UserDirectorySearchTestModule + ), + ) + + # Check that the custom module was configured as expected + self.assertTrue(user_dir_search_module.custom_module.config["test_option"],) + self.assertEqual( + user_dir_search_module.custom_module.user_displayname_to_show_first, + test_displayname, + ) + + @override_config( + { + "user_directory": { + "enabled": True, + "search_all_users": True, + "user_directory_search_module": { + "module": ( + "tests.handlers.test_user_directory.UserDirectorySearchTestModule" + ), + # Fill the config with options that will influence user dir search results + "config": {"user_displayname_to_show_first": test_displayname}, + }, + } + } + ) + def test_get_search_query_ordering(self): + """Tests that implementing UserDirectorySearchModule.get_search_query_ordering + modifies the ordering of user directory search results + """ + handler = self.hs.get_user_directory_handler() + + # Create a few users to test the directory with + user1 = self.register_user( + "user1", "password", displayname="1 Ordinary Displayname" + ) + user2 = self.register_user( + "user2", "password", displayname="2 Normal Displayname" + ) + user3 = self.register_user("user3", "password", displayname=test_displayname) + + searcher = self.register_user("searcher", "password") + + # Search for the term "user" + results = self.get_success(handler.search_users(searcher, "user", 20))[ + "results" + ] + + # Typically we'd expect Synapse to return users in lexicographical order, assuming + # they have similar User IDs/display names, and profile information. + + # We purposefully don't include a test for ordering without this module as Synapse + # could change its default user directory ordering at any time. + + # Check that the order of returned results using our module is as we expect, + # i.e our user with a special display name shows up first (and results after that + # are simply ordered lexicographically ascending). + received_user_id_ordering = [result["user_id"] for result in results] + expected_user_id_ordering = [user3, user1, user2] + self.assertEqual(received_user_id_ordering, expected_user_id_ordering)