From f84da3c32ec74cf054e2fd6d10618aa4997cffaa Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 26 Sep 2023 11:57:50 -0400 Subject: [PATCH] Add a cache around server ACL checking (#16360) * Pre-compiles the server ACLs onto an object per room and invalidates them when new events come in. * Converts the server ACL checking into Rust. --- changelog.d/16360.misc | 1 + rust/src/acl/mod.rs | 102 +++++++++++++++++++++ rust/src/lib.rs | 2 + stubs/synapse/synapse_rust/acl.pyi | 21 +++++ synapse/events/validator.py | 7 +- synapse/federation/federation_server.py | 76 ++------------- synapse/handlers/federation_event.py | 6 ++ synapse/handlers/message.py | 5 + synapse/replication/tcp/client.py | 6 ++ synapse/storage/controllers/state.py | 59 ++++++++++++ tests/federation/test_federation_server.py | 35 ++++--- 11 files changed, 235 insertions(+), 85 deletions(-) create mode 100644 changelog.d/16360.misc create mode 100644 rust/src/acl/mod.rs create mode 100644 stubs/synapse/synapse_rust/acl.pyi diff --git a/changelog.d/16360.misc b/changelog.d/16360.misc new file mode 100644 index 0000000000..b32d7b521e --- /dev/null +++ b/changelog.d/16360.misc @@ -0,0 +1 @@ +Cache server ACL checking. diff --git a/rust/src/acl/mod.rs b/rust/src/acl/mod.rs new file mode 100644 index 0000000000..071f2b7732 --- /dev/null +++ b/rust/src/acl/mod.rs @@ -0,0 +1,102 @@ +// Copyright 2023 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! An implementation of Matrix server ACL rules. + +use std::net::Ipv4Addr; +use std::str::FromStr; + +use anyhow::Error; +use pyo3::prelude::*; +use regex::Regex; + +use crate::push::utils::{glob_to_regex, GlobMatchType}; + +/// Called when registering modules with python. +pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> { + let child_module = PyModule::new(py, "acl")?; + child_module.add_class::()?; + + m.add_submodule(child_module)?; + + // We need to manually add the module to sys.modules to make `from + // synapse.synapse_rust import acl` work. + py.import("sys")? + .getattr("modules")? + .set_item("synapse.synapse_rust.acl", child_module)?; + + Ok(()) +} + +#[derive(Debug, Clone)] +#[pyclass(frozen)] +pub struct ServerAclEvaluator { + allow_ip_literals: bool, + allow: Vec, + deny: Vec, +} + +#[pymethods] +impl ServerAclEvaluator { + #[new] + pub fn py_new( + allow_ip_literals: bool, + allow: Vec<&str>, + deny: Vec<&str>, + ) -> Result { + let allow = allow + .iter() + .map(|s| glob_to_regex(s, GlobMatchType::Whole)) + .collect::>()?; + let deny = deny + .iter() + .map(|s| glob_to_regex(s, GlobMatchType::Whole)) + .collect::>()?; + + Ok(ServerAclEvaluator { + allow_ip_literals, + allow, + deny, + }) + } + + pub fn server_matches_acl_event(&self, server_name: &str) -> bool { + // first of all, check if literal IPs are blocked, and if so, whether the + // server name is a literal IP + if !self.allow_ip_literals { + // check for ipv6 literals. These start with '['. + if server_name.starts_with('[') { + return false; + } + + // check for ipv4 literals. We can just lift the routine from std::net. + if Ipv4Addr::from_str(server_name).is_ok() { + return false; + } + } + + // next, check the deny list + if self.deny.iter().any(|e| e.is_match(server_name)) { + return false; + } + + // then the allow list. + if self.allow.iter().any(|e| e.is_match(server_name)) { + return true; + } + + // everything else should be rejected. + false + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ce67f58611..c44c09bda7 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -2,6 +2,7 @@ use lazy_static::lazy_static; use pyo3::prelude::*; use pyo3_log::ResetHandle; +pub mod acl; pub mod push; lazy_static! { @@ -38,6 +39,7 @@ fn synapse_rust(py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?; m.add_function(wrap_pyfunction!(reset_logging_config, m)?)?; + acl::register_module(py, m)?; push::register_module(py, m)?; Ok(()) diff --git a/stubs/synapse/synapse_rust/acl.pyi b/stubs/synapse/synapse_rust/acl.pyi new file mode 100644 index 0000000000..e03989b627 --- /dev/null +++ b/stubs/synapse/synapse_rust/acl.pyi @@ -0,0 +1,21 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +class ServerAclEvaluator: + def __init__( + self, allow_ip_literals: bool, allow: List[str], deny: List[str] + ) -> None: ... + def server_matches_acl_event(self, server_name: str) -> bool: ... diff --git a/synapse/events/validator.py b/synapse/events/validator.py index a637fadfab..83d9fb5813 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -39,9 +39,9 @@ from synapse.events.utils import ( CANONICALJSON_MIN_INT, validate_canonicaljson, ) -from synapse.federation.federation_server import server_matches_acl_event from synapse.http.servlet import validate_json_object from synapse.rest.models import RequestBodyModel +from synapse.storage.controllers.state import server_acl_evaluator_from_event from synapse.types import EventID, JsonDict, RoomID, StrCollection, UserID @@ -106,7 +106,10 @@ class EventValidator: self._validate_retention(event) elif event.type == EventTypes.ServerACL: - if not server_matches_acl_event(config.server.server_name, event): + server_acl_evaluator = server_acl_evaluator_from_event(event) + if not server_acl_evaluator.server_matches_acl_event( + config.server.server_name + ): raise SynapseError( 400, "Can't create an ACL event that denies the local server" ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index f9915e5a3f..ec8e770430 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -29,10 +29,8 @@ from typing import ( Union, ) -from matrix_common.regex import glob_to_regex from prometheus_client import Counter, Gauge, Histogram -from twisted.internet.abstract import isIPAddress from twisted.python import failure from synapse.api.constants import ( @@ -1324,75 +1322,13 @@ class FederationServer(FederationBase): Raises: AuthError if the server does not match the ACL """ - acl_event = await self._storage_controllers.state.get_current_state_event( - room_id, EventTypes.ServerACL, "" + server_acl_evaluator = ( + await self._storage_controllers.state.get_server_acl_for_room(room_id) ) - if not acl_event or server_matches_acl_event(server_name, acl_event): - return - - raise AuthError(code=403, msg="Server is banned from room") - - -def server_matches_acl_event(server_name: str, acl_event: EventBase) -> bool: - """Check if the given server is allowed by the ACL event - - Args: - server_name: name of server, without any port part - acl_event: m.room.server_acl event - - Returns: - True if this server is allowed by the ACLs - """ - logger.debug("Checking %s against acl %s", server_name, acl_event.content) - - # first of all, check if literal IPs are blocked, and if so, whether the - # server name is a literal IP - allow_ip_literals = acl_event.content.get("allow_ip_literals", True) - if not isinstance(allow_ip_literals, bool): - logger.warning("Ignoring non-bool allow_ip_literals flag") - allow_ip_literals = True - if not allow_ip_literals: - # check for ipv6 literals. These start with '['. - if server_name[0] == "[": - return False - - # check for ipv4 literals. We can just lift the routine from twisted. - if isIPAddress(server_name): - return False - - # next, check the deny list - deny = acl_event.content.get("deny", []) - if not isinstance(deny, (list, tuple)): - logger.warning("Ignoring non-list deny ACL %s", deny) - deny = [] - for e in deny: - if _acl_entry_matches(server_name, e): - # logger.info("%s matched deny rule %s", server_name, e) - return False - - # then the allow list. - allow = acl_event.content.get("allow", []) - if not isinstance(allow, (list, tuple)): - logger.warning("Ignoring non-list allow ACL %s", allow) - allow = [] - for e in allow: - if _acl_entry_matches(server_name, e): - # logger.info("%s matched allow rule %s", server_name, e) - return True - - # everything else should be rejected. - # logger.info("%s fell through", server_name) - return False - - -def _acl_entry_matches(server_name: str, acl_entry: Any) -> bool: - if not isinstance(acl_entry, str): - logger.warning( - "Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry) - ) - return False - regex = glob_to_regex(acl_entry) - return bool(regex.match(server_name)) + if server_acl_evaluator and not server_acl_evaluator.server_matches_acl_event( + server_name + ): + raise AuthError(code=403, msg="Server is banned from room") class FederationHandlerRegistry: diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 7c62cdfaef..0cc8e990d9 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -2342,6 +2342,12 @@ class FederationEventHandler: # TODO retrieve the previous state, and exclude join -> join transitions self._notifier.notify_user_joined_room(event.event_id, event.room_id) + # If this is a server ACL event, clear the cache in the storage controller. + if event.type == EventTypes.ServerACL: + self._state_storage_controller.get_server_acl_for_room.invalidate( + (event.room_id,) + ) + def _sanity_check_event(self, ev: EventBase) -> None: """ Do some early sanity checks of a received event diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c036578a3d..44dbbf81dd 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1730,6 +1730,11 @@ class EventCreationHandler: event.event_id, event.room_id ) + if event.type == EventTypes.ServerACL: + self._storage_controllers.state.get_server_acl_for_room.invalidate( + (event.room_id,) + ) + await self._maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index ca8a76f77c..1c7946522a 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -205,6 +205,12 @@ class ReplicationDataHandler: self.notifier.notify_user_joined_room( row.data.event_id, row.data.room_id ) + + # If this is a server ACL event, clear the cache in the storage controller. + if row.data.type == EventTypes.ServerACL: + self._state_storage_controller.get_server_acl_for_room.invalidate( + (row.data.room_id,) + ) elif stream_name == UnPartialStatedRoomStream.NAME: for row in rows: assert isinstance(row, UnPartialStatedRoomStreamRow) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 10d219c045..46957723a1 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -37,6 +37,7 @@ from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, PartialStateEventsTracker, ) +from synapse.synapse_rust.acl import ServerAclEvaluator from synapse.types import MutableStateMap, StateMap, get_domain_from_id from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer @@ -501,6 +502,31 @@ class StateStorageController: return event.content.get("alias") + @cached() + async def get_server_acl_for_room( + self, room_id: str + ) -> Optional[ServerAclEvaluator]: + """Get the server ACL evaluator for room, if any + + This does up-front parsing of the content to ignore bad data and pre-compile + regular expressions. + + Args: + room_id: The room ID + + Returns: + The server ACL evaluator, if any + """ + + acl_event = await self.get_current_state_event( + room_id, EventTypes.ServerACL, "" + ) + + if not acl_event: + return None + + return server_acl_evaluator_from_event(acl_event) + @trace @tag_args async def get_current_state_deltas( @@ -760,3 +786,36 @@ class StateStorageController: cache.state_group = object() return frozenset(cache.hosts_to_joined_users) + + +def server_acl_evaluator_from_event(acl_event: EventBase) -> "ServerAclEvaluator": + """ + Create a ServerAclEvaluator from a m.room.server_acl event's content. + + This does up-front parsing of the content to ignore bad data. It then creates + the ServerAclEvaluator which will pre-compile regular expressions from the globs. + """ + + # first of all, parse if literal IPs are blocked. + allow_ip_literals = acl_event.content.get("allow_ip_literals", True) + if not isinstance(allow_ip_literals, bool): + logger.warning("Ignoring non-bool allow_ip_literals flag") + allow_ip_literals = True + + # next, parse the deny list by ignoring any non-strings. + deny = acl_event.content.get("deny", []) + if not isinstance(deny, (list, tuple)): + logger.warning("Ignoring non-list deny ACL %s", deny) + deny = [] + else: + deny = [s for s in deny if isinstance(s, str)] + + # then the allow list. + allow = acl_event.content.get("allow", []) + if not isinstance(allow, (list, tuple)): + logger.warning("Ignoring non-list allow ACL %s", allow) + allow = [] + else: + allow = [s for s in allow if isinstance(s, str)] + + return ServerAclEvaluator(allow_ip_literals, allow, deny) diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py index 5c850d1843..1831a5b47a 100644 --- a/tests/federation/test_federation_server.py +++ b/tests/federation/test_federation_server.py @@ -22,10 +22,10 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.config.server import DEFAULT_ROOM_VERSION from synapse.events import EventBase, make_event_from_dict -from synapse.federation.federation_server import server_matches_acl_event from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer +from synapse.storage.controllers.state import server_acl_evaluator_from_event from synapse.types import JsonDict from synapse.util import Clock @@ -67,37 +67,46 @@ class ServerACLsTestCase(unittest.TestCase): e = _create_acl_event({"allow": ["*"], "deny": ["evil.com"]}) logging.info("ACL event: %s", e.content) - self.assertFalse(server_matches_acl_event("evil.com", e)) - self.assertFalse(server_matches_acl_event("EVIL.COM", e)) + server_acl_evalutor = server_acl_evaluator_from_event(e) - self.assertTrue(server_matches_acl_event("evil.com.au", e)) - self.assertTrue(server_matches_acl_event("honestly.not.evil.com", e)) + self.assertFalse(server_acl_evalutor.server_matches_acl_event("evil.com")) + self.assertFalse(server_acl_evalutor.server_matches_acl_event("EVIL.COM")) + + self.assertTrue(server_acl_evalutor.server_matches_acl_event("evil.com.au")) + self.assertTrue( + server_acl_evalutor.server_matches_acl_event("honestly.not.evil.com") + ) def test_block_ip_literals(self) -> None: e = _create_acl_event({"allow_ip_literals": False, "allow": ["*"]}) logging.info("ACL event: %s", e.content) - self.assertFalse(server_matches_acl_event("1.2.3.4", e)) - self.assertTrue(server_matches_acl_event("1a.2.3.4", e)) - self.assertFalse(server_matches_acl_event("[1:2::]", e)) - self.assertTrue(server_matches_acl_event("1:2:3:4", e)) + server_acl_evalutor = server_acl_evaluator_from_event(e) + + self.assertFalse(server_acl_evalutor.server_matches_acl_event("1.2.3.4")) + self.assertTrue(server_acl_evalutor.server_matches_acl_event("1a.2.3.4")) + self.assertFalse(server_acl_evalutor.server_matches_acl_event("[1:2::]")) + self.assertTrue(server_acl_evalutor.server_matches_acl_event("1:2:3:4")) def test_wildcard_matching(self) -> None: e = _create_acl_event({"allow": ["good*.com"]}) + + server_acl_evalutor = server_acl_evaluator_from_event(e) + self.assertTrue( - server_matches_acl_event("good.com", e), + server_acl_evalutor.server_matches_acl_event("good.com"), "* matches 0 characters", ) self.assertTrue( - server_matches_acl_event("GOOD.COM", e), + server_acl_evalutor.server_matches_acl_event("GOOD.COM"), "pattern is case-insensitive", ) self.assertTrue( - server_matches_acl_event("good.aa.com", e), + server_acl_evalutor.server_matches_acl_event("good.aa.com"), "* matches several characters, including '.'", ) self.assertFalse( - server_matches_acl_event("ishgood.com", e), + server_acl_evalutor.server_matches_acl_event("ishgood.com"), "pattern does not allow prefixes", )