From 19a1cda084342034cc92c88c0376cbcadbf8e2a0 Mon Sep 17 00:00:00 2001 From: "DeepBlueV7.X" Date: Wed, 23 Aug 2023 08:35:23 +0000 Subject: [PATCH 01/10] Properly update retry_last_ts when hitting the maximum retry interval (#16156) * Properly update retry_last_ts when hitting the maximum retry interval This was broken in 1.87 when the maximum retry interval got changed from almost infinite to a week (and made configurable). fixes #16101 Signed-off-by: Nicolas Werner * Add changelog * Change fix + add test * Add comment --------- Signed-off-by: Nicolas Werner Co-authored-by: Mathieu Velten --- changelog.d/16156.bugfix | 1 + .../storage/databases/main/transactions.py | 4 +- tests/util/test_retryutils.py | 51 +++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 changelog.d/16156.bugfix diff --git a/changelog.d/16156.bugfix b/changelog.d/16156.bugfix new file mode 100644 index 0000000000..17284297cf --- /dev/null +++ b/changelog.d/16156.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.87 where synapse would send an excessive amount of federation requests to servers which have been offline for a long time. Contributed by Nico. diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index c3bd36efc9..48e4b0ba3c 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -242,6 +242,8 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): ) -> None: # Upsert retry time interval if retry_interval is zero (i.e. we're # resetting it) or greater than the existing retry interval. + # We also upsert when the new retry interval is the same as the existing one, + # since it will be the case when `destination_max_retry_interval` is reached. # # WARNING: This is executed in autocommit, so we shouldn't add any more # SQL calls in here (without being very careful). @@ -257,7 +259,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): WHERE EXCLUDED.retry_interval = 0 OR destinations.retry_interval IS NULL - OR destinations.retry_interval < EXCLUDED.retry_interval + OR destinations.retry_interval <= EXCLUDED.retry_interval """ txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval)) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index 1277e1a865..4bcd17a6fc 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -108,3 +108,54 @@ class RetryLimiterTestCase(HomeserverTestCase): new_timings = self.get_success(store.get_destination_retry_timings("test_dest")) self.assertIsNone(new_timings) + + def test_max_retry_interval(self) -> None: + """Test that `destination_max_retry_interval` setting works as expected""" + store = self.hs.get_datastores().main + + destination_max_retry_interval_ms = ( + self.hs.config.federation.destination_max_retry_interval_ms + ) + + self.get_success(get_retry_limiter("test_dest", self.clock, store)) + self.pump(1) + + failure_ts = self.clock.time_msec() + + # Simulate reaching destination_max_retry_interval + self.get_success( + store.set_destination_retry_timings( + "test_dest", + failure_ts=failure_ts, + retry_last_ts=failure_ts, + retry_interval=destination_max_retry_interval_ms, + ) + ) + + # Check it fails + self.get_failure( + get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination + ) + + # Get past retry_interval and we can try again, and still throw an error to continue the backoff + self.reactor.advance(destination_max_retry_interval_ms / 1000 + 1) + limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) + self.pump(1) + try: + with limiter: + self.pump(1) + raise AssertionError("argh") + except AssertionError: + pass + + self.pump() + + # retry_interval does not increase and stays at destination_max_retry_interval_ms + new_timings = self.get_success(store.get_destination_retry_timings("test_dest")) + assert new_timings is not None + self.assertEqual(new_timings.retry_interval, destination_max_retry_interval_ms) + + # Check it fails + self.get_failure( + get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination + ) From da162cbe4e748841e93849c87374023a0fcbb390 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 23 Aug 2023 07:31:00 -0400 Subject: [PATCH 02/10] Add tests for restoring the presence state after a restart. (#16151) --- changelog.d/16150.misc | 2 +- changelog.d/16151.misc | 1 + tests/handlers/test_presence.py | 116 ++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 changelog.d/16151.misc diff --git a/changelog.d/16150.misc b/changelog.d/16150.misc index 97861282fd..41059378c5 100644 --- a/changelog.d/16150.misc +++ b/changelog.d/16150.misc @@ -1 +1 @@ -Clean-up calling `setup_background_tasks` in unit tests. +Improve presence tests. diff --git a/changelog.d/16151.misc b/changelog.d/16151.misc new file mode 100644 index 0000000000..41059378c5 --- /dev/null +++ b/changelog.d/16151.misc @@ -0,0 +1 @@ +Improve presence tests. diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index 1f483eb75a..1aebcc16ad 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -38,6 +38,7 @@ from synapse.handlers.presence import ( from synapse.rest import admin from synapse.rest.client import room from synapse.server import HomeServer +from synapse.storage.database import LoggingDatabaseConnection from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util import Clock @@ -513,6 +514,121 @@ class PresenceTimeoutTestCase(unittest.TestCase): self.assertEqual(state, new_state) +class PresenceHandlerInitTestCase(unittest.HomeserverTestCase): + def default_config(self) -> JsonDict: + config = super().default_config() + # Disable background tasks on this worker so that the PresenceHandler isn't + # loaded until we request it. + config["run_background_tasks_on"] = "other" + return config + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.user_id = f"@test:{self.hs.config.server.server_name}" + + # Move the reactor to the initial time. + self.reactor.advance(1000) + now = self.clock.time_msec() + + main_store = hs.get_datastores().main + self.get_success( + main_store.update_presence( + [ + UserPresenceState( + user_id=self.user_id, + state=PresenceState.ONLINE, + last_active_ts=now, + last_federation_update_ts=now, + last_user_sync_ts=now, + status_msg=None, + currently_active=True, + ) + ] + ) + ) + + # Regenerate the preloaded presence information on PresenceStore. + def refill_presence(db_conn: LoggingDatabaseConnection) -> None: + main_store._presence_on_startup = main_store._get_active_presence(db_conn) + + self.get_success(main_store.db_pool.runWithConnection(refill_presence)) + + def test_restored_presence_idles(self) -> None: + """The presence state restored from the database should not persist forever.""" + + # Get the handler (which kicks off a bunch of timers). + presence_handler = self.hs.get_presence_handler() + + # Assert the user is online. + state = self.get_success( + presence_handler.get_state(UserID.from_string(self.user_id)) + ) + self.assertEqual(state.state, PresenceState.ONLINE) + + # Advance such that the user should timeout. + self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000) + self.reactor.pump([5]) + + # Check that the user is now offline. + state = self.get_success( + presence_handler.get_state(UserID.from_string(self.user_id)) + ) + self.assertEqual(state.state, PresenceState.OFFLINE) + + @parameterized.expand( + [ + (PresenceState.BUSY, PresenceState.BUSY), + (PresenceState.ONLINE, PresenceState.ONLINE), + (PresenceState.UNAVAILABLE, PresenceState.UNAVAILABLE), + # Offline syncs don't update the state. + (PresenceState.OFFLINE, PresenceState.ONLINE), + ] + ) + @unittest.override_config({"experimental_features": {"msc3026_enabled": True}}) + def test_restored_presence_online_after_sync( + self, sync_state: str, expected_state: str + ) -> None: + """ + The presence state restored from the database should be overridden with sync after a timeout. + + Args: + sync_state: The presence state of the new sync. + expected_state: The expected presence right after the sync. + """ + + # Get the handler (which kicks off a bunch of timers). + presence_handler = self.hs.get_presence_handler() + + # Assert the user is online, as restored. + state = self.get_success( + presence_handler.get_state(UserID.from_string(self.user_id)) + ) + self.assertEqual(state.state, PresenceState.ONLINE) + + # Advance slightly and sync. + self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000 / 2) + self.get_success( + presence_handler.user_syncing( + self.user_id, sync_state != PresenceState.OFFLINE, sync_state + ) + ) + + # Assert the user is in the expected state. + state = self.get_success( + presence_handler.get_state(UserID.from_string(self.user_id)) + ) + self.assertEqual(state.state, expected_state) + + # Advance such that the user's preloaded data times out, but not the new sync. + self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000 / 2) + self.reactor.pump([5]) + + # Check that the user is in the sync state (as the client is currently syncing still). + state = self.get_success( + presence_handler.get_state(UserID.from_string(self.user_id)) + ) + self.assertEqual(state.state, sync_state) + + class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase): user_id = "@test:server" user_id_obj = UserID.from_string(user_id) From 873971a8b9b4cbbc141df570e76a02c7b4b9b9c0 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 23 Aug 2023 13:37:51 +0200 Subject: [PATCH 03/10] Task scheduler: mark task as active if we are scheduling ASAP (#16165) --- changelog.d/16165.misc | 1 + synapse/storage/databases/main/task_scheduler.py | 2 +- synapse/util/task_scheduler.py | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 changelog.d/16165.misc diff --git a/changelog.d/16165.misc b/changelog.d/16165.misc new file mode 100644 index 0000000000..b4d514d249 --- /dev/null +++ b/changelog.d/16165.misc @@ -0,0 +1 @@ +Task scheduler: mark task as active if we are scheduling as soon as possible. diff --git a/synapse/storage/databases/main/task_scheduler.py b/synapse/storage/databases/main/task_scheduler.py index 1fb3180c3c..9ab120eea9 100644 --- a/synapse/storage/databases/main/task_scheduler.py +++ b/synapse/storage/databases/main/task_scheduler.py @@ -92,7 +92,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore): if clauses: sql = sql + " WHERE " + " AND ".join(clauses) - sql = sql + "ORDER BY timestamp" + sql = sql + " ORDER BY timestamp" txn.execute(sql, args) return self.db_pool.cursor_to_dict(txn) diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py index 773a8327f6..4aea64b338 100644 --- a/synapse/util/task_scheduler.py +++ b/synapse/util/task_scheduler.py @@ -154,13 +154,15 @@ class TaskScheduler: f"No function associated with action {action} of the scheduled task" ) + status = TaskStatus.SCHEDULED if timestamp is None or timestamp < self._clock.time_msec(): timestamp = self._clock.time_msec() + status = TaskStatus.ACTIVE task = ScheduledTask( random_string(16), action, - TaskStatus.SCHEDULED, + status, timestamp, resource_id, params, From 86ecd341ec93167fbb5a335237c1cd629e7256a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Aug 2023 13:04:46 +0100 Subject: [PATCH 04/10] Always update `retry_last_ts` (#16164) --- changelog.d/16164.bugfix | 1 + synapse/storage/databases/main/transactions.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/16164.bugfix diff --git a/changelog.d/16164.bugfix b/changelog.d/16164.bugfix new file mode 100644 index 0000000000..17284297cf --- /dev/null +++ b/changelog.d/16164.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.87 where synapse would send an excessive amount of federation requests to servers which have been offline for a long time. Contributed by Nico. diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index 48e4b0ba3c..860bbf7c0f 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -242,8 +242,6 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): ) -> None: # Upsert retry time interval if retry_interval is zero (i.e. we're # resetting it) or greater than the existing retry interval. - # We also upsert when the new retry interval is the same as the existing one, - # since it will be the case when `destination_max_retry_interval` is reached. # # WARNING: This is executed in autocommit, so we shouldn't add any more # SQL calls in here (without being very careful). @@ -258,8 +256,10 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): retry_interval = EXCLUDED.retry_interval WHERE EXCLUDED.retry_interval = 0 + OR EXCLUDED.retry_last_ts = 0 OR destinations.retry_interval IS NULL - OR destinations.retry_interval <= EXCLUDED.retry_interval + OR destinations.retry_interval < EXCLUDED.retry_interval + OR destinations.retry_last_ts < EXCLUDED.retry_last_ts """ txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval)) From 7cd79ce0519964bf52a3f88d6fd8a5cc5dff5c6c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Aug 2023 13:45:19 +0100 Subject: [PATCH 05/10] Reduce DB contention on worker locks (#16160) --- changelog.d/16160.misc | 1 + .../03_read_write_locks_triggers.sql.postgres | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 changelog.d/16160.misc create mode 100644 synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres diff --git a/changelog.d/16160.misc b/changelog.d/16160.misc new file mode 100644 index 0000000000..78803b7bcd --- /dev/null +++ b/changelog.d/16160.misc @@ -0,0 +1 @@ +Reduce DB contention on worker locks. diff --git a/synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres new file mode 100644 index 0000000000..31de5bfa18 --- /dev/null +++ b/synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres @@ -0,0 +1,37 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql` + +-- Reduce the number of writes we do on this table. +-- +-- Note: that we still want to lock the row here (i.e. still do a `DO UPDATE +-- SET`) so that we serialize updates. +CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$ +BEGIN + INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token) + VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token) + ON CONFLICT (lock_name, lock_key) + DO UPDATE SET write_lock = NEW.write_lock + WHERE OLD.write_lock != NEW.write_lock; + RETURN NEW; +END +$$ +LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger ON worker_read_write_locks; +CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks + FOR EACH ROW + EXECUTE PROCEDURE upsert_read_write_lock_parent(); From 4adaba9acf224e14171a8a4b9c98ef0791c4a1e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Aug 2023 13:45:25 +0100 Subject: [PATCH 06/10] Fix rare deadlock when using read/write locks (#16133) --- changelog.d/16133.bugfix | 1 + .../02_read_write_locks_deadlock.sql.postgres | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 changelog.d/16133.bugfix create mode 100644 synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres diff --git a/changelog.d/16133.bugfix b/changelog.d/16133.bugfix new file mode 100644 index 0000000000..ed8830692f --- /dev/null +++ b/changelog.d/16133.bugfix @@ -0,0 +1 @@ +Fix a rare race that could block new events from being sent for up to two minutes. Introduced in v1.90.0. diff --git a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres new file mode 100644 index 0000000000..401c42e18a --- /dev/null +++ b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres @@ -0,0 +1,37 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- To avoid the possibility of a deadlock, lock the +-- `worker_read_write_locks_mode` table so that we serialize inserts/deletes +-- for a specific lock name/key. + +CREATE OR REPLACE FUNCTION delete_read_write_lock_parent_before() RETURNS trigger AS $$ +BEGIN + -- `PERFORM` is a `SELECT` which discards the rows. + PERFORM * FROM worker_read_write_locks_mode + WHERE + lock_name = OLD.lock_name + AND lock_key = OLD.lock_key + FOR UPDATE; + + RETURN OLD; +END +$$ +LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks; +CREATE TRIGGER delete_read_write_lock_parent_before_trigger BEFORE DELETE ON worker_read_write_locks + FOR EACH ROW + EXECUTE PROCEDURE delete_read_write_lock_parent_before(); From ec662bbe413bd976af97f099ea4f11dafaf98b3e Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 23 Aug 2023 14:00:34 +0100 Subject: [PATCH 07/10] Filter out unwanted user_agents from udv. (#16124) --- changelog.d/16124.bugfix | 1 + synapse/storage/databases/main/client_ips.py | 5 ++ tests/storage/test_client_ips.py | 65 ++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 changelog.d/16124.bugfix diff --git a/changelog.d/16124.bugfix b/changelog.d/16124.bugfix new file mode 100644 index 0000000000..fb1d501a2f --- /dev/null +++ b/changelog.d/16124.bugfix @@ -0,0 +1 @@ +Filter out user agent references to the sliding sync proxy and rust-sdk from the user_daily_visits table to ensure that Element X can be represented fully. diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 0df160d2b0..d8d333e11d 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -579,6 +579,11 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke device_id: Optional[str], now: Optional[int] = None, ) -> None: + # The sync proxy continuously triggers /sync even if the user is not + # present so should be excluded from user_ips entries. + if user_agent == "sync-v3-proxy-": + return + if not now: now = int(self._clock.time_msec()) key = (user_id, access_token, ip) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index cd0079871c..209d68b40b 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -654,6 +654,71 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): r, ) + def test_invalid_user_agents_are_ignored(self) -> None: + # First make sure we have completed all updates. + self.wait_for_background_updates() + + user_id1 = "@user1:id" + user_id2 = "@user2:id" + device_id1 = "MY_DEVICE1" + device_id2 = "MY_DEVICE2" + access_token1 = "access_token1" + access_token2 = "access_token2" + + # Insert a user IP 1 + self.get_success( + self.store.store_device( + user_id1, + device_id1, + "display name1", + ) + ) + # Insert a user IP 2 + self.get_success( + self.store.store_device( + user_id2, + device_id2, + "display name2", + ) + ) + + self.get_success( + self.store.insert_client_ip( + user_id1, access_token1, "ip", "sync-v3-proxy-", device_id1 + ) + ) + self.get_success( + self.store.insert_client_ip( + user_id2, access_token2, "ip", "user_agent", device_id2 + ) + ) + # Force persisting to disk + self.reactor.advance(200) + + # We should see that in the DB + result = self.get_success( + self.store.db_pool.simple_select_list( + table="user_ips", + keyvalues={}, + retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], + desc="get_user_ip_and_agents", + ) + ) + + # ensure user1 is filtered out + self.assertEqual( + result, + [ + { + "access_token": access_token2, + "ip": "ip", + "user_agent": "user_agent", + "device_id": device_id2, + "last_seen": 0, + } + ], + ) + class ClientIpAuthTestCase(unittest.HomeserverTestCase): servlets = [ From 85118420a226c5664f9cb8ea31d91cf842709740 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 23 Aug 2023 16:16:14 +0100 Subject: [PATCH 08/10] Switch `devenv` dependency in the nix development environment to the latest release (instead of the development branch) (#16063) --- changelog.d/16063.misc | 1 + flake.lock | 8 ++++---- flake.nix | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog.d/16063.misc diff --git a/changelog.d/16063.misc b/changelog.d/16063.misc new file mode 100644 index 0000000000..069fc1adab --- /dev/null +++ b/changelog.d/16063.misc @@ -0,0 +1 @@ +Fix building the nix development environment on MacOS systems. diff --git a/flake.lock b/flake.lock index 084c40fe2f..d53be767a7 100644 --- a/flake.lock +++ b/flake.lock @@ -8,16 +8,16 @@ "pre-commit-hooks": "pre-commit-hooks" }, "locked": { - "lastModified": 1690534632, - "narHash": "sha256-kOXS9x5y17VKliC7wZxyszAYrWdRl1JzggbQl0gyo94=", + "lastModified": 1688058187, + "narHash": "sha256-ipDcc7qrucpJ0+0eYNlwnE+ISTcq4m03qW+CWUshRXI=", "owner": "cachix", "repo": "devenv", - "rev": "6568e7e485a46bbf32051e4d6347fa1fed8b2f25", + "rev": "c8778e3dc30eb9043e218aaa3861d42d4992de77", "type": "github" }, "original": { "owner": "cachix", - "ref": "main", + "ref": "v0.6.3", "repo": "devenv", "type": "github" } diff --git a/flake.nix b/flake.nix index e70a41dfc2..b89b6d9218 100644 --- a/flake.nix +++ b/flake.nix @@ -45,7 +45,7 @@ # Output a development shell for x86_64/aarch64 Linux/Darwin (MacOS). systems.url = "github:nix-systems/default"; # A development environment manager built on Nix. See https://devenv.sh. - devenv.url = "github:cachix/devenv/main"; + devenv.url = "github:cachix/devenv/v0.6.3"; # Rust toolchain. rust-overlay.url = "github:oxalica/rust-overlay"; }; From 18279631e9555bd9032b993074e62c7af886d9cd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Aug 2023 16:24:30 +0100 Subject: [PATCH 09/10] Fix rare deadlock when using read/write locks (#16169) --- changelog.d/16169.bugfix | 1 + .../02_read_write_locks_deadlock.sql.postgres | 37 ---------- .../04_read_write_locks_deadlock.sql.postgres | 71 +++++++++++++++++++ 3 files changed, 72 insertions(+), 37 deletions(-) create mode 100644 changelog.d/16169.bugfix delete mode 100644 synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres create mode 100644 synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres diff --git a/changelog.d/16169.bugfix b/changelog.d/16169.bugfix new file mode 100644 index 0000000000..ed8830692f --- /dev/null +++ b/changelog.d/16169.bugfix @@ -0,0 +1 @@ +Fix a rare race that could block new events from being sent for up to two minutes. Introduced in v1.90.0. diff --git a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres deleted file mode 100644 index 401c42e18a..0000000000 --- a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2023 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- To avoid the possibility of a deadlock, lock the --- `worker_read_write_locks_mode` table so that we serialize inserts/deletes --- for a specific lock name/key. - -CREATE OR REPLACE FUNCTION delete_read_write_lock_parent_before() RETURNS trigger AS $$ -BEGIN - -- `PERFORM` is a `SELECT` which discards the rows. - PERFORM * FROM worker_read_write_locks_mode - WHERE - lock_name = OLD.lock_name - AND lock_key = OLD.lock_key - FOR UPDATE; - - RETURN OLD; -END -$$ -LANGUAGE plpgsql; - -DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks; -CREATE TRIGGER delete_read_write_lock_parent_before_trigger BEFORE DELETE ON worker_read_write_locks - FOR EACH ROW - EXECUTE PROCEDURE delete_read_write_lock_parent_before(); diff --git a/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres new file mode 100644 index 0000000000..0eb459c0b9 --- /dev/null +++ b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres @@ -0,0 +1,71 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Remove a previous attempt to avoid deadlocks +DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks; +DROP FUNCTION IF EXISTS delete_read_write_lock_parent_before; + + +-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock +-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we +-- update the `worker_read_write_locks_mode.token` to match another instance +-- that has currently acquired the lock, or we delete the row if nobody has +-- currently acquired a lock. +CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$ +DECLARE + new_token TEXT; + mode_row_token TEXT; +BEGIN + -- Only update the token in `_mode` if its our token. This prevents + -- deadlocks. + -- + -- We shove the token into `mode_row_token`, as otherwise postgres complains + -- we're not using the returned data. + SELECT token INTO mode_row_token FROM worker_read_write_locks_mode + WHERE + lock_name = OLD.lock_name + AND lock_key = OLD.lock_key + AND token = OLD.token + FOR UPDATE; + + IF NOT FOUND THEN + RETURN NEW; + END IF; + + SELECT token INTO new_token FROM worker_read_write_locks + WHERE + lock_name = OLD.lock_name + AND lock_key = OLD.lock_key + LIMIT 1 FOR UPDATE SKIP LOCKED; + + IF NOT FOUND THEN + DELETE FROM worker_read_write_locks_mode + WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token; + ELSE + UPDATE worker_read_write_locks_mode + SET token = new_token + WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key; + END IF; + + RETURN NEW; +END +$$ +LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks; +CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks + FOR EACH ROW + EXECUTE PROCEDURE delete_read_write_lock_parent(); From 33fa82a34cb0001787889be88c3817688ce2f76d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 23 Aug 2023 13:22:34 -0400 Subject: [PATCH 10/10] Stabilize support for MSC3958 (suppress notifications from edits). (#16113) --- changelog.d/16113.feature | 1 + rust/benches/evaluator.rs | 1 - rust/src/push/base_rules.rs | 2 +- rust/src/push/evaluator.rs | 2 +- rust/src/push/mod.rs | 9 --------- stubs/synapse/synapse_rust/push.pyi | 1 - synapse/config/experimental.py | 5 ----- synapse/storage/databases/main/push_rule.py | 1 - tests/push/test_bulk_push_rule_evaluator.py | 1 - 9 files changed, 3 insertions(+), 20 deletions(-) create mode 100644 changelog.d/16113.feature diff --git a/changelog.d/16113.feature b/changelog.d/16113.feature new file mode 100644 index 0000000000..69fdaaebac --- /dev/null +++ b/changelog.d/16113.feature @@ -0,0 +1 @@ +Suppress notifications from message edits per [MSC3958](https://github.com/matrix-org/matrix-spec-proposals/pull/3958). diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 6e1eab2a3b..14071105a0 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -197,7 +197,6 @@ fn bench_eval_message(b: &mut Bencher) { false, false, false, - false, ); b.iter(|| eval.run(&rules, Some("bob"), Some("person"))); diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs index 00baceda91..59fd27665a 100644 --- a/rust/src/push/base_rules.rs +++ b/rust/src/push/base_rules.rs @@ -228,7 +228,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ // We don't want to notify on edits *unless* the edit directly mentions a // user, which is handled above. PushRule { - rule_id: Cow::Borrowed("global/override/.org.matrix.msc3958.suppress_edits"), + rule_id: Cow::Borrowed("global/override/.m.rule.suppress_edits"), priority_class: 5, conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventPropertyIs( EventPropertyIsCondition { diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index 48e670478b..5b9bf9b26a 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -564,7 +564,7 @@ fn test_requires_room_version_supports_condition() { }; let rules = PushRules::new(vec![custom_rule]); result = evaluator.run( - &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false), + &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true), None, None, ); diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 829fb79d0e..8e91f506cc 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -527,7 +527,6 @@ pub struct FilteredPushRules { msc1767_enabled: bool, msc3381_polls_enabled: bool, msc3664_enabled: bool, - msc3958_suppress_edits_enabled: bool, } #[pymethods] @@ -539,7 +538,6 @@ impl FilteredPushRules { msc1767_enabled: bool, msc3381_polls_enabled: bool, msc3664_enabled: bool, - msc3958_suppress_edits_enabled: bool, ) -> Self { Self { push_rules, @@ -547,7 +545,6 @@ impl FilteredPushRules { msc1767_enabled, msc3381_polls_enabled, msc3664_enabled, - msc3958_suppress_edits_enabled, } } @@ -584,12 +581,6 @@ impl FilteredPushRules { return false; } - if !self.msc3958_suppress_edits_enabled - && rule.rule_id == "global/override/.org.matrix.msc3958.suppress_edits" - { - return false; - } - true }) .map(|r| { diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index d573a37b9a..1f432d4ecf 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -46,7 +46,6 @@ class FilteredPushRules: msc1767_enabled: bool, msc3381_polls_enabled: bool, msc3664_enabled: bool, - msc3958_suppress_edits_enabled: bool, ): ... def rules(self) -> Collection[Tuple[PushRule, bool]]: ... diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 277ea4675b..84d6dd13af 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -383,11 +383,6 @@ class ExperimentalConfig(Config): # MSC3391: Removing account data. self.msc3391_enabled = experimental.get("msc3391_enabled", False) - # MSC3959: Do not generate notifications for edits. - self.msc3958_supress_edit_notifs = experimental.get( - "msc3958_supress_edit_notifs", False - ) - # MSC3967: Do not require UIA when first uploading cross signing keys self.msc3967_enabled = experimental.get("msc3967_enabled", False) diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index c13c0bc7d7..bec0dc2afe 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -88,7 +88,6 @@ def _load_rules( msc1767_enabled=experimental_config.msc1767_enabled, msc3664_enabled=experimental_config.msc3664_enabled, msc3381_polls_enabled=experimental_config.msc3381_polls_enabled, - msc3958_suppress_edits_enabled=experimental_config.msc3958_supress_edit_notifs, ) return filtered_rules diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 829b9df83d..937e6ebb7d 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -382,7 +382,6 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) ) - @override_config({"experimental_features": {"msc3958_supress_edit_notifs": True}}) def test_suppress_edits(self) -> None: """Under the default push rules, event edits should not generate notifications.""" bulk_evaluator = BulkPushRuleEvaluator(self.hs)