mirror of
				https://github.com/matrix-org/synapse.git
				synced 2025-10-31 12:18:24 +00:00 
			
		
		
		
	Add basic read/write lock (#15782)
This commit is contained in:
		
							parent
							
								
									ce857c05d5
								
							
						
					
					
						commit
						39d131b016
					
				
							
								
								
									
										1
									
								
								changelog.d/15782.misc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changelog.d/15782.misc
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| Add read/write style cross-worker locks. | ||||
| @ -197,6 +197,11 @@ IGNORED_TABLES = { | ||||
|     "ui_auth_sessions", | ||||
|     "ui_auth_sessions_credentials", | ||||
|     "ui_auth_sessions_ips", | ||||
|     # Ignore the worker locks table, as a) there shouldn't be any acquired locks | ||||
|     # after porting, and b) the circular foreign key constraints make it hard to | ||||
|     # port. | ||||
|     "worker_read_write_locks_mode", | ||||
|     "worker_read_write_locks", | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @ -805,7 +810,9 @@ class Porter: | ||||
|             ) | ||||
|             # Map from table name to args passed to `handle_table`, i.e. a tuple | ||||
|             # of: `postgres_size`, `table_size`, `forward_chunk`, `backward_chunk`. | ||||
|             tables_to_port_info_map = {r[0]: r[1:] for r in setup_res} | ||||
|             tables_to_port_info_map = { | ||||
|                 r[0]: r[1:] for r in setup_res if r[0] not in IGNORED_TABLES | ||||
|             } | ||||
| 
 | ||||
|             # Step 5. Do the copying. | ||||
|             # | ||||
|  | ||||
| @ -25,6 +25,7 @@ from synapse.storage.database import ( | ||||
|     LoggingDatabaseConnection, | ||||
|     LoggingTransaction, | ||||
| ) | ||||
| from synapse.storage.engines import PostgresEngine | ||||
| from synapse.util import Clock | ||||
| from synapse.util.stringutils import random_string | ||||
| 
 | ||||
| @ -68,12 +69,20 @@ class LockStore(SQLBaseStore): | ||||
|         self._reactor = hs.get_reactor() | ||||
|         self._instance_name = hs.get_instance_id() | ||||
| 
 | ||||
|         # A map from `(lock_name, lock_key)` to the token of any locks that we | ||||
|         # think we currently hold. | ||||
|         self._live_tokens: WeakValueDictionary[ | ||||
|         # A map from `(lock_name, lock_key)` to lock that we think we | ||||
|         # currently hold. | ||||
|         self._live_lock_tokens: WeakValueDictionary[ | ||||
|             Tuple[str, str], Lock | ||||
|         ] = WeakValueDictionary() | ||||
| 
 | ||||
|         # A map from `(lock_name, lock_key, token)` to read/write lock that we | ||||
|         # think we currently hold. For a given lock_name/lock_key, there can be | ||||
|         # multiple read locks at a time but only one write lock (no mixing read | ||||
|         # and write locks at the same time). | ||||
|         self._live_read_write_lock_tokens: WeakValueDictionary[ | ||||
|             Tuple[str, str, str], Lock | ||||
|         ] = WeakValueDictionary() | ||||
| 
 | ||||
|         # When we shut down we want to remove the locks. Technically this can | ||||
|         # lead to a race, as we may drop the lock while we are still processing. | ||||
|         # However, a) it should be a small window, b) the lock is best effort | ||||
| @ -91,11 +100,13 @@ class LockStore(SQLBaseStore): | ||||
|         """Called when the server is shutting down""" | ||||
|         logger.info("Dropping held locks due to shutdown") | ||||
| 
 | ||||
|         # We need to take a copy of the tokens dict as dropping the locks will | ||||
|         # cause the dictionary to change. | ||||
|         locks = dict(self._live_tokens) | ||||
|         # We need to take a copy of the locks as dropping the locks will cause | ||||
|         # the dictionary to change. | ||||
|         locks = list(self._live_lock_tokens.values()) + list( | ||||
|             self._live_read_write_lock_tokens.values() | ||||
|         ) | ||||
| 
 | ||||
|         for lock in locks.values(): | ||||
|         for lock in locks: | ||||
|             await lock.release() | ||||
| 
 | ||||
|         logger.info("Dropped locks due to shutdown") | ||||
| @ -122,7 +133,7 @@ class LockStore(SQLBaseStore): | ||||
|         """ | ||||
| 
 | ||||
|         # Check if this process has taken out a lock and if it's still valid. | ||||
|         lock = self._live_tokens.get((lock_name, lock_key)) | ||||
|         lock = self._live_lock_tokens.get((lock_name, lock_key)) | ||||
|         if lock and await lock.is_still_valid(): | ||||
|             return None | ||||
| 
 | ||||
| @ -176,61 +187,111 @@ class LockStore(SQLBaseStore): | ||||
|             self._reactor, | ||||
|             self._clock, | ||||
|             self, | ||||
|             read_write=False, | ||||
|             lock_name=lock_name, | ||||
|             lock_key=lock_key, | ||||
|             token=token, | ||||
|         ) | ||||
| 
 | ||||
|         self._live_tokens[(lock_name, lock_key)] = lock | ||||
|         self._live_lock_tokens[(lock_name, lock_key)] = lock | ||||
| 
 | ||||
|         return lock | ||||
| 
 | ||||
|     async def _is_lock_still_valid( | ||||
|         self, lock_name: str, lock_key: str, token: str | ||||
|     ) -> bool: | ||||
|         """Checks whether this instance still holds the lock.""" | ||||
|         last_renewed_ts = await self.db_pool.simple_select_one_onecol( | ||||
|             table="worker_locks", | ||||
|             keyvalues={ | ||||
|                 "lock_name": lock_name, | ||||
|                 "lock_key": lock_key, | ||||
|                 "token": token, | ||||
|             }, | ||||
|             retcol="last_renewed_ts", | ||||
|             allow_none=True, | ||||
|             desc="is_lock_still_valid", | ||||
|         ) | ||||
|         return ( | ||||
|             last_renewed_ts is not None | ||||
|             and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts | ||||
|     async def try_acquire_read_write_lock( | ||||
|         self, | ||||
|         lock_name: str, | ||||
|         lock_key: str, | ||||
|         write: bool, | ||||
|     ) -> Optional["Lock"]: | ||||
|         """Try to acquire a lock for the given name/key. Will return an async | ||||
|         context manager if the lock is successfully acquired, which *must* be | ||||
|         used (otherwise the lock will leak). | ||||
|         """ | ||||
| 
 | ||||
|         now = self._clock.time_msec() | ||||
|         token = random_string(6) | ||||
| 
 | ||||
|         def _try_acquire_read_write_lock_txn(txn: LoggingTransaction) -> None: | ||||
|             # We attempt to acquire the lock by inserting into | ||||
|             # `worker_read_write_locks` and seeing if that fails any | ||||
|             # constraints. If it doesn't then we have acquired the lock, | ||||
|             # otherwise we haven't. | ||||
|             # | ||||
|             # Before that though we clear the table of any stale locks. | ||||
| 
 | ||||
|             delete_sql = """ | ||||
|                 DELETE FROM worker_read_write_locks | ||||
|                     WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?; | ||||
|             """ | ||||
| 
 | ||||
|             insert_sql = """ | ||||
|                 INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts) | ||||
|                 VALUES (?, ?, ?, ?, ?, ?) | ||||
|             """ | ||||
| 
 | ||||
|             if isinstance(self.database_engine, PostgresEngine): | ||||
|                 # For Postgres we can send these queries at the same time. | ||||
|                 txn.execute( | ||||
|                     delete_sql + ";" + insert_sql, | ||||
|                     ( | ||||
|                         # DELETE args | ||||
|                         now - _LOCK_TIMEOUT_MS, | ||||
|                         lock_name, | ||||
|                         lock_key, | ||||
|                         # UPSERT args | ||||
|                         lock_name, | ||||
|                         lock_key, | ||||
|                         write, | ||||
|                         self._instance_name, | ||||
|                         token, | ||||
|                         now, | ||||
|                     ), | ||||
|                 ) | ||||
|             else: | ||||
|                 # For SQLite these need to be two queries. | ||||
|                 txn.execute( | ||||
|                     delete_sql, | ||||
|                     ( | ||||
|                         now - _LOCK_TIMEOUT_MS, | ||||
|                         lock_name, | ||||
|                         lock_key, | ||||
|                     ), | ||||
|                 ) | ||||
|                 txn.execute( | ||||
|                     insert_sql, | ||||
|                     ( | ||||
|                         lock_name, | ||||
|                         lock_key, | ||||
|                         write, | ||||
|                         self._instance_name, | ||||
|                         token, | ||||
|                         now, | ||||
|                     ), | ||||
|                 ) | ||||
| 
 | ||||
|             return | ||||
| 
 | ||||
|         try: | ||||
|             await self.db_pool.runInteraction( | ||||
|                 "try_acquire_read_write_lock", | ||||
|                 _try_acquire_read_write_lock_txn, | ||||
|             ) | ||||
|         except self.database_engine.module.IntegrityError: | ||||
|             return None | ||||
| 
 | ||||
|         lock = Lock( | ||||
|             self._reactor, | ||||
|             self._clock, | ||||
|             self, | ||||
|             read_write=True, | ||||
|             lock_name=lock_name, | ||||
|             lock_key=lock_key, | ||||
|             token=token, | ||||
|         ) | ||||
| 
 | ||||
|     async def _renew_lock(self, lock_name: str, lock_key: str, token: str) -> None: | ||||
|         """Attempt to renew the lock if we still hold it.""" | ||||
|         await self.db_pool.simple_update( | ||||
|             table="worker_locks", | ||||
|             keyvalues={ | ||||
|                 "lock_name": lock_name, | ||||
|                 "lock_key": lock_key, | ||||
|                 "token": token, | ||||
|             }, | ||||
|             updatevalues={"last_renewed_ts": self._clock.time_msec()}, | ||||
|             desc="renew_lock", | ||||
|         ) | ||||
|         self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock | ||||
| 
 | ||||
|     async def _drop_lock(self, lock_name: str, lock_key: str, token: str) -> None: | ||||
|         """Attempt to drop the lock, if we still hold it""" | ||||
|         await self.db_pool.simple_delete( | ||||
|             table="worker_locks", | ||||
|             keyvalues={ | ||||
|                 "lock_name": lock_name, | ||||
|                 "lock_key": lock_key, | ||||
|                 "token": token, | ||||
|             }, | ||||
|             desc="drop_lock", | ||||
|         ) | ||||
| 
 | ||||
|         self._live_tokens.pop((lock_name, lock_key), None) | ||||
|         return lock | ||||
| 
 | ||||
| 
 | ||||
| class Lock: | ||||
| @ -259,6 +320,7 @@ class Lock: | ||||
|         reactor: IReactorCore, | ||||
|         clock: Clock, | ||||
|         store: LockStore, | ||||
|         read_write: bool, | ||||
|         lock_name: str, | ||||
|         lock_key: str, | ||||
|         token: str, | ||||
| @ -266,13 +328,23 @@ class Lock: | ||||
|         self._reactor = reactor | ||||
|         self._clock = clock | ||||
|         self._store = store | ||||
|         self._read_write = read_write | ||||
|         self._lock_name = lock_name | ||||
|         self._lock_key = lock_key | ||||
| 
 | ||||
|         self._token = token | ||||
| 
 | ||||
|         self._table = "worker_read_write_locks" if read_write else "worker_locks" | ||||
| 
 | ||||
|         self._looping_call = clock.looping_call( | ||||
|             self._renew, _RENEWAL_INTERVAL_MS, store, lock_name, lock_key, token | ||||
|             self._renew, | ||||
|             _RENEWAL_INTERVAL_MS, | ||||
|             store, | ||||
|             clock, | ||||
|             read_write, | ||||
|             lock_name, | ||||
|             lock_key, | ||||
|             token, | ||||
|         ) | ||||
| 
 | ||||
|         self._dropped = False | ||||
| @ -281,6 +353,8 @@ class Lock: | ||||
|     @wrap_as_background_process("Lock._renew") | ||||
|     async def _renew( | ||||
|         store: LockStore, | ||||
|         clock: Clock, | ||||
|         read_write: bool, | ||||
|         lock_name: str, | ||||
|         lock_key: str, | ||||
|         token: str, | ||||
| @ -291,12 +365,34 @@ class Lock: | ||||
|         don't end up with a reference to `self` in the reactor, which would stop | ||||
|         this from being cleaned up if we dropped the context manager. | ||||
|         """ | ||||
|         await store._renew_lock(lock_name, lock_key, token) | ||||
|         table = "worker_read_write_locks" if read_write else "worker_locks" | ||||
|         await store.db_pool.simple_update( | ||||
|             table=table, | ||||
|             keyvalues={ | ||||
|                 "lock_name": lock_name, | ||||
|                 "lock_key": lock_key, | ||||
|                 "token": token, | ||||
|             }, | ||||
|             updatevalues={"last_renewed_ts": clock.time_msec()}, | ||||
|             desc="renew_lock", | ||||
|         ) | ||||
| 
 | ||||
|     async def is_still_valid(self) -> bool: | ||||
|         """Check if the lock is still held by us""" | ||||
|         return await self._store._is_lock_still_valid( | ||||
|             self._lock_name, self._lock_key, self._token | ||||
|         last_renewed_ts = await self._store.db_pool.simple_select_one_onecol( | ||||
|             table=self._table, | ||||
|             keyvalues={ | ||||
|                 "lock_name": self._lock_name, | ||||
|                 "lock_key": self._lock_key, | ||||
|                 "token": self._token, | ||||
|             }, | ||||
|             retcol="last_renewed_ts", | ||||
|             allow_none=True, | ||||
|             desc="is_lock_still_valid", | ||||
|         ) | ||||
|         return ( | ||||
|             last_renewed_ts is not None | ||||
|             and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts | ||||
|         ) | ||||
| 
 | ||||
|     async def __aenter__(self) -> None: | ||||
| @ -325,7 +421,23 @@ class Lock: | ||||
|         if self._looping_call.running: | ||||
|             self._looping_call.stop() | ||||
| 
 | ||||
|         await self._store._drop_lock(self._lock_name, self._lock_key, self._token) | ||||
|         await self._store.db_pool.simple_delete( | ||||
|             table=self._table, | ||||
|             keyvalues={ | ||||
|                 "lock_name": self._lock_name, | ||||
|                 "lock_key": self._lock_key, | ||||
|                 "token": self._token, | ||||
|             }, | ||||
|             desc="drop_lock", | ||||
|         ) | ||||
| 
 | ||||
|         if self._read_write: | ||||
|             self._store._live_read_write_lock_tokens.pop( | ||||
|                 (self._lock_name, self._lock_key, self._token), None | ||||
|             ) | ||||
|         else: | ||||
|             self._store._live_lock_tokens.pop((self._lock_name, self._lock_key), None) | ||||
| 
 | ||||
|         self._dropped = True | ||||
| 
 | ||||
|     def __del__(self) -> None: | ||||
|  | ||||
| @ -0,0 +1,152 @@ | ||||
| /* Copyright 2023 The Matrix.org Foundation C.I.C | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *    http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| 
 | ||||
| -- We implement read/write style locks by using two tables with mutual foreign | ||||
| -- key constraints. Note that this implementation is vulnerable to starving | ||||
| -- writers if read locks repeatedly get acquired. | ||||
| -- | ||||
| -- The first table (`worker_read_write_locks_mode`) indicates that a given lock | ||||
| -- has either been acquired in read mode *or* write mode, but not both. This is | ||||
| -- enforced by the unique constraint. Each instance of a lock being acquired is | ||||
| -- associated with a random `token`. | ||||
| -- | ||||
| -- The second table (`worker_read_write_locks`) tracks who has currently | ||||
| -- acquired a given lock. For a given lock_name/lock_key, there can be multiple | ||||
| -- read locks at a time but only one write lock (no mixing read and write locks | ||||
| -- at the same time). | ||||
| -- | ||||
| -- The foreign key from the second to first table enforces that for any given | ||||
| -- lock the second table cannot have a mix of rows with read or write. | ||||
| -- | ||||
| -- The foreign key from the first to second table enforces that we don't have a | ||||
| -- row for a lock in the first table if not in the second table. | ||||
| -- | ||||
| -- | ||||
| -- Furthermore, we add some triggers to automatically keep the first table up to | ||||
| -- date when inserting/deleting from the second table. This reduces the number | ||||
| -- of round trips needed to acquire and release locks, as those operations | ||||
| -- simply become an INSERT or DELETE. These triggers are added in a separate | ||||
| -- delta due to database specific syntax. | ||||
| 
 | ||||
| 
 | ||||
| -- A table to track whether a lock is currently acquired, and if so whether its | ||||
| -- in read or write mode. | ||||
| CREATE TABLE worker_read_write_locks_mode ( | ||||
|     lock_name TEXT NOT NULL, | ||||
|     lock_key TEXT NOT NULL, | ||||
|     -- Whether this lock is in read (false) or write (true) mode | ||||
|     write_lock BOOLEAN NOT NULL, | ||||
|     -- A token that has currently acquired the lock. We need this so that we can | ||||
|     -- add a foreign constraint from this table to `worker_read_write_locks`. | ||||
|     token TEXT NOT NULL | ||||
| ); | ||||
| 
 | ||||
| -- Ensure that we can only have one row per lock | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key); | ||||
| -- We need this (redundant) constraint so that we can have a foreign key | ||||
| -- constraint against this table. | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock); | ||||
| 
 | ||||
| 
 | ||||
| -- A table to track who has currently acquired a given lock. | ||||
| CREATE TABLE worker_read_write_locks ( | ||||
|     lock_name TEXT NOT NULL, | ||||
|     lock_key TEXT NOT NULL, | ||||
|     -- We write the instance name to ease manual debugging, we don't ever read | ||||
|     -- from it. | ||||
|     -- Note: instance names aren't guarenteed to be unique. | ||||
|     instance_name TEXT NOT NULL, | ||||
|     -- Whether the process has taken out a "read" or a "write" lock. | ||||
|     write_lock BOOLEAN NOT NULL, | ||||
|     -- A random string generated each time an instance takes out a lock. Used by | ||||
|     -- the instance to tell whether the lock is still held by it (e.g. in the | ||||
|     -- case where the process stalls for a long time the lock may time out and | ||||
|     -- be taken out by another instance, at which point the original instance | ||||
|     -- can tell it no longer holds the lock as the tokens no longer match). | ||||
|     token TEXT NOT NULL, | ||||
|     last_renewed_ts BIGINT NOT NULL, | ||||
| 
 | ||||
|     -- This constraint ensures that a given lock has only been acquired in read | ||||
|     -- xor write mode, but not both. | ||||
|     FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock) | ||||
| ); | ||||
| 
 | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token); | ||||
| -- Ensures that only one instance can acquire a lock in write mode at a time. | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock; | ||||
| 
 | ||||
| 
 | ||||
| -- Add a foreign key constraint to ensure that if a lock is in | ||||
| -- `worker_read_write_locks_mode` then there must be a corresponding row in | ||||
| -- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in | ||||
| -- `worker_read_write_locks_mode` when the lock is not currently acquired). | ||||
| -- | ||||
| -- We only add to PostgreSQL as SQLite does not support adding constraints | ||||
| -- after table creation, and so doesn't support "circular" foreign key | ||||
| -- constraints. | ||||
| ALTER TABLE worker_read_write_locks_mode ADD CONSTRAINT worker_read_write_locks_mode_foreign | ||||
|     FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED; | ||||
| 
 | ||||
| 
 | ||||
| -- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try | ||||
| -- and acquire a lock, i.e. insert into `worker_read_write_locks`, | ||||
| CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$ | ||||
| BEGIN | ||||
|     INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token) | ||||
|         VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token) | ||||
|         ON CONFLICT (lock_name, lock_key) | ||||
|         DO NOTHING; | ||||
|     RETURN NEW; | ||||
| END | ||||
| $$ | ||||
| LANGUAGE plpgsql; | ||||
| 
 | ||||
| CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks | ||||
|     FOR EACH ROW | ||||
|     EXECUTE PROCEDURE upsert_read_write_lock_parent(); | ||||
| 
 | ||||
| 
 | ||||
| -- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock | ||||
| -- is released (i.e. a row deleted from `worker_read_write_locks`). Either we | ||||
| -- update the `worker_read_write_locks_mode.token` to match another instance | ||||
| -- that has currently acquired the lock, or we delete the row if nobody has | ||||
| -- currently acquired a lock. | ||||
| CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$ | ||||
| DECLARE | ||||
|     new_token TEXT; | ||||
| BEGIN | ||||
|     SELECT token INTO new_token FROM worker_read_write_locks | ||||
|         WHERE | ||||
|             lock_name = OLD.lock_name | ||||
|             AND lock_key = OLD.lock_key; | ||||
| 
 | ||||
|     IF NOT FOUND THEN | ||||
|         DELETE FROM worker_read_write_locks_mode | ||||
|             WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key; | ||||
|     ELSE | ||||
|         UPDATE worker_read_write_locks_mode | ||||
|             SET token = new_token | ||||
|             WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key; | ||||
|     END IF; | ||||
| 
 | ||||
|     RETURN NEW; | ||||
| END | ||||
| $$ | ||||
| LANGUAGE plpgsql; | ||||
| 
 | ||||
| CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks | ||||
|     FOR EACH ROW | ||||
|     EXECUTE PROCEDURE delete_read_write_lock_parent(); | ||||
| @ -0,0 +1,119 @@ | ||||
| /* Copyright 2023 The Matrix.org Foundation C.I.C | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *    http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| 
 | ||||
| -- c.f. the postgres version for context. The tables and constraints are the | ||||
| -- same, however they need to be defined slightly differently to work around how | ||||
| -- each database handles circular foreign key references. | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| -- A table to track whether a lock is currently acquired, and if so whether its | ||||
| -- in read or write mode. | ||||
| CREATE TABLE worker_read_write_locks_mode ( | ||||
|     lock_name TEXT NOT NULL, | ||||
|     lock_key TEXT NOT NULL, | ||||
|     -- Whether this lock is in read (false) or write (true) mode | ||||
|     write_lock BOOLEAN NOT NULL, | ||||
|     -- A token that has currently acquired the lock. We need this so that we can | ||||
|     -- add a foreign constraint from this table to `worker_read_write_locks`. | ||||
|     token TEXT NOT NULL, | ||||
|     -- Add a foreign key constraint to ensure that if a lock is in | ||||
|     -- `worker_read_write_locks_mode` then there must be a corresponding row in | ||||
|     -- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in | ||||
|     -- `worker_read_write_locks_mode` when the lock is not currently acquired). | ||||
|     FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED | ||||
| ); | ||||
| 
 | ||||
| -- Ensure that we can only have one row per lock | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key); | ||||
| -- We need this (redundant) constraint so that we can have a foreign key | ||||
| -- constraint against this table. | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock); | ||||
| 
 | ||||
| 
 | ||||
| -- A table to track who has currently acquired a given lock. | ||||
| CREATE TABLE worker_read_write_locks ( | ||||
|     lock_name TEXT NOT NULL, | ||||
|     lock_key TEXT NOT NULL, | ||||
|     -- We write the instance name to ease manual debugging, we don't ever read | ||||
|     -- from it. | ||||
|     -- Note: instance names aren't guarenteed to be unique. | ||||
|     instance_name TEXT NOT NULL, | ||||
|     -- Whether the process has taken out a "read" or a "write" lock. | ||||
|     write_lock BOOLEAN NOT NULL, | ||||
|     -- A random string generated each time an instance takes out a lock. Used by | ||||
|     -- the instance to tell whether the lock is still held by it (e.g. in the | ||||
|     -- case where the process stalls for a long time the lock may time out and | ||||
|     -- be taken out by another instance, at which point the original instance | ||||
|     -- can tell it no longer holds the lock as the tokens no longer match). | ||||
|     token TEXT NOT NULL, | ||||
|     last_renewed_ts BIGINT NOT NULL, | ||||
| 
 | ||||
|     -- This constraint ensures that a given lock has only been acquired in read | ||||
|     -- xor write mode, but not both. | ||||
|     FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock) | ||||
| ); | ||||
| 
 | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token); | ||||
| -- Ensures that only one instance can acquire a lock in write mode at a time. | ||||
| CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock; | ||||
| 
 | ||||
| 
 | ||||
| -- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try | ||||
| -- and acquire a lock, i.e. insert into `worker_read_write_locks`, | ||||
| CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger | ||||
| BEFORE INSERT ON worker_read_write_locks | ||||
| FOR EACH ROW | ||||
| BEGIN | ||||
|     -- First ensure that `worker_read_write_locks_mode` doesn't have stale | ||||
|     -- entries in it, as on SQLite we don't have the foreign key constraint to | ||||
|     -- enforce this. | ||||
|     DELETE FROM worker_read_write_locks_mode | ||||
|         WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key | ||||
|         AND NOT EXISTS ( | ||||
|             SELECT 1 FROM worker_read_write_locks | ||||
|             WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key | ||||
|         ); | ||||
| 
 | ||||
|     INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token) | ||||
|         VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token) | ||||
|         ON CONFLICT (lock_name, lock_key) | ||||
|         DO NOTHING; | ||||
| END; | ||||
| 
 | ||||
| -- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock | ||||
| -- is released (i.e. a row deleted from `worker_read_write_locks`). Either we | ||||
| -- update the `worker_read_write_locks_mode.token` to match another instance | ||||
| -- that has currently acquired the lock, or we delete the row if nobody has | ||||
| -- currently acquired a lock. | ||||
| CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger | ||||
| AFTER DELETE ON worker_read_write_locks | ||||
| FOR EACH ROW | ||||
| BEGIN | ||||
|     DELETE FROM worker_read_write_locks_mode | ||||
|         WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key | ||||
|         AND NOT EXISTS ( | ||||
|             SELECT 1 FROM worker_read_write_locks | ||||
|             WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key | ||||
|         ); | ||||
| 
 | ||||
|     UPDATE worker_read_write_locks_mode | ||||
|         SET token = ( | ||||
|             SELECT token FROM worker_read_write_locks | ||||
|             WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key | ||||
|         ) | ||||
|         WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key; | ||||
| END; | ||||
| @ -166,4 +166,285 @@ class LockTestCase(unittest.HomeserverTestCase): | ||||
|         # Now call the shutdown code | ||||
|         self.get_success(self.store._on_shutdown()) | ||||
| 
 | ||||
|         self.assertEqual(self.store._live_tokens, {}) | ||||
|         self.assertEqual(self.store._live_lock_tokens, {}) | ||||
| 
 | ||||
| 
 | ||||
| class ReadWriteLockTestCase(unittest.HomeserverTestCase): | ||||
|     """Test the read/write lock implementation.""" | ||||
| 
 | ||||
|     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: | ||||
|         self.store = hs.get_datastores().main | ||||
| 
 | ||||
|     def test_acquire_write_contention(self) -> None: | ||||
|         """Test that we can only acquire one write lock at a time""" | ||||
|         # Track the number of tasks holding the lock. | ||||
|         # Should be at most 1. | ||||
|         in_lock = 0 | ||||
|         max_in_lock = 0 | ||||
| 
 | ||||
|         release_lock: "Deferred[None]" = Deferred() | ||||
| 
 | ||||
|         async def task() -> None: | ||||
|             nonlocal in_lock | ||||
|             nonlocal max_in_lock | ||||
| 
 | ||||
|             lock = await self.store.try_acquire_read_write_lock( | ||||
|                 "name", "key", write=True | ||||
|             ) | ||||
|             if not lock: | ||||
|                 return | ||||
| 
 | ||||
|             async with lock: | ||||
|                 in_lock += 1 | ||||
|                 max_in_lock = max(max_in_lock, in_lock) | ||||
| 
 | ||||
|                 # Block to allow other tasks to attempt to take the lock. | ||||
|                 await release_lock | ||||
| 
 | ||||
|                 in_lock -= 1 | ||||
| 
 | ||||
|         # Start 3 tasks. | ||||
|         task1 = defer.ensureDeferred(task()) | ||||
|         task2 = defer.ensureDeferred(task()) | ||||
|         task3 = defer.ensureDeferred(task()) | ||||
| 
 | ||||
|         # Give the reactor a kick so that the database transaction returns. | ||||
|         self.pump() | ||||
| 
 | ||||
|         release_lock.callback(None) | ||||
| 
 | ||||
|         # Run the tasks to completion. | ||||
|         # To work around `Linearizer`s using a different reactor to sleep when | ||||
|         # contended (#12841), we call `runUntilCurrent` on | ||||
|         # `twisted.internet.reactor`, which is a different reactor to that used | ||||
|         # by the homeserver. | ||||
|         assert isinstance(reactor, ReactorBase) | ||||
|         self.get_success(task1) | ||||
|         reactor.runUntilCurrent() | ||||
|         self.get_success(task2) | ||||
|         reactor.runUntilCurrent() | ||||
|         self.get_success(task3) | ||||
| 
 | ||||
|         # At most one task should have held the lock at a time. | ||||
|         self.assertEqual(max_in_lock, 1) | ||||
| 
 | ||||
|     def test_acquire_multiple_reads(self) -> None: | ||||
|         """Test that we can acquire multiple read locks at a time""" | ||||
|         # Track the number of tasks holding the lock. | ||||
|         in_lock = 0 | ||||
|         max_in_lock = 0 | ||||
| 
 | ||||
|         release_lock: "Deferred[None]" = Deferred() | ||||
| 
 | ||||
|         async def task() -> None: | ||||
|             nonlocal in_lock | ||||
|             nonlocal max_in_lock | ||||
| 
 | ||||
|             lock = await self.store.try_acquire_read_write_lock( | ||||
|                 "name", "key", write=False | ||||
|             ) | ||||
|             if not lock: | ||||
|                 return | ||||
| 
 | ||||
|             async with lock: | ||||
|                 in_lock += 1 | ||||
|                 max_in_lock = max(max_in_lock, in_lock) | ||||
| 
 | ||||
|                 # Block to allow other tasks to attempt to take the lock. | ||||
|                 await release_lock | ||||
| 
 | ||||
|                 in_lock -= 1 | ||||
| 
 | ||||
|         # Start 3 tasks. | ||||
|         task1 = defer.ensureDeferred(task()) | ||||
|         task2 = defer.ensureDeferred(task()) | ||||
|         task3 = defer.ensureDeferred(task()) | ||||
| 
 | ||||
|         # Give the reactor a kick so that the database transaction returns. | ||||
|         self.pump() | ||||
| 
 | ||||
|         release_lock.callback(None) | ||||
| 
 | ||||
|         # Run the tasks to completion. | ||||
|         # To work around `Linearizer`s using a different reactor to sleep when | ||||
|         # contended (#12841), we call `runUntilCurrent` on | ||||
|         # `twisted.internet.reactor`, which is a different reactor to that used | ||||
|         # by the homeserver. | ||||
|         assert isinstance(reactor, ReactorBase) | ||||
|         self.get_success(task1) | ||||
|         reactor.runUntilCurrent() | ||||
|         self.get_success(task2) | ||||
|         reactor.runUntilCurrent() | ||||
|         self.get_success(task3) | ||||
| 
 | ||||
|         # At most one task should have held the lock at a time. | ||||
|         self.assertEqual(max_in_lock, 3) | ||||
| 
 | ||||
|     def test_write_lock_acquired(self) -> None: | ||||
|         """Test that we can take out a write lock and that while we hold it | ||||
|         nobody else can take it out. | ||||
|         """ | ||||
|         # First to acquire this lock, so it should complete | ||||
|         lock = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         assert lock is not None | ||||
| 
 | ||||
|         # Enter the context manager | ||||
|         self.get_success(lock.__aenter__()) | ||||
| 
 | ||||
|         # Attempting to acquire the lock again fails, as both read and write. | ||||
|         lock2 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNone(lock2) | ||||
| 
 | ||||
|         lock3 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=False) | ||||
|         ) | ||||
|         self.assertIsNone(lock3) | ||||
| 
 | ||||
|         # Calling `is_still_valid` reports true. | ||||
|         self.assertTrue(self.get_success(lock.is_still_valid())) | ||||
| 
 | ||||
|         # Drop the lock | ||||
|         self.get_success(lock.__aexit__(None, None, None)) | ||||
| 
 | ||||
|         # We can now acquire the lock again. | ||||
|         lock4 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         assert lock4 is not None | ||||
|         self.get_success(lock4.__aenter__()) | ||||
|         self.get_success(lock4.__aexit__(None, None, None)) | ||||
| 
 | ||||
|     def test_read_lock_acquired(self) -> None: | ||||
|         """Test that we can take out a read lock and that while we hold it | ||||
|         only other reads can use it. | ||||
|         """ | ||||
|         # First to acquire this lock, so it should complete | ||||
|         lock = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=False) | ||||
|         ) | ||||
|         assert lock is not None | ||||
| 
 | ||||
|         # Enter the context manager | ||||
|         self.get_success(lock.__aenter__()) | ||||
| 
 | ||||
|         # Attempting to acquire the write lock fails | ||||
|         lock2 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNone(lock2) | ||||
| 
 | ||||
|         # Attempting to acquire a read lock succeeds | ||||
|         lock3 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=False) | ||||
|         ) | ||||
|         assert lock3 is not None | ||||
|         self.get_success(lock3.__aenter__()) | ||||
| 
 | ||||
|         # Calling `is_still_valid` reports true. | ||||
|         self.assertTrue(self.get_success(lock.is_still_valid())) | ||||
| 
 | ||||
|         # Drop the first lock | ||||
|         self.get_success(lock.__aexit__(None, None, None)) | ||||
| 
 | ||||
|         # Attempting to acquire the write lock still fails, as lock3 is still | ||||
|         # active. | ||||
|         lock4 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNone(lock4) | ||||
| 
 | ||||
|         # Drop the still open third lock | ||||
|         self.get_success(lock3.__aexit__(None, None, None)) | ||||
| 
 | ||||
|         # We can now acquire the lock again. | ||||
|         lock5 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         assert lock5 is not None | ||||
|         self.get_success(lock5.__aenter__()) | ||||
|         self.get_success(lock5.__aexit__(None, None, None)) | ||||
| 
 | ||||
|     def test_maintain_lock(self) -> None: | ||||
|         """Test that we don't time out locks while they're still active (lock is | ||||
|         renewed in the background if the process is still alive)""" | ||||
| 
 | ||||
|         lock = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         assert lock is not None | ||||
| 
 | ||||
|         self.get_success(lock.__aenter__()) | ||||
| 
 | ||||
|         # Wait for ages with the lock, we should not be able to get the lock. | ||||
|         self.reactor.advance(5 * _LOCK_TIMEOUT_MS / 1000) | ||||
|         self.pump() | ||||
| 
 | ||||
|         lock2 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNone(lock2) | ||||
| 
 | ||||
|         self.get_success(lock.__aexit__(None, None, None)) | ||||
| 
 | ||||
|     def test_timeout_lock(self) -> None: | ||||
|         """Test that we time out locks if they're not updated for ages""" | ||||
| 
 | ||||
|         lock = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         assert lock is not None | ||||
| 
 | ||||
|         self.get_success(lock.__aenter__()) | ||||
| 
 | ||||
|         # We simulate the process getting stuck by cancelling the looping call | ||||
|         # that keeps the lock active. | ||||
|         lock._looping_call.stop() | ||||
| 
 | ||||
|         # Wait for the lock to timeout. | ||||
|         self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000) | ||||
| 
 | ||||
|         lock2 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNotNone(lock2) | ||||
| 
 | ||||
|         self.assertFalse(self.get_success(lock.is_still_valid())) | ||||
| 
 | ||||
|     def test_drop(self) -> None: | ||||
|         """Test that dropping the context manager means we stop renewing the lock""" | ||||
| 
 | ||||
|         lock = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNotNone(lock) | ||||
| 
 | ||||
|         del lock | ||||
| 
 | ||||
|         # Wait for the lock to timeout. | ||||
|         self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000) | ||||
| 
 | ||||
|         lock2 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNotNone(lock2) | ||||
| 
 | ||||
|     def test_shutdown(self) -> None: | ||||
|         """Test that shutting down Synapse releases the locks""" | ||||
|         # Acquire two locks | ||||
|         lock = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key", write=True) | ||||
|         ) | ||||
|         self.assertIsNotNone(lock) | ||||
|         lock2 = self.get_success( | ||||
|             self.store.try_acquire_read_write_lock("name", "key2", write=True) | ||||
|         ) | ||||
|         self.assertIsNotNone(lock2) | ||||
| 
 | ||||
|         # Now call the shutdown code | ||||
|         self.get_success(self.store._on_shutdown()) | ||||
| 
 | ||||
|         self.assertEqual(self.store._live_read_write_lock_tokens, {}) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user