From 102677638002b3ef6ae956947333ddcde80680a7 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 2 Oct 2023 15:22:36 +0100 Subject: [PATCH] mypy plugin to check `@cached` return types (#14911) Co-authored-by: David Robertson Co-authored-by: Patrick Cloke Co-authored-by: Erik Johnston Assert that the return type of callables wrapped in @cached and @cachedList are cachable (aka immutable). --- changelog.d/14911.misc | 1 + scripts-dev/mypy_synapse_plugin.py | 287 +++++++++++++++--- synapse/handlers/room_list.py | 4 +- .../databases/main/event_push_actions.py | 7 +- synapse/storage/databases/main/relations.py | 12 +- synapse/storage/databases/main/roommember.py | 5 +- synapse/storage/roommember.py | 1 + synapse/util/caches/descriptors.py | 64 +++- 8 files changed, 323 insertions(+), 58 deletions(-) create mode 100644 changelog.d/14911.misc diff --git a/changelog.d/14911.misc b/changelog.d/14911.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/14911.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py index a0b3854f1b..6592a4a6b7 100644 --- a/scripts-dev/mypy_synapse_plugin.py +++ b/scripts-dev/mypy_synapse_plugin.py @@ -16,13 +16,24 @@ can crop up, e.g the cache descriptors. """ -from typing import Callable, Optional, Type +from typing import Callable, Optional, Tuple, Type, Union +import mypy.types from mypy.erasetype import remove_instance_last_known_values -from mypy.nodes import ARG_NAMED_OPT -from mypy.plugin import MethodSigContext, Plugin +from mypy.errorcodes import ErrorCode +from mypy.nodes import ARG_NAMED_OPT, TempNode, Var +from mypy.plugin import FunctionSigContext, MethodSigContext, Plugin from mypy.typeops import bind_self -from mypy.types import CallableType, Instance, NoneType, UnionType +from mypy.types import ( + AnyType, + CallableType, + Instance, + NoneType, + TupleType, + TypeAliasType, + UninhabitedType, + UnionType, +) class SynapsePlugin(Plugin): @@ -36,9 +47,37 @@ class SynapsePlugin(Plugin): ) ): return cached_function_method_signature + + if fullname in ( + "synapse.util.caches.descriptors._CachedFunctionDescriptor.__call__", + "synapse.util.caches.descriptors._CachedListFunctionDescriptor.__call__", + ): + return check_is_cacheable_wrapper + return None +def _get_true_return_type(signature: CallableType) -> mypy.types.Type: + """ + Get the "final" return type of a callable which might return an Awaitable/Deferred. + """ + if isinstance(signature.ret_type, Instance): + # If a coroutine, unwrap the coroutine's return type. + if signature.ret_type.type.fullname == "typing.Coroutine": + return signature.ret_type.args[2] + + # If an awaitable, unwrap the awaitable's final value. + elif signature.ret_type.type.fullname == "typing.Awaitable": + return signature.ret_type.args[0] + + # If a Deferred, unwrap the Deferred's final value. + elif signature.ret_type.type.fullname == "twisted.internet.defer.Deferred": + return signature.ret_type.args[0] + + # Otherwise, return the raw value of the function. + return signature.ret_type + + def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: """Fixes the `CachedFunction.__call__` signature to be correct. @@ -47,16 +86,17 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: 1. the `self` argument needs to be marked as "bound"; 2. any `cache_context` argument should be removed; 3. an optional keyword argument `on_invalidated` should be added. + 4. Wrap the return type to always be a Deferred. """ - # First we mark this as a bound function signature. - signature = bind_self(ctx.default_signature) + # 1. Mark this as a bound function signature. + signature: CallableType = bind_self(ctx.default_signature) - # Secondly, we remove any "cache_context" args. + # 2. Remove any "cache_context" args. # # Note: We should be only doing this if `cache_context=True` is set, but if # it isn't then the code will raise an exception when its called anyway, so - # its not the end of the world. + # it's not the end of the world. context_arg_index = None for idx, name in enumerate(signature.arg_names): if name == "cache_context": @@ -72,7 +112,7 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: arg_names.pop(context_arg_index) arg_kinds.pop(context_arg_index) - # Third, we add an optional "on_invalidate" argument. + # 3. Add an optional "on_invalidate" argument. # # This is a either # - a callable which accepts no input and returns nothing, or @@ -94,35 +134,16 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: arg_names.append("on_invalidate") arg_kinds.append(ARG_NAMED_OPT) # Arg is an optional kwarg. - # Finally we ensure the return type is a Deferred. - if ( - isinstance(signature.ret_type, Instance) - and signature.ret_type.type.fullname == "twisted.internet.defer.Deferred" - ): - # If it is already a Deferred, nothing to do. - ret_type = signature.ret_type - else: - ret_arg = None - if isinstance(signature.ret_type, Instance): - # If a coroutine, wrap the coroutine's return type in a Deferred. - if signature.ret_type.type.fullname == "typing.Coroutine": - ret_arg = signature.ret_type.args[2] + # 4. Ensure the return type is a Deferred. + ret_arg = _get_true_return_type(signature) - # If an awaitable, wrap the awaitable's final value in a Deferred. - elif signature.ret_type.type.fullname == "typing.Awaitable": - ret_arg = signature.ret_type.args[0] - - # Otherwise, wrap the return value in a Deferred. - if ret_arg is None: - ret_arg = signature.ret_type - - # This should be able to use ctx.api.named_generic_type, but that doesn't seem - # to find the correct symbol for anything more than 1 module deep. - # - # modules is not part of CheckerPluginInterface. The following is a combination - # of TypeChecker.named_generic_type and TypeChecker.lookup_typeinfo. - sym = ctx.api.modules["twisted.internet.defer"].names.get("Deferred") # type: ignore[attr-defined] - ret_type = Instance(sym.node, [remove_instance_last_known_values(ret_arg)]) + # This should be able to use ctx.api.named_generic_type, but that doesn't seem + # to find the correct symbol for anything more than 1 module deep. + # + # modules is not part of CheckerPluginInterface. The following is a combination + # of TypeChecker.named_generic_type and TypeChecker.lookup_typeinfo. + sym = ctx.api.modules["twisted.internet.defer"].names.get("Deferred") # type: ignore[attr-defined] + ret_type = Instance(sym.node, [remove_instance_last_known_values(ret_arg)]) signature = signature.copy_modified( arg_types=arg_types, @@ -134,6 +155,198 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: return signature +def check_is_cacheable_wrapper(ctx: MethodSigContext) -> CallableType: + """Asserts that the signature of a method returns a value which can be cached. + + Makes no changes to the provided method signature. + """ + # The true signature, this isn't being modified so this is what will be returned. + signature: CallableType = ctx.default_signature + + if not isinstance(ctx.args[0][0], TempNode): + ctx.api.note("Cached function is not a TempNode?!", ctx.context) # type: ignore[attr-defined] + return signature + + orig_sig = ctx.args[0][0].type + if not isinstance(orig_sig, CallableType): + ctx.api.fail("Cached 'function' is not a callable", ctx.context) + return signature + + check_is_cacheable(orig_sig, ctx) + + return signature + + +def check_is_cacheable( + signature: CallableType, + ctx: Union[MethodSigContext, FunctionSigContext], +) -> None: + """ + Check if a callable returns a type which can be cached. + + Args: + signature: The callable to check. + ctx: The signature context, used for error reporting. + """ + # Unwrap the true return type from the cached function. + return_type = _get_true_return_type(signature) + + verbose = ctx.api.options.verbosity >= 1 + # TODO Technically a cachedList only needs immutable values, but forcing them + # to return Mapping instead of Dict is fine. + ok, note = is_cacheable(return_type, signature, verbose) + + if ok: + message = f"function {signature.name} is @cached, returning {return_type}" + else: + message = f"function {signature.name} is @cached, but has mutable return value {return_type}" + + if note: + message += f" ({note})" + message = message.replace("builtins.", "").replace("typing.", "") + + if ok and note: + ctx.api.note(message, ctx.context) # type: ignore[attr-defined] + elif not ok: + ctx.api.fail(message, ctx.context, code=AT_CACHED_MUTABLE_RETURN) + + +# Immutable simple values. +IMMUTABLE_VALUE_TYPES = { + "builtins.bool", + "builtins.int", + "builtins.float", + "builtins.str", + "builtins.bytes", +} + +# Types defined in Synapse which are known to be immutable. +IMMUTABLE_CUSTOM_TYPES = { + "synapse.synapse_rust.acl.ServerAclEvaluator", + "synapse.synapse_rust.push.FilteredPushRules", + # This is technically not immutable, but close enough. + "signedjson.types.VerifyKey", +} + +# Immutable containers only if the values are also immutable. +IMMUTABLE_CONTAINER_TYPES_REQUIRING_IMMUTABLE_ELEMENTS = { + "builtins.frozenset", + "builtins.tuple", + "typing.AbstractSet", + "typing.Sequence", + "immutabledict.immutabledict", +} + +MUTABLE_CONTAINER_TYPES = { + "builtins.set", + "builtins.list", + "builtins.dict", +} + +AT_CACHED_MUTABLE_RETURN = ErrorCode( + "synapse-@cached-mutable", + "@cached() should have an immutable return type", + "General", +) + + +def is_cacheable( + rt: mypy.types.Type, signature: CallableType, verbose: bool +) -> Tuple[bool, Optional[str]]: + """ + Check if a particular type is cachable. + + A type is cachable if it is immutable; for complex types this recurses to + check each type parameter. + + Returns: a 2-tuple (cacheable, message). + - cachable: False means the type is definitely not cacheable; + true means anything else. + - Optional message. + """ + + # This should probably be done via a TypeVisitor. Apologies to the reader! + if isinstance(rt, AnyType): + return True, ("may be mutable" if verbose else None) + + elif isinstance(rt, Instance): + if ( + rt.type.fullname in IMMUTABLE_VALUE_TYPES + or rt.type.fullname in IMMUTABLE_CUSTOM_TYPES + ): + # "Simple" types are generally immutable. + return True, None + + elif rt.type.fullname == "typing.Mapping": + # Generally mapping keys are immutable, but they only *have* to be + # hashable, which doesn't imply immutability. E.g. Mapping[K, V] + # is cachable iff K and V are cachable. + return is_cacheable(rt.args[0], signature, verbose) and is_cacheable( + rt.args[1], signature, verbose + ) + + elif rt.type.fullname in IMMUTABLE_CONTAINER_TYPES_REQUIRING_IMMUTABLE_ELEMENTS: + # E.g. Collection[T] is cachable iff T is cachable. + return is_cacheable(rt.args[0], signature, verbose) + + elif rt.type.fullname in MUTABLE_CONTAINER_TYPES: + # Mutable containers are mutable regardless of their underlying type. + return False, None + + elif "attrs" in rt.type.metadata: + # attrs classes are only cachable iff it is frozen (immutable itself) + # and all attributes are cachable. + frozen = rt.type.metadata["attrs"]["frozen"] + if frozen: + for attribute in rt.type.metadata["attrs"]["attributes"]: + attribute_name = attribute["name"] + symbol_node = rt.type.names[attribute_name].node + assert isinstance(symbol_node, Var) + assert symbol_node.type is not None + ok, note = is_cacheable(symbol_node.type, signature, verbose) + if not ok: + return False, f"non-frozen attrs property: {attribute_name}" + # All attributes were frozen. + return True, None + else: + return False, "non-frozen attrs class" + + else: + # Ensure we fail for unknown types, these generally means that the + # above code is not complete. + return ( + False, + f"Don't know how to handle {rt.type.fullname} return type instance", + ) + + elif isinstance(rt, NoneType): + # None is cachable. + return True, None + + elif isinstance(rt, (TupleType, UnionType)): + # Tuples and unions are cachable iff all their items are cachable. + for item in rt.items: + ok, note = is_cacheable(item, signature, verbose) + if not ok: + return False, note + # This discards notes but that's probably fine + return True, None + + elif isinstance(rt, TypeAliasType): + # For a type alias, check if the underlying real type is cachable. + return is_cacheable(mypy.types.get_proper_type(rt), signature, verbose) + + elif isinstance(rt, UninhabitedType) and rt.is_noreturn: + # There is no return value, just consider it cachable. This is only used + # in tests. + return True, None + + else: + # Ensure we fail for unknown types, these generally means that the + # above code is not complete. + return False, f"Don't know how to handle {type(rt).__qualname__} return type" + + def plugin(version: str) -> Type[SynapsePlugin]: # This is the entry point of the plugin, and lets us deal with the fact # that the mypy plugin interface is *not* stable by looking at the version diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index bb0bdb8e6f..36e2db8975 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -33,7 +33,7 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) -from synapse.types import JsonDict, ThirdPartyInstanceID +from synapse.types import JsonDict, JsonMapping, ThirdPartyInstanceID from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.caches.response_cache import ResponseCache @@ -256,7 +256,7 @@ class RoomListHandler: cache_context: _CacheContext, with_alias: bool = True, allow_private: bool = False, - ) -> Optional[JsonDict]: + ) -> Optional[JsonMapping]: """Returns the entry for a room Args: diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index ba99e63d26..39556481ff 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -182,6 +182,7 @@ class UserPushAction(EmailPushAction): profile_tag: str +# TODO This is used as a cached value and is mutable. @attr.s(slots=True, auto_attribs=True) class NotifCounts: """ @@ -193,7 +194,7 @@ class NotifCounts: highlight_count: int = 0 -@attr.s(slots=True, auto_attribs=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class RoomNotifCounts: """ The per-user, per-room count of notifications. Used by sync and push. @@ -201,7 +202,7 @@ class RoomNotifCounts: main_timeline: NotifCounts # Map of thread ID to the notification counts. - threads: Dict[str, NotifCounts] + threads: Mapping[str, NotifCounts] @staticmethod def empty() -> "RoomNotifCounts": @@ -483,7 +484,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return room_to_count - @cached(tree=True, max_entries=5000, iterable=True) + @cached(tree=True, max_entries=5000, iterable=True) # type: ignore[synapse-@cached-mutable] async def get_unread_event_push_actions_by_room_for_user( self, room_id: str, diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index b67f780c10..9246b418f5 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -458,7 +458,7 @@ class RelationsWorkerStore(SQLBaseStore): ) return result is not None - @cached() + @cached() # type: ignore[synapse-@cached-mutable] async def get_references_for_event(self, event_id: str) -> List[JsonDict]: raise NotImplementedError() @@ -512,11 +512,12 @@ class RelationsWorkerStore(SQLBaseStore): "_get_references_for_events_txn", _get_references_for_events_txn ) - @cached() + @cached() # type: ignore[synapse-@cached-mutable] def get_applicable_edit(self, event_id: str) -> Optional[EventBase]: raise NotImplementedError() - @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids") + # TODO: This returns a mutable object, which is generally bad. + @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids") # type: ignore[synapse-@cached-mutable] async def get_applicable_edits( self, event_ids: Collection[str] ) -> Mapping[str, Optional[EventBase]]: @@ -598,11 +599,12 @@ class RelationsWorkerStore(SQLBaseStore): for original_event_id in event_ids } - @cached() + @cached() # type: ignore[synapse-@cached-mutable] def get_thread_summary(self, event_id: str) -> Optional[Tuple[int, EventBase]]: raise NotImplementedError() - @cachedList(cached_method_name="get_thread_summary", list_name="event_ids") + # TODO: This returns a mutable object, which is generally bad. + @cachedList(cached_method_name="get_thread_summary", list_name="event_ids") # type: ignore[synapse-@cached-mutable] async def get_thread_summaries( self, event_ids: Collection[str] ) -> Mapping[str, Optional[Tuple[int, EventBase]]]: diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 3755773faa..e93573f315 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -275,7 +275,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): _get_users_in_room_with_profiles, ) - @cached(max_entries=100000) + @cached(max_entries=100000) # type: ignore[synapse-@cached-mutable] async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]: """Get the details of a room roughly suitable for use by the room summary extension to /sync. Useful when lazy loading room members. @@ -1071,7 +1071,8 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): ) return {row["event_id"]: row["membership"] for row in rows} - @cached(max_entries=10000) + # TODO This returns a mutable object, which is generally confusing when using a cache. + @cached(max_entries=10000) # type: ignore[synapse-@cached-mutable] def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache": return _JoinedHostsCache() diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 2500381b7b..cbfb32014c 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -45,6 +45,7 @@ class ProfileInfo: display_name: Optional[str] +# TODO This is used as a cached value and is mutable. @attr.s(slots=True, frozen=True, weakref_slot=False, auto_attribs=True) class MemberSummary: # A truncated list of (user_id, event_id) tuples for users of a given diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 8514a75a1c..ce736fdf75 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -36,6 +36,8 @@ from typing import ( ) from weakref import WeakValueDictionary +import attr + from twisted.internet import defer from twisted.python.failure import Failure @@ -466,6 +468,35 @@ class _CacheContext: ) +@attr.s(auto_attribs=True, slots=True, frozen=True) +class _CachedFunctionDescriptor: + """Helper for `@cached`, we name it so that we can hook into it with mypy + plugin.""" + + max_entries: int + num_args: Optional[int] + uncached_args: Optional[Collection[str]] + tree: bool + cache_context: bool + iterable: bool + prune_unread_entries: bool + name: Optional[str] + + def __call__(self, orig: F) -> CachedFunction[F]: + d = DeferredCacheDescriptor( + orig, + max_entries=self.max_entries, + num_args=self.num_args, + uncached_args=self.uncached_args, + tree=self.tree, + cache_context=self.cache_context, + iterable=self.iterable, + prune_unread_entries=self.prune_unread_entries, + name=self.name, + ) + return cast(CachedFunction[F], d) + + def cached( *, max_entries: int = 1000, @@ -476,9 +507,8 @@ def cached( iterable: bool = False, prune_unread_entries: bool = True, name: Optional[str] = None, -) -> Callable[[F], CachedFunction[F]]: - func = lambda orig: DeferredCacheDescriptor( - orig, +) -> _CachedFunctionDescriptor: + return _CachedFunctionDescriptor( max_entries=max_entries, num_args=num_args, uncached_args=uncached_args, @@ -489,7 +519,26 @@ def cached( name=name, ) - return cast(Callable[[F], CachedFunction[F]], func) + +@attr.s(auto_attribs=True, slots=True, frozen=True) +class _CachedListFunctionDescriptor: + """Helper for `@cachedList`, we name it so that we can hook into it with mypy + plugin.""" + + cached_method_name: str + list_name: str + num_args: Optional[int] = None + name: Optional[str] = None + + def __call__(self, orig: F) -> CachedFunction[F]: + d = DeferredCacheListDescriptor( + orig, + cached_method_name=self.cached_method_name, + list_name=self.list_name, + num_args=self.num_args, + name=self.name, + ) + return cast(CachedFunction[F], d) def cachedList( @@ -498,7 +547,7 @@ def cachedList( list_name: str, num_args: Optional[int] = None, name: Optional[str] = None, -) -> Callable[[F], CachedFunction[F]]: +) -> _CachedListFunctionDescriptor: """Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`. Used to do batch lookups for an already created cache. One of the arguments @@ -527,16 +576,13 @@ def cachedList( def batch_do_something(self, first_arg, second_args): ... """ - func = lambda orig: DeferredCacheListDescriptor( - orig, + return _CachedListFunctionDescriptor( cached_method_name=cached_method_name, list_name=list_name, num_args=num_args, name=name, ) - return cast(Callable[[F], CachedFunction[F]], func) - def _get_cache_key_builder( param_names: Sequence[str],