From b378d98c8f93412bc51f0d4b9c4aa2d1701cf523 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 11 May 2021 11:04:03 +0100 Subject: [PATCH] Add debug logging for issue #9533 (#9959) Hopefully this will help us track down where to-device messages are getting lost/delayed. --- changelog.d/9959.misc | 1 + .../federation/sender/per_destination_queue.py | 9 +++++++++ synapse/logging/__init__.py | 7 ++++++- synapse/notifier.py | 8 ++++++++ synapse/replication/tcp/client.py | 1 - synapse/storage/databases/main/deviceinbox.py | 18 ++++++++++++++++++ 6 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 changelog.d/9959.misc diff --git a/changelog.d/9959.misc b/changelog.d/9959.misc new file mode 100644 index 0000000000..7231f29d79 --- /dev/null +++ b/changelog.d/9959.misc @@ -0,0 +1 @@ +Add debug logging for lost/delayed to-device messages. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 3b053ebcfb..3a2efd56ee 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -28,6 +28,7 @@ from synapse.api.presence import UserPresenceState from synapse.events import EventBase from synapse.federation.units import Edu from synapse.handlers.presence import format_user_presence_state +from synapse.logging import issue9533_logger from synapse.logging.opentracing import SynapseTags, set_tag from synapse.metrics import sent_transactions_counter from synapse.metrics.background_process_metrics import run_as_background_process @@ -574,6 +575,14 @@ class PerDestinationQueue: for content in contents ] + if edus: + issue9533_logger.debug( + "Sending %i to-device messages to %s, up to stream id %i", + len(edus), + self._destination, + stream_id, + ) + return (edus, stream_id) def _start_catching_up(self) -> None: diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py index e00969f8b1..b50a4f95eb 100644 --- a/synapse/logging/__init__.py +++ b/synapse/logging/__init__.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -# These are imported to allow for nicer logging configuration files. +import logging + from synapse.logging._remote import RemoteHandler from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter +# These are imported to allow for nicer logging configuration files. __all__ = ["RemoteHandler", "JsonFormatter", "TerseJsonFormatter"] + +# Debug logger for https://github.com/matrix-org/synapse/issues/9533 etc +issue9533_logger = logging.getLogger("synapse.9533_debug") diff --git a/synapse/notifier.py b/synapse/notifier.py index b9531007e2..24b4e6649f 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -38,6 +38,7 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.api.errors import AuthError from synapse.events import EventBase from synapse.handlers.presence import format_user_presence_state +from synapse.logging import issue9533_logger from synapse.logging.context import PreserveLoggingContext from synapse.logging.opentracing import log_kv, start_active_span from synapse.logging.utils import log_function @@ -426,6 +427,13 @@ class Notifier: for room in rooms: user_streams |= self.room_to_user_streams.get(room, set()) + if stream_key == "to_device_key": + issue9533_logger.debug( + "to-device messages stream id %s, awaking streams for %s", + new_token, + users, + ) + time_now_ms = self.clock.time_msec() for user_stream in user_streams: try: diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 4f3c6a18b6..62d7809175 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -51,7 +51,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) - # How long we allow callers to wait for replication updates before timing out. _WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 30 diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 7c9d1f744e..50e7ddd735 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -15,6 +15,7 @@ import logging from typing import List, Optional, Tuple +from synapse.logging import issue9533_logger from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.replication.tcp.streams import ToDeviceStream from synapse.storage._base import SQLBaseStore, db_to_json @@ -404,6 +405,13 @@ class DeviceInboxWorkerStore(SQLBaseStore): ], ) + if remote_messages_by_destination: + issue9533_logger.debug( + "Queued outgoing to-device messages with stream_id %i for %s", + stream_id, + list(remote_messages_by_destination.keys()), + ) + async with self._device_inbox_id_gen.get_next() as stream_id: now_ms = self.clock.time_msec() await self.db_pool.runInteraction( @@ -533,6 +541,16 @@ class DeviceInboxWorkerStore(SQLBaseStore): ], ) + issue9533_logger.debug( + "Stored to-device messages with stream_id %i for %s", + stream_id, + [ + (user_id, device_id) + for (user_id, messages_by_device) in local_by_user_then_device.items() + for device_id in messages_by_device.keys() + ], + ) + class DeviceInboxBackgroundUpdateStore(SQLBaseStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"