From 736199b7638175c439fff10a1f8a2d7da96838e5 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Fri, 19 May 2023 16:13:44 +0000 Subject: [PATCH] Remove old R30 because R30v2 supercedes it (#10428) R30v2 has been out since 2021-07-19 (https://github.com/matrix-org/synapse/pull/10332) and we started collecting stats on 2021-08-16. Since it's been over a year now (almost 2 years), this is enough grace period for us to now rip it out. --- changelog.d/10428.removal | 1 + .../reporting_homeserver_usage_statistics.md | 5 - synapse/app/phone_stats_home.py | 4 - synapse/storage/databases/main/metrics.py | 83 ---------- tests/app/test_phone_stats_home.py | 154 ------------------ 5 files changed, 1 insertion(+), 246 deletions(-) create mode 100644 changelog.d/10428.removal diff --git a/changelog.d/10428.removal b/changelog.d/10428.removal new file mode 100644 index 0000000000..c056e89585 --- /dev/null +++ b/changelog.d/10428.removal @@ -0,0 +1 @@ +Remove the old version of the R30 (30-day retained users) phone-home metric. diff --git a/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md index 3a7ed7c806..60b758e33b 100644 --- a/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md +++ b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md @@ -42,11 +42,6 @@ The following statistics are sent to the configured reporting endpoint: | `daily_e2ee_messages` | int | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours. | | `daily_sent_messages` | int | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours. | | `daily_sent_e2ee_messages` | int | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours. | -| `r30_users_all` | int | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types. | -| `r30_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string. | -| `r30_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string. | -| `r30_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string. | -| `r30_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string. | | `r30v2_users_all` | int | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. | | `r30v2_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string. | | `r30v2_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string. | diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 897dd3edac..09988670da 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -127,10 +127,6 @@ async def phone_stats_home( daily_sent_messages = await store.count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages - r30_results = await store.count_r30_users() - for name, count in r30_results.items(): - stats["r30_users_" + name] = count - r30v2_results = await store.count_r30v2_users() for name, count in r30v2_results.items(): stats["r30v2_users_" + name] = count diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py index 14294a0bb8..595e22982e 100644 --- a/synapse/storage/databases/main/metrics.py +++ b/synapse/storage/databases/main/metrics.py @@ -248,89 +248,6 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): (count,) = cast(Tuple[int], txn.fetchone()) return count - async def count_r30_users(self) -> Dict[str, int]: - """ - Counts the number of 30 day retained users, defined as:- - * Users who have created their accounts more than 30 days ago - * Where last seen at most 30 days ago - * Where account creation and last_seen are > 30 days apart - - Returns: - A mapping of counts globally as well as broken out by platform. - """ - - def _count_r30_users(txn: LoggingTransaction) -> Dict[str, int]: - thirty_days_in_secs = 86400 * 30 - now = int(self._clock.time()) - thirty_days_ago_in_secs = now - thirty_days_in_secs - - sql = """ - SELECT platform, COUNT(*) FROM ( - SELECT - users.name, platform, users.creation_ts * 1000, - MAX(uip.last_seen) - FROM users - INNER JOIN ( - SELECT - user_id, - last_seen, - CASE - WHEN user_agent LIKE '%%Android%%' THEN 'android' - WHEN user_agent LIKE '%%iOS%%' THEN 'ios' - WHEN user_agent LIKE '%%Electron%%' THEN 'electron' - WHEN user_agent LIKE '%%Mozilla%%' THEN 'web' - WHEN user_agent LIKE '%%Gecko%%' THEN 'web' - ELSE 'unknown' - END - AS platform - FROM user_ips - ) uip - ON users.name = uip.user_id - AND users.appservice_id is NULL - AND users.creation_ts < ? - AND uip.last_seen/1000 > ? - AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 - GROUP BY users.name, platform, users.creation_ts - ) u GROUP BY platform - """ - - results = {} - txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - - for row in txn: - if row[0] == "unknown": - pass - results[row[0]] = row[1] - - sql = """ - SELECT COUNT(*) FROM ( - SELECT users.name, users.creation_ts * 1000, - MAX(uip.last_seen) - FROM users - INNER JOIN ( - SELECT - user_id, - last_seen - FROM user_ips - ) uip - ON users.name = uip.user_id - AND appservice_id is NULL - AND users.creation_ts < ? - AND uip.last_seen/1000 > ? - AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 - GROUP BY users.name, users.creation_ts - ) u - """ - - txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - - (count,) = cast(Tuple[int], txn.fetchone()) - results["all"] = count - - return results - - return await self.db_pool.runInteraction("count_r30_users", _count_r30_users) - async def count_r30v2_users(self) -> Dict[str, int]: """ Counts the number of 30 day retained users, defined as users that: diff --git a/tests/app/test_phone_stats_home.py b/tests/app/test_phone_stats_home.py index a860eedbcf..9305b758d7 100644 --- a/tests/app/test_phone_stats_home.py +++ b/tests/app/test_phone_stats_home.py @@ -4,7 +4,6 @@ from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.util import Clock -from tests import unittest from tests.server import ThreadedMemoryReactorClock from tests.unittest import HomeserverTestCase @@ -12,154 +11,6 @@ FIVE_MINUTES_IN_SECONDS = 300 ONE_DAY_IN_SECONDS = 86400 -class PhoneHomeTestCase(HomeserverTestCase): - servlets = [ - synapse.rest.admin.register_servlets_for_client_rest_resource, - room.register_servlets, - login.register_servlets, - ] - - # Override the retention time for the user_ips table because otherwise it - # gets pruned too aggressively for our R30 test. - @unittest.override_config({"user_ips_max_age": "365d"}) - def test_r30_minimum_usage(self) -> None: - """ - Tests the minimum amount of interaction necessary for the R30 metric - to consider a user 'retained'. - """ - - # Register a user, log it in, create a room and send a message - user_id = self.register_user("u1", "secret!") - access_token = self.login("u1", "secret!") - room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token) - self.helper.send(room_id, "message", tok=access_token) - - # Check the R30 results do not count that user. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Advance 30 days (+ 1 second, because strict inequality causes issues if we are - # bang on 30 days later). - self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1) - - # (Make sure the user isn't somehow counted by this point.) - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Send a message (this counts as activity) - self.helper.send(room_id, "message2", tok=access_token) - - # We have to wait some time for _update_client_ips_batch to get - # called and update the user_ips table. - self.reactor.advance(2 * 60 * 60) - - # *Now* the user is counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance 29 days. The user has now not posted for 29 days. - self.reactor.advance(29 * ONE_DAY_IN_SECONDS) - - # The user is still counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance another day. The user has now not posted for 30 days. - self.reactor.advance(ONE_DAY_IN_SECONDS) - - # The user is now no longer counted in R30. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - def test_r30_minimum_usage_using_default_config(self) -> None: - """ - Tests the minimum amount of interaction necessary for the R30 metric - to consider a user 'retained'. - - N.B. This test does not override the `user_ips_max_age` config setting, - which defaults to 28 days. - """ - - # Register a user, log it in, create a room and send a message - user_id = self.register_user("u1", "secret!") - access_token = self.login("u1", "secret!") - room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token) - self.helper.send(room_id, "message", tok=access_token) - - # Check the R30 results do not count that user. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Advance 30 days (+ 1 second, because strict inequality causes issues if we are - # bang on 30 days later). - self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1) - - # (Make sure the user isn't somehow counted by this point.) - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Send a message (this counts as activity) - self.helper.send(room_id, "message2", tok=access_token) - - # We have to wait some time for _update_client_ips_batch to get - # called and update the user_ips table. - self.reactor.advance(2 * 60 * 60) - - # *Now* the user is counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance 27 days. The user has now not posted for 27 days. - self.reactor.advance(27 * ONE_DAY_IN_SECONDS) - - # The user is still counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance another day. The user has now not posted for 28 days. - self.reactor.advance(ONE_DAY_IN_SECONDS) - - # The user is now no longer counted in R30. - # (This is because the user_ips table has been pruned, which by default - # only preserves the last 28 days of entries.) - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - def test_r30_user_must_be_retained_for_at_least_a_month(self) -> None: - """ - Tests that a newly-registered user must be retained for a whole month - before appearing in the R30 statistic, even if they post every day - during that time! - """ - # Register a user and send a message - user_id = self.register_user("u1", "secret!") - access_token = self.login("u1", "secret!") - room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token) - self.helper.send(room_id, "message", tok=access_token) - - # Check the user does not contribute to R30 yet. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - for _ in range(30): - # This loop posts a message every day for 30 days - self.reactor.advance(ONE_DAY_IN_SECONDS) - self.helper.send(room_id, "I'm still here", tok=access_token) - - # Notice that the user *still* does not contribute to R30! - r30_results = self.get_success( - self.hs.get_datastores().main.count_r30_users() - ) - self.assertEqual(r30_results, {"all": 0}) - - self.reactor.advance(ONE_DAY_IN_SECONDS) - self.helper.send(room_id, "Still here!", tok=access_token) - - # *Now* the user appears in R30. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - class PhoneHomeR30V2TestCase(HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -363,11 +214,6 @@ class PhoneHomeR30V2TestCase(HomeserverTestCase): r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0} ) - # Check that this is a situation where old R30 differs: - # old R30 DOES count this as 'retained'. - r30_results = self.get_success(store.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "ios": 1}) - # Now we want to check that the user will still be able to appear in # R30v2 as long as the user performs some other activity between # 30 and 60 days later.