Remove old R30 because R30v2 supercedes it (#10428)

R30v2 has been out since 2021-07-19 (https://github.com/matrix-org/synapse/pull/10332)
and we started collecting stats on 2021-08-16. Since it's been over a year now
(almost 2 years), this is enough grace period for us to now rip it out.
This commit is contained in:
reivilibre 2023-05-19 16:13:44 +00:00 committed by GitHub
parent 1e89976b26
commit 736199b763
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1 additions and 246 deletions

View File

@ -0,0 +1 @@
Remove the old version of the R30 (30-day retained users) phone-home metric.

View File

@ -42,11 +42,6 @@ The following statistics are sent to the configured reporting endpoint:
| `daily_e2ee_messages` | int | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours. |
| `daily_sent_messages` | int | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours. |
| `daily_sent_e2ee_messages` | int | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours. |
| `r30_users_all` | int | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types. |
| `r30_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string. |
| `r30_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string. |
| `r30_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string. |
| `r30_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string. |
| `r30v2_users_all` | int | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. |
| `r30v2_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string. |
| `r30v2_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string. |

View File

@ -127,10 +127,6 @@ async def phone_stats_home(
daily_sent_messages = await store.count_daily_sent_messages()
stats["daily_sent_messages"] = daily_sent_messages
r30_results = await store.count_r30_users()
for name, count in r30_results.items():
stats["r30_users_" + name] = count
r30v2_results = await store.count_r30v2_users()
for name, count in r30v2_results.items():
stats["r30v2_users_" + name] = count

View File

@ -248,89 +248,6 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
(count,) = cast(Tuple[int], txn.fetchone())
return count
async def count_r30_users(self) -> Dict[str, int]:
"""
Counts the number of 30 day retained users, defined as:-
* Users who have created their accounts more than 30 days ago
* Where last seen at most 30 days ago
* Where account creation and last_seen are > 30 days apart
Returns:
A mapping of counts globally as well as broken out by platform.
"""
def _count_r30_users(txn: LoggingTransaction) -> Dict[str, int]:
thirty_days_in_secs = 86400 * 30
now = int(self._clock.time())
thirty_days_ago_in_secs = now - thirty_days_in_secs
sql = """
SELECT platform, COUNT(*) FROM (
SELECT
users.name, platform, users.creation_ts * 1000,
MAX(uip.last_seen)
FROM users
INNER JOIN (
SELECT
user_id,
last_seen,
CASE
WHEN user_agent LIKE '%%Android%%' THEN 'android'
WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
ELSE 'unknown'
END
AS platform
FROM user_ips
) uip
ON users.name = uip.user_id
AND users.appservice_id is NULL
AND users.creation_ts < ?
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, platform, users.creation_ts
) u GROUP BY platform
"""
results = {}
txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
for row in txn:
if row[0] == "unknown":
pass
results[row[0]] = row[1]
sql = """
SELECT COUNT(*) FROM (
SELECT users.name, users.creation_ts * 1000,
MAX(uip.last_seen)
FROM users
INNER JOIN (
SELECT
user_id,
last_seen
FROM user_ips
) uip
ON users.name = uip.user_id
AND appservice_id is NULL
AND users.creation_ts < ?
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, users.creation_ts
) u
"""
txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
(count,) = cast(Tuple[int], txn.fetchone())
results["all"] = count
return results
return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)
async def count_r30v2_users(self) -> Dict[str, int]:
"""
Counts the number of 30 day retained users, defined as users that:

View File

@ -4,7 +4,6 @@ from synapse.rest.client import login, room
from synapse.server import HomeServer
from synapse.util import Clock
from tests import unittest
from tests.server import ThreadedMemoryReactorClock
from tests.unittest import HomeserverTestCase
@ -12,154 +11,6 @@ FIVE_MINUTES_IN_SECONDS = 300
ONE_DAY_IN_SECONDS = 86400
class PhoneHomeTestCase(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
]
# Override the retention time for the user_ips table because otherwise it
# gets pruned too aggressively for our R30 test.
@unittest.override_config({"user_ips_max_age": "365d"})
def test_r30_minimum_usage(self) -> None:
"""
Tests the minimum amount of interaction necessary for the R30 metric
to consider a user 'retained'.
"""
# Register a user, log it in, create a room and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)
# Check the R30 results do not count that user.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})
# Advance 30 days (+ 1 second, because strict inequality causes issues if we are
# bang on 30 days later).
self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)
# (Make sure the user isn't somehow counted by this point.)
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})
# Send a message (this counts as activity)
self.helper.send(room_id, "message2", tok=access_token)
# We have to wait some time for _update_client_ips_batch to get
# called and update the user_ips table.
self.reactor.advance(2 * 60 * 60)
# *Now* the user is counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})
# Advance 29 days. The user has now not posted for 29 days.
self.reactor.advance(29 * ONE_DAY_IN_SECONDS)
# The user is still counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})
# Advance another day. The user has now not posted for 30 days.
self.reactor.advance(ONE_DAY_IN_SECONDS)
# The user is now no longer counted in R30.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})
def test_r30_minimum_usage_using_default_config(self) -> None:
"""
Tests the minimum amount of interaction necessary for the R30 metric
to consider a user 'retained'.
N.B. This test does not override the `user_ips_max_age` config setting,
which defaults to 28 days.
"""
# Register a user, log it in, create a room and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)
# Check the R30 results do not count that user.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})
# Advance 30 days (+ 1 second, because strict inequality causes issues if we are
# bang on 30 days later).
self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)
# (Make sure the user isn't somehow counted by this point.)
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})
# Send a message (this counts as activity)
self.helper.send(room_id, "message2", tok=access_token)
# We have to wait some time for _update_client_ips_batch to get
# called and update the user_ips table.
self.reactor.advance(2 * 60 * 60)
# *Now* the user is counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})
# Advance 27 days. The user has now not posted for 27 days.
self.reactor.advance(27 * ONE_DAY_IN_SECONDS)
# The user is still counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})
# Advance another day. The user has now not posted for 28 days.
self.reactor.advance(ONE_DAY_IN_SECONDS)
# The user is now no longer counted in R30.
# (This is because the user_ips table has been pruned, which by default
# only preserves the last 28 days of entries.)
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})
def test_r30_user_must_be_retained_for_at_least_a_month(self) -> None:
"""
Tests that a newly-registered user must be retained for a whole month
before appearing in the R30 statistic, even if they post every day
during that time!
"""
# Register a user and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)
# Check the user does not contribute to R30 yet.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})
for _ in range(30):
# This loop posts a message every day for 30 days
self.reactor.advance(ONE_DAY_IN_SECONDS)
self.helper.send(room_id, "I'm still here", tok=access_token)
# Notice that the user *still* does not contribute to R30!
r30_results = self.get_success(
self.hs.get_datastores().main.count_r30_users()
)
self.assertEqual(r30_results, {"all": 0})
self.reactor.advance(ONE_DAY_IN_SECONDS)
self.helper.send(room_id, "Still here!", tok=access_token)
# *Now* the user appears in R30.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})
class PhoneHomeR30V2TestCase(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
@ -363,11 +214,6 @@ class PhoneHomeR30V2TestCase(HomeserverTestCase):
r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
)
# Check that this is a situation where old R30 differs:
# old R30 DOES count this as 'retained'.
r30_results = self.get_success(store.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "ios": 1})
# Now we want to check that the user will still be able to appear in
# R30v2 as long as the user performs some other activity between
# 30 and 60 days later.