Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Remove old R30 because R30v2 supercedes it (#10428)
Browse files Browse the repository at this point in the history
R30v2 has been out since 2021-07-19 (#10332)
and we started collecting stats on 2021-08-16. Since it's been over a year now
(almost 2 years), this is enough grace period for us to now rip it out.
  • Loading branch information
reivilibre authored May 19, 2023
1 parent 1e89976 commit 736199b
Show file tree
Hide file tree
Showing 5 changed files with 1 addition and 246 deletions.
1 change: 1 addition & 0 deletions changelog.d/10428.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove the old version of the R30 (30-day retained users) phone-home metric.
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ The following statistics are sent to the configured reporting endpoint:
| `daily_e2ee_messages` | int | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours. |
| `daily_sent_messages` | int | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours. |
| `daily_sent_e2ee_messages` | int | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours. |
| `r30_users_all` | int | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types. |
| `r30_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string. |
| `r30_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string. |
| `r30_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string. |
| `r30_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string. |
| `r30v2_users_all` | int | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. |
| `r30v2_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string. |
| `r30v2_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string. |
Expand Down
4 changes: 0 additions & 4 deletions synapse/app/phone_stats_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,6 @@ async def phone_stats_home(
daily_sent_messages = await store.count_daily_sent_messages()
stats["daily_sent_messages"] = daily_sent_messages

r30_results = await store.count_r30_users()
for name, count in r30_results.items():
stats["r30_users_" + name] = count

r30v2_results = await store.count_r30v2_users()
for name, count in r30v2_results.items():
stats["r30v2_users_" + name] = count
Expand Down
83 changes: 0 additions & 83 deletions synapse/storage/databases/main/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,89 +248,6 @@ def _count_users(self, txn: LoggingTransaction, time_from: int) -> int:
(count,) = cast(Tuple[int], txn.fetchone())
return count

async def count_r30_users(self) -> Dict[str, int]:
"""
Counts the number of 30 day retained users, defined as:-
* Users who have created their accounts more than 30 days ago
* Where last seen at most 30 days ago
* Where account creation and last_seen are > 30 days apart
Returns:
A mapping of counts globally as well as broken out by platform.
"""

def _count_r30_users(txn: LoggingTransaction) -> Dict[str, int]:
thirty_days_in_secs = 86400 * 30
now = int(self._clock.time())
thirty_days_ago_in_secs = now - thirty_days_in_secs

sql = """
SELECT platform, COUNT(*) FROM (
SELECT
users.name, platform, users.creation_ts * 1000,
MAX(uip.last_seen)
FROM users
INNER JOIN (
SELECT
user_id,
last_seen,
CASE
WHEN user_agent LIKE '%%Android%%' THEN 'android'
WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
ELSE 'unknown'
END
AS platform
FROM user_ips
) uip
ON users.name = uip.user_id
AND users.appservice_id is NULL
AND users.creation_ts < ?
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, platform, users.creation_ts
) u GROUP BY platform
"""

results = {}
txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))

for row in txn:
if row[0] == "unknown":
pass
results[row[0]] = row[1]

sql = """
SELECT COUNT(*) FROM (
SELECT users.name, users.creation_ts * 1000,
MAX(uip.last_seen)
FROM users
INNER JOIN (
SELECT
user_id,
last_seen
FROM user_ips
) uip
ON users.name = uip.user_id
AND appservice_id is NULL
AND users.creation_ts < ?
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, users.creation_ts
) u
"""

txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))

(count,) = cast(Tuple[int], txn.fetchone())
results["all"] = count

return results

return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)

async def count_r30v2_users(self) -> Dict[str, int]:
"""
Counts the number of 30 day retained users, defined as users that:
Expand Down
154 changes: 0 additions & 154 deletions tests/app/test_phone_stats_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,162 +4,13 @@
from synapse.server import HomeServer
from synapse.util import Clock

from tests import unittest
from tests.server import ThreadedMemoryReactorClock
from tests.unittest import HomeserverTestCase

FIVE_MINUTES_IN_SECONDS = 300
ONE_DAY_IN_SECONDS = 86400


class PhoneHomeTestCase(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
]

# Override the retention time for the user_ips table because otherwise it
# gets pruned too aggressively for our R30 test.
@unittest.override_config({"user_ips_max_age": "365d"})
def test_r30_minimum_usage(self) -> None:
"""
Tests the minimum amount of interaction necessary for the R30 metric
to consider a user 'retained'.
"""

# Register a user, log it in, create a room and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)

# Check the R30 results do not count that user.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Advance 30 days (+ 1 second, because strict inequality causes issues if we are
# bang on 30 days later).
self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)

# (Make sure the user isn't somehow counted by this point.)
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Send a message (this counts as activity)
self.helper.send(room_id, "message2", tok=access_token)

# We have to wait some time for _update_client_ips_batch to get
# called and update the user_ips table.
self.reactor.advance(2 * 60 * 60)

# *Now* the user is counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance 29 days. The user has now not posted for 29 days.
self.reactor.advance(29 * ONE_DAY_IN_SECONDS)

# The user is still counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance another day. The user has now not posted for 30 days.
self.reactor.advance(ONE_DAY_IN_SECONDS)

# The user is now no longer counted in R30.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})

def test_r30_minimum_usage_using_default_config(self) -> None:
"""
Tests the minimum amount of interaction necessary for the R30 metric
to consider a user 'retained'.
N.B. This test does not override the `user_ips_max_age` config setting,
which defaults to 28 days.
"""

# Register a user, log it in, create a room and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)

# Check the R30 results do not count that user.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Advance 30 days (+ 1 second, because strict inequality causes issues if we are
# bang on 30 days later).
self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)

# (Make sure the user isn't somehow counted by this point.)
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Send a message (this counts as activity)
self.helper.send(room_id, "message2", tok=access_token)

# We have to wait some time for _update_client_ips_batch to get
# called and update the user_ips table.
self.reactor.advance(2 * 60 * 60)

# *Now* the user is counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance 27 days. The user has now not posted for 27 days.
self.reactor.advance(27 * ONE_DAY_IN_SECONDS)

# The user is still counted.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance another day. The user has now not posted for 28 days.
self.reactor.advance(ONE_DAY_IN_SECONDS)

# The user is now no longer counted in R30.
# (This is because the user_ips table has been pruned, which by default
# only preserves the last 28 days of entries.)
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})

def test_r30_user_must_be_retained_for_at_least_a_month(self) -> None:
"""
Tests that a newly-registered user must be retained for a whole month
before appearing in the R30 statistic, even if they post every day
during that time!
"""
# Register a user and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)

# Check the user does not contribute to R30 yet.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 0})

for _ in range(30):
# This loop posts a message every day for 30 days
self.reactor.advance(ONE_DAY_IN_SECONDS)
self.helper.send(room_id, "I'm still here", tok=access_token)

# Notice that the user *still* does not contribute to R30!
r30_results = self.get_success(
self.hs.get_datastores().main.count_r30_users()
)
self.assertEqual(r30_results, {"all": 0})

self.reactor.advance(ONE_DAY_IN_SECONDS)
self.helper.send(room_id, "Still here!", tok=access_token)

# *Now* the user appears in R30.
r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})


class PhoneHomeR30V2TestCase(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
Expand Down Expand Up @@ -363,11 +214,6 @@ def test_r30v2_returning_dormant_users_not_counted(self) -> None:
r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
)

# Check that this is a situation where old R30 differs:
# old R30 DOES count this as 'retained'.
r30_results = self.get_success(store.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "ios": 1})

# Now we want to check that the user will still be able to appear in
# R30v2 as long as the user performs some other activity between
# 30 and 60 days later.
Expand Down

0 comments on commit 736199b

Please sign in to comment.