Merge branch 'develop' of github.com:matrix-org/synapse into erikj/split_out_fed_txn

This commit is contained in:
Erik Johnston 2016-11-23 11:31:53 +00:00
commit b69f76c106
10 changed files with 396 additions and 25 deletions

View File

@ -1,3 +1,31 @@
Changes in synapse v0.18.4 (2016-11-22)
=======================================
Bug fixes:
* Add workaround for buggy clients that the fail to register (PR #1632)
Changes in synapse v0.18.4-rc1 (2016-11-14)
===========================================
Changes:
* Various database efficiency improvements (PR #1188, #1192)
* Update default config to blacklist more internal IPs, thanks to Euan Kemp (PR
#1198)
* Allow specifying duration in minutes in config, thanks to Daniel Dent (PR
#1625)
Bug fixes:
* Fix media repo to set CORs headers on responses (PR #1190)
* Fix registration to not error on non-ascii passwords (PR #1191)
* Fix create event code to limit the number of prev_events (PR #1615)
* Fix bug in transaction ID deduplication (PR #1624)
Changes in synapse v0.18.3 (2016-11-08) Changes in synapse v0.18.3 (2016-11-08)
======================================= =======================================

View File

@ -16,4 +16,4 @@
""" This is a reference implementation of a Matrix home server. """ This is a reference implementation of a Matrix home server.
""" """
__version__ = "0.18.3" __version__ = "0.18.4"

View File

@ -39,6 +39,7 @@ class Codes(object):
CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED" CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED"
CAPTCHA_INVALID = "M_CAPTCHA_INVALID" CAPTCHA_INVALID = "M_CAPTCHA_INVALID"
MISSING_PARAM = "M_MISSING_PARAM" MISSING_PARAM = "M_MISSING_PARAM"
INVALID_PARAM = "M_INVALID_PARAM"
TOO_LARGE = "M_TOO_LARGE" TOO_LARGE = "M_TOO_LARGE"
EXCLUSIVE = "M_EXCLUSIVE" EXCLUSIVE = "M_EXCLUSIVE"
THREEPID_AUTH_FAILED = "M_THREEPID_AUTH_FAILED" THREEPID_AUTH_FAILED = "M_THREEPID_AUTH_FAILED"

View File

@ -71,6 +71,21 @@ class Filtering(object):
if key in user_filter_json["room"]: if key in user_filter_json["room"]:
self._check_definition(user_filter_json["room"][key]) self._check_definition(user_filter_json["room"][key])
if "event_fields" in user_filter_json:
if type(user_filter_json["event_fields"]) != list:
raise SynapseError(400, "event_fields must be a list of strings")
for field in user_filter_json["event_fields"]:
if not isinstance(field, basestring):
raise SynapseError(400, "Event field must be a string")
# Don't allow '\\' in event field filters. This makes matching
# events a lot easier as we can then use a negative lookbehind
# assertion to split '\.' If we allowed \\ then it would
# incorrectly split '\\.' See synapse.events.utils.serialize_event
if r'\\' in field:
raise SynapseError(
400, r'The escape character \ cannot itself be escaped'
)
def _check_definition_room_lists(self, definition): def _check_definition_room_lists(self, definition):
"""Check that "rooms" and "not_rooms" are lists of room ids if they """Check that "rooms" and "not_rooms" are lists of room ids if they
are present are present
@ -152,6 +167,7 @@ class FilterCollection(object):
self.include_leave = filter_json.get("room", {}).get( self.include_leave = filter_json.get("room", {}).get(
"include_leave", False "include_leave", False
) )
self.event_fields = filter_json.get("event_fields", [])
def __repr__(self): def __repr__(self):
return "<FilterCollection %s>" % (json.dumps(self._filter_json),) return "<FilterCollection %s>" % (json.dumps(self._filter_json),)
@ -186,6 +202,26 @@ class FilterCollection(object):
def filter_room_account_data(self, events): def filter_room_account_data(self, events):
return self._room_account_data.filter(self._room_filter.filter(events)) return self._room_account_data.filter(self._room_filter.filter(events))
def blocks_all_presence(self):
return (
self._presence_filter.filters_all_types() or
self._presence_filter.filters_all_senders()
)
def blocks_all_room_ephemeral(self):
return (
self._room_ephemeral_filter.filters_all_types() or
self._room_ephemeral_filter.filters_all_senders() or
self._room_ephemeral_filter.filters_all_rooms()
)
def blocks_all_room_timeline(self):
return (
self._room_timeline_filter.filters_all_types() or
self._room_timeline_filter.filters_all_senders() or
self._room_timeline_filter.filters_all_rooms()
)
class Filter(object): class Filter(object):
def __init__(self, filter_json): def __init__(self, filter_json):
@ -202,6 +238,15 @@ class Filter(object):
self.contains_url = self.filter_json.get("contains_url", None) self.contains_url = self.filter_json.get("contains_url", None)
def filters_all_types(self):
return "*" in self.not_types
def filters_all_senders(self):
return "*" in self.not_senders
def filters_all_rooms(self):
return "*" in self.not_rooms
def check(self, event): def check(self, event):
"""Checks whether the filter matches the given event. """Checks whether the filter matches the given event.

View File

@ -16,6 +16,17 @@
from synapse.api.constants import EventTypes from synapse.api.constants import EventTypes
from . import EventBase from . import EventBase
from frozendict import frozendict
import re
# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
# (?<!stuff) matches if the current position in the string is not preceded
# by a match for 'stuff'.
# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
SPLIT_FIELD_REGEX = re.compile(r'(?<!\\)\.')
def prune_event(event): def prune_event(event):
""" Returns a pruned version of the given event, which removes all keys we """ Returns a pruned version of the given event, which removes all keys we
@ -97,6 +108,83 @@ def prune_event(event):
) )
def _copy_field(src, dst, field):
"""Copy the field in 'src' to 'dst'.
For example, if src={"foo":{"bar":5}} and dst={}, and field=["foo","bar"]
then dst={"foo":{"bar":5}}.
Args:
src(dict): The dict to read from.
dst(dict): The dict to modify.
field(list<str>): List of keys to drill down to in 'src'.
"""
if len(field) == 0: # this should be impossible
return
if len(field) == 1: # common case e.g. 'origin_server_ts'
if field[0] in src:
dst[field[0]] = src[field[0]]
return
# Else is a nested field e.g. 'content.body'
# Pop the last field as that's the key to move across and we need the
# parent dict in order to access the data. Drill down to the right dict.
key_to_move = field.pop(-1)
sub_dict = src
for sub_field in field: # e.g. sub_field => "content"
if sub_field in sub_dict and type(sub_dict[sub_field]) in [dict, frozendict]:
sub_dict = sub_dict[sub_field]
else:
return
if key_to_move not in sub_dict:
return
# Insert the key into the output dictionary, creating nested objects
# as required. We couldn't do this any earlier or else we'd need to delete
# the empty objects if the key didn't exist.
sub_out_dict = dst
for sub_field in field:
sub_out_dict = sub_out_dict.setdefault(sub_field, {})
sub_out_dict[key_to_move] = sub_dict[key_to_move]
def only_fields(dictionary, fields):
"""Return a new dict with only the fields in 'dictionary' which are present
in 'fields'.
If there are no event fields specified then all fields are included.
The entries may include '.' charaters to indicate sub-fields.
So ['content.body'] will include the 'body' field of the 'content' object.
A literal '.' character in a field name may be escaped using a '\'.
Args:
dictionary(dict): The dictionary to read from.
fields(list<str>): A list of fields to copy over. Only shallow refs are
taken.
Returns:
dict: A new dictionary with only the given fields. If fields was empty,
the same dictionary is returned.
"""
if len(fields) == 0:
return dictionary
# for each field, convert it:
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
# for each element of the output array of arrays:
# remove escaping so we can use the right key names.
split_fields[:] = [
[f.replace(r'\.', r'.') for f in field_array] for field_array in split_fields
]
output = {}
for field_array in split_fields:
_copy_field(dictionary, output, field_array)
return output
def format_event_raw(d): def format_event_raw(d):
return d return d
@ -137,7 +225,7 @@ def format_event_for_client_v2_without_room_id(d):
def serialize_event(e, time_now_ms, as_client_event=True, def serialize_event(e, time_now_ms, as_client_event=True,
event_format=format_event_for_client_v1, event_format=format_event_for_client_v1,
token_id=None): token_id=None, only_event_fields=None):
# FIXME(erikj): To handle the case of presence events and the like # FIXME(erikj): To handle the case of presence events and the like
if not isinstance(e, EventBase): if not isinstance(e, EventBase):
return e return e
@ -164,6 +252,12 @@ def serialize_event(e, time_now_ms, as_client_event=True,
d["unsigned"]["transaction_id"] = txn_id d["unsigned"]["transaction_id"] = txn_id
if as_client_event: if as_client_event:
return event_format(d) d = event_format(d)
else:
if only_event_fields:
if (not isinstance(only_event_fields, list) or
not all(isinstance(f, basestring) for f in only_event_fields)):
raise TypeError("only_event_fields must be a list of strings")
d = only_fields(d, only_event_fields)
return d return d

View File

@ -277,6 +277,7 @@ class SyncHandler(object):
""" """
with Measure(self.clock, "load_filtered_recents"): with Measure(self.clock, "load_filtered_recents"):
timeline_limit = sync_config.filter_collection.timeline_limit() timeline_limit = sync_config.filter_collection.timeline_limit()
block_all_timeline = sync_config.filter_collection.blocks_all_room_timeline()
if recents is None or newly_joined_room or timeline_limit < len(recents): if recents is None or newly_joined_room or timeline_limit < len(recents):
limited = True limited = True
@ -293,7 +294,7 @@ class SyncHandler(object):
else: else:
recents = [] recents = []
if not limited: if not limited or block_all_timeline:
defer.returnValue(TimelineBatch( defer.returnValue(TimelineBatch(
events=recents, events=recents,
prev_batch=now_token, prev_batch=now_token,
@ -531,6 +532,11 @@ class SyncHandler(object):
) )
newly_joined_rooms, newly_joined_users = res newly_joined_rooms, newly_joined_users = res
block_all_presence_data = (
since_token is None and
sync_config.filter_collection.blocks_all_presence()
)
if not block_all_presence_data:
yield self._generate_sync_entry_for_presence( yield self._generate_sync_entry_for_presence(
sync_result_builder, newly_joined_rooms, newly_joined_users sync_result_builder, newly_joined_rooms, newly_joined_users
) )
@ -709,7 +715,14 @@ class SyncHandler(object):
`(newly_joined_rooms, newly_joined_users)` `(newly_joined_rooms, newly_joined_users)`
""" """
user_id = sync_result_builder.sync_config.user.to_string() user_id = sync_result_builder.sync_config.user.to_string()
block_all_room_ephemeral = (
sync_result_builder.since_token is None and
sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
)
if block_all_room_ephemeral:
ephemeral_by_room = {}
else:
now_token, ephemeral_by_room = yield self.ephemeral_by_room( now_token, ephemeral_by_room = yield self.ephemeral_by_room(
sync_result_builder.sync_config, sync_result_builder.sync_config,
now_token=sync_result_builder.now_token, now_token=sync_result_builder.now_token,

View File

@ -169,6 +169,17 @@ class RegisterRestServlet(RestServlet):
guest_access_token = body.get("guest_access_token", None) guest_access_token = body.get("guest_access_token", None)
if (
'initial_device_display_name' in body and
'password' not in body
):
# ignore 'initial_device_display_name' if sent without
# a password to work around a client bug where it sent
# the 'initial_device_display_name' param alone, wiping out
# the original registration params
logger.warn("Ignoring initial_device_display_name without password")
del body['initial_device_display_name']
session_id = self.auth_handler.get_session_id(body) session_id = self.auth_handler.get_session_id(body)
registered_user_id = None registered_user_id = None
if session_id: if session_id:

View File

@ -162,7 +162,7 @@ class SyncRestServlet(RestServlet):
time_now = self.clock.time_msec() time_now = self.clock.time_msec()
joined = self.encode_joined( joined = self.encode_joined(
sync_result.joined, time_now, requester.access_token_id sync_result.joined, time_now, requester.access_token_id, filter.event_fields
) )
invited = self.encode_invited( invited = self.encode_invited(
@ -170,7 +170,7 @@ class SyncRestServlet(RestServlet):
) )
archived = self.encode_archived( archived = self.encode_archived(
sync_result.archived, time_now, requester.access_token_id sync_result.archived, time_now, requester.access_token_id, filter.event_fields
) )
response_content = { response_content = {
@ -197,7 +197,7 @@ class SyncRestServlet(RestServlet):
formatted.append(event) formatted.append(event)
return {"events": formatted} return {"events": formatted}
def encode_joined(self, rooms, time_now, token_id): def encode_joined(self, rooms, time_now, token_id, event_fields):
""" """
Encode the joined rooms in a sync result Encode the joined rooms in a sync result
@ -208,7 +208,8 @@ class SyncRestServlet(RestServlet):
calculations calculations
token_id(int): ID of the user's auth token - used for namespacing token_id(int): ID of the user's auth token - used for namespacing
of transaction IDs of transaction IDs
event_fields(list<str>): List of event fields to include. If empty,
all fields will be returned.
Returns: Returns:
dict[str, dict[str, object]]: the joined rooms list, in our dict[str, dict[str, object]]: the joined rooms list, in our
response format response format
@ -216,7 +217,7 @@ class SyncRestServlet(RestServlet):
joined = {} joined = {}
for room in rooms: for room in rooms:
joined[room.room_id] = self.encode_room( joined[room.room_id] = self.encode_room(
room, time_now, token_id room, time_now, token_id, only_fields=event_fields
) )
return joined return joined
@ -253,7 +254,7 @@ class SyncRestServlet(RestServlet):
return invited return invited
def encode_archived(self, rooms, time_now, token_id): def encode_archived(self, rooms, time_now, token_id, event_fields):
""" """
Encode the archived rooms in a sync result Encode the archived rooms in a sync result
@ -264,7 +265,8 @@ class SyncRestServlet(RestServlet):
calculations calculations
token_id(int): ID of the user's auth token - used for namespacing token_id(int): ID of the user's auth token - used for namespacing
of transaction IDs of transaction IDs
event_fields(list<str>): List of event fields to include. If empty,
all fields will be returned.
Returns: Returns:
dict[str, dict[str, object]]: The invited rooms list, in our dict[str, dict[str, object]]: The invited rooms list, in our
response format response format
@ -272,13 +274,13 @@ class SyncRestServlet(RestServlet):
joined = {} joined = {}
for room in rooms: for room in rooms:
joined[room.room_id] = self.encode_room( joined[room.room_id] = self.encode_room(
room, time_now, token_id, joined=False room, time_now, token_id, joined=False, only_fields=event_fields
) )
return joined return joined
@staticmethod @staticmethod
def encode_room(room, time_now, token_id, joined=True): def encode_room(room, time_now, token_id, joined=True, only_fields=None):
""" """
Args: Args:
room (JoinedSyncResult|ArchivedSyncResult): sync result for a room (JoinedSyncResult|ArchivedSyncResult): sync result for a
@ -289,7 +291,7 @@ class SyncRestServlet(RestServlet):
of transaction IDs of transaction IDs
joined (bool): True if the user is joined to this room - will mean joined (bool): True if the user is joined to this room - will mean
we handle ephemeral events we handle ephemeral events
only_fields(list<str>): Optional. The list of event fields to include.
Returns: Returns:
dict[str, object]: the room, encoded in our response format dict[str, object]: the room, encoded in our response format
""" """
@ -298,6 +300,7 @@ class SyncRestServlet(RestServlet):
return serialize_event( return serialize_event(
event, time_now, token_id=token_id, event, time_now, token_id=token_id,
event_format=format_event_for_client_v2_without_room_id, event_format=format_event_for_client_v2_without_room_id,
only_event_fields=only_fields,
) )
state_dict = room.state state_dict = room.state

View File

@ -16,6 +16,7 @@
from twisted.internet import defer from twisted.internet import defer
from ._base import SQLBaseStore from ._base import SQLBaseStore
from synapse.api.errors import SynapseError, Codes
from synapse.util.caches.descriptors import cachedInlineCallbacks from synapse.util.caches.descriptors import cachedInlineCallbacks
import simplejson as json import simplejson as json
@ -24,6 +25,13 @@ import simplejson as json
class FilteringStore(SQLBaseStore): class FilteringStore(SQLBaseStore):
@cachedInlineCallbacks(num_args=2) @cachedInlineCallbacks(num_args=2)
def get_user_filter(self, user_localpart, filter_id): def get_user_filter(self, user_localpart, filter_id):
# filter_id is BIGINT UNSIGNED, so if it isn't a number, fail
# with a coherent error message rather than 500 M_UNKNOWN.
try:
int(filter_id)
except ValueError:
raise SynapseError(400, "Invalid filter ID", Codes.INVALID_PARAM)
def_json = yield self._simple_select_one_onecol( def_json = yield self._simple_select_one_onecol(
table="user_filters", table="user_filters",
keyvalues={ keyvalues={

View File

@ -17,7 +17,11 @@
from .. import unittest from .. import unittest
from synapse.events import FrozenEvent from synapse.events import FrozenEvent
from synapse.events.utils import prune_event from synapse.events.utils import prune_event, serialize_event
def MockEvent(**kwargs):
return FrozenEvent(kwargs)
class PruneEventTestCase(unittest.TestCase): class PruneEventTestCase(unittest.TestCase):
@ -114,3 +118,167 @@ class PruneEventTestCase(unittest.TestCase):
'unsigned': {}, 'unsigned': {},
} }
) )
class SerializeEventTestCase(unittest.TestCase):
def serialize(self, ev, fields):
return serialize_event(ev, 1479807801915, only_event_fields=fields)
def test_event_fields_works_with_keys(self):
self.assertEquals(
self.serialize(
MockEvent(
sender="@alice:localhost",
room_id="!foo:bar"
),
["room_id"]
),
{
"room_id": "!foo:bar",
}
)
def test_event_fields_works_with_nested_keys(self):
self.assertEquals(
self.serialize(
MockEvent(
sender="@alice:localhost",
room_id="!foo:bar",
content={
"body": "A message",
},
),
["content.body"]
),
{
"content": {
"body": "A message",
}
}
)
def test_event_fields_works_with_dot_keys(self):
self.assertEquals(
self.serialize(
MockEvent(
sender="@alice:localhost",
room_id="!foo:bar",
content={
"key.with.dots": {},
},
),
["content.key\.with\.dots"]
),
{
"content": {
"key.with.dots": {},
}
}
)
def test_event_fields_works_with_nested_dot_keys(self):
self.assertEquals(
self.serialize(
MockEvent(
sender="@alice:localhost",
room_id="!foo:bar",
content={
"not_me": 1,
"nested.dot.key": {
"leaf.key": 42,
"not_me_either": 1,
},
},
),
["content.nested\.dot\.key.leaf\.key"]
),
{
"content": {
"nested.dot.key": {
"leaf.key": 42,
},
}
}
)
def test_event_fields_nops_with_unknown_keys(self):
self.assertEquals(
self.serialize(
MockEvent(
sender="@alice:localhost",
room_id="!foo:bar",
content={
"foo": "bar",
},
),
["content.foo", "content.notexists"]
),
{
"content": {
"foo": "bar",
}
}
)
def test_event_fields_nops_with_non_dict_keys(self):
self.assertEquals(
self.serialize(
MockEvent(
sender="@alice:localhost",
room_id="!foo:bar",
content={
"foo": ["I", "am", "an", "array"],
},
),
["content.foo.am"]
),
{}
)
def test_event_fields_nops_with_array_keys(self):
self.assertEquals(
self.serialize(
MockEvent(
sender="@alice:localhost",
room_id="!foo:bar",
content={
"foo": ["I", "am", "an", "array"],
},
),
["content.foo.1"]
),
{}
)
def test_event_fields_all_fields_if_empty(self):
self.assertEquals(
self.serialize(
MockEvent(
room_id="!foo:bar",
content={
"foo": "bar",
},
),
[]
),
{
"room_id": "!foo:bar",
"content": {
"foo": "bar",
},
"unsigned": {}
}
)
def test_event_fields_fail_if_fields_not_str(self):
with self.assertRaises(TypeError):
self.serialize(
MockEvent(
room_id="!foo:bar",
content={
"foo": "bar",
},
),
["room_id", 4]
)