Skip to content

Commit

Permalink
Performance improvements and refactor of Ratelimiter (matrix-org#7595)
Browse files Browse the repository at this point in the history
While working on matrix-org#5665 I found myself digging into the `Ratelimiter` class and seeing that it was both:

* Rather undocumented, and
* causing a *lot* of config checks

This PR attempts to refactor and comment the `Ratelimiter` class, as well as encourage config file accesses to only be done at instantiation. 

Best to be reviewed commit-by-commit.
  • Loading branch information
anoadragon453 authored and phil-flex committed Jun 16, 2020
1 parent 021558d commit be735d7
Show file tree
Hide file tree
Showing 19 changed files with 322 additions and 230 deletions.
1 change: 1 addition & 0 deletions changelog.d/7595.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor `Ratelimiter` to limit the amount of expensive config value accesses.
153 changes: 118 additions & 35 deletions synapse/api/ratelimiting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright 2014-2016 OpenMarket Ltd
# Copyright 2020 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,75 +17,157 @@
from typing import Any, Optional, Tuple

from synapse.api.errors import LimitExceededError
from synapse.util import Clock


class Ratelimiter(object):
"""
Ratelimit message sending by user.
Ratelimit actions marked by arbitrary keys.
Args:
clock: A homeserver clock, for retrieving the current time
rate_hz: The long term number of actions that can be performed in a second.
burst_count: How many actions that can be performed before being limited.
"""

def __init__(self):
self.message_counts = (
OrderedDict()
) # type: OrderedDict[Any, Tuple[float, int, Optional[float]]]
def __init__(self, clock: Clock, rate_hz: float, burst_count: int):
self.clock = clock
self.rate_hz = rate_hz
self.burst_count = burst_count

# A ordered dictionary keeping track of actions, when they were last
# performed and how often. Each entry is a mapping from a key of arbitrary type
# to a tuple representing:
# * How many times an action has occurred since a point in time
# * The point in time
# * The rate_hz of this particular entry. This can vary per request
self.actions = OrderedDict() # type: OrderedDict[Any, Tuple[float, int, float]]

def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True):
def can_do_action(
self,
key: Any,
rate_hz: Optional[float] = None,
burst_count: Optional[int] = None,
update: bool = True,
_time_now_s: Optional[int] = None,
) -> Tuple[bool, float]:
"""Can the entity (e.g. user or IP address) perform the action?
Args:
key: The key we should use when rate limiting. Can be a user ID
(when sending events), an IP address, etc.
time_now_s: The time now.
rate_hz: The long term number of messages a user can send in a
second.
burst_count: How many messages the user can send before being
limited.
update (bool): Whether to update the message rates or not. This is
useful to check if a message would be allowed to be sent before
its ready to be actually sent.
rate_hz: The long term number of actions that can be performed in a second.
Overrides the value set during instantiation if set.
burst_count: How many actions that can be performed before being limited.
Overrides the value set during instantiation if set.
update: Whether to count this check as performing the action
_time_now_s: The current time. Optional, defaults to the current time according
to self.clock. Only used by tests.
Returns:
A pair of a bool indicating if they can send a message now and a
time in seconds of when they can next send a message.
A tuple containing:
* A bool indicating if they can perform the action now
* The reactor timestamp for when the action can be performed next.
-1 if rate_hz is less than or equal to zero
"""
self.prune_message_counts(time_now_s)
message_count, time_start, _ignored = self.message_counts.get(
key, (0.0, time_now_s, None)
)
# Override default values if set
time_now_s = _time_now_s if _time_now_s is not None else self.clock.time()
rate_hz = rate_hz if rate_hz is not None else self.rate_hz
burst_count = burst_count if burst_count is not None else self.burst_count

# Remove any expired entries
self._prune_message_counts(time_now_s)

# Check if there is an existing count entry for this key
action_count, time_start, _ = self.actions.get(key, (0.0, time_now_s, 0.0))

# Check whether performing another action is allowed
time_delta = time_now_s - time_start
sent_count = message_count - time_delta * rate_hz
if sent_count < 0:
performed_count = action_count - time_delta * rate_hz
if performed_count < 0:
# Allow, reset back to count 1
allowed = True
time_start = time_now_s
message_count = 1.0
elif sent_count > burst_count - 1.0:
action_count = 1.0
elif performed_count > burst_count - 1.0:
# Deny, we have exceeded our burst count
allowed = False
else:
# We haven't reached our limit yet
allowed = True
message_count += 1
action_count += 1.0

if update:
self.message_counts[key] = (message_count, time_start, rate_hz)
self.actions[key] = (action_count, time_start, rate_hz)

if rate_hz > 0:
time_allowed = time_start + (message_count - burst_count + 1) / rate_hz
# Find out when the count of existing actions expires
time_allowed = time_start + (action_count - burst_count + 1) / rate_hz

# Don't give back a time in the past
if time_allowed < time_now_s:
time_allowed = time_now_s

else:
# XXX: Why is this -1? This seems to only be used in
# self.ratelimit. I guess so that clients get a time in the past and don't
# feel afraid to try again immediately
time_allowed = -1

return allowed, time_allowed

def prune_message_counts(self, time_now_s):
for key in list(self.message_counts.keys()):
message_count, time_start, rate_hz = self.message_counts[key]
def _prune_message_counts(self, time_now_s: int):
"""Remove message count entries that have not exceeded their defined
rate_hz limit
Args:
time_now_s: The current time
"""
# We create a copy of the key list here as the dictionary is modified during
# the loop
for key in list(self.actions.keys()):
action_count, time_start, rate_hz = self.actions[key]

# Rate limit = "seconds since we started limiting this action" * rate_hz
# If this limit has not been exceeded, wipe our record of this action
time_delta = time_now_s - time_start
if message_count - time_delta * rate_hz > 0:
break
if action_count - time_delta * rate_hz > 0:
continue
else:
del self.message_counts[key]
del self.actions[key]

def ratelimit(
self,
key: Any,
rate_hz: Optional[float] = None,
burst_count: Optional[int] = None,
update: bool = True,
_time_now_s: Optional[int] = None,
):
"""Checks if an action can be performed. If not, raises a LimitExceededError
Args:
key: An arbitrary key used to classify an action
rate_hz: The long term number of actions that can be performed in a second.
Overrides the value set during instantiation if set.
burst_count: How many actions that can be performed before being limited.
Overrides the value set during instantiation if set.
update: Whether to count this check as performing the action
_time_now_s: The current time. Optional, defaults to the current time according
to self.clock. Only used by tests.
Raises:
LimitExceededError: If an action could not be performed, along with the time in
milliseconds until the action can be performed again
"""
time_now_s = _time_now_s if _time_now_s is not None else self.clock.time()

def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True):
allowed, time_allowed = self.can_do_action(
key, time_now_s, rate_hz, burst_count, update
key,
rate_hz=rate_hz,
burst_count=burst_count,
update=update,
_time_now_s=time_now_s,
)

if not allowed:
Expand Down
8 changes: 7 additions & 1 deletion synapse/config/ratelimiting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict

from ._base import Config


class RateLimitConfig(object):
def __init__(self, config, defaults={"per_second": 0.17, "burst_count": 3.0}):
def __init__(
self,
config: Dict[str, float],
defaults={"per_second": 0.17, "burst_count": 3.0},
):
self.per_second = config.get("per_second", defaults["per_second"])
self.burst_count = config.get("burst_count", defaults["burst_count"])

Expand Down
60 changes: 29 additions & 31 deletions synapse/handlers/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import synapse.types
from synapse.api.constants import EventTypes, Membership
from synapse.api.errors import LimitExceededError
from synapse.api.ratelimiting import Ratelimiter
from synapse.types import UserID

logger = logging.getLogger(__name__)
Expand All @@ -44,11 +44,26 @@ def __init__(self, hs):
self.notifier = hs.get_notifier()
self.state_handler = hs.get_state_handler()
self.distributor = hs.get_distributor()
self.ratelimiter = hs.get_ratelimiter()
self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter()
self.clock = hs.get_clock()
self.hs = hs

# The rate_hz and burst_count are overridden on a per-user basis
self.request_ratelimiter = Ratelimiter(
clock=self.clock, rate_hz=0, burst_count=0
)
self._rc_message = self.hs.config.rc_message

# Check whether ratelimiting room admin message redaction is enabled
# by the presence of rate limits in the config
if self.hs.config.rc_admin_redaction:
self.admin_redaction_ratelimiter = Ratelimiter(
clock=self.clock,
rate_hz=self.hs.config.rc_admin_redaction.per_second,
burst_count=self.hs.config.rc_admin_redaction.burst_count,
)
else:
self.admin_redaction_ratelimiter = None

self.server_name = hs.hostname

self.event_builder_factory = hs.get_event_builder_factory()
Expand All @@ -70,7 +85,6 @@ def ratelimit(self, requester, update=True, is_admin_redaction=False):
Raises:
LimitExceededError if the request should be ratelimited
"""
time_now = self.clock.time()
user_id = requester.user.to_string()

# The AS user itself is never rate limited.
Expand All @@ -83,48 +97,32 @@ def ratelimit(self, requester, update=True, is_admin_redaction=False):
if requester.app_service and not requester.app_service.is_rate_limited():
return

messages_per_second = self._rc_message.per_second
burst_count = self._rc_message.burst_count

# Check if there is a per user override in the DB.
override = yield self.store.get_ratelimit_for_user(user_id)
if override:
# If overriden with a null Hz then ratelimiting has been entirely
# If overridden with a null Hz then ratelimiting has been entirely
# disabled for the user
if not override.messages_per_second:
return

messages_per_second = override.messages_per_second
burst_count = override.burst_count

if is_admin_redaction and self.admin_redaction_ratelimiter:
# If we have separate config for admin redactions, use a separate
# ratelimiter as to not have user_ids clash
self.admin_redaction_ratelimiter.ratelimit(user_id, update=update)
else:
# We default to different values if this is an admin redaction and
# the config is set
if is_admin_redaction and self.hs.config.rc_admin_redaction:
messages_per_second = self.hs.config.rc_admin_redaction.per_second
burst_count = self.hs.config.rc_admin_redaction.burst_count
else:
messages_per_second = self.hs.config.rc_message.per_second
burst_count = self.hs.config.rc_message.burst_count

if is_admin_redaction and self.hs.config.rc_admin_redaction:
# If we have separate config for admin redactions we use a separate
# ratelimiter
allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action(
user_id,
time_now,
rate_hz=messages_per_second,
burst_count=burst_count,
update=update,
)
else:
allowed, time_allowed = self.ratelimiter.can_do_action(
# Override rate and burst count per-user
self.request_ratelimiter.ratelimit(
user_id,
time_now,
rate_hz=messages_per_second,
burst_count=burst_count,
update=update,
)
if not allowed:
raise LimitExceededError(
retry_after_ms=int(1000 * (time_allowed - time_now))
)

async def maybe_kick_guest_users(self, event, context=None):
# Technically this function invalidates current_state by changing it.
Expand Down
24 changes: 8 additions & 16 deletions synapse/handlers/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def __init__(self, hs):

# Ratelimiter for failed auth during UIA. Uses same ratelimit config
# as per `rc_login.failed_attempts`.
self._failed_uia_attempts_ratelimiter = Ratelimiter()
self._failed_uia_attempts_ratelimiter = Ratelimiter(
clock=self.clock,
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
)

self._clock = self.hs.get_clock()

Expand Down Expand Up @@ -196,13 +200,7 @@ async def validate_user_via_ui_auth(
user_id = requester.user.to_string()

# Check if we should be ratelimited due to too many previous failed attempts
self._failed_uia_attempts_ratelimiter.ratelimit(
user_id,
time_now_s=self._clock.time(),
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
update=False,
)
self._failed_uia_attempts_ratelimiter.ratelimit(user_id, update=False)

# build a list of supported flows
flows = [[login_type] for login_type in self._supported_ui_auth_types]
Expand All @@ -212,14 +210,8 @@ async def validate_user_via_ui_auth(
flows, request, request_body, clientip, description
)
except LoginError:
# Update the ratelimite to say we failed (`can_do_action` doesn't raise).
self._failed_uia_attempts_ratelimiter.can_do_action(
user_id,
time_now_s=self._clock.time(),
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
update=True,
)
# Update the ratelimiter to say we failed (`can_do_action` doesn't raise).
self._failed_uia_attempts_ratelimiter.can_do_action(user_id)
raise

# find the completed login type
Expand Down
1 change: 0 additions & 1 deletion synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,6 @@ def __init__(self, hs):
self.profile_handler = hs.get_profile_handler()
self.event_builder_factory = hs.get_event_builder_factory()
self.server_name = hs.hostname
self.ratelimiter = hs.get_ratelimiter()
self.notifier = hs.get_notifier()
self.config = hs.config
self.require_membership_for_aliases = hs.config.require_membership_for_aliases
Expand Down
9 changes: 1 addition & 8 deletions synapse/handlers/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,14 +425,7 @@ def check_registration_ratelimit(self, address):
if not address:
return

time_now = self.clock.time()

self.ratelimiter.ratelimit(
address,
time_now_s=time_now,
rate_hz=self.hs.config.rc_registration.per_second,
burst_count=self.hs.config.rc_registration.burst_count,
)
self.ratelimiter.ratelimit(address)

def register_with_store(
self,
Expand Down
Loading

0 comments on commit be735d7

Please sign in to comment.