matrix-org · H-Shay · May 13, 2022 · May 3, 2022 · May 5, 2022 · May 5, 2022
@@ -0,0 +1 @@
+Add a config options to allow for auto-tuning of caches.
@@ -784,6 +784,24 @@ caches:
  #
  #cache_entry_ttl: 30m
 
+ # This flag enables cache autotuning, and is further specified by the sub-options `max_cache_memory_usage`,
+ # `target_cache_memory_usage`, `min_cache_ttl`. These flags work in conjunction with each other to maintain
+ # a balance between cache memory usage and cache entry availability. You must be using jemalloc to utilize
+ # this option, and all three of the options must be specified for this feature to work.
+ #cache_autotuning:
+ # This flag sets a ceiling on much memory the cache can use before caches begin to be continuously evicted.
+ # They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in
+ # the flag below, or until the `min_cache_ttl` is hit.
+ #max_cache_memory_usage: 1024M
+
+ # This flag sets a rough target for the desired memory usage of the caches.
+ #target_cache_memory_usage: 758M
+
+ # 'min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when
+ # caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches
+ # from being emptied while Synapse is evicting due to memory.
+ #min_cache_ttl: 5m
+
  # Controls how long the results of a /sync request are cached for after
  # a successful response is returned. A higher duration can help clients with
  # intermittent connections, at the cost of higher memory usage.

@@ -1119,16 +1119,29 @@ Caching can be configured through the following sub-options:
  with intermittent connections, at the cost of higher memory usage.
  By default, this is zero, which means that sync responses are not cached
  at all.
-
+* `cache_autotuning` and its sub-options `max_cache_memory_usage`, `target_cache_memory_usage`, and
+ `min_cache_ttl` work in conjunction with each other to maintain a balance between cache memory 
+ usage and cache entry availability. You must be using [jemalloc](https:/matrix-org/synapse#help-synapse-is-slow-and-eats-all-my-ramcpu) 
+ to utilize this option, and all three of the options must be specified for this feature to work.
+ * `max_cache_memory_usage` sets a ceiling on how much memory the cache can use before caches begin to be continuously evicted.
+ They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in
+ the flag below, or until the `min_cache_ttl` is hit.
+ * `target_memory_usage` sets a rough target for the desired memory usage of the caches.
+ * `min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when
+ caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches
+ from being emptied while Synapse is evicting due to memory.
 
 Example configuration:
 ```yaml
 caches:
  global_factor: 1.0
  per_cache_factors:
  get_users_who_share_room_with_user: 2.0
- expire_caches: false
  sync_response_cache_duration: 2m
+ cache_autotuning:
+ max_cache_memory_usage: 1024M
+ target_cache_memory_usage: 758M
+ min_cache_ttl: 5m
 ```
 
 ### Reloading cache factors

@@ -176,6 +176,24 @@ def generate_config_section(self, **kwargs: Any) -> str:
  #
  #cache_entry_ttl: 30m
 
+ # This flag enables cache autotuning, and is further specified by the sub-options `max_cache_memory_usage`,
+ # `target_cache_memory_usage`, `min_cache_ttl`. These flags work in conjunction with each other to maintain
+ # a balance between cache memory usage and cache entry availability. You must be using jemalloc to utilize
+ # this option, and all three of the options must be specified for this feature to work.
+ #cache_autotuning:
+ # This flag sets a ceiling on much memory the cache can use before caches begin to be continuously evicted.
+ # They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in
+ # the flag below, or until the `min_cache_ttl` is hit.
+ #max_cache_memory_usage: 1024M
+
+ # This flag sets a rough target for the desired memory usage of the caches.
+ #target_cache_memory_usage: 758M
+
+ # 'min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when
+ # caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches
+ # from being emptied while Synapse is evicting due to memory.
+ #min_cache_ttl: 5m
+
  # Controls how long the results of a /sync request are cached for after
  # a successful response is returned. A higher duration can help clients with
  # intermittent connections, at the cost of higher memory usage.
@@ -263,6 +281,21 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
  )
  self.expiry_time_msec = self.parse_duration(expiry_time)
 
+ self.cache_autotuning = cache_config.get("cache_autotuning")
+ if self.cache_autotuning:
+ max_memory_usage = self.cache_autotuning.get("max_cache_memory_usage")
+ self.cache_autotuning["max_cache_memory_usage"] = self.parse_size(
+ max_memory_usage
+ )
+
+ target_mem_size = self.cache_autotuning.get("target_cache_memory_usage")
+ self.cache_autotuning["target_cache_memory_usage"] = self.parse_size(
+ target_mem_size
+ )
+
+ min_cache_ttl = self.cache_autotuning.get("min_cache_ttl")
+ self.cache_autotuning["min_cache_ttl"] = self.parse_duration(min_cache_ttl)
+
  self.sync_response_cache_duration = self.parse_duration(
  cache_config.get("sync_response_cache_duration", 0)
  )

@@ -18,6 +18,7 @@
 import re
 from typing import Iterable, Optional, overload
 
+import attr
 from prometheus_client import REGISTRY, Metric
 from typing_extensions import Literal
 
@@ -27,52 +28,24 @@
 logger = logging.getLogger(__name__)
 
 
-def _setup_jemalloc_stats() -> None:
- """Checks to see if jemalloc is loaded, and hooks up a collector to record
- statistics exposed by jemalloc.
- """
-
- # Try to find the loaded jemalloc shared library, if any. We need to
- # introspect into what is loaded, rather than loading whatever is on the
- # path, as if we load a *different* jemalloc version things will seg fault.
-
- # We look in `/proc/self/maps`, which only exists on linux.
- if not os.path.exists("/proc/self/maps"):
- logger.debug("Not looking for jemalloc as no /proc/self/maps exist")
- return
-
- # We're looking for a path at the end of the line that includes
- # "libjemalloc".
- regex = re.compile(r"/\S+/libjemalloc.*$")
-
- jemalloc_path = None
- with open("/proc/self/maps") as f:
- for line in f:
- match = regex.search(line.strip())
- if match:
- jemalloc_path = match.group()
-
- if not jemalloc_path:
- # No loaded jemalloc was found.
- logger.debug("jemalloc not found")
- return
-
- logger.debug("Found jemalloc at %s", jemalloc_path)
-
- jemalloc = ctypes.CDLL(jemalloc_path)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class JemallocStats:
+ jemalloc: ctypes.CDLL
 
  @overload
  def _mallctl(
- name: str, read: Literal[True] = True, write: Optional[int] = None
+ self, name: str, read: Literal[True] = True, write: Optional[int] = None
  ) -> int:
  ...
 
  @overload
- def _mallctl(name: str, read: Literal[False], write: Optional[int] = None) -> None:
+ def _mallctl(
+ self, name: str, read: Literal[False], write: Optional[int] = None
+ ) -> None:
  ...
 
  def _mallctl(
- name: str, read: bool = True, write: Optional[int] = None
+ self, name: str, read: bool = True, write: Optional[int] = None
  ) -> Optional[int]:
  """Wrapper around `mallctl` for reading and writing integers to
  jemalloc.
@@ -120,7 +93,7 @@ def _mallctl(
  # Where oldp/oldlenp is a buffer where the old value will be written to
  # (if not null), and newp/newlen is the buffer with the new value to set
  # (if not null). Note that they're all references *except* newlen.
- result = jemalloc.mallctl(
+ result = self.jemalloc.mallctl(
  name.encode("ascii"),
  input_var_ref,
  input_len_ref,
@@ -136,21 +109,80 @@ def _mallctl(
 
  return input_var.value
 
- def _jemalloc_refresh_stats() -> None:
+ def refresh_stats(self) -> None:
  """Request that jemalloc updates its internal statistics. This needs to
  be called before querying for stats, otherwise it will return stale
  values.
  """
  try:
- _mallctl("epoch", read=False, write=1)
+ self._mallctl("epoch", read=False, write=1)
  except Exception as e:
  logger.warning("Failed to reload jemalloc stats: %s", e)
 
+ def get_stat(self, name: str) -> int:
+ """Request the stat of the given name at the time of the last
+ `refresh_stats` call. This may throw if we fail to read
+ the stat.
+ """
+ return self._mallctl(f"stats.{name}")
+
+
+_JEMALLOC_STATS: Optional[JemallocStats] = None
+
+
+def get_jemalloc_stats() -> Optional[JemallocStats]:
+ """Returns an interface to jemalloc, if it is being used.
+
+ Note that this will always return None until `setup_jemalloc_stats` has been
+ called.
+ """
+ return _JEMALLOC_STATS
+
+
+def _setup_jemalloc_stats() -> None:
+ """Checks to see if jemalloc is loaded, and hooks up a collector to record
+ statistics exposed by jemalloc.
+ """
+
+ global _JEMALLOC_STATS
+
+ # Try to find the loaded jemalloc shared library, if any. We need to
+ # introspect into what is loaded, rather than loading whatever is on the
+ # path, as if we load a *different* jemalloc version things will seg fault.
+
+ # We look in `/proc/self/maps`, which only exists on linux.
+ if not os.path.exists("/proc/self/maps"):
+ logger.debug("Not looking for jemalloc as no /proc/self/maps exist")
+ return
+
+ # We're looking for a path at the end of the line that includes
+ # "libjemalloc".
+ regex = re.compile(r"/\S+/libjemalloc.*$")
+
+ jemalloc_path = None
+ with open("/proc/self/maps") as f:
+ for line in f:
+ match = regex.search(line.strip())
+ if match:
+ jemalloc_path = match.group()
+
+ if not jemalloc_path:
+ # No loaded jemalloc was found.
+ logger.debug("jemalloc not found")
+ return
+
+ logger.debug("Found jemalloc at %s", jemalloc_path)
+
+ jemalloc_dll = ctypes.CDLL(jemalloc_path)
+
+ stats = JemallocStats(jemalloc_dll)
+ _JEMALLOC_STATS = stats
+
  class JemallocCollector(Collector):
  """Metrics for internal jemalloc stats."""
 
  def collect(self) -> Iterable[Metric]:
- _jemalloc_refresh_stats()
+ stats.refresh_stats()
 
  g = GaugeMetricFamily(
  "jemalloc_stats_app_memory_bytes",
@@ -184,7 +216,7 @@ def collect(self) -> Iterable[Metric]:
  "metadata",
  ):
  try:
- value = _mallctl(f"stats.{t}")
+ value = stats.get_stat(t)
  except Exception as e:
  # There was an error fetching the value, skip.
  logger.warning("Failed to read jemalloc stats.%s: %s", t, e)