Make the metrics less racy (#4061)
This commit is contained in:
parent
6a4d01ee94
commit
b69216f768
|
@ -0,0 +1 @@
|
||||||
|
Fix some metrics being racy and causing exceptions when polled by Prometheus.
|
|
@ -39,7 +39,8 @@ outgoing_responses_counter = Counter(
|
||||||
)
|
)
|
||||||
|
|
||||||
response_timer = Histogram(
|
response_timer = Histogram(
|
||||||
"synapse_http_server_response_time_seconds", "sec",
|
"synapse_http_server_response_time_seconds",
|
||||||
|
"sec",
|
||||||
["method", "servlet", "tag", "code"],
|
["method", "servlet", "tag", "code"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -79,15 +80,11 @@ response_size = Counter(
|
||||||
# than when the response was written.
|
# than when the response was written.
|
||||||
|
|
||||||
in_flight_requests_ru_utime = Counter(
|
in_flight_requests_ru_utime = Counter(
|
||||||
"synapse_http_server_in_flight_requests_ru_utime_seconds",
|
"synapse_http_server_in_flight_requests_ru_utime_seconds", "", ["method", "servlet"]
|
||||||
"",
|
|
||||||
["method", "servlet"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
in_flight_requests_ru_stime = Counter(
|
in_flight_requests_ru_stime = Counter(
|
||||||
"synapse_http_server_in_flight_requests_ru_stime_seconds",
|
"synapse_http_server_in_flight_requests_ru_stime_seconds", "", ["method", "servlet"]
|
||||||
"",
|
|
||||||
["method", "servlet"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
in_flight_requests_db_txn_count = Counter(
|
in_flight_requests_db_txn_count = Counter(
|
||||||
|
@ -134,7 +131,7 @@ def _get_in_flight_counts():
|
||||||
# type
|
# type
|
||||||
counts = {}
|
counts = {}
|
||||||
for rm in reqs:
|
for rm in reqs:
|
||||||
key = (rm.method, rm.name,)
|
key = (rm.method, rm.name)
|
||||||
counts[key] = counts.get(key, 0) + 1
|
counts[key] = counts.get(key, 0) + 1
|
||||||
|
|
||||||
return counts
|
return counts
|
||||||
|
@ -175,7 +172,8 @@ class RequestMetrics(object):
|
||||||
if context != self.start_context:
|
if context != self.start_context:
|
||||||
logger.warn(
|
logger.warn(
|
||||||
"Context have unexpectedly changed %r, %r",
|
"Context have unexpectedly changed %r, %r",
|
||||||
context, self.start_context
|
context,
|
||||||
|
self.start_context,
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -192,10 +190,10 @@ class RequestMetrics(object):
|
||||||
resource_usage = context.get_resource_usage()
|
resource_usage = context.get_resource_usage()
|
||||||
|
|
||||||
response_ru_utime.labels(self.method, self.name, tag).inc(
|
response_ru_utime.labels(self.method, self.name, tag).inc(
|
||||||
resource_usage.ru_utime,
|
resource_usage.ru_utime
|
||||||
)
|
)
|
||||||
response_ru_stime.labels(self.method, self.name, tag).inc(
|
response_ru_stime.labels(self.method, self.name, tag).inc(
|
||||||
resource_usage.ru_stime,
|
resource_usage.ru_stime
|
||||||
)
|
)
|
||||||
response_db_txn_count.labels(self.method, self.name, tag).inc(
|
response_db_txn_count.labels(self.method, self.name, tag).inc(
|
||||||
resource_usage.db_txn_count
|
resource_usage.db_txn_count
|
||||||
|
@ -222,8 +220,15 @@ class RequestMetrics(object):
|
||||||
diff = new_stats - self._request_stats
|
diff = new_stats - self._request_stats
|
||||||
self._request_stats = new_stats
|
self._request_stats = new_stats
|
||||||
|
|
||||||
in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
|
# max() is used since rapid use of ru_stime/ru_utime can end up with the
|
||||||
in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
|
# count going backwards due to NTP, time smearing, fine-grained
|
||||||
|
# correction, or floating points. Who knows, really?
|
||||||
|
in_flight_requests_ru_utime.labels(self.method, self.name).inc(
|
||||||
|
max(diff.ru_utime, 0)
|
||||||
|
)
|
||||||
|
in_flight_requests_ru_stime.labels(self.method, self.name).inc(
|
||||||
|
max(diff.ru_stime, 0)
|
||||||
|
)
|
||||||
|
|
||||||
in_flight_requests_db_txn_count.labels(self.method, self.name).inc(
|
in_flight_requests_db_txn_count.labels(self.method, self.name).inc(
|
||||||
diff.db_txn_count
|
diff.db_txn_count
|
||||||
|
|
|
@ -186,9 +186,9 @@ class Notifier(object):
|
||||||
def count_listeners():
|
def count_listeners():
|
||||||
all_user_streams = set()
|
all_user_streams = set()
|
||||||
|
|
||||||
for x in self.room_to_user_streams.values():
|
for x in list(self.room_to_user_streams.values()):
|
||||||
all_user_streams |= x
|
all_user_streams |= x
|
||||||
for x in self.user_to_user_stream.values():
|
for x in list(self.user_to_user_stream.values()):
|
||||||
all_user_streams.add(x)
|
all_user_streams.add(x)
|
||||||
|
|
||||||
return sum(stream.count_listeners() for stream in all_user_streams)
|
return sum(stream.count_listeners() for stream in all_user_streams)
|
||||||
|
@ -196,7 +196,7 @@ class Notifier(object):
|
||||||
|
|
||||||
LaterGauge(
|
LaterGauge(
|
||||||
"synapse_notifier_rooms", "", [],
|
"synapse_notifier_rooms", "", [],
|
||||||
lambda: count(bool, self.room_to_user_streams.values()),
|
lambda: count(bool, list(self.room_to_user_streams.values())),
|
||||||
)
|
)
|
||||||
LaterGauge(
|
LaterGauge(
|
||||||
"synapse_notifier_users", "", [],
|
"synapse_notifier_users", "", [],
|
||||||
|
|
Loading…
Reference in New Issue