Clean up exception handling in the startup code (#9059)
Factor out the exception handling in the startup code to a utility function, and fix the some logging and exit code stuff.
This commit is contained in:
parent
4e04435bda
commit
671138f658
|
@ -0,0 +1 @@
|
||||||
|
Fix incorrect exit code when there is an error at startup.
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright 2017 New Vector Ltd
|
# Copyright 2017 New Vector Ltd
|
||||||
|
# Copyright 2019-2021 The Matrix.org Foundation C.I.C
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -19,7 +20,7 @@ import signal
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Iterable
|
from typing import Awaitable, Callable, Iterable
|
||||||
|
|
||||||
from typing_extensions import NoReturn
|
from typing_extensions import NoReturn
|
||||||
|
|
||||||
|
@ -143,6 +144,45 @@ def quit_with_error(error_string: str) -> NoReturn:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def register_start(cb: Callable[..., Awaitable], *args, **kwargs) -> None:
|
||||||
|
"""Register a callback with the reactor, to be called once it is running
|
||||||
|
|
||||||
|
This can be used to initialise parts of the system which require an asynchronous
|
||||||
|
setup.
|
||||||
|
|
||||||
|
Any exception raised by the callback will be printed and logged, and the process
|
||||||
|
will exit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def wrapper():
|
||||||
|
try:
|
||||||
|
await cb(*args, **kwargs)
|
||||||
|
except Exception:
|
||||||
|
# previously, we used Failure().printTraceback() here, in the hope that
|
||||||
|
# would give better tracebacks than traceback.print_exc(). However, that
|
||||||
|
# doesn't handle chained exceptions (with a __cause__ or __context__) well,
|
||||||
|
# and I *think* the need for Failure() is reduced now that we mostly use
|
||||||
|
# async/await.
|
||||||
|
|
||||||
|
# Write the exception to both the logs *and* the unredirected stderr,
|
||||||
|
# because people tend to get confused if it only goes to one or the other.
|
||||||
|
#
|
||||||
|
# One problem with this is that if people are using a logging config that
|
||||||
|
# logs to the console (as is common eg under docker), they will get two
|
||||||
|
# copies of the exception. We could maybe try to detect that, but it's
|
||||||
|
# probably a cost we can bear.
|
||||||
|
logger.fatal("Error during startup", exc_info=True)
|
||||||
|
print("Error during startup:", file=sys.__stderr__)
|
||||||
|
traceback.print_exc(file=sys.__stderr__)
|
||||||
|
|
||||||
|
# it's no use calling sys.exit here, since that just raises a SystemExit
|
||||||
|
# exception which is then caught by the reactor, and everything carries
|
||||||
|
# on as normal.
|
||||||
|
os._exit(1)
|
||||||
|
|
||||||
|
reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper()))
|
||||||
|
|
||||||
|
|
||||||
def listen_metrics(bind_addresses, port):
|
def listen_metrics(bind_addresses, port):
|
||||||
"""
|
"""
|
||||||
Start Prometheus metrics server.
|
Start Prometheus metrics server.
|
||||||
|
@ -227,7 +267,7 @@ def refresh_certificate(hs):
|
||||||
logger.info("Context factories updated.")
|
logger.info("Context factories updated.")
|
||||||
|
|
||||||
|
|
||||||
def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
async def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
||||||
"""
|
"""
|
||||||
Start a Synapse server or worker.
|
Start a Synapse server or worker.
|
||||||
|
|
||||||
|
@ -241,10 +281,8 @@ def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
||||||
hs: homeserver instance
|
hs: homeserver instance
|
||||||
listeners: Listener configuration ('listeners' in homeserver.yaml)
|
listeners: Listener configuration ('listeners' in homeserver.yaml)
|
||||||
"""
|
"""
|
||||||
try:
|
|
||||||
# Set up the SIGHUP machinery.
|
# Set up the SIGHUP machinery.
|
||||||
if hasattr(signal, "SIGHUP"):
|
if hasattr(signal, "SIGHUP"):
|
||||||
|
|
||||||
reactor = hs.get_reactor()
|
reactor = hs.get_reactor()
|
||||||
|
|
||||||
@wrap_as_background_process("sighup")
|
@wrap_as_background_process("sighup")
|
||||||
|
@ -304,12 +342,6 @@ def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
||||||
if sys.version_info >= (3, 7):
|
if sys.version_info >= (3, 7):
|
||||||
gc.collect()
|
gc.collect()
|
||||||
gc.freeze()
|
gc.freeze()
|
||||||
except Exception:
|
|
||||||
traceback.print_exc(file=sys.stderr)
|
|
||||||
reactor = hs.get_reactor()
|
|
||||||
if reactor.running:
|
|
||||||
reactor.stop()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_sentry(hs):
|
def setup_sentry(hs):
|
||||||
|
|
|
@ -21,7 +21,7 @@ from typing import Dict, Iterable, Optional, Set
|
||||||
|
|
||||||
from typing_extensions import ContextManager
|
from typing_extensions import ContextManager
|
||||||
|
|
||||||
from twisted.internet import address, reactor
|
from twisted.internet import address
|
||||||
|
|
||||||
import synapse
|
import synapse
|
||||||
import synapse.events
|
import synapse.events
|
||||||
|
@ -34,6 +34,7 @@ from synapse.api.urls import (
|
||||||
SERVER_KEY_V2_PREFIX,
|
SERVER_KEY_V2_PREFIX,
|
||||||
)
|
)
|
||||||
from synapse.app import _base
|
from synapse.app import _base
|
||||||
|
from synapse.app._base import register_start
|
||||||
from synapse.config._base import ConfigError
|
from synapse.config._base import ConfigError
|
||||||
from synapse.config.homeserver import HomeServerConfig
|
from synapse.config.homeserver import HomeServerConfig
|
||||||
from synapse.config.logger import setup_logging
|
from synapse.config.logger import setup_logging
|
||||||
|
@ -960,9 +961,7 @@ def start(config_options):
|
||||||
# streams. Will no-op if no streams can be written to by this worker.
|
# streams. Will no-op if no streams can be written to by this worker.
|
||||||
hs.get_replication_streamer()
|
hs.get_replication_streamer()
|
||||||
|
|
||||||
reactor.addSystemEventTrigger(
|
register_start(_base.start, hs, config.worker_listeners)
|
||||||
"before", "startup", _base.start, hs, config.worker_listeners
|
|
||||||
)
|
|
||||||
|
|
||||||
_base.start_worker_reactor("synapse-generic-worker", config)
|
_base.start_worker_reactor("synapse-generic-worker", config)
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,7 @@ import os
|
||||||
import sys
|
import sys
|
||||||
from typing import Iterable, Iterator
|
from typing import Iterable, Iterator
|
||||||
|
|
||||||
from twisted.internet import defer, reactor
|
from twisted.internet import reactor
|
||||||
from twisted.python.failure import Failure
|
|
||||||
from twisted.web.resource import EncodingResourceWrapper, IResource
|
from twisted.web.resource import EncodingResourceWrapper, IResource
|
||||||
from twisted.web.server import GzipEncoderFactory
|
from twisted.web.server import GzipEncoderFactory
|
||||||
from twisted.web.static import File
|
from twisted.web.static import File
|
||||||
|
@ -38,7 +37,7 @@ from synapse.api.urls import (
|
||||||
WEB_CLIENT_PREFIX,
|
WEB_CLIENT_PREFIX,
|
||||||
)
|
)
|
||||||
from synapse.app import _base
|
from synapse.app import _base
|
||||||
from synapse.app._base import listen_ssl, listen_tcp, quit_with_error
|
from synapse.app._base import listen_ssl, listen_tcp, quit_with_error, register_start
|
||||||
from synapse.config._base import ConfigError
|
from synapse.config._base import ConfigError
|
||||||
from synapse.config.emailconfig import ThreepidBehaviour
|
from synapse.config.emailconfig import ThreepidBehaviour
|
||||||
from synapse.config.homeserver import HomeServerConfig
|
from synapse.config.homeserver import HomeServerConfig
|
||||||
|
@ -414,7 +413,6 @@ def setup(config_options):
|
||||||
_base.refresh_certificate(hs)
|
_base.refresh_certificate(hs)
|
||||||
|
|
||||||
async def start():
|
async def start():
|
||||||
try:
|
|
||||||
# Run the ACME provisioning code, if it's enabled.
|
# Run the ACME provisioning code, if it's enabled.
|
||||||
if hs.config.acme_enabled:
|
if hs.config.acme_enabled:
|
||||||
acme = hs.get_acme_handler()
|
acme = hs.get_acme_handler()
|
||||||
|
@ -433,21 +431,11 @@ def setup(config_options):
|
||||||
await oidc.load_metadata()
|
await oidc.load_metadata()
|
||||||
await oidc.load_jwks()
|
await oidc.load_jwks()
|
||||||
|
|
||||||
_base.start(hs, config.listeners)
|
await _base.start(hs, config.listeners)
|
||||||
|
|
||||||
hs.get_datastore().db_pool.updates.start_doing_background_updates()
|
hs.get_datastore().db_pool.updates.start_doing_background_updates()
|
||||||
except Exception:
|
|
||||||
# Print the exception and bail out.
|
|
||||||
print("Error during startup:", file=sys.stderr)
|
|
||||||
|
|
||||||
# this gives better tracebacks than traceback.print_exc()
|
register_start(start)
|
||||||
Failure().printTraceback(file=sys.stderr)
|
|
||||||
|
|
||||||
if reactor.running:
|
|
||||||
reactor.stop()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
reactor.callWhenRunning(lambda: defer.ensureDeferred(start()))
|
|
||||||
|
|
||||||
return hs
|
return hs
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue