Refactor SimpleHttpClient to pull out reusable methods (#15427)

Pulls out some methods to `BaseHttpClient` to eventually be
reused in other contexts.
This commit is contained in:
Jason Little 2023-04-14 15:46:04 -05:00 committed by GitHub
parent 8a47d6e3a6
commit c9326140dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 77 additions and 56 deletions

1
changelog.d/15427.misc Normal file
View File

@ -0,0 +1 @@
Refactor `SimpleHttpClient` to pull out a base class.

View File

@ -312,35 +312,27 @@ class BlacklistingAgentWrapper(Agent):
) )
class SimpleHttpClient: class BaseHttpClient:
""" """
A simple, no-frills HTTP client with methods that wrap up common ways of A simple, no-frills HTTP client with methods that wrap up common ways of
using HTTP in Matrix using HTTP in Matrix. Does not come with a default Agent, subclasses will need to
define their own.
Args:
hs: The HomeServer instance to pass in
treq_args: Extra keyword arguments to be given to treq.request.
""" """
agent: IAgent
def __init__( def __init__(
self, self,
hs: "HomeServer", hs: "HomeServer",
treq_args: Optional[Dict[str, Any]] = None, treq_args: Optional[Dict[str, Any]] = None,
ip_whitelist: Optional[IPSet] = None,
ip_blacklist: Optional[IPSet] = None,
use_proxy: bool = False,
): ):
"""
Args:
hs
treq_args: Extra keyword arguments to be given to treq.request.
ip_blacklist: The IP addresses that are blacklisted that
we may not request.
ip_whitelist: The whitelisted IP addresses, that we can
request if it were otherwise caught in a blacklist.
use_proxy: Whether proxy settings should be discovered and used
from conventional environment variables.
"""
self.hs = hs self.hs = hs
self.reactor = hs.get_reactor()
self._ip_whitelist = ip_whitelist
self._ip_blacklist = ip_blacklist
self._extra_treq_args = treq_args or {} self._extra_treq_args = treq_args or {}
self.clock = hs.get_clock() self.clock = hs.get_clock()
@ -356,44 +348,6 @@ class SimpleHttpClient:
# reactor. # reactor.
self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor())) self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor()))
if self._ip_blacklist:
# If we have an IP blacklist, we need to use a DNS resolver which
# filters out blacklisted IP addresses, to prevent DNS rebinding.
self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
)
else:
self.reactor = hs.get_reactor()
# the pusher makes lots of concurrent SSL connections to sygnal, and
# tends to do so in batches, so we need to allow the pool to keep
# lots of idle connections around.
pool = HTTPConnectionPool(self.reactor)
# XXX: The justification for using the cache factor here is that larger instances
# will need both more cache and more connections.
# Still, this should probably be a separate dial
pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
pool.cachedConnectionTimeout = 2 * 60
self.agent: IAgent = ProxyAgent(
self.reactor,
hs.get_reactor(),
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
use_proxy=use_proxy,
)
if self._ip_blacklist:
# If we have an IP blacklist, we then install the blacklisting Agent
# which prevents direct access to IP addresses, that are not caught
# by the DNS resolution.
self.agent = BlacklistingAgentWrapper(
self.agent,
ip_blacklist=self._ip_blacklist,
ip_whitelist=self._ip_whitelist,
)
async def request( async def request(
self, self,
method: str, method: str,
@ -799,6 +753,72 @@ class SimpleHttpClient:
) )
class SimpleHttpClient(BaseHttpClient):
"""
An HTTP client capable of crossing a proxy and respecting a block/allow list.
This also configures a larger / longer lasting HTTP connection pool.
Args:
hs: The HomeServer instance to pass in
treq_args: Extra keyword arguments to be given to treq.request.
ip_blacklist: The IP addresses that are blacklisted that
we may not request.
ip_whitelist: The whitelisted IP addresses, that we can
request if it were otherwise caught in a blacklist.
use_proxy: Whether proxy settings should be discovered and used
from conventional environment variables.
"""
def __init__(
self,
hs: "HomeServer",
treq_args: Optional[Dict[str, Any]] = None,
ip_whitelist: Optional[IPSet] = None,
ip_blacklist: Optional[IPSet] = None,
use_proxy: bool = False,
):
super().__init__(hs, treq_args=treq_args)
self._ip_whitelist = ip_whitelist
self._ip_blacklist = ip_blacklist
if self._ip_blacklist:
# If we have an IP blacklist, we need to use a DNS resolver which
# filters out blacklisted IP addresses, to prevent DNS rebinding.
self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
self.reactor, self._ip_whitelist, self._ip_blacklist
)
# the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
# do so in batches, so we need to allow the pool to keep lots of idle
# connections around.
pool = HTTPConnectionPool(self.reactor)
# XXX: The justification for using the cache factor here is that larger
# instances will need both more cache and more connections.
# Still, this should probably be a separate dial
pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
pool.cachedConnectionTimeout = 2 * 60
self.agent: IAgent = ProxyAgent(
self.reactor,
hs.get_reactor(),
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
use_proxy=use_proxy,
)
if self._ip_blacklist:
# If we have an IP blacklist, we then install the blacklisting Agent
# which prevents direct access to IP addresses, that are not caught
# by the DNS resolution.
self.agent = BlacklistingAgentWrapper(
self.agent,
ip_blacklist=self._ip_blacklist,
ip_whitelist=self._ip_whitelist,
)
def _timeout_to_request_timed_out_error(f: Failure) -> Failure: def _timeout_to_request_timed_out_error(f: Failure) -> Failure:
if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError): if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
# The TCP connection has its own timeout (set by the 'connectTimeout' param # The TCP connection has its own timeout (set by the 'connectTimeout' param