mirror of
				https://github.com/matrix-org/synapse.git
				synced 2025-10-26 01:38:12 +00:00 
			
		
		
		
	Allow for the configuration of max request retries and min/max retry delays in the matrix federation client (#12504)
Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Erik Johnston <erik@matrix.org>
This commit is contained in:
		
							parent
							
								
									f6321e386c
								
							
						
					
					
						commit
						d84e66144d
					
				
							
								
								
									
										1
									
								
								changelog.d/12504.misc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changelog.d/12504.misc
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| Allow for the configuration of max request retries and min/max retry delays in the matrix federation client. | ||||
| @ -1196,6 +1196,32 @@ Example configuration: | ||||
| allow_device_name_lookup_over_federation: true | ||||
| ``` | ||||
| --- | ||||
| ### `federation` | ||||
| 
 | ||||
| The federation section defines some sub-options related to federation. | ||||
| 
 | ||||
| The following options are related to configuring timeout and retry logic for one request, | ||||
| independently of the others. | ||||
| Short retry algorithm is used when something or someone will wait for the request to have an | ||||
| answer, while long retry is used for requests that happen in the background, | ||||
| like sending a federation transaction. | ||||
| 
 | ||||
| * `client_timeout`: timeout for the federation requests in seconds. Default to 60s. | ||||
| * `max_short_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 2s. | ||||
| * `max_long_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 60s. | ||||
| * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts. | ||||
| * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts. | ||||
| 
 | ||||
| Example configuration: | ||||
| ```yaml | ||||
| federation: | ||||
|   client_timeout: 180 | ||||
|   max_short_retry_delay: 7 | ||||
|   max_long_retry_delay: 100 | ||||
|   max_short_retries: 5 | ||||
|   max_long_retries: 20 | ||||
| ``` | ||||
| --- | ||||
| ## Caching | ||||
| 
 | ||||
| Options related to caching. | ||||
|  | ||||
| @ -22,6 +22,8 @@ class FederationConfig(Config): | ||||
|     section = "federation" | ||||
| 
 | ||||
|     def read_config(self, config: JsonDict, **kwargs: Any) -> None: | ||||
|         federation_config = config.setdefault("federation", {}) | ||||
| 
 | ||||
|         # FIXME: federation_domain_whitelist needs sytests | ||||
|         self.federation_domain_whitelist: Optional[dict] = None | ||||
|         federation_domain_whitelist = config.get("federation_domain_whitelist", None) | ||||
| @ -49,5 +51,13 @@ class FederationConfig(Config): | ||||
|             "allow_device_name_lookup_over_federation", False | ||||
|         ) | ||||
| 
 | ||||
|         # Allow for the configuration of timeout, max request retries | ||||
|         # and min/max retry delays in the matrix federation client. | ||||
|         self.client_timeout = federation_config.get("client_timeout", 60) | ||||
|         self.max_long_retry_delay = federation_config.get("max_long_retry_delay", 60) | ||||
|         self.max_short_retry_delay = federation_config.get("max_short_retry_delay", 2) | ||||
|         self.max_long_retries = federation_config.get("max_long_retries", 10) | ||||
|         self.max_short_retries = federation_config.get("max_short_retries", 3) | ||||
| 
 | ||||
| 
 | ||||
| _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}} | ||||
|  | ||||
| @ -95,8 +95,6 @@ incoming_responses_counter = Counter( | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| MAX_LONG_RETRIES = 10 | ||||
| MAX_SHORT_RETRIES = 3 | ||||
| MAXINT = sys.maxsize | ||||
| 
 | ||||
| 
 | ||||
| @ -406,7 +404,12 @@ class MatrixFederationHttpClient: | ||||
|         self.clock = hs.get_clock() | ||||
|         self._store = hs.get_datastores().main | ||||
|         self.version_string_bytes = hs.version_string.encode("ascii") | ||||
|         self.default_timeout = 60 | ||||
|         self.default_timeout = hs.config.federation.client_timeout | ||||
| 
 | ||||
|         self.max_long_retry_delay = hs.config.federation.max_long_retry_delay | ||||
|         self.max_short_retry_delay = hs.config.federation.max_short_retry_delay | ||||
|         self.max_long_retries = hs.config.federation.max_long_retries | ||||
|         self.max_short_retries = hs.config.federation.max_short_retries | ||||
| 
 | ||||
|         self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor)) | ||||
| 
 | ||||
| @ -583,9 +586,9 @@ class MatrixFederationHttpClient: | ||||
|             # XXX: Would be much nicer to retry only at the transaction-layer | ||||
|             # (once we have reliable transactions in place) | ||||
|             if long_retries: | ||||
|                 retries_left = MAX_LONG_RETRIES | ||||
|                 retries_left = self.max_long_retries | ||||
|             else: | ||||
|                 retries_left = MAX_SHORT_RETRIES | ||||
|                 retries_left = self.max_short_retries | ||||
| 
 | ||||
|             url_bytes = request.uri | ||||
|             url_str = url_bytes.decode("ascii") | ||||
| @ -730,12 +733,12 @@ class MatrixFederationHttpClient: | ||||
| 
 | ||||
|                     if retries_left and not timeout: | ||||
|                         if long_retries: | ||||
|                             delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left) | ||||
|                             delay = min(delay, 60) | ||||
|                             delay = 4 ** (self.max_long_retries + 1 - retries_left) | ||||
|                             delay = min(delay, self.max_long_retry_delay) | ||||
|                             delay *= random.uniform(0.8, 1.4) | ||||
|                         else: | ||||
|                             delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left) | ||||
|                             delay = min(delay, 2) | ||||
|                             delay = 0.5 * 2 ** (self.max_short_retries - retries_left) | ||||
|                             delay = min(delay, self.max_short_retry_delay) | ||||
|                             delay *= random.uniform(0.8, 1.4) | ||||
| 
 | ||||
|                         logger.debug( | ||||
|  | ||||
| @ -40,7 +40,7 @@ from synapse.server import HomeServer | ||||
| from synapse.util import Clock | ||||
| 
 | ||||
| from tests.server import FakeTransport | ||||
| from tests.unittest import HomeserverTestCase | ||||
| from tests.unittest import HomeserverTestCase, override_config | ||||
| 
 | ||||
| 
 | ||||
| def check_logcontext(context: LoggingContextOrSentinel) -> None: | ||||
| @ -640,3 +640,21 @@ class FederationClientTests(HomeserverTestCase): | ||||
|             self.cl.build_auth_headers( | ||||
|                 b"", b"GET", b"https://example.com", destination_is=b"" | ||||
|             ) | ||||
| 
 | ||||
|     @override_config( | ||||
|         { | ||||
|             "federation": { | ||||
|                 "client_timeout": 180, | ||||
|                 "max_long_retry_delay": 100, | ||||
|                 "max_short_retry_delay": 7, | ||||
|                 "max_long_retries": 20, | ||||
|                 "max_short_retries": 5, | ||||
|             } | ||||
|         } | ||||
|     ) | ||||
|     def test_configurable_retry_and_delay_values(self) -> None: | ||||
|         self.assertEqual(self.cl.default_timeout, 180) | ||||
|         self.assertEqual(self.cl.max_long_retry_delay, 100) | ||||
|         self.assertEqual(self.cl.max_short_retry_delay, 7) | ||||
|         self.assertEqual(self.cl.max_long_retries, 20) | ||||
|         self.assertEqual(self.cl.max_short_retries, 5) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user