mirror of
				https://github.com/matrix-org/synapse.git
				synced 2025-11-04 06:07:23 +00:00 
			
		
		
		
	Add OpenTracing for database activity. (#10113)
This adds quite a lot of OpenTracing decoration for database activity. Specifically it adds tracing at four different levels: * emit a span for each "interaction" - ie, the top level database function that we tend to call "transaction", but isn't really, because it can end up as multiple transactions. * emit a span while we hold a database connection open * emit a span for each database transaction - actual actual transaction. * emit a span for each database query. I'm aware this might be quite a lot of overhead, but even just running it on a local Synapse it looks really interesting, and I hope the overhead can be offset just by turning down the sampling frequency and finding other ways of tracing requests of interest (eg, the `force_tracing_for_users` setting).
This commit is contained in:
		
							parent
							
								
									1d143074c5
								
							
						
					
					
						commit
						9eea4646be
					
				
							
								
								
									
										1
									
								
								changelog.d/10113.feature
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changelog.d/10113.feature
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
				
			|||||||
 | 
					Report OpenTracing spans for database activity.
 | 
				
			||||||
@ -271,6 +271,12 @@ class SynapseTags:
 | 
				
			|||||||
    # HTTP request tag (used to distinguish full vs incremental syncs, etc)
 | 
					    # HTTP request tag (used to distinguish full vs incremental syncs, etc)
 | 
				
			||||||
    REQUEST_TAG = "request_tag"
 | 
					    REQUEST_TAG = "request_tag"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Text description of a database transaction
 | 
				
			||||||
 | 
					    DB_TXN_DESC = "db.txn_desc"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Uniqueish ID of a database transaction
 | 
				
			||||||
 | 
					    DB_TXN_ID = "db.txn_id"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Block everything by default
 | 
					# Block everything by default
 | 
				
			||||||
# A regex which matches the server_names to expose traces for.
 | 
					# A regex which matches the server_names to expose traces for.
 | 
				
			||||||
 | 
				
			|||||||
@ -40,6 +40,7 @@ from twisted.enterprise import adbapi
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from synapse.api.errors import StoreError
 | 
					from synapse.api.errors import StoreError
 | 
				
			||||||
from synapse.config.database import DatabaseConnectionConfig
 | 
					from synapse.config.database import DatabaseConnectionConfig
 | 
				
			||||||
 | 
					from synapse.logging import opentracing
 | 
				
			||||||
from synapse.logging.context import (
 | 
					from synapse.logging.context import (
 | 
				
			||||||
    LoggingContext,
 | 
					    LoggingContext,
 | 
				
			||||||
    current_context,
 | 
					    current_context,
 | 
				
			||||||
@ -313,7 +314,14 @@ class LoggingTransaction:
 | 
				
			|||||||
        start = time.time()
 | 
					        start = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            return func(sql, *args)
 | 
					            with opentracing.start_active_span(
 | 
				
			||||||
 | 
					                "db.query",
 | 
				
			||||||
 | 
					                tags={
 | 
				
			||||||
 | 
					                    opentracing.tags.DATABASE_TYPE: "sql",
 | 
				
			||||||
 | 
					                    opentracing.tags.DATABASE_STATEMENT: sql,
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					            ):
 | 
				
			||||||
 | 
					                return func(sql, *args)
 | 
				
			||||||
        except Exception as e:
 | 
					        except Exception as e:
 | 
				
			||||||
            sql_logger.debug("[SQL FAIL] {%s} %s", self.name, e)
 | 
					            sql_logger.debug("[SQL FAIL] {%s} %s", self.name, e)
 | 
				
			||||||
            raise
 | 
					            raise
 | 
				
			||||||
@ -525,9 +533,16 @@ class DatabasePool:
 | 
				
			|||||||
                    exception_callbacks=exception_callbacks,
 | 
					                    exception_callbacks=exception_callbacks,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    r = func(cursor, *args, **kwargs)
 | 
					                    with opentracing.start_active_span(
 | 
				
			||||||
                    conn.commit()
 | 
					                        "db.txn",
 | 
				
			||||||
                    return r
 | 
					                        tags={
 | 
				
			||||||
 | 
					                            opentracing.SynapseTags.DB_TXN_DESC: desc,
 | 
				
			||||||
 | 
					                            opentracing.SynapseTags.DB_TXN_ID: name,
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                    ):
 | 
				
			||||||
 | 
					                        r = func(cursor, *args, **kwargs)
 | 
				
			||||||
 | 
					                        conn.commit()
 | 
				
			||||||
 | 
					                        return r
 | 
				
			||||||
                except self.engine.module.OperationalError as e:
 | 
					                except self.engine.module.OperationalError as e:
 | 
				
			||||||
                    # This can happen if the database disappears mid
 | 
					                    # This can happen if the database disappears mid
 | 
				
			||||||
                    # transaction.
 | 
					                    # transaction.
 | 
				
			||||||
@ -653,16 +668,17 @@ class DatabasePool:
 | 
				
			|||||||
            logger.warning("Starting db txn '%s' from sentinel context", desc)
 | 
					            logger.warning("Starting db txn '%s' from sentinel context", desc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            result = await self.runWithConnection(
 | 
					            with opentracing.start_active_span(f"db.{desc}"):
 | 
				
			||||||
                self.new_transaction,
 | 
					                result = await self.runWithConnection(
 | 
				
			||||||
                desc,
 | 
					                    self.new_transaction,
 | 
				
			||||||
                after_callbacks,
 | 
					                    desc,
 | 
				
			||||||
                exception_callbacks,
 | 
					                    after_callbacks,
 | 
				
			||||||
                func,
 | 
					                    exception_callbacks,
 | 
				
			||||||
                *args,
 | 
					                    func,
 | 
				
			||||||
                db_autocommit=db_autocommit,
 | 
					                    *args,
 | 
				
			||||||
                **kwargs,
 | 
					                    db_autocommit=db_autocommit,
 | 
				
			||||||
            )
 | 
					                    **kwargs,
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for after_callback, after_args, after_kwargs in after_callbacks:
 | 
					            for after_callback, after_args, after_kwargs in after_callbacks:
 | 
				
			||||||
                after_callback(*after_args, **after_kwargs)
 | 
					                after_callback(*after_args, **after_kwargs)
 | 
				
			||||||
@ -718,25 +734,29 @@ class DatabasePool:
 | 
				
			|||||||
            with LoggingContext(
 | 
					            with LoggingContext(
 | 
				
			||||||
                str(curr_context), parent_context=parent_context
 | 
					                str(curr_context), parent_context=parent_context
 | 
				
			||||||
            ) as context:
 | 
					            ) as context:
 | 
				
			||||||
                sched_duration_sec = monotonic_time() - start_time
 | 
					                with opentracing.start_active_span(
 | 
				
			||||||
                sql_scheduling_timer.observe(sched_duration_sec)
 | 
					                    operation_name="db.connection",
 | 
				
			||||||
                context.add_database_scheduled(sched_duration_sec)
 | 
					                ):
 | 
				
			||||||
 | 
					                    sched_duration_sec = monotonic_time() - start_time
 | 
				
			||||||
 | 
					                    sql_scheduling_timer.observe(sched_duration_sec)
 | 
				
			||||||
 | 
					                    context.add_database_scheduled(sched_duration_sec)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if self.engine.is_connection_closed(conn):
 | 
					                    if self.engine.is_connection_closed(conn):
 | 
				
			||||||
                    logger.debug("Reconnecting closed database connection")
 | 
					                        logger.debug("Reconnecting closed database connection")
 | 
				
			||||||
                    conn.reconnect()
 | 
					                        conn.reconnect()
 | 
				
			||||||
 | 
					                        opentracing.log_kv({"message": "reconnected"})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                try:
 | 
					                    try:
 | 
				
			||||||
                    if db_autocommit:
 | 
					                        if db_autocommit:
 | 
				
			||||||
                        self.engine.attempt_to_set_autocommit(conn, True)
 | 
					                            self.engine.attempt_to_set_autocommit(conn, True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    db_conn = LoggingDatabaseConnection(
 | 
					                        db_conn = LoggingDatabaseConnection(
 | 
				
			||||||
                        conn, self.engine, "runWithConnection"
 | 
					                            conn, self.engine, "runWithConnection"
 | 
				
			||||||
                    )
 | 
					                        )
 | 
				
			||||||
                    return func(db_conn, *args, **kwargs)
 | 
					                        return func(db_conn, *args, **kwargs)
 | 
				
			||||||
                finally:
 | 
					                    finally:
 | 
				
			||||||
                    if db_autocommit:
 | 
					                        if db_autocommit:
 | 
				
			||||||
                        self.engine.attempt_to_set_autocommit(conn, False)
 | 
					                            self.engine.attempt_to_set_autocommit(conn, False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return await make_deferred_yieldable(
 | 
					        return await make_deferred_yieldable(
 | 
				
			||||||
            self._db_pool.runWithConnection(inner_func, *args, **kwargs)
 | 
					            self._db_pool.runWithConnection(inner_func, *args, **kwargs)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user