Fix a memory leak when creating UninitializedVariables (as SavedModel loading code does)
This is the same shared_name kernel caching issue we fixed for regular tf.Variable. Adds a test using the C++ memory checker; the Python checker complains about a weakref being allocated each time, so I'm just using the C++ checker for now. Does not test on GPUs, since that appears to still leak memory (could be an infra issue). PiperOrigin-RevId: 331798116 Change-Id: I59f7912621b7d20b6fcf19f87d3f27e60d56fec3
This commit is contained in:
parent
579a12c875
commit
84ff8673dc
tensorflow/python
@ -24,9 +24,9 @@ from tensorflow.python.profiler.traceme import traceme_wrapper
|
|||||||
from tensorflow.python.util import tf_inspect
|
from tensorflow.python.util import tf_inspect
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from tensorflow.python.platform.cpp_memory_checker import _CppMemoryChecker # pylint:disable=g-import-not-at-top
|
from tensorflow.python.platform.cpp_memory_checker import _CppMemoryChecker as CppMemoryChecker # pylint:disable=g-import-not-at-top
|
||||||
except ImportError:
|
except ImportError:
|
||||||
_CppMemoryChecker = None
|
CppMemoryChecker = None
|
||||||
|
|
||||||
|
|
||||||
def _get_test_name_best_effort():
|
def _get_test_name_best_effort():
|
||||||
@ -77,13 +77,13 @@ class MemoryChecker(object):
|
|||||||
self._trace_me = TraceMe('with MemoryChecker():')
|
self._trace_me = TraceMe('with MemoryChecker():')
|
||||||
self._trace_me.__enter__()
|
self._trace_me.__enter__()
|
||||||
self._python_memory_checker = _PythonMemoryChecker()
|
self._python_memory_checker = _PythonMemoryChecker()
|
||||||
if _CppMemoryChecker:
|
if CppMemoryChecker:
|
||||||
self._cpp_memory_checker = _CppMemoryChecker(_get_test_name_best_effort())
|
self._cpp_memory_checker = CppMemoryChecker(_get_test_name_best_effort())
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@traceme_wrapper
|
@traceme_wrapper
|
||||||
def __exit__(self, exc_type, exc_value, traceback):
|
def __exit__(self, exc_type, exc_value, traceback):
|
||||||
if _CppMemoryChecker:
|
if CppMemoryChecker:
|
||||||
self._cpp_memory_checker.stop()
|
self._cpp_memory_checker.stop()
|
||||||
self._trace_me.__exit__(exc_type, exc_value, traceback)
|
self._trace_me.__exit__(exc_type, exc_value, traceback)
|
||||||
|
|
||||||
@ -99,7 +99,7 @@ class MemoryChecker(object):
|
|||||||
code complexity and the allcoation pattern.
|
code complexity and the allcoation pattern.
|
||||||
"""
|
"""
|
||||||
self._python_memory_checker.record_snapshot()
|
self._python_memory_checker.record_snapshot()
|
||||||
if _CppMemoryChecker:
|
if CppMemoryChecker:
|
||||||
self._cpp_memory_checker.record_snapshot()
|
self._cpp_memory_checker.record_snapshot()
|
||||||
|
|
||||||
@traceme_wrapper
|
@traceme_wrapper
|
||||||
@ -111,7 +111,7 @@ class MemoryChecker(object):
|
|||||||
directory provided the infra instead.
|
directory provided the infra instead.
|
||||||
"""
|
"""
|
||||||
self._python_memory_checker.report()
|
self._python_memory_checker.report()
|
||||||
if _CppMemoryChecker:
|
if CppMemoryChecker:
|
||||||
self._cpp_memory_checker.report()
|
self._cpp_memory_checker.report()
|
||||||
|
|
||||||
@traceme_wrapper
|
@traceme_wrapper
|
||||||
@ -124,7 +124,7 @@ class MemoryChecker(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
self._python_memory_checker.assert_no_leak_if_all_possibly_except_one()
|
self._python_memory_checker.assert_no_leak_if_all_possibly_except_one()
|
||||||
if _CppMemoryChecker:
|
if CppMemoryChecker:
|
||||||
self._cpp_memory_checker.assert_no_leak_if_all_possibly_except_one()
|
self._cpp_memory_checker.assert_no_leak_if_all_possibly_except_one()
|
||||||
|
|
||||||
@traceme_wrapper
|
@traceme_wrapper
|
||||||
|
@ -926,6 +926,8 @@ cuda_py_test(
|
|||||||
name = "resource_variable_ops_test",
|
name = "resource_variable_ops_test",
|
||||||
size = "medium",
|
size = "medium",
|
||||||
srcs = ["resource_variable_ops_test.py"],
|
srcs = ["resource_variable_ops_test.py"],
|
||||||
|
# TODO(kkb): CppMemoryChecker is flaky without these two flags, investigate.
|
||||||
|
#
|
||||||
# TODO(b/128347673): Re-enable.
|
# TODO(b/128347673): Re-enable.
|
||||||
tags = ["no_windows"],
|
tags = ["no_windows"],
|
||||||
tfrt_enabled = True,
|
tfrt_enabled = True,
|
||||||
@ -936,6 +938,7 @@ cuda_py_test(
|
|||||||
"//tensorflow/python:errors",
|
"//tensorflow/python:errors",
|
||||||
"//tensorflow/python:framework_for_generated_wrappers",
|
"//tensorflow/python:framework_for_generated_wrappers",
|
||||||
"//tensorflow/python:framework_test_lib",
|
"//tensorflow/python:framework_test_lib",
|
||||||
|
"//tensorflow/python:memory_checker",
|
||||||
"//tensorflow/python:resource_variable_ops",
|
"//tensorflow/python:resource_variable_ops",
|
||||||
"//tensorflow/python:variables",
|
"//tensorflow/python:variables",
|
||||||
"@absl_py//absl/testing:parameterized",
|
"@absl_py//absl/testing:parameterized",
|
||||||
|
@ -34,6 +34,7 @@ from tensorflow.python.framework import constant_op
|
|||||||
from tensorflow.python.framework import cpp_shape_inference_pb2
|
from tensorflow.python.framework import cpp_shape_inference_pb2
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import errors
|
from tensorflow.python.framework import errors
|
||||||
|
from tensorflow.python.framework import memory_checker
|
||||||
from tensorflow.python.framework import ops
|
from tensorflow.python.framework import ops
|
||||||
from tensorflow.python.framework import tensor_shape
|
from tensorflow.python.framework import tensor_shape
|
||||||
from tensorflow.python.framework import tensor_util
|
from tensorflow.python.framework import tensor_util
|
||||||
@ -1568,6 +1569,29 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase,
|
|||||||
var.handle, indices, dtype=dtype)
|
var.handle, indices, dtype=dtype)
|
||||||
self.assertAllEqual(expected, result)
|
self.assertAllEqual(expected, result)
|
||||||
|
|
||||||
|
@test_util.run_v2_only
|
||||||
|
def testUninitializedVariableMemoryUsage(self):
|
||||||
|
if test_util.is_gpu_available():
|
||||||
|
# TODO(allenl): Investigate possible GPU-specific memory leaks
|
||||||
|
self.skipTest("Disabled when a GPU is available")
|
||||||
|
# TODO(kkb): Python memory checker complains continuous `weakref`
|
||||||
|
# allocations, investigate.
|
||||||
|
if memory_checker.CppMemoryChecker is None:
|
||||||
|
self.skipTest("Requires the C++ memory checker")
|
||||||
|
|
||||||
|
def _create_and_delete_variable():
|
||||||
|
resource_variable_ops.UninitializedVariable(
|
||||||
|
shape=[100, 100],
|
||||||
|
dtype=dtypes.float32)
|
||||||
|
|
||||||
|
_create_and_delete_variable()
|
||||||
|
checker = memory_checker.CppMemoryChecker(
|
||||||
|
"ResourceVariableOps.testUninitializedVariableMemoryUsage")
|
||||||
|
for _ in range(2):
|
||||||
|
_create_and_delete_variable()
|
||||||
|
checker.record_snapshot()
|
||||||
|
checker.report()
|
||||||
|
checker.assert_no_leak_if_all_possibly_except_one()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test.main()
|
test.main()
|
||||||
|
@ -1954,7 +1954,7 @@ class UninitializedVariable(BaseResourceVariable):
|
|||||||
unique_id = shared_name
|
unique_id = shared_name
|
||||||
else:
|
else:
|
||||||
unique_id = "%s_%d" % (handle_name, ops.uid())
|
unique_id = "%s_%d" % (handle_name, ops.uid())
|
||||||
shared_name = context.shared_name(unique_id)
|
shared_name = context.shared_name()
|
||||||
handle = _variable_handle_from_shape_and_dtype(
|
handle = _variable_handle_from_shape_and_dtype(
|
||||||
shape=shape,
|
shape=shape,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
|
Loading…
Reference in New Issue
Block a user