Fix a memory leak when creating UninitializedVariables (as SavedModel loading code does)

This is the same shared_name kernel caching issue we fixed for regular tf.Variable.

Adds a test using the C++ memory checker; the Python checker complains about a weakref being allocated each time, so I'm just using the C++ checker for now.

Does not test on GPUs, since that appears to still leak memory (could be an infra issue).

PiperOrigin-RevId: 331798116
Change-Id: I59f7912621b7d20b6fcf19f87d3f27e60d56fec3
This commit is contained in:
Allen Lavoie 2020-09-15 10:23:53 -07:00 committed by TensorFlower Gardener
parent 579a12c875
commit 84ff8673dc
4 changed files with 36 additions and 9 deletions

View File

@ -24,9 +24,9 @@ from tensorflow.python.profiler.traceme import traceme_wrapper
from tensorflow.python.util import tf_inspect
try:
from tensorflow.python.platform.cpp_memory_checker import _CppMemoryChecker # pylint:disable=g-import-not-at-top
from tensorflow.python.platform.cpp_memory_checker import _CppMemoryChecker as CppMemoryChecker # pylint:disable=g-import-not-at-top
except ImportError:
_CppMemoryChecker = None
CppMemoryChecker = None
def _get_test_name_best_effort():
@ -77,13 +77,13 @@ class MemoryChecker(object):
self._trace_me = TraceMe('with MemoryChecker():')
self._trace_me.__enter__()
self._python_memory_checker = _PythonMemoryChecker()
if _CppMemoryChecker:
self._cpp_memory_checker = _CppMemoryChecker(_get_test_name_best_effort())
if CppMemoryChecker:
self._cpp_memory_checker = CppMemoryChecker(_get_test_name_best_effort())
return self
@traceme_wrapper
def __exit__(self, exc_type, exc_value, traceback):
if _CppMemoryChecker:
if CppMemoryChecker:
self._cpp_memory_checker.stop()
self._trace_me.__exit__(exc_type, exc_value, traceback)
@ -99,7 +99,7 @@ class MemoryChecker(object):
code complexity and the allcoation pattern.
"""
self._python_memory_checker.record_snapshot()
if _CppMemoryChecker:
if CppMemoryChecker:
self._cpp_memory_checker.record_snapshot()
@traceme_wrapper
@ -111,7 +111,7 @@ class MemoryChecker(object):
directory provided the infra instead.
"""
self._python_memory_checker.report()
if _CppMemoryChecker:
if CppMemoryChecker:
self._cpp_memory_checker.report()
@traceme_wrapper
@ -124,7 +124,7 @@ class MemoryChecker(object):
"""
self._python_memory_checker.assert_no_leak_if_all_possibly_except_one()
if _CppMemoryChecker:
if CppMemoryChecker:
self._cpp_memory_checker.assert_no_leak_if_all_possibly_except_one()
@traceme_wrapper

View File

@ -926,6 +926,8 @@ cuda_py_test(
name = "resource_variable_ops_test",
size = "medium",
srcs = ["resource_variable_ops_test.py"],
# TODO(kkb): CppMemoryChecker is flaky without these two flags, investigate.
#
# TODO(b/128347673): Re-enable.
tags = ["no_windows"],
tfrt_enabled = True,
@ -936,6 +938,7 @@ cuda_py_test(
"//tensorflow/python:errors",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:memory_checker",
"//tensorflow/python:resource_variable_ops",
"//tensorflow/python:variables",
"@absl_py//absl/testing:parameterized",

View File

@ -34,6 +34,7 @@ from tensorflow.python.framework import constant_op
from tensorflow.python.framework import cpp_shape_inference_pb2
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import memory_checker
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
@ -1568,6 +1569,29 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase,
var.handle, indices, dtype=dtype)
self.assertAllEqual(expected, result)
@test_util.run_v2_only
def testUninitializedVariableMemoryUsage(self):
if test_util.is_gpu_available():
# TODO(allenl): Investigate possible GPU-specific memory leaks
self.skipTest("Disabled when a GPU is available")
# TODO(kkb): Python memory checker complains continuous `weakref`
# allocations, investigate.
if memory_checker.CppMemoryChecker is None:
self.skipTest("Requires the C++ memory checker")
def _create_and_delete_variable():
resource_variable_ops.UninitializedVariable(
shape=[100, 100],
dtype=dtypes.float32)
_create_and_delete_variable()
checker = memory_checker.CppMemoryChecker(
"ResourceVariableOps.testUninitializedVariableMemoryUsage")
for _ in range(2):
_create_and_delete_variable()
checker.record_snapshot()
checker.report()
checker.assert_no_leak_if_all_possibly_except_one()
if __name__ == "__main__":
test.main()

View File

@ -1954,7 +1954,7 @@ class UninitializedVariable(BaseResourceVariable):
unique_id = shared_name
else:
unique_id = "%s_%d" % (handle_name, ops.uid())
shared_name = context.shared_name(unique_id)
shared_name = context.shared_name()
handle = _variable_handle_from_shape_and_dtype(
shape=shape,
dtype=dtype,