Prevent test failures by manually triggering Python garbage collector before resetting the server def.
Due to current implementation of set_server_def, resources might be leaked and destroyed after the device manager (and devices) are released. When there are multiple set_server_def calls, this leads to non-deterministic segfaults when the Python GC starts to clean up hanging resources. PiperOrigin-RevId: 259668467
This commit is contained in:
parent
150a6c06b2
commit
805b28132e
@ -25,6 +25,7 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import gc
|
||||
import os
|
||||
import time
|
||||
|
||||
@ -1114,8 +1115,7 @@ class RemoteWorkerMicroBenchmarks(test.Benchmark):
|
||||
wall_time=mean_us,
|
||||
extras={"examples_per_sec": num_iters / total_time})
|
||||
|
||||
# TODO(b/136184459): Re-enabled once crash is fixed
|
||||
def _DISABLED_benchmark_send_mirroring_off(self):
|
||||
def benchmark_send_mirroring_off(self):
|
||||
remote.connect_to_remote_host(self._cached_server_target1)
|
||||
|
||||
x = random_ops.random_uniform((2, 2)).cpu()
|
||||
@ -1130,9 +1130,12 @@ class RemoteWorkerMicroBenchmarks(test.Benchmark):
|
||||
|
||||
context.context().mirroring_policy = context.MIRRORING_NONE
|
||||
self._run(lambda: func(x))
|
||||
# NOTE(b/136184459): Force garbage collecting hanging resources before
|
||||
# subsequent calls to set_server_def, to ensure the destroy resource ops are
|
||||
# executed when their corresponding device and manager are still available.
|
||||
gc.collect()
|
||||
|
||||
# TODO(b/136184459): Re-enabled once crash is fixed
|
||||
def _DISABLED_benchmark_send_mirroring_on(self):
|
||||
def benchmark_send_mirroring_on(self):
|
||||
remote.connect_to_remote_host(self._cached_server_target1)
|
||||
|
||||
x = random_ops.random_uniform((2, 2)).cpu()
|
||||
@ -1147,9 +1150,12 @@ class RemoteWorkerMicroBenchmarks(test.Benchmark):
|
||||
|
||||
context.context().mirroring_policy = context.MIRRORING_ALL
|
||||
self._run(lambda: func(x))
|
||||
# NOTE(b/136184459): Force garbage collecting hanging resources before
|
||||
# subsequent calls to set_server_def, to ensure the destroy resource ops are
|
||||
# executed when their corresponding device and manager are still available.
|
||||
gc.collect()
|
||||
|
||||
# TODO(b/136184459): Re-enabled once crash is fixed
|
||||
def _DISABLED_benchmark_worker_mirroring_off(self):
|
||||
def benchmark_worker_mirroring_off(self):
|
||||
remote.connect_to_remote_host(
|
||||
[self._cached_server_target1, self._cached_server_target2])
|
||||
|
||||
@ -1166,9 +1172,12 @@ class RemoteWorkerMicroBenchmarks(test.Benchmark):
|
||||
|
||||
context.context().mirroring_policy = context.MIRRORING_NONE
|
||||
self._run(func)
|
||||
# NOTE(b/136184459): Force garbage collecting hanging resources before
|
||||
# subsequent calls to set_server_def, to ensure the destroy resource ops are
|
||||
# executed when their corresponding device and manager are still available.
|
||||
gc.collect()
|
||||
|
||||
# TODO(b/136184459): Re-enabled once crash is fixed
|
||||
def _DISABLED_benchmark_worker_mirroring_on(self):
|
||||
def benchmark_worker_mirroring_on(self):
|
||||
remote.connect_to_remote_host(
|
||||
[self._cached_server_target1, self._cached_server_target2])
|
||||
|
||||
@ -1185,6 +1194,10 @@ class RemoteWorkerMicroBenchmarks(test.Benchmark):
|
||||
|
||||
context.context().mirroring_policy = context.MIRRORING_ALL
|
||||
self._run(func)
|
||||
# NOTE(b/136184459): Force garbage collecting hanging resources before
|
||||
# subsequent calls to set_server_def, to ensure the destroy resource ops are
|
||||
# executed when their corresponding device and manager are still available.
|
||||
gc.collect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
Reference in New Issue
Block a user